1 //===- OpenMPIRBuilder.cpp - Builder for LLVM-IR for OpenMP directives ----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// 10 /// This file implements the OpenMPIRBuilder class, which is used as a 11 /// convenient way to create LLVM instructions for OpenMP directives. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" 16 17 #include "llvm/ADT/StringRef.h" 18 #include "llvm/ADT/Triple.h" 19 #include "llvm/IR/CFG.h" 20 #include "llvm/IR/DebugInfo.h" 21 #include "llvm/IR/IRBuilder.h" 22 #include "llvm/IR/MDBuilder.h" 23 #include "llvm/Support/CommandLine.h" 24 #include "llvm/Support/Error.h" 25 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 26 #include "llvm/Transforms/Utils/CodeExtractor.h" 27 28 #include <sstream> 29 30 #define DEBUG_TYPE "openmp-ir-builder" 31 32 using namespace llvm; 33 using namespace omp; 34 35 static cl::opt<bool> 36 OptimisticAttributes("openmp-ir-builder-optimistic-attributes", cl::Hidden, 37 cl::desc("Use optimistic attributes describing " 38 "'as-if' properties of runtime calls."), 39 cl::init(false)); 40 41 void OpenMPIRBuilder::addAttributes(omp::RuntimeFunction FnID, Function &Fn) { 42 LLVMContext &Ctx = Fn.getContext(); 43 44 #define OMP_ATTRS_SET(VarName, AttrSet) AttributeSet VarName = AttrSet; 45 #include "llvm/Frontend/OpenMP/OMPKinds.def" 46 47 // Add attributes to the new declaration. 48 switch (FnID) { 49 #define OMP_RTL_ATTRS(Enum, FnAttrSet, RetAttrSet, ArgAttrSets) \ 50 case Enum: \ 51 Fn.setAttributes( \ 52 AttributeList::get(Ctx, FnAttrSet, RetAttrSet, ArgAttrSets)); \ 53 break; 54 #include "llvm/Frontend/OpenMP/OMPKinds.def" 55 default: 56 // Attributes are optional. 57 break; 58 } 59 } 60 61 FunctionCallee 62 OpenMPIRBuilder::getOrCreateRuntimeFunction(Module &M, RuntimeFunction FnID) { 63 FunctionType *FnTy = nullptr; 64 Function *Fn = nullptr; 65 66 // Try to find the declation in the module first. 67 switch (FnID) { 68 #define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...) \ 69 case Enum: \ 70 FnTy = FunctionType::get(ReturnType, ArrayRef<Type *>{__VA_ARGS__}, \ 71 IsVarArg); \ 72 Fn = M.getFunction(Str); \ 73 break; 74 #include "llvm/Frontend/OpenMP/OMPKinds.def" 75 } 76 77 if (!Fn) { 78 // Create a new declaration if we need one. 79 switch (FnID) { 80 #define OMP_RTL(Enum, Str, ...) \ 81 case Enum: \ 82 Fn = Function::Create(FnTy, GlobalValue::ExternalLinkage, Str, M); \ 83 break; 84 #include "llvm/Frontend/OpenMP/OMPKinds.def" 85 } 86 87 // Add information if the runtime function takes a callback function 88 if (FnID == OMPRTL___kmpc_fork_call || FnID == OMPRTL___kmpc_fork_teams) { 89 if (!Fn->hasMetadata(LLVMContext::MD_callback)) { 90 LLVMContext &Ctx = Fn->getContext(); 91 MDBuilder MDB(Ctx); 92 // Annotate the callback behavior of the runtime function: 93 // - The callback callee is argument number 2 (microtask). 94 // - The first two arguments of the callback callee are unknown (-1). 95 // - All variadic arguments to the runtime function are passed to the 96 // callback callee. 97 Fn->addMetadata( 98 LLVMContext::MD_callback, 99 *MDNode::get(Ctx, {MDB.createCallbackEncoding( 100 2, {-1, -1}, /* VarArgsArePassed */ true)})); 101 } 102 } 103 104 LLVM_DEBUG(dbgs() << "Created OpenMP runtime function " << Fn->getName() 105 << " with type " << *Fn->getFunctionType() << "\n"); 106 addAttributes(FnID, *Fn); 107 108 } else { 109 LLVM_DEBUG(dbgs() << "Found OpenMP runtime function " << Fn->getName() 110 << " with type " << *Fn->getFunctionType() << "\n"); 111 } 112 113 assert(Fn && "Failed to create OpenMP runtime function"); 114 115 // Cast the function to the expected type if necessary 116 Constant *C = ConstantExpr::getBitCast(Fn, FnTy->getPointerTo()); 117 return {FnTy, C}; 118 } 119 120 Function *OpenMPIRBuilder::getOrCreateRuntimeFunctionPtr(RuntimeFunction FnID) { 121 FunctionCallee RTLFn = getOrCreateRuntimeFunction(M, FnID); 122 auto *Fn = dyn_cast<llvm::Function>(RTLFn.getCallee()); 123 assert(Fn && "Failed to create OpenMP runtime function pointer"); 124 return Fn; 125 } 126 127 void OpenMPIRBuilder::initialize() { initializeTypes(M); } 128 129 void OpenMPIRBuilder::finalize(Function *Fn, bool AllowExtractorSinking) { 130 SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet; 131 SmallVector<BasicBlock *, 32> Blocks; 132 SmallVector<OutlineInfo, 16> DeferredOutlines; 133 for (OutlineInfo &OI : OutlineInfos) { 134 // Skip functions that have not finalized yet; may happen with nested 135 // function generation. 136 if (Fn && OI.getFunction() != Fn) { 137 DeferredOutlines.push_back(OI); 138 continue; 139 } 140 141 ParallelRegionBlockSet.clear(); 142 Blocks.clear(); 143 OI.collectBlocks(ParallelRegionBlockSet, Blocks); 144 145 Function *OuterFn = OI.getFunction(); 146 CodeExtractorAnalysisCache CEAC(*OuterFn); 147 CodeExtractor Extractor(Blocks, /* DominatorTree */ nullptr, 148 /* AggregateArgs */ false, 149 /* BlockFrequencyInfo */ nullptr, 150 /* BranchProbabilityInfo */ nullptr, 151 /* AssumptionCache */ nullptr, 152 /* AllowVarArgs */ true, 153 /* AllowAlloca */ true, 154 /* Suffix */ ".omp_par"); 155 156 LLVM_DEBUG(dbgs() << "Before outlining: " << *OuterFn << "\n"); 157 LLVM_DEBUG(dbgs() << "Entry " << OI.EntryBB->getName() 158 << " Exit: " << OI.ExitBB->getName() << "\n"); 159 assert(Extractor.isEligible() && 160 "Expected OpenMP outlining to be possible!"); 161 162 Function *OutlinedFn = Extractor.extractCodeRegion(CEAC); 163 164 LLVM_DEBUG(dbgs() << "After outlining: " << *OuterFn << "\n"); 165 LLVM_DEBUG(dbgs() << " Outlined function: " << *OutlinedFn << "\n"); 166 assert(OutlinedFn->getReturnType()->isVoidTy() && 167 "OpenMP outlined functions should not return a value!"); 168 169 // For compability with the clang CG we move the outlined function after the 170 // one with the parallel region. 171 OutlinedFn->removeFromParent(); 172 M.getFunctionList().insertAfter(OuterFn->getIterator(), OutlinedFn); 173 174 // Remove the artificial entry introduced by the extractor right away, we 175 // made our own entry block after all. 176 { 177 BasicBlock &ArtificialEntry = OutlinedFn->getEntryBlock(); 178 assert(ArtificialEntry.getUniqueSuccessor() == OI.EntryBB); 179 assert(OI.EntryBB->getUniquePredecessor() == &ArtificialEntry); 180 if (AllowExtractorSinking) { 181 // Move instructions from the to-be-deleted ArtificialEntry to the entry 182 // basic block of the parallel region. CodeExtractor may have sunk 183 // allocas/bitcasts for values that are solely used in the outlined 184 // region and do not escape. 185 assert(!ArtificialEntry.empty() && 186 "Expected instructions to sink in the outlined region"); 187 for (BasicBlock::iterator It = ArtificialEntry.begin(), 188 End = ArtificialEntry.end(); 189 It != End;) { 190 Instruction &I = *It; 191 It++; 192 193 if (I.isTerminator()) 194 continue; 195 196 I.moveBefore(*OI.EntryBB, OI.EntryBB->getFirstInsertionPt()); 197 } 198 } 199 OI.EntryBB->moveBefore(&ArtificialEntry); 200 ArtificialEntry.eraseFromParent(); 201 } 202 assert(&OutlinedFn->getEntryBlock() == OI.EntryBB); 203 assert(OutlinedFn && OutlinedFn->getNumUses() == 1); 204 205 // Run a user callback, e.g. to add attributes. 206 if (OI.PostOutlineCB) 207 OI.PostOutlineCB(*OutlinedFn); 208 } 209 210 // Remove work items that have been completed. 211 OutlineInfos = std::move(DeferredOutlines); 212 } 213 214 OpenMPIRBuilder::~OpenMPIRBuilder() { 215 assert(OutlineInfos.empty() && "There must be no outstanding outlinings"); 216 } 217 218 Value *OpenMPIRBuilder::getOrCreateIdent(Constant *SrcLocStr, 219 IdentFlag LocFlags, 220 unsigned Reserve2Flags) { 221 // Enable "C-mode". 222 LocFlags |= OMP_IDENT_FLAG_KMPC; 223 224 Value *&Ident = 225 IdentMap[{SrcLocStr, uint64_t(LocFlags) << 31 | Reserve2Flags}]; 226 if (!Ident) { 227 Constant *I32Null = ConstantInt::getNullValue(Int32); 228 Constant *IdentData[] = { 229 I32Null, ConstantInt::get(Int32, uint32_t(LocFlags)), 230 ConstantInt::get(Int32, Reserve2Flags), I32Null, SrcLocStr}; 231 Constant *Initializer = ConstantStruct::get( 232 cast<StructType>(IdentPtr->getPointerElementType()), IdentData); 233 234 // Look for existing encoding of the location + flags, not needed but 235 // minimizes the difference to the existing solution while we transition. 236 for (GlobalVariable &GV : M.getGlobalList()) 237 if (GV.getType() == IdentPtr && GV.hasInitializer()) 238 if (GV.getInitializer() == Initializer) 239 return Ident = &GV; 240 241 auto *GV = new GlobalVariable(M, IdentPtr->getPointerElementType(), 242 /* isConstant = */ true, 243 GlobalValue::PrivateLinkage, Initializer); 244 GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); 245 GV->setAlignment(Align(8)); 246 Ident = GV; 247 } 248 return Builder.CreatePointerCast(Ident, IdentPtr); 249 } 250 251 Type *OpenMPIRBuilder::getLanemaskType() { 252 LLVMContext &Ctx = M.getContext(); 253 Triple triple(M.getTargetTriple()); 254 255 // This test is adequate until deviceRTL has finer grained lane widths 256 return triple.isAMDGCN() ? Type::getInt64Ty(Ctx) : Type::getInt32Ty(Ctx); 257 } 258 259 Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef LocStr) { 260 Constant *&SrcLocStr = SrcLocStrMap[LocStr]; 261 if (!SrcLocStr) { 262 Constant *Initializer = 263 ConstantDataArray::getString(M.getContext(), LocStr); 264 265 // Look for existing encoding of the location, not needed but minimizes the 266 // difference to the existing solution while we transition. 267 for (GlobalVariable &GV : M.getGlobalList()) 268 if (GV.isConstant() && GV.hasInitializer() && 269 GV.getInitializer() == Initializer) 270 return SrcLocStr = ConstantExpr::getPointerCast(&GV, Int8Ptr); 271 272 SrcLocStr = Builder.CreateGlobalStringPtr(LocStr, /* Name */ "", 273 /* AddressSpace */ 0, &M); 274 } 275 return SrcLocStr; 276 } 277 278 Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef FunctionName, 279 StringRef FileName, 280 unsigned Line, 281 unsigned Column) { 282 SmallString<128> Buffer; 283 Buffer.push_back(';'); 284 Buffer.append(FileName); 285 Buffer.push_back(';'); 286 Buffer.append(FunctionName); 287 Buffer.push_back(';'); 288 Buffer.append(std::to_string(Line)); 289 Buffer.push_back(';'); 290 Buffer.append(std::to_string(Column)); 291 Buffer.push_back(';'); 292 Buffer.push_back(';'); 293 return getOrCreateSrcLocStr(Buffer.str()); 294 } 295 296 Constant *OpenMPIRBuilder::getOrCreateDefaultSrcLocStr() { 297 return getOrCreateSrcLocStr(";unknown;unknown;0;0;;"); 298 } 299 300 Constant * 301 OpenMPIRBuilder::getOrCreateSrcLocStr(const LocationDescription &Loc) { 302 DILocation *DIL = Loc.DL.get(); 303 if (!DIL) 304 return getOrCreateDefaultSrcLocStr(); 305 StringRef FileName = M.getName(); 306 if (DIFile *DIF = DIL->getFile()) 307 if (Optional<StringRef> Source = DIF->getSource()) 308 FileName = *Source; 309 StringRef Function = DIL->getScope()->getSubprogram()->getName(); 310 Function = 311 !Function.empty() ? Function : Loc.IP.getBlock()->getParent()->getName(); 312 return getOrCreateSrcLocStr(Function, FileName, DIL->getLine(), 313 DIL->getColumn()); 314 } 315 316 Value *OpenMPIRBuilder::getOrCreateThreadID(Value *Ident) { 317 return Builder.CreateCall( 318 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num), Ident, 319 "omp_global_thread_num"); 320 } 321 322 OpenMPIRBuilder::InsertPointTy 323 OpenMPIRBuilder::createBarrier(const LocationDescription &Loc, Directive DK, 324 bool ForceSimpleCall, bool CheckCancelFlag) { 325 if (!updateToLocation(Loc)) 326 return Loc.IP; 327 return emitBarrierImpl(Loc, DK, ForceSimpleCall, CheckCancelFlag); 328 } 329 330 OpenMPIRBuilder::InsertPointTy 331 OpenMPIRBuilder::emitBarrierImpl(const LocationDescription &Loc, Directive Kind, 332 bool ForceSimpleCall, bool CheckCancelFlag) { 333 // Build call __kmpc_cancel_barrier(loc, thread_id) or 334 // __kmpc_barrier(loc, thread_id); 335 336 IdentFlag BarrierLocFlags; 337 switch (Kind) { 338 case OMPD_for: 339 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_FOR; 340 break; 341 case OMPD_sections: 342 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SECTIONS; 343 break; 344 case OMPD_single: 345 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SINGLE; 346 break; 347 case OMPD_barrier: 348 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_EXPL; 349 break; 350 default: 351 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL; 352 break; 353 } 354 355 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 356 Value *Args[] = {getOrCreateIdent(SrcLocStr, BarrierLocFlags), 357 getOrCreateThreadID(getOrCreateIdent(SrcLocStr))}; 358 359 // If we are in a cancellable parallel region, barriers are cancellation 360 // points. 361 // TODO: Check why we would force simple calls or to ignore the cancel flag. 362 bool UseCancelBarrier = 363 !ForceSimpleCall && isLastFinalizationInfoCancellable(OMPD_parallel); 364 365 Value *Result = 366 Builder.CreateCall(getOrCreateRuntimeFunctionPtr( 367 UseCancelBarrier ? OMPRTL___kmpc_cancel_barrier 368 : OMPRTL___kmpc_barrier), 369 Args); 370 371 if (UseCancelBarrier && CheckCancelFlag) 372 emitCancelationCheckImpl(Result, OMPD_parallel); 373 374 return Builder.saveIP(); 375 } 376 377 OpenMPIRBuilder::InsertPointTy 378 OpenMPIRBuilder::createCancel(const LocationDescription &Loc, 379 Value *IfCondition, 380 omp::Directive CanceledDirective) { 381 if (!updateToLocation(Loc)) 382 return Loc.IP; 383 384 // LLVM utilities like blocks with terminators. 385 auto *UI = Builder.CreateUnreachable(); 386 387 Instruction *ThenTI = UI, *ElseTI = nullptr; 388 if (IfCondition) 389 SplitBlockAndInsertIfThenElse(IfCondition, UI, &ThenTI, &ElseTI); 390 Builder.SetInsertPoint(ThenTI); 391 392 Value *CancelKind = nullptr; 393 switch (CanceledDirective) { 394 #define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \ 395 case DirectiveEnum: \ 396 CancelKind = Builder.getInt32(Value); \ 397 break; 398 #include "llvm/Frontend/OpenMP/OMPKinds.def" 399 default: 400 llvm_unreachable("Unknown cancel kind!"); 401 } 402 403 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 404 Value *Ident = getOrCreateIdent(SrcLocStr); 405 Value *Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind}; 406 Value *Result = Builder.CreateCall( 407 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancel), Args); 408 409 // The actual cancel logic is shared with others, e.g., cancel_barriers. 410 emitCancelationCheckImpl(Result, CanceledDirective); 411 412 // Update the insertion point and remove the terminator we introduced. 413 Builder.SetInsertPoint(UI->getParent()); 414 UI->eraseFromParent(); 415 416 return Builder.saveIP(); 417 } 418 419 void OpenMPIRBuilder::emitCancelationCheckImpl( 420 Value *CancelFlag, omp::Directive CanceledDirective) { 421 assert(isLastFinalizationInfoCancellable(CanceledDirective) && 422 "Unexpected cancellation!"); 423 424 // For a cancel barrier we create two new blocks. 425 BasicBlock *BB = Builder.GetInsertBlock(); 426 BasicBlock *NonCancellationBlock; 427 if (Builder.GetInsertPoint() == BB->end()) { 428 // TODO: This branch will not be needed once we moved to the 429 // OpenMPIRBuilder codegen completely. 430 NonCancellationBlock = BasicBlock::Create( 431 BB->getContext(), BB->getName() + ".cont", BB->getParent()); 432 } else { 433 NonCancellationBlock = SplitBlock(BB, &*Builder.GetInsertPoint()); 434 BB->getTerminator()->eraseFromParent(); 435 Builder.SetInsertPoint(BB); 436 } 437 BasicBlock *CancellationBlock = BasicBlock::Create( 438 BB->getContext(), BB->getName() + ".cncl", BB->getParent()); 439 440 // Jump to them based on the return value. 441 Value *Cmp = Builder.CreateIsNull(CancelFlag); 442 Builder.CreateCondBr(Cmp, NonCancellationBlock, CancellationBlock, 443 /* TODO weight */ nullptr, nullptr); 444 445 // From the cancellation block we finalize all variables and go to the 446 // post finalization block that is known to the FiniCB callback. 447 Builder.SetInsertPoint(CancellationBlock); 448 auto &FI = FinalizationStack.back(); 449 FI.FiniCB(Builder.saveIP()); 450 451 // The continuation block is where code generation continues. 452 Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->begin()); 453 } 454 455 IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( 456 const LocationDescription &Loc, InsertPointTy OuterAllocaIP, 457 BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB, 458 FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads, 459 omp::ProcBindKind ProcBind, bool IsCancellable) { 460 if (!updateToLocation(Loc)) 461 return Loc.IP; 462 463 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 464 Value *Ident = getOrCreateIdent(SrcLocStr); 465 Value *ThreadID = getOrCreateThreadID(Ident); 466 467 if (NumThreads) { 468 // Build call __kmpc_push_num_threads(&Ident, global_tid, num_threads) 469 Value *Args[] = { 470 Ident, ThreadID, 471 Builder.CreateIntCast(NumThreads, Int32, /*isSigned*/ false)}; 472 Builder.CreateCall( 473 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_num_threads), Args); 474 } 475 476 if (ProcBind != OMP_PROC_BIND_default) { 477 // Build call __kmpc_push_proc_bind(&Ident, global_tid, proc_bind) 478 Value *Args[] = { 479 Ident, ThreadID, 480 ConstantInt::get(Int32, unsigned(ProcBind), /*isSigned=*/true)}; 481 Builder.CreateCall( 482 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_proc_bind), Args); 483 } 484 485 BasicBlock *InsertBB = Builder.GetInsertBlock(); 486 Function *OuterFn = InsertBB->getParent(); 487 488 // Save the outer alloca block because the insertion iterator may get 489 // invalidated and we still need this later. 490 BasicBlock *OuterAllocaBlock = OuterAllocaIP.getBlock(); 491 492 // Vector to remember instructions we used only during the modeling but which 493 // we want to delete at the end. 494 SmallVector<Instruction *, 4> ToBeDeleted; 495 496 // Change the location to the outer alloca insertion point to create and 497 // initialize the allocas we pass into the parallel region. 498 Builder.restoreIP(OuterAllocaIP); 499 AllocaInst *TIDAddr = Builder.CreateAlloca(Int32, nullptr, "tid.addr"); 500 AllocaInst *ZeroAddr = Builder.CreateAlloca(Int32, nullptr, "zero.addr"); 501 502 // If there is an if condition we actually use the TIDAddr and ZeroAddr in the 503 // program, otherwise we only need them for modeling purposes to get the 504 // associated arguments in the outlined function. In the former case, 505 // initialize the allocas properly, in the latter case, delete them later. 506 if (IfCondition) { 507 Builder.CreateStore(Constant::getNullValue(Int32), TIDAddr); 508 Builder.CreateStore(Constant::getNullValue(Int32), ZeroAddr); 509 } else { 510 ToBeDeleted.push_back(TIDAddr); 511 ToBeDeleted.push_back(ZeroAddr); 512 } 513 514 // Create an artificial insertion point that will also ensure the blocks we 515 // are about to split are not degenerated. 516 auto *UI = new UnreachableInst(Builder.getContext(), InsertBB); 517 518 Instruction *ThenTI = UI, *ElseTI = nullptr; 519 if (IfCondition) 520 SplitBlockAndInsertIfThenElse(IfCondition, UI, &ThenTI, &ElseTI); 521 522 BasicBlock *ThenBB = ThenTI->getParent(); 523 BasicBlock *PRegEntryBB = ThenBB->splitBasicBlock(ThenTI, "omp.par.entry"); 524 BasicBlock *PRegBodyBB = 525 PRegEntryBB->splitBasicBlock(ThenTI, "omp.par.region"); 526 BasicBlock *PRegPreFiniBB = 527 PRegBodyBB->splitBasicBlock(ThenTI, "omp.par.pre_finalize"); 528 BasicBlock *PRegExitBB = 529 PRegPreFiniBB->splitBasicBlock(ThenTI, "omp.par.exit"); 530 531 auto FiniCBWrapper = [&](InsertPointTy IP) { 532 // Hide "open-ended" blocks from the given FiniCB by setting the right jump 533 // target to the region exit block. 534 if (IP.getBlock()->end() == IP.getPoint()) { 535 IRBuilder<>::InsertPointGuard IPG(Builder); 536 Builder.restoreIP(IP); 537 Instruction *I = Builder.CreateBr(PRegExitBB); 538 IP = InsertPointTy(I->getParent(), I->getIterator()); 539 } 540 assert(IP.getBlock()->getTerminator()->getNumSuccessors() == 1 && 541 IP.getBlock()->getTerminator()->getSuccessor(0) == PRegExitBB && 542 "Unexpected insertion point for finalization call!"); 543 return FiniCB(IP); 544 }; 545 546 FinalizationStack.push_back({FiniCBWrapper, OMPD_parallel, IsCancellable}); 547 548 // Generate the privatization allocas in the block that will become the entry 549 // of the outlined function. 550 Builder.SetInsertPoint(PRegEntryBB->getTerminator()); 551 InsertPointTy InnerAllocaIP = Builder.saveIP(); 552 553 AllocaInst *PrivTIDAddr = 554 Builder.CreateAlloca(Int32, nullptr, "tid.addr.local"); 555 Instruction *PrivTID = Builder.CreateLoad(PrivTIDAddr, "tid"); 556 557 // Add some fake uses for OpenMP provided arguments. 558 ToBeDeleted.push_back(Builder.CreateLoad(TIDAddr, "tid.addr.use")); 559 Instruction *ZeroAddrUse = Builder.CreateLoad(ZeroAddr, "zero.addr.use"); 560 ToBeDeleted.push_back(ZeroAddrUse); 561 562 // ThenBB 563 // | 564 // V 565 // PRegionEntryBB <- Privatization allocas are placed here. 566 // | 567 // V 568 // PRegionBodyBB <- BodeGen is invoked here. 569 // | 570 // V 571 // PRegPreFiniBB <- The block we will start finalization from. 572 // | 573 // V 574 // PRegionExitBB <- A common exit to simplify block collection. 575 // 576 577 LLVM_DEBUG(dbgs() << "Before body codegen: " << *OuterFn << "\n"); 578 579 // Let the caller create the body. 580 assert(BodyGenCB && "Expected body generation callback!"); 581 InsertPointTy CodeGenIP(PRegBodyBB, PRegBodyBB->begin()); 582 BodyGenCB(InnerAllocaIP, CodeGenIP, *PRegPreFiniBB); 583 584 LLVM_DEBUG(dbgs() << "After body codegen: " << *OuterFn << "\n"); 585 586 FunctionCallee RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_call); 587 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { 588 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { 589 llvm::LLVMContext &Ctx = F->getContext(); 590 MDBuilder MDB(Ctx); 591 // Annotate the callback behavior of the __kmpc_fork_call: 592 // - The callback callee is argument number 2 (microtask). 593 // - The first two arguments of the callback callee are unknown (-1). 594 // - All variadic arguments to the __kmpc_fork_call are passed to the 595 // callback callee. 596 F->addMetadata( 597 llvm::LLVMContext::MD_callback, 598 *llvm::MDNode::get( 599 Ctx, {MDB.createCallbackEncoding(2, {-1, -1}, 600 /* VarArgsArePassed */ true)})); 601 } 602 } 603 604 OutlineInfo OI; 605 OI.PostOutlineCB = [=](Function &OutlinedFn) { 606 // Add some known attributes. 607 OutlinedFn.addParamAttr(0, Attribute::NoAlias); 608 OutlinedFn.addParamAttr(1, Attribute::NoAlias); 609 OutlinedFn.addFnAttr(Attribute::NoUnwind); 610 OutlinedFn.addFnAttr(Attribute::NoRecurse); 611 612 assert(OutlinedFn.arg_size() >= 2 && 613 "Expected at least tid and bounded tid as arguments"); 614 unsigned NumCapturedVars = 615 OutlinedFn.arg_size() - /* tid & bounded tid */ 2; 616 617 CallInst *CI = cast<CallInst>(OutlinedFn.user_back()); 618 CI->getParent()->setName("omp_parallel"); 619 Builder.SetInsertPoint(CI); 620 621 // Build call __kmpc_fork_call(Ident, n, microtask, var1, .., varn); 622 Value *ForkCallArgs[] = { 623 Ident, Builder.getInt32(NumCapturedVars), 624 Builder.CreateBitCast(&OutlinedFn, ParallelTaskPtr)}; 625 626 SmallVector<Value *, 16> RealArgs; 627 RealArgs.append(std::begin(ForkCallArgs), std::end(ForkCallArgs)); 628 RealArgs.append(CI->arg_begin() + /* tid & bound tid */ 2, CI->arg_end()); 629 630 Builder.CreateCall(RTLFn, RealArgs); 631 632 LLVM_DEBUG(dbgs() << "With fork_call placed: " 633 << *Builder.GetInsertBlock()->getParent() << "\n"); 634 635 InsertPointTy ExitIP(PRegExitBB, PRegExitBB->end()); 636 637 // Initialize the local TID stack location with the argument value. 638 Builder.SetInsertPoint(PrivTID); 639 Function::arg_iterator OutlinedAI = OutlinedFn.arg_begin(); 640 Builder.CreateStore(Builder.CreateLoad(OutlinedAI), PrivTIDAddr); 641 642 // If no "if" clause was present we do not need the call created during 643 // outlining, otherwise we reuse it in the serialized parallel region. 644 if (!ElseTI) { 645 CI->eraseFromParent(); 646 } else { 647 648 // If an "if" clause was present we are now generating the serialized 649 // version into the "else" branch. 650 Builder.SetInsertPoint(ElseTI); 651 652 // Build calls __kmpc_serialized_parallel(&Ident, GTid); 653 Value *SerializedParallelCallArgs[] = {Ident, ThreadID}; 654 Builder.CreateCall( 655 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_serialized_parallel), 656 SerializedParallelCallArgs); 657 658 // OutlinedFn(>id, &zero, CapturedStruct); 659 CI->removeFromParent(); 660 Builder.Insert(CI); 661 662 // __kmpc_end_serialized_parallel(&Ident, GTid); 663 Value *EndArgs[] = {Ident, ThreadID}; 664 Builder.CreateCall( 665 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_serialized_parallel), 666 EndArgs); 667 668 LLVM_DEBUG(dbgs() << "With serialized parallel region: " 669 << *Builder.GetInsertBlock()->getParent() << "\n"); 670 } 671 672 for (Instruction *I : ToBeDeleted) 673 I->eraseFromParent(); 674 }; 675 676 // Adjust the finalization stack, verify the adjustment, and call the 677 // finalize function a last time to finalize values between the pre-fini 678 // block and the exit block if we left the parallel "the normal way". 679 auto FiniInfo = FinalizationStack.pop_back_val(); 680 (void)FiniInfo; 681 assert(FiniInfo.DK == OMPD_parallel && 682 "Unexpected finalization stack state!"); 683 684 Instruction *PRegPreFiniTI = PRegPreFiniBB->getTerminator(); 685 686 InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->getIterator()); 687 FiniCB(PreFiniIP); 688 689 OI.EntryBB = PRegEntryBB; 690 OI.ExitBB = PRegExitBB; 691 692 SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet; 693 SmallVector<BasicBlock *, 32> Blocks; 694 OI.collectBlocks(ParallelRegionBlockSet, Blocks); 695 696 // Ensure a single exit node for the outlined region by creating one. 697 // We might have multiple incoming edges to the exit now due to finalizations, 698 // e.g., cancel calls that cause the control flow to leave the region. 699 BasicBlock *PRegOutlinedExitBB = PRegExitBB; 700 PRegExitBB = SplitBlock(PRegExitBB, &*PRegExitBB->getFirstInsertionPt()); 701 PRegOutlinedExitBB->setName("omp.par.outlined.exit"); 702 Blocks.push_back(PRegOutlinedExitBB); 703 704 CodeExtractorAnalysisCache CEAC(*OuterFn); 705 CodeExtractor Extractor(Blocks, /* DominatorTree */ nullptr, 706 /* AggregateArgs */ false, 707 /* BlockFrequencyInfo */ nullptr, 708 /* BranchProbabilityInfo */ nullptr, 709 /* AssumptionCache */ nullptr, 710 /* AllowVarArgs */ true, 711 /* AllowAlloca */ true, 712 /* Suffix */ ".omp_par"); 713 714 // Find inputs to, outputs from the code region. 715 BasicBlock *CommonExit = nullptr; 716 SetVector<Value *> Inputs, Outputs, SinkingCands, HoistingCands; 717 Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit); 718 Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands); 719 720 LLVM_DEBUG(dbgs() << "Before privatization: " << *OuterFn << "\n"); 721 722 FunctionCallee TIDRTLFn = 723 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num); 724 725 auto PrivHelper = [&](Value &V) { 726 if (&V == TIDAddr || &V == ZeroAddr) 727 return; 728 729 SetVector<Use *> Uses; 730 for (Use &U : V.uses()) 731 if (auto *UserI = dyn_cast<Instruction>(U.getUser())) 732 if (ParallelRegionBlockSet.count(UserI->getParent())) 733 Uses.insert(&U); 734 735 // __kmpc_fork_call expects extra arguments as pointers. If the input 736 // already has a pointer type, everything is fine. Otherwise, store the 737 // value onto stack and load it back inside the to-be-outlined region. This 738 // will ensure only the pointer will be passed to the function. 739 // FIXME: if there are more than 15 trailing arguments, they must be 740 // additionally packed in a struct. 741 Value *Inner = &V; 742 if (!V.getType()->isPointerTy()) { 743 IRBuilder<>::InsertPointGuard Guard(Builder); 744 LLVM_DEBUG(llvm::dbgs() << "Forwarding input as pointer: " << V << "\n"); 745 746 Builder.restoreIP(OuterAllocaIP); 747 Value *Ptr = 748 Builder.CreateAlloca(V.getType(), nullptr, V.getName() + ".reloaded"); 749 750 // Store to stack at end of the block that currently branches to the entry 751 // block of the to-be-outlined region. 752 Builder.SetInsertPoint(InsertBB, 753 InsertBB->getTerminator()->getIterator()); 754 Builder.CreateStore(&V, Ptr); 755 756 // Load back next to allocations in the to-be-outlined region. 757 Builder.restoreIP(InnerAllocaIP); 758 Inner = Builder.CreateLoad(Ptr); 759 } 760 761 Value *ReplacementValue = nullptr; 762 CallInst *CI = dyn_cast<CallInst>(&V); 763 if (CI && CI->getCalledFunction() == TIDRTLFn.getCallee()) { 764 ReplacementValue = PrivTID; 765 } else { 766 Builder.restoreIP( 767 PrivCB(InnerAllocaIP, Builder.saveIP(), V, *Inner, ReplacementValue)); 768 assert(ReplacementValue && 769 "Expected copy/create callback to set replacement value!"); 770 if (ReplacementValue == &V) 771 return; 772 } 773 774 for (Use *UPtr : Uses) 775 UPtr->set(ReplacementValue); 776 }; 777 778 // Reset the inner alloca insertion as it will be used for loading the values 779 // wrapped into pointers before passing them into the to-be-outlined region. 780 // Configure it to insert immediately after the fake use of zero address so 781 // that they are available in the generated body and so that the 782 // OpenMP-related values (thread ID and zero address pointers) remain leading 783 // in the argument list. 784 InnerAllocaIP = IRBuilder<>::InsertPoint( 785 ZeroAddrUse->getParent(), ZeroAddrUse->getNextNode()->getIterator()); 786 787 // Reset the outer alloca insertion point to the entry of the relevant block 788 // in case it was invalidated. 789 OuterAllocaIP = IRBuilder<>::InsertPoint( 790 OuterAllocaBlock, OuterAllocaBlock->getFirstInsertionPt()); 791 792 for (Value *Input : Inputs) { 793 LLVM_DEBUG(dbgs() << "Captured input: " << *Input << "\n"); 794 PrivHelper(*Input); 795 } 796 LLVM_DEBUG({ 797 for (Value *Output : Outputs) 798 LLVM_DEBUG(dbgs() << "Captured output: " << *Output << "\n"); 799 }); 800 assert(Outputs.empty() && 801 "OpenMP outlining should not produce live-out values!"); 802 803 LLVM_DEBUG(dbgs() << "After privatization: " << *OuterFn << "\n"); 804 LLVM_DEBUG({ 805 for (auto *BB : Blocks) 806 dbgs() << " PBR: " << BB->getName() << "\n"; 807 }); 808 809 // Register the outlined info. 810 addOutlineInfo(std::move(OI)); 811 812 InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end()); 813 UI->eraseFromParent(); 814 815 return AfterIP; 816 } 817 818 void OpenMPIRBuilder::emitFlush(const LocationDescription &Loc) { 819 // Build call void __kmpc_flush(ident_t *loc) 820 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 821 Value *Args[] = {getOrCreateIdent(SrcLocStr)}; 822 823 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_flush), Args); 824 } 825 826 void OpenMPIRBuilder::createFlush(const LocationDescription &Loc) { 827 if (!updateToLocation(Loc)) 828 return; 829 emitFlush(Loc); 830 } 831 832 void OpenMPIRBuilder::emitTaskwaitImpl(const LocationDescription &Loc) { 833 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 834 // global_tid); 835 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 836 Value *Ident = getOrCreateIdent(SrcLocStr); 837 Value *Args[] = {Ident, getOrCreateThreadID(Ident)}; 838 839 // Ignore return result until untied tasks are supported. 840 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskwait), 841 Args); 842 } 843 844 void OpenMPIRBuilder::createTaskwait(const LocationDescription &Loc) { 845 if (!updateToLocation(Loc)) 846 return; 847 emitTaskwaitImpl(Loc); 848 } 849 850 void OpenMPIRBuilder::emitTaskyieldImpl(const LocationDescription &Loc) { 851 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 852 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 853 Value *Ident = getOrCreateIdent(SrcLocStr); 854 Constant *I32Null = ConstantInt::getNullValue(Int32); 855 Value *Args[] = {Ident, getOrCreateThreadID(Ident), I32Null}; 856 857 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskyield), 858 Args); 859 } 860 861 void OpenMPIRBuilder::createTaskyield(const LocationDescription &Loc) { 862 if (!updateToLocation(Loc)) 863 return; 864 emitTaskyieldImpl(Loc); 865 } 866 867 OpenMPIRBuilder::InsertPointTy 868 OpenMPIRBuilder::createMaster(const LocationDescription &Loc, 869 BodyGenCallbackTy BodyGenCB, 870 FinalizeCallbackTy FiniCB) { 871 872 if (!updateToLocation(Loc)) 873 return Loc.IP; 874 875 Directive OMPD = Directive::OMPD_master; 876 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 877 Value *Ident = getOrCreateIdent(SrcLocStr); 878 Value *ThreadId = getOrCreateThreadID(Ident); 879 Value *Args[] = {Ident, ThreadId}; 880 881 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_master); 882 Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args); 883 884 Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_master); 885 Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args); 886 887 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB, 888 /*Conditional*/ true, /*hasFinalize*/ true); 889 } 890 891 CanonicalLoopInfo *OpenMPIRBuilder::createLoopSkeleton( 892 DebugLoc DL, Value *TripCount, Function *F, BasicBlock *PreInsertBefore, 893 BasicBlock *PostInsertBefore, const Twine &Name) { 894 Module *M = F->getParent(); 895 LLVMContext &Ctx = M->getContext(); 896 Type *IndVarTy = TripCount->getType(); 897 898 // Create the basic block structure. 899 BasicBlock *Preheader = 900 BasicBlock::Create(Ctx, "omp_" + Name + ".preheader", F, PreInsertBefore); 901 BasicBlock *Header = 902 BasicBlock::Create(Ctx, "omp_" + Name + ".header", F, PreInsertBefore); 903 BasicBlock *Cond = 904 BasicBlock::Create(Ctx, "omp_" + Name + ".cond", F, PreInsertBefore); 905 BasicBlock *Body = 906 BasicBlock::Create(Ctx, "omp_" + Name + ".body", F, PreInsertBefore); 907 BasicBlock *Latch = 908 BasicBlock::Create(Ctx, "omp_" + Name + ".inc", F, PostInsertBefore); 909 BasicBlock *Exit = 910 BasicBlock::Create(Ctx, "omp_" + Name + ".exit", F, PostInsertBefore); 911 BasicBlock *After = 912 BasicBlock::Create(Ctx, "omp_" + Name + ".after", F, PostInsertBefore); 913 914 // Use specified DebugLoc for new instructions. 915 Builder.SetCurrentDebugLocation(DL); 916 917 Builder.SetInsertPoint(Preheader); 918 Builder.CreateBr(Header); 919 920 Builder.SetInsertPoint(Header); 921 PHINode *IndVarPHI = Builder.CreatePHI(IndVarTy, 2, "omp_" + Name + ".iv"); 922 IndVarPHI->addIncoming(ConstantInt::get(IndVarTy, 0), Preheader); 923 Builder.CreateBr(Cond); 924 925 Builder.SetInsertPoint(Cond); 926 Value *Cmp = 927 Builder.CreateICmpULT(IndVarPHI, TripCount, "omp_" + Name + ".cmp"); 928 Builder.CreateCondBr(Cmp, Body, Exit); 929 930 Builder.SetInsertPoint(Body); 931 Builder.CreateBr(Latch); 932 933 Builder.SetInsertPoint(Latch); 934 Value *Next = Builder.CreateAdd(IndVarPHI, ConstantInt::get(IndVarTy, 1), 935 "omp_" + Name + ".next", /*HasNUW=*/true); 936 Builder.CreateBr(Header); 937 IndVarPHI->addIncoming(Next, Latch); 938 939 Builder.SetInsertPoint(Exit); 940 Builder.CreateBr(After); 941 942 // Remember and return the canonical control flow. 943 LoopInfos.emplace_front(); 944 CanonicalLoopInfo *CL = &LoopInfos.front(); 945 946 CL->Preheader = Preheader; 947 CL->Header = Header; 948 CL->Cond = Cond; 949 CL->Body = Body; 950 CL->Latch = Latch; 951 CL->Exit = Exit; 952 CL->After = After; 953 954 CL->IsValid = true; 955 956 #ifndef NDEBUG 957 CL->assertOK(); 958 #endif 959 return CL; 960 } 961 962 CanonicalLoopInfo * 963 OpenMPIRBuilder::createCanonicalLoop(const LocationDescription &Loc, 964 LoopBodyGenCallbackTy BodyGenCB, 965 Value *TripCount, const Twine &Name) { 966 BasicBlock *BB = Loc.IP.getBlock(); 967 BasicBlock *NextBB = BB->getNextNode(); 968 969 CanonicalLoopInfo *CL = createLoopSkeleton(Loc.DL, TripCount, BB->getParent(), 970 NextBB, NextBB, Name); 971 BasicBlock *After = CL->getAfter(); 972 973 // If location is not set, don't connect the loop. 974 if (updateToLocation(Loc)) { 975 // Split the loop at the insertion point: Branch to the preheader and move 976 // every following instruction to after the loop (the After BB). Also, the 977 // new successor is the loop's after block. 978 Builder.CreateBr(CL->Preheader); 979 After->getInstList().splice(After->begin(), BB->getInstList(), 980 Builder.GetInsertPoint(), BB->end()); 981 After->replaceSuccessorsPhiUsesWith(BB, After); 982 } 983 984 // Emit the body content. We do it after connecting the loop to the CFG to 985 // avoid that the callback encounters degenerate BBs. 986 BodyGenCB(CL->getBodyIP(), CL->getIndVar()); 987 988 #ifndef NDEBUG 989 CL->assertOK(); 990 #endif 991 return CL; 992 } 993 994 CanonicalLoopInfo *OpenMPIRBuilder::createCanonicalLoop( 995 const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, 996 Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop, 997 InsertPointTy ComputeIP, const Twine &Name) { 998 999 // Consider the following difficulties (assuming 8-bit signed integers): 1000 // * Adding \p Step to the loop counter which passes \p Stop may overflow: 1001 // DO I = 1, 100, 50 1002 /// * A \p Step of INT_MIN cannot not be normalized to a positive direction: 1003 // DO I = 100, 0, -128 1004 1005 // Start, Stop and Step must be of the same integer type. 1006 auto *IndVarTy = cast<IntegerType>(Start->getType()); 1007 assert(IndVarTy == Stop->getType() && "Stop type mismatch"); 1008 assert(IndVarTy == Step->getType() && "Step type mismatch"); 1009 1010 LocationDescription ComputeLoc = 1011 ComputeIP.isSet() ? LocationDescription(ComputeIP, Loc.DL) : Loc; 1012 updateToLocation(ComputeLoc); 1013 1014 ConstantInt *Zero = ConstantInt::get(IndVarTy, 0); 1015 ConstantInt *One = ConstantInt::get(IndVarTy, 1); 1016 1017 // Like Step, but always positive. 1018 Value *Incr = Step; 1019 1020 // Distance between Start and Stop; always positive. 1021 Value *Span; 1022 1023 // Condition whether there are no iterations are executed at all, e.g. because 1024 // UB < LB. 1025 Value *ZeroCmp; 1026 1027 if (IsSigned) { 1028 // Ensure that increment is positive. If not, negate and invert LB and UB. 1029 Value *IsNeg = Builder.CreateICmpSLT(Step, Zero); 1030 Incr = Builder.CreateSelect(IsNeg, Builder.CreateNeg(Step), Step); 1031 Value *LB = Builder.CreateSelect(IsNeg, Stop, Start); 1032 Value *UB = Builder.CreateSelect(IsNeg, Start, Stop); 1033 Span = Builder.CreateSub(UB, LB, "", false, true); 1034 ZeroCmp = Builder.CreateICmp( 1035 InclusiveStop ? CmpInst::ICMP_SLT : CmpInst::ICMP_SLE, UB, LB); 1036 } else { 1037 Span = Builder.CreateSub(Stop, Start, "", true); 1038 ZeroCmp = Builder.CreateICmp( 1039 InclusiveStop ? CmpInst::ICMP_ULT : CmpInst::ICMP_ULE, Stop, Start); 1040 } 1041 1042 Value *CountIfLooping; 1043 if (InclusiveStop) { 1044 CountIfLooping = Builder.CreateAdd(Builder.CreateUDiv(Span, Incr), One); 1045 } else { 1046 // Avoid incrementing past stop since it could overflow. 1047 Value *CountIfTwo = Builder.CreateAdd( 1048 Builder.CreateUDiv(Builder.CreateSub(Span, One), Incr), One); 1049 Value *OneCmp = Builder.CreateICmp( 1050 InclusiveStop ? CmpInst::ICMP_ULT : CmpInst::ICMP_ULE, Span, Incr); 1051 CountIfLooping = Builder.CreateSelect(OneCmp, One, CountIfTwo); 1052 } 1053 Value *TripCount = Builder.CreateSelect(ZeroCmp, Zero, CountIfLooping, 1054 "omp_" + Name + ".tripcount"); 1055 1056 auto BodyGen = [=](InsertPointTy CodeGenIP, Value *IV) { 1057 Builder.restoreIP(CodeGenIP); 1058 Value *Span = Builder.CreateMul(IV, Step); 1059 Value *IndVar = Builder.CreateAdd(Span, Start); 1060 BodyGenCB(Builder.saveIP(), IndVar); 1061 }; 1062 LocationDescription LoopLoc = ComputeIP.isSet() ? Loc.IP : Builder.saveIP(); 1063 return createCanonicalLoop(LoopLoc, BodyGen, TripCount, Name); 1064 } 1065 1066 // Returns an LLVM function to call for initializing loop bounds using OpenMP 1067 // static scheduling depending on `type`. Only i32 and i64 are supported by the 1068 // runtime. Always interpret integers as unsigned similarly to 1069 // CanonicalLoopInfo. 1070 static FunctionCallee getKmpcForStaticInitForType(Type *Ty, Module &M, 1071 OpenMPIRBuilder &OMPBuilder) { 1072 unsigned Bitwidth = Ty->getIntegerBitWidth(); 1073 if (Bitwidth == 32) 1074 return OMPBuilder.getOrCreateRuntimeFunction( 1075 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u); 1076 if (Bitwidth == 64) 1077 return OMPBuilder.getOrCreateRuntimeFunction( 1078 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u); 1079 llvm_unreachable("unknown OpenMP loop iterator bitwidth"); 1080 } 1081 1082 // Sets the number of loop iterations to the given value. This value must be 1083 // valid in the condition block (i.e., defined in the preheader) and is 1084 // interpreted as an unsigned integer. 1085 void setCanonicalLoopTripCount(CanonicalLoopInfo *CLI, Value *TripCount) { 1086 Instruction *CmpI = &CLI->getCond()->front(); 1087 assert(isa<CmpInst>(CmpI) && "First inst must compare IV with TripCount"); 1088 CmpI->setOperand(1, TripCount); 1089 CLI->assertOK(); 1090 } 1091 1092 CanonicalLoopInfo *OpenMPIRBuilder::createStaticWorkshareLoop( 1093 const LocationDescription &Loc, CanonicalLoopInfo *CLI, 1094 InsertPointTy AllocaIP, bool NeedsBarrier, Value *Chunk) { 1095 // Set up the source location value for OpenMP runtime. 1096 if (!updateToLocation(Loc)) 1097 return nullptr; 1098 1099 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 1100 Value *SrcLoc = getOrCreateIdent(SrcLocStr); 1101 1102 // Declare useful OpenMP runtime functions. 1103 Value *IV = CLI->getIndVar(); 1104 Type *IVTy = IV->getType(); 1105 FunctionCallee StaticInit = getKmpcForStaticInitForType(IVTy, M, *this); 1106 FunctionCallee StaticFini = 1107 getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_for_static_fini); 1108 1109 // Allocate space for computed loop bounds as expected by the "init" function. 1110 Builder.restoreIP(AllocaIP); 1111 Type *I32Type = Type::getInt32Ty(M.getContext()); 1112 Value *PLastIter = Builder.CreateAlloca(I32Type, nullptr, "p.lastiter"); 1113 Value *PLowerBound = Builder.CreateAlloca(IVTy, nullptr, "p.lowerbound"); 1114 Value *PUpperBound = Builder.CreateAlloca(IVTy, nullptr, "p.upperbound"); 1115 Value *PStride = Builder.CreateAlloca(IVTy, nullptr, "p.stride"); 1116 1117 // At the end of the preheader, prepare for calling the "init" function by 1118 // storing the current loop bounds into the allocated space. A canonical loop 1119 // always iterates from 0 to trip-count with step 1. Note that "init" expects 1120 // and produces an inclusive upper bound. 1121 Builder.SetInsertPoint(CLI->getPreheader()->getTerminator()); 1122 Constant *Zero = ConstantInt::get(IVTy, 0); 1123 Constant *One = ConstantInt::get(IVTy, 1); 1124 Builder.CreateStore(Zero, PLowerBound); 1125 Value *UpperBound = Builder.CreateSub(CLI->getTripCount(), One); 1126 Builder.CreateStore(UpperBound, PUpperBound); 1127 Builder.CreateStore(One, PStride); 1128 1129 if (!Chunk) 1130 Chunk = One; 1131 1132 Value *ThreadNum = getOrCreateThreadID(SrcLoc); 1133 1134 // TODO: extract scheduling type and map it to OMP constant. This is curently 1135 // happening in kmp.h and its ilk and needs to be moved to OpenMP.td first. 1136 constexpr int StaticSchedType = 34; 1137 Constant *SchedulingType = ConstantInt::get(I32Type, StaticSchedType); 1138 1139 // Call the "init" function and update the trip count of the loop with the 1140 // value it produced. 1141 Builder.CreateCall(StaticInit, 1142 {SrcLoc, ThreadNum, SchedulingType, PLastIter, PLowerBound, 1143 PUpperBound, PStride, One, Chunk}); 1144 Value *LowerBound = Builder.CreateLoad(PLowerBound); 1145 Value *InclusiveUpperBound = Builder.CreateLoad(PUpperBound); 1146 Value *TripCountMinusOne = Builder.CreateSub(InclusiveUpperBound, LowerBound); 1147 Value *TripCount = Builder.CreateAdd(TripCountMinusOne, One); 1148 setCanonicalLoopTripCount(CLI, TripCount); 1149 1150 // Update all uses of the induction variable except the one in the condition 1151 // block that compares it with the actual upper bound, and the increment in 1152 // the latch block. 1153 // TODO: this can eventually move to CanonicalLoopInfo or to a new 1154 // CanonicalLoopInfoUpdater interface. 1155 Builder.SetInsertPoint(CLI->getBody(), CLI->getBody()->getFirstInsertionPt()); 1156 Value *UpdatedIV = Builder.CreateAdd(IV, LowerBound); 1157 IV->replaceUsesWithIf(UpdatedIV, [&](Use &U) { 1158 auto *Instr = dyn_cast<Instruction>(U.getUser()); 1159 return !Instr || 1160 (Instr->getParent() != CLI->getCond() && 1161 Instr->getParent() != CLI->getLatch() && Instr != UpdatedIV); 1162 }); 1163 1164 // In the "exit" block, call the "fini" function. 1165 Builder.SetInsertPoint(CLI->getExit(), 1166 CLI->getExit()->getTerminator()->getIterator()); 1167 Builder.CreateCall(StaticFini, {SrcLoc, ThreadNum}); 1168 1169 // Add the barrier if requested. 1170 if (NeedsBarrier) 1171 createBarrier(LocationDescription(Builder.saveIP(), Loc.DL), 1172 omp::Directive::OMPD_for, /* ForceSimpleCall */ false, 1173 /* CheckCancelFlag */ false); 1174 1175 CLI->assertOK(); 1176 return CLI; 1177 } 1178 1179 CanonicalLoopInfo *OpenMPIRBuilder::createWorkshareLoop( 1180 const LocationDescription &Loc, CanonicalLoopInfo *CLI, 1181 InsertPointTy AllocaIP, bool NeedsBarrier) { 1182 // Currently only supports static schedules. 1183 return createStaticWorkshareLoop(Loc, CLI, AllocaIP, NeedsBarrier); 1184 } 1185 1186 /// Make \p Source branch to \p Target. 1187 /// 1188 /// Handles two situations: 1189 /// * \p Source already has an unconditional branch. 1190 /// * \p Source is a degenerate block (no terminator because the BB is 1191 /// the current head of the IR construction). 1192 static void redirectTo(BasicBlock *Source, BasicBlock *Target, DebugLoc DL) { 1193 if (Instruction *Term = Source->getTerminator()) { 1194 auto *Br = cast<BranchInst>(Term); 1195 assert(!Br->isConditional() && 1196 "BB's terminator must be an unconditional branch (or degenerate)"); 1197 BasicBlock *Succ = Br->getSuccessor(0); 1198 Succ->removePredecessor(Source, /*KeepOneInputPHIs=*/true); 1199 Br->setSuccessor(0, Target); 1200 return; 1201 } 1202 1203 auto *NewBr = BranchInst::Create(Target, Source); 1204 NewBr->setDebugLoc(DL); 1205 } 1206 1207 /// Redirect all edges that branch to \p OldTarget to \p NewTarget. That is, 1208 /// after this \p OldTarget will be orphaned. 1209 static void redirectAllPredecessorsTo(BasicBlock *OldTarget, 1210 BasicBlock *NewTarget, DebugLoc DL) { 1211 for (BasicBlock *Pred : make_early_inc_range(predecessors(OldTarget))) 1212 redirectTo(Pred, NewTarget, DL); 1213 } 1214 1215 /// Determine which blocks in \p BBs are reachable from outside and remove the 1216 /// ones that are not reachable from the function. 1217 static void removeUnusedBlocksFromParent(ArrayRef<BasicBlock *> BBs) { 1218 SmallPtrSet<BasicBlock *, 6> BBsToErase{BBs.begin(), BBs.end()}; 1219 auto HasRemainingUses = [&BBsToErase](BasicBlock *BB) { 1220 for (Use &U : BB->uses()) { 1221 auto *UseInst = dyn_cast<Instruction>(U.getUser()); 1222 if (!UseInst) 1223 continue; 1224 if (BBsToErase.count(UseInst->getParent())) 1225 continue; 1226 return true; 1227 } 1228 return false; 1229 }; 1230 1231 while (true) { 1232 bool Changed = false; 1233 for (BasicBlock *BB : make_early_inc_range(BBsToErase)) { 1234 if (HasRemainingUses(BB)) { 1235 BBsToErase.erase(BB); 1236 Changed = true; 1237 } 1238 } 1239 if (!Changed) 1240 break; 1241 } 1242 1243 SmallVector<BasicBlock *, 7> BBVec(BBsToErase.begin(), BBsToErase.end()); 1244 DeleteDeadBlocks(BBVec); 1245 } 1246 1247 CanonicalLoopInfo * 1248 OpenMPIRBuilder::collapseLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops, 1249 InsertPointTy ComputeIP) { 1250 assert(Loops.size() >= 1 && "At least one loop required"); 1251 size_t NumLoops = Loops.size(); 1252 1253 // Nothing to do if there is already just one loop. 1254 if (NumLoops == 1) 1255 return Loops.front(); 1256 1257 CanonicalLoopInfo *Outermost = Loops.front(); 1258 CanonicalLoopInfo *Innermost = Loops.back(); 1259 BasicBlock *OrigPreheader = Outermost->getPreheader(); 1260 BasicBlock *OrigAfter = Outermost->getAfter(); 1261 Function *F = OrigPreheader->getParent(); 1262 1263 // Setup the IRBuilder for inserting the trip count computation. 1264 Builder.SetCurrentDebugLocation(DL); 1265 if (ComputeIP.isSet()) 1266 Builder.restoreIP(ComputeIP); 1267 else 1268 Builder.restoreIP(Outermost->getPreheaderIP()); 1269 1270 // Derive the collapsed' loop trip count. 1271 // TODO: Find common/largest indvar type. 1272 Value *CollapsedTripCount = nullptr; 1273 for (CanonicalLoopInfo *L : Loops) { 1274 Value *OrigTripCount = L->getTripCount(); 1275 if (!CollapsedTripCount) { 1276 CollapsedTripCount = OrigTripCount; 1277 continue; 1278 } 1279 1280 // TODO: Enable UndefinedSanitizer to diagnose an overflow here. 1281 CollapsedTripCount = Builder.CreateMul(CollapsedTripCount, OrigTripCount, 1282 {}, /*HasNUW=*/true); 1283 } 1284 1285 // Create the collapsed loop control flow. 1286 CanonicalLoopInfo *Result = 1287 createLoopSkeleton(DL, CollapsedTripCount, F, 1288 OrigPreheader->getNextNode(), OrigAfter, "collapsed"); 1289 1290 // Build the collapsed loop body code. 1291 // Start with deriving the input loop induction variables from the collapsed 1292 // one, using a divmod scheme. To preserve the original loops' order, the 1293 // innermost loop use the least significant bits. 1294 Builder.restoreIP(Result->getBodyIP()); 1295 1296 Value *Leftover = Result->getIndVar(); 1297 SmallVector<Value *> NewIndVars; 1298 NewIndVars.set_size(NumLoops); 1299 for (int i = NumLoops - 1; i >= 1; --i) { 1300 Value *OrigTripCount = Loops[i]->getTripCount(); 1301 1302 Value *NewIndVar = Builder.CreateURem(Leftover, OrigTripCount); 1303 NewIndVars[i] = NewIndVar; 1304 1305 Leftover = Builder.CreateUDiv(Leftover, OrigTripCount); 1306 } 1307 // Outermost loop gets all the remaining bits. 1308 NewIndVars[0] = Leftover; 1309 1310 // Construct the loop body control flow. 1311 // We progressively construct the branch structure following in direction of 1312 // the control flow, from the leading in-between code, the loop nest body, the 1313 // trailing in-between code, and rejoining the collapsed loop's latch. 1314 // ContinueBlock and ContinuePred keep track of the source(s) of next edge. If 1315 // the ContinueBlock is set, continue with that block. If ContinuePred, use 1316 // its predecessors as sources. 1317 BasicBlock *ContinueBlock = Result->getBody(); 1318 BasicBlock *ContinuePred = nullptr; 1319 auto ContinueWith = [&ContinueBlock, &ContinuePred, DL](BasicBlock *Dest, 1320 BasicBlock *NextSrc) { 1321 if (ContinueBlock) 1322 redirectTo(ContinueBlock, Dest, DL); 1323 else 1324 redirectAllPredecessorsTo(ContinuePred, Dest, DL); 1325 1326 ContinueBlock = nullptr; 1327 ContinuePred = NextSrc; 1328 }; 1329 1330 // The code before the nested loop of each level. 1331 // Because we are sinking it into the nest, it will be executed more often 1332 // that the original loop. More sophisticated schemes could keep track of what 1333 // the in-between code is and instantiate it only once per thread. 1334 for (size_t i = 0; i < NumLoops - 1; ++i) 1335 ContinueWith(Loops[i]->getBody(), Loops[i + 1]->getHeader()); 1336 1337 // Connect the loop nest body. 1338 ContinueWith(Innermost->getBody(), Innermost->getLatch()); 1339 1340 // The code after the nested loop at each level. 1341 for (size_t i = NumLoops - 1; i > 0; --i) 1342 ContinueWith(Loops[i]->getAfter(), Loops[i - 1]->getLatch()); 1343 1344 // Connect the finished loop to the collapsed loop latch. 1345 ContinueWith(Result->getLatch(), nullptr); 1346 1347 // Replace the input loops with the new collapsed loop. 1348 redirectTo(Outermost->getPreheader(), Result->getPreheader(), DL); 1349 redirectTo(Result->getAfter(), Outermost->getAfter(), DL); 1350 1351 // Replace the input loop indvars with the derived ones. 1352 for (size_t i = 0; i < NumLoops; ++i) 1353 Loops[i]->getIndVar()->replaceAllUsesWith(NewIndVars[i]); 1354 1355 // Remove unused parts of the input loops. 1356 SmallVector<BasicBlock *, 12> OldControlBBs; 1357 OldControlBBs.reserve(6 * Loops.size()); 1358 for (CanonicalLoopInfo *Loop : Loops) 1359 Loop->collectControlBlocks(OldControlBBs); 1360 removeUnusedBlocksFromParent(OldControlBBs); 1361 1362 #ifndef NDEBUG 1363 Result->assertOK(); 1364 #endif 1365 return Result; 1366 } 1367 1368 std::vector<CanonicalLoopInfo *> 1369 OpenMPIRBuilder::tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops, 1370 ArrayRef<Value *> TileSizes) { 1371 assert(TileSizes.size() == Loops.size() && 1372 "Must pass as many tile sizes as there are loops"); 1373 int NumLoops = Loops.size(); 1374 assert(NumLoops >= 1 && "At least one loop to tile required"); 1375 1376 CanonicalLoopInfo *OutermostLoop = Loops.front(); 1377 CanonicalLoopInfo *InnermostLoop = Loops.back(); 1378 Function *F = OutermostLoop->getBody()->getParent(); 1379 BasicBlock *InnerEnter = InnermostLoop->getBody(); 1380 BasicBlock *InnerLatch = InnermostLoop->getLatch(); 1381 1382 // Collect original trip counts and induction variable to be accessible by 1383 // index. Also, the structure of the original loops is not preserved during 1384 // the construction of the tiled loops, so do it before we scavenge the BBs of 1385 // any original CanonicalLoopInfo. 1386 SmallVector<Value *, 4> OrigTripCounts, OrigIndVars; 1387 for (CanonicalLoopInfo *L : Loops) { 1388 OrigTripCounts.push_back(L->getTripCount()); 1389 OrigIndVars.push_back(L->getIndVar()); 1390 } 1391 1392 // Collect the code between loop headers. These may contain SSA definitions 1393 // that are used in the loop nest body. To be usable with in the innermost 1394 // body, these BasicBlocks will be sunk into the loop nest body. That is, 1395 // these instructions may be executed more often than before the tiling. 1396 // TODO: It would be sufficient to only sink them into body of the 1397 // corresponding tile loop. 1398 SmallVector<std::pair<BasicBlock *, BasicBlock *>, 4> InbetweenCode; 1399 for (int i = 0; i < NumLoops - 1; ++i) { 1400 CanonicalLoopInfo *Surrounding = Loops[i]; 1401 CanonicalLoopInfo *Nested = Loops[i + 1]; 1402 1403 BasicBlock *EnterBB = Surrounding->getBody(); 1404 BasicBlock *ExitBB = Nested->getHeader(); 1405 InbetweenCode.emplace_back(EnterBB, ExitBB); 1406 } 1407 1408 // Compute the trip counts of the floor loops. 1409 Builder.SetCurrentDebugLocation(DL); 1410 Builder.restoreIP(OutermostLoop->getPreheaderIP()); 1411 SmallVector<Value *, 4> FloorCount, FloorRems; 1412 for (int i = 0; i < NumLoops; ++i) { 1413 Value *TileSize = TileSizes[i]; 1414 Value *OrigTripCount = OrigTripCounts[i]; 1415 Type *IVType = OrigTripCount->getType(); 1416 1417 Value *FloorTripCount = Builder.CreateUDiv(OrigTripCount, TileSize); 1418 Value *FloorTripRem = Builder.CreateURem(OrigTripCount, TileSize); 1419 1420 // 0 if tripcount divides the tilesize, 1 otherwise. 1421 // 1 means we need an additional iteration for a partial tile. 1422 // 1423 // Unfortunately we cannot just use the roundup-formula 1424 // (tripcount + tilesize - 1)/tilesize 1425 // because the summation might overflow. We do not want introduce undefined 1426 // behavior when the untiled loop nest did not. 1427 Value *FloorTripOverflow = 1428 Builder.CreateICmpNE(FloorTripRem, ConstantInt::get(IVType, 0)); 1429 1430 FloorTripOverflow = Builder.CreateZExt(FloorTripOverflow, IVType); 1431 FloorTripCount = 1432 Builder.CreateAdd(FloorTripCount, FloorTripOverflow, 1433 "omp_floor" + Twine(i) + ".tripcount", true); 1434 1435 // Remember some values for later use. 1436 FloorCount.push_back(FloorTripCount); 1437 FloorRems.push_back(FloorTripRem); 1438 } 1439 1440 // Generate the new loop nest, from the outermost to the innermost. 1441 std::vector<CanonicalLoopInfo *> Result; 1442 Result.reserve(NumLoops * 2); 1443 1444 // The basic block of the surrounding loop that enters the nest generated 1445 // loop. 1446 BasicBlock *Enter = OutermostLoop->getPreheader(); 1447 1448 // The basic block of the surrounding loop where the inner code should 1449 // continue. 1450 BasicBlock *Continue = OutermostLoop->getAfter(); 1451 1452 // Where the next loop basic block should be inserted. 1453 BasicBlock *OutroInsertBefore = InnermostLoop->getExit(); 1454 1455 auto EmbeddNewLoop = 1456 [this, DL, F, InnerEnter, &Enter, &Continue, &OutroInsertBefore]( 1457 Value *TripCount, const Twine &Name) -> CanonicalLoopInfo * { 1458 CanonicalLoopInfo *EmbeddedLoop = createLoopSkeleton( 1459 DL, TripCount, F, InnerEnter, OutroInsertBefore, Name); 1460 redirectTo(Enter, EmbeddedLoop->getPreheader(), DL); 1461 redirectTo(EmbeddedLoop->getAfter(), Continue, DL); 1462 1463 // Setup the position where the next embedded loop connects to this loop. 1464 Enter = EmbeddedLoop->getBody(); 1465 Continue = EmbeddedLoop->getLatch(); 1466 OutroInsertBefore = EmbeddedLoop->getLatch(); 1467 return EmbeddedLoop; 1468 }; 1469 1470 auto EmbeddNewLoops = [&Result, &EmbeddNewLoop](ArrayRef<Value *> TripCounts, 1471 const Twine &NameBase) { 1472 for (auto P : enumerate(TripCounts)) { 1473 CanonicalLoopInfo *EmbeddedLoop = 1474 EmbeddNewLoop(P.value(), NameBase + Twine(P.index())); 1475 Result.push_back(EmbeddedLoop); 1476 } 1477 }; 1478 1479 EmbeddNewLoops(FloorCount, "floor"); 1480 1481 // Within the innermost floor loop, emit the code that computes the tile 1482 // sizes. 1483 Builder.SetInsertPoint(Enter->getTerminator()); 1484 SmallVector<Value *, 4> TileCounts; 1485 for (int i = 0; i < NumLoops; ++i) { 1486 CanonicalLoopInfo *FloorLoop = Result[i]; 1487 Value *TileSize = TileSizes[i]; 1488 1489 Value *FloorIsEpilogue = 1490 Builder.CreateICmpEQ(FloorLoop->getIndVar(), FloorCount[i]); 1491 Value *TileTripCount = 1492 Builder.CreateSelect(FloorIsEpilogue, FloorRems[i], TileSize); 1493 1494 TileCounts.push_back(TileTripCount); 1495 } 1496 1497 // Create the tile loops. 1498 EmbeddNewLoops(TileCounts, "tile"); 1499 1500 // Insert the inbetween code into the body. 1501 BasicBlock *BodyEnter = Enter; 1502 BasicBlock *BodyEntered = nullptr; 1503 for (std::pair<BasicBlock *, BasicBlock *> P : InbetweenCode) { 1504 BasicBlock *EnterBB = P.first; 1505 BasicBlock *ExitBB = P.second; 1506 1507 if (BodyEnter) 1508 redirectTo(BodyEnter, EnterBB, DL); 1509 else 1510 redirectAllPredecessorsTo(BodyEntered, EnterBB, DL); 1511 1512 BodyEnter = nullptr; 1513 BodyEntered = ExitBB; 1514 } 1515 1516 // Append the original loop nest body into the generated loop nest body. 1517 if (BodyEnter) 1518 redirectTo(BodyEnter, InnerEnter, DL); 1519 else 1520 redirectAllPredecessorsTo(BodyEntered, InnerEnter, DL); 1521 redirectAllPredecessorsTo(InnerLatch, Continue, DL); 1522 1523 // Replace the original induction variable with an induction variable computed 1524 // from the tile and floor induction variables. 1525 Builder.restoreIP(Result.back()->getBodyIP()); 1526 for (int i = 0; i < NumLoops; ++i) { 1527 CanonicalLoopInfo *FloorLoop = Result[i]; 1528 CanonicalLoopInfo *TileLoop = Result[NumLoops + i]; 1529 Value *OrigIndVar = OrigIndVars[i]; 1530 Value *Size = TileSizes[i]; 1531 1532 Value *Scale = 1533 Builder.CreateMul(Size, FloorLoop->getIndVar(), {}, /*HasNUW=*/true); 1534 Value *Shift = 1535 Builder.CreateAdd(Scale, TileLoop->getIndVar(), {}, /*HasNUW=*/true); 1536 OrigIndVar->replaceAllUsesWith(Shift); 1537 } 1538 1539 // Remove unused parts of the original loops. 1540 SmallVector<BasicBlock *, 12> OldControlBBs; 1541 OldControlBBs.reserve(6 * Loops.size()); 1542 for (CanonicalLoopInfo *Loop : Loops) 1543 Loop->collectControlBlocks(OldControlBBs); 1544 removeUnusedBlocksFromParent(OldControlBBs); 1545 1546 #ifndef NDEBUG 1547 for (CanonicalLoopInfo *GenL : Result) 1548 GenL->assertOK(); 1549 #endif 1550 return Result; 1551 } 1552 1553 OpenMPIRBuilder::InsertPointTy 1554 OpenMPIRBuilder::createCopyPrivate(const LocationDescription &Loc, 1555 llvm::Value *BufSize, llvm::Value *CpyBuf, 1556 llvm::Value *CpyFn, llvm::Value *DidIt) { 1557 if (!updateToLocation(Loc)) 1558 return Loc.IP; 1559 1560 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 1561 Value *Ident = getOrCreateIdent(SrcLocStr); 1562 Value *ThreadId = getOrCreateThreadID(Ident); 1563 1564 llvm::Value *DidItLD = Builder.CreateLoad(DidIt); 1565 1566 Value *Args[] = {Ident, ThreadId, BufSize, CpyBuf, CpyFn, DidItLD}; 1567 1568 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_copyprivate); 1569 Builder.CreateCall(Fn, Args); 1570 1571 return Builder.saveIP(); 1572 } 1573 1574 OpenMPIRBuilder::InsertPointTy 1575 OpenMPIRBuilder::createSingle(const LocationDescription &Loc, 1576 BodyGenCallbackTy BodyGenCB, 1577 FinalizeCallbackTy FiniCB, llvm::Value *DidIt) { 1578 1579 if (!updateToLocation(Loc)) 1580 return Loc.IP; 1581 1582 // If needed (i.e. not null), initialize `DidIt` with 0 1583 if (DidIt) { 1584 Builder.CreateStore(Builder.getInt32(0), DidIt); 1585 } 1586 1587 Directive OMPD = Directive::OMPD_single; 1588 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 1589 Value *Ident = getOrCreateIdent(SrcLocStr); 1590 Value *ThreadId = getOrCreateThreadID(Ident); 1591 Value *Args[] = {Ident, ThreadId}; 1592 1593 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_single); 1594 Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args); 1595 1596 Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_single); 1597 Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args); 1598 1599 // generates the following: 1600 // if (__kmpc_single()) { 1601 // .... single region ... 1602 // __kmpc_end_single 1603 // } 1604 1605 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB, 1606 /*Conditional*/ true, /*hasFinalize*/ true); 1607 } 1608 1609 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createCritical( 1610 const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, 1611 FinalizeCallbackTy FiniCB, StringRef CriticalName, Value *HintInst) { 1612 1613 if (!updateToLocation(Loc)) 1614 return Loc.IP; 1615 1616 Directive OMPD = Directive::OMPD_critical; 1617 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 1618 Value *Ident = getOrCreateIdent(SrcLocStr); 1619 Value *ThreadId = getOrCreateThreadID(Ident); 1620 Value *LockVar = getOMPCriticalRegionLock(CriticalName); 1621 Value *Args[] = {Ident, ThreadId, LockVar}; 1622 1623 SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), std::end(Args)); 1624 Function *RTFn = nullptr; 1625 if (HintInst) { 1626 // Add Hint to entry Args and create call 1627 EnterArgs.push_back(HintInst); 1628 RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical_with_hint); 1629 } else { 1630 RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical); 1631 } 1632 Instruction *EntryCall = Builder.CreateCall(RTFn, EnterArgs); 1633 1634 Function *ExitRTLFn = 1635 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_critical); 1636 Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args); 1637 1638 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB, 1639 /*Conditional*/ false, /*hasFinalize*/ true); 1640 } 1641 1642 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::EmitOMPInlinedRegion( 1643 Directive OMPD, Instruction *EntryCall, Instruction *ExitCall, 1644 BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool Conditional, 1645 bool HasFinalize) { 1646 1647 if (HasFinalize) 1648 FinalizationStack.push_back({FiniCB, OMPD, /*IsCancellable*/ false}); 1649 1650 // Create inlined region's entry and body blocks, in preparation 1651 // for conditional creation 1652 BasicBlock *EntryBB = Builder.GetInsertBlock(); 1653 Instruction *SplitPos = EntryBB->getTerminator(); 1654 if (!isa_and_nonnull<BranchInst>(SplitPos)) 1655 SplitPos = new UnreachableInst(Builder.getContext(), EntryBB); 1656 BasicBlock *ExitBB = EntryBB->splitBasicBlock(SplitPos, "omp_region.end"); 1657 BasicBlock *FiniBB = 1658 EntryBB->splitBasicBlock(EntryBB->getTerminator(), "omp_region.finalize"); 1659 1660 Builder.SetInsertPoint(EntryBB->getTerminator()); 1661 emitCommonDirectiveEntry(OMPD, EntryCall, ExitBB, Conditional); 1662 1663 // generate body 1664 BodyGenCB(/* AllocaIP */ InsertPointTy(), 1665 /* CodeGenIP */ Builder.saveIP(), *FiniBB); 1666 1667 // If we didn't emit a branch to FiniBB during body generation, it means 1668 // FiniBB is unreachable (e.g. while(1);). stop generating all the 1669 // unreachable blocks, and remove anything we are not going to use. 1670 auto SkipEmittingRegion = FiniBB->hasNPredecessors(0); 1671 if (SkipEmittingRegion) { 1672 FiniBB->eraseFromParent(); 1673 ExitCall->eraseFromParent(); 1674 // Discard finalization if we have it. 1675 if (HasFinalize) { 1676 assert(!FinalizationStack.empty() && 1677 "Unexpected finalization stack state!"); 1678 FinalizationStack.pop_back(); 1679 } 1680 } else { 1681 // emit exit call and do any needed finalization. 1682 auto FinIP = InsertPointTy(FiniBB, FiniBB->getFirstInsertionPt()); 1683 assert(FiniBB->getTerminator()->getNumSuccessors() == 1 && 1684 FiniBB->getTerminator()->getSuccessor(0) == ExitBB && 1685 "Unexpected control flow graph state!!"); 1686 emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize); 1687 assert(FiniBB->getUniquePredecessor()->getUniqueSuccessor() == FiniBB && 1688 "Unexpected Control Flow State!"); 1689 MergeBlockIntoPredecessor(FiniBB); 1690 } 1691 1692 // If we are skipping the region of a non conditional, remove the exit 1693 // block, and clear the builder's insertion point. 1694 assert(SplitPos->getParent() == ExitBB && 1695 "Unexpected Insertion point location!"); 1696 if (!Conditional && SkipEmittingRegion) { 1697 ExitBB->eraseFromParent(); 1698 Builder.ClearInsertionPoint(); 1699 } else { 1700 auto merged = MergeBlockIntoPredecessor(ExitBB); 1701 BasicBlock *ExitPredBB = SplitPos->getParent(); 1702 auto InsertBB = merged ? ExitPredBB : ExitBB; 1703 if (!isa_and_nonnull<BranchInst>(SplitPos)) 1704 SplitPos->eraseFromParent(); 1705 Builder.SetInsertPoint(InsertBB); 1706 } 1707 1708 return Builder.saveIP(); 1709 } 1710 1711 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveEntry( 1712 Directive OMPD, Value *EntryCall, BasicBlock *ExitBB, bool Conditional) { 1713 1714 // if nothing to do, Return current insertion point. 1715 if (!Conditional) 1716 return Builder.saveIP(); 1717 1718 BasicBlock *EntryBB = Builder.GetInsertBlock(); 1719 Value *CallBool = Builder.CreateIsNotNull(EntryCall); 1720 auto *ThenBB = BasicBlock::Create(M.getContext(), "omp_region.body"); 1721 auto *UI = new UnreachableInst(Builder.getContext(), ThenBB); 1722 1723 // Emit thenBB and set the Builder's insertion point there for 1724 // body generation next. Place the block after the current block. 1725 Function *CurFn = EntryBB->getParent(); 1726 CurFn->getBasicBlockList().insertAfter(EntryBB->getIterator(), ThenBB); 1727 1728 // Move Entry branch to end of ThenBB, and replace with conditional 1729 // branch (If-stmt) 1730 Instruction *EntryBBTI = EntryBB->getTerminator(); 1731 Builder.CreateCondBr(CallBool, ThenBB, ExitBB); 1732 EntryBBTI->removeFromParent(); 1733 Builder.SetInsertPoint(UI); 1734 Builder.Insert(EntryBBTI); 1735 UI->eraseFromParent(); 1736 Builder.SetInsertPoint(ThenBB->getTerminator()); 1737 1738 // return an insertion point to ExitBB. 1739 return IRBuilder<>::InsertPoint(ExitBB, ExitBB->getFirstInsertionPt()); 1740 } 1741 1742 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveExit( 1743 omp::Directive OMPD, InsertPointTy FinIP, Instruction *ExitCall, 1744 bool HasFinalize) { 1745 1746 Builder.restoreIP(FinIP); 1747 1748 // If there is finalization to do, emit it before the exit call 1749 if (HasFinalize) { 1750 assert(!FinalizationStack.empty() && 1751 "Unexpected finalization stack state!"); 1752 1753 FinalizationInfo Fi = FinalizationStack.pop_back_val(); 1754 assert(Fi.DK == OMPD && "Unexpected Directive for Finalization call!"); 1755 1756 Fi.FiniCB(FinIP); 1757 1758 BasicBlock *FiniBB = FinIP.getBlock(); 1759 Instruction *FiniBBTI = FiniBB->getTerminator(); 1760 1761 // set Builder IP for call creation 1762 Builder.SetInsertPoint(FiniBBTI); 1763 } 1764 1765 // place the Exitcall as last instruction before Finalization block terminator 1766 ExitCall->removeFromParent(); 1767 Builder.Insert(ExitCall); 1768 1769 return IRBuilder<>::InsertPoint(ExitCall->getParent(), 1770 ExitCall->getIterator()); 1771 } 1772 1773 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createCopyinClauseBlocks( 1774 InsertPointTy IP, Value *MasterAddr, Value *PrivateAddr, 1775 llvm::IntegerType *IntPtrTy, bool BranchtoEnd) { 1776 if (!IP.isSet()) 1777 return IP; 1778 1779 IRBuilder<>::InsertPointGuard IPG(Builder); 1780 1781 // creates the following CFG structure 1782 // OMP_Entry : (MasterAddr != PrivateAddr)? 1783 // F T 1784 // | \ 1785 // | copin.not.master 1786 // | / 1787 // v / 1788 // copyin.not.master.end 1789 // | 1790 // v 1791 // OMP.Entry.Next 1792 1793 BasicBlock *OMP_Entry = IP.getBlock(); 1794 Function *CurFn = OMP_Entry->getParent(); 1795 BasicBlock *CopyBegin = 1796 BasicBlock::Create(M.getContext(), "copyin.not.master", CurFn); 1797 BasicBlock *CopyEnd = nullptr; 1798 1799 // If entry block is terminated, split to preserve the branch to following 1800 // basic block (i.e. OMP.Entry.Next), otherwise, leave everything as is. 1801 if (isa_and_nonnull<BranchInst>(OMP_Entry->getTerminator())) { 1802 CopyEnd = OMP_Entry->splitBasicBlock(OMP_Entry->getTerminator(), 1803 "copyin.not.master.end"); 1804 OMP_Entry->getTerminator()->eraseFromParent(); 1805 } else { 1806 CopyEnd = 1807 BasicBlock::Create(M.getContext(), "copyin.not.master.end", CurFn); 1808 } 1809 1810 Builder.SetInsertPoint(OMP_Entry); 1811 Value *MasterPtr = Builder.CreatePtrToInt(MasterAddr, IntPtrTy); 1812 Value *PrivatePtr = Builder.CreatePtrToInt(PrivateAddr, IntPtrTy); 1813 Value *cmp = Builder.CreateICmpNE(MasterPtr, PrivatePtr); 1814 Builder.CreateCondBr(cmp, CopyBegin, CopyEnd); 1815 1816 Builder.SetInsertPoint(CopyBegin); 1817 if (BranchtoEnd) 1818 Builder.SetInsertPoint(Builder.CreateBr(CopyEnd)); 1819 1820 return Builder.saveIP(); 1821 } 1822 1823 CallInst *OpenMPIRBuilder::createOMPAlloc(const LocationDescription &Loc, 1824 Value *Size, Value *Allocator, 1825 std::string Name) { 1826 IRBuilder<>::InsertPointGuard IPG(Builder); 1827 Builder.restoreIP(Loc.IP); 1828 1829 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 1830 Value *Ident = getOrCreateIdent(SrcLocStr); 1831 Value *ThreadId = getOrCreateThreadID(Ident); 1832 Value *Args[] = {ThreadId, Size, Allocator}; 1833 1834 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_alloc); 1835 1836 return Builder.CreateCall(Fn, Args, Name); 1837 } 1838 1839 CallInst *OpenMPIRBuilder::createOMPFree(const LocationDescription &Loc, 1840 Value *Addr, Value *Allocator, 1841 std::string Name) { 1842 IRBuilder<>::InsertPointGuard IPG(Builder); 1843 Builder.restoreIP(Loc.IP); 1844 1845 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 1846 Value *Ident = getOrCreateIdent(SrcLocStr); 1847 Value *ThreadId = getOrCreateThreadID(Ident); 1848 Value *Args[] = {ThreadId, Addr, Allocator}; 1849 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_free); 1850 return Builder.CreateCall(Fn, Args, Name); 1851 } 1852 1853 CallInst *OpenMPIRBuilder::createCachedThreadPrivate( 1854 const LocationDescription &Loc, llvm::Value *Pointer, 1855 llvm::ConstantInt *Size, const llvm::Twine &Name) { 1856 IRBuilder<>::InsertPointGuard IPG(Builder); 1857 Builder.restoreIP(Loc.IP); 1858 1859 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 1860 Value *Ident = getOrCreateIdent(SrcLocStr); 1861 Value *ThreadId = getOrCreateThreadID(Ident); 1862 Constant *ThreadPrivateCache = 1863 getOrCreateOMPInternalVariable(Int8PtrPtr, Name); 1864 llvm::Value *Args[] = {Ident, ThreadId, Pointer, Size, ThreadPrivateCache}; 1865 1866 Function *Fn = 1867 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_threadprivate_cached); 1868 1869 return Builder.CreateCall(Fn, Args); 1870 } 1871 1872 std::string OpenMPIRBuilder::getNameWithSeparators(ArrayRef<StringRef> Parts, 1873 StringRef FirstSeparator, 1874 StringRef Separator) { 1875 SmallString<128> Buffer; 1876 llvm::raw_svector_ostream OS(Buffer); 1877 StringRef Sep = FirstSeparator; 1878 for (StringRef Part : Parts) { 1879 OS << Sep << Part; 1880 Sep = Separator; 1881 } 1882 return OS.str().str(); 1883 } 1884 1885 Constant *OpenMPIRBuilder::getOrCreateOMPInternalVariable( 1886 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 1887 // TODO: Replace the twine arg with stringref to get rid of the conversion 1888 // logic. However This is taken from current implementation in clang as is. 1889 // Since this method is used in many places exclusively for OMP internal use 1890 // we will keep it as is for temporarily until we move all users to the 1891 // builder and then, if possible, fix it everywhere in one go. 1892 SmallString<256> Buffer; 1893 llvm::raw_svector_ostream Out(Buffer); 1894 Out << Name; 1895 StringRef RuntimeName = Out.str(); 1896 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 1897 if (Elem.second) { 1898 assert(Elem.second->getType()->getPointerElementType() == Ty && 1899 "OMP internal variable has different type than requested"); 1900 } else { 1901 // TODO: investigate the appropriate linkage type used for the global 1902 // variable for possibly changing that to internal or private, or maybe 1903 // create different versions of the function for different OMP internal 1904 // variables. 1905 Elem.second = new llvm::GlobalVariable( 1906 M, Ty, /*IsConstant*/ false, llvm::GlobalValue::CommonLinkage, 1907 llvm::Constant::getNullValue(Ty), Elem.first(), 1908 /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal, 1909 AddressSpace); 1910 } 1911 1912 return Elem.second; 1913 } 1914 1915 Value *OpenMPIRBuilder::getOMPCriticalRegionLock(StringRef CriticalName) { 1916 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 1917 std::string Name = getNameWithSeparators({Prefix, "var"}, ".", "."); 1918 return getOrCreateOMPInternalVariable(KmpCriticalNameTy, Name); 1919 } 1920 1921 // Create all simple and struct types exposed by the runtime and remember 1922 // the llvm::PointerTypes of them for easy access later. 1923 void OpenMPIRBuilder::initializeTypes(Module &M) { 1924 LLVMContext &Ctx = M.getContext(); 1925 StructType *T; 1926 #define OMP_TYPE(VarName, InitValue) VarName = InitValue; 1927 #define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \ 1928 VarName##Ty = ArrayType::get(ElemTy, ArraySize); \ 1929 VarName##PtrTy = PointerType::getUnqual(VarName##Ty); 1930 #define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \ 1931 VarName = FunctionType::get(ReturnType, {__VA_ARGS__}, IsVarArg); \ 1932 VarName##Ptr = PointerType::getUnqual(VarName); 1933 #define OMP_STRUCT_TYPE(VarName, StructName, ...) \ 1934 T = StructType::getTypeByName(Ctx, StructName); \ 1935 if (!T) \ 1936 T = StructType::create(Ctx, {__VA_ARGS__}, StructName); \ 1937 VarName = T; \ 1938 VarName##Ptr = PointerType::getUnqual(T); 1939 #include "llvm/Frontend/OpenMP/OMPKinds.def" 1940 } 1941 1942 void OpenMPIRBuilder::OutlineInfo::collectBlocks( 1943 SmallPtrSetImpl<BasicBlock *> &BlockSet, 1944 SmallVectorImpl<BasicBlock *> &BlockVector) { 1945 SmallVector<BasicBlock *, 32> Worklist; 1946 BlockSet.insert(EntryBB); 1947 BlockSet.insert(ExitBB); 1948 1949 Worklist.push_back(EntryBB); 1950 while (!Worklist.empty()) { 1951 BasicBlock *BB = Worklist.pop_back_val(); 1952 BlockVector.push_back(BB); 1953 for (BasicBlock *SuccBB : successors(BB)) 1954 if (BlockSet.insert(SuccBB).second) 1955 Worklist.push_back(SuccBB); 1956 } 1957 } 1958 1959 void CanonicalLoopInfo::collectControlBlocks( 1960 SmallVectorImpl<BasicBlock *> &BBs) { 1961 // We only count those BBs as control block for which we do not need to 1962 // reverse the CFG, i.e. not the loop body which can contain arbitrary control 1963 // flow. For consistency, this also means we do not add the Body block, which 1964 // is just the entry to the body code. 1965 BBs.reserve(BBs.size() + 6); 1966 BBs.append({Preheader, Header, Cond, Latch, Exit, After}); 1967 } 1968 1969 void CanonicalLoopInfo::assertOK() const { 1970 #ifndef NDEBUG 1971 if (!IsValid) 1972 return; 1973 1974 // Verify standard control-flow we use for OpenMP loops. 1975 assert(Preheader); 1976 assert(isa<BranchInst>(Preheader->getTerminator()) && 1977 "Preheader must terminate with unconditional branch"); 1978 assert(Preheader->getSingleSuccessor() == Header && 1979 "Preheader must jump to header"); 1980 1981 assert(Header); 1982 assert(isa<BranchInst>(Header->getTerminator()) && 1983 "Header must terminate with unconditional branch"); 1984 assert(Header->getSingleSuccessor() == Cond && 1985 "Header must jump to exiting block"); 1986 1987 assert(Cond); 1988 assert(Cond->getSinglePredecessor() == Header && 1989 "Exiting block only reachable from header"); 1990 1991 assert(isa<BranchInst>(Cond->getTerminator()) && 1992 "Exiting block must terminate with conditional branch"); 1993 assert(size(successors(Cond)) == 2 && 1994 "Exiting block must have two successors"); 1995 assert(cast<BranchInst>(Cond->getTerminator())->getSuccessor(0) == Body && 1996 "Exiting block's first successor jump to the body"); 1997 assert(cast<BranchInst>(Cond->getTerminator())->getSuccessor(1) == Exit && 1998 "Exiting block's second successor must exit the loop"); 1999 2000 assert(Body); 2001 assert(Body->getSinglePredecessor() == Cond && 2002 "Body only reachable from exiting block"); 2003 assert(!isa<PHINode>(Body->front())); 2004 2005 assert(Latch); 2006 assert(isa<BranchInst>(Latch->getTerminator()) && 2007 "Latch must terminate with unconditional branch"); 2008 assert(Latch->getSingleSuccessor() == Header && "Latch must jump to header"); 2009 // TODO: To support simple redirecting of the end of the body code that has 2010 // multiple; introduce another auxiliary basic block like preheader and after. 2011 assert(Latch->getSinglePredecessor() != nullptr); 2012 assert(!isa<PHINode>(Latch->front())); 2013 2014 assert(Exit); 2015 assert(isa<BranchInst>(Exit->getTerminator()) && 2016 "Exit block must terminate with unconditional branch"); 2017 assert(Exit->getSingleSuccessor() == After && 2018 "Exit block must jump to after block"); 2019 2020 assert(After); 2021 assert(After->getSinglePredecessor() == Exit && 2022 "After block only reachable from exit block"); 2023 assert(After->empty() || !isa<PHINode>(After->front())); 2024 2025 Instruction *IndVar = getIndVar(); 2026 assert(IndVar && "Canonical induction variable not found?"); 2027 assert(isa<IntegerType>(IndVar->getType()) && 2028 "Induction variable must be an integer"); 2029 assert(cast<PHINode>(IndVar)->getParent() == Header && 2030 "Induction variable must be a PHI in the loop header"); 2031 assert(cast<PHINode>(IndVar)->getIncomingBlock(0) == Preheader); 2032 assert( 2033 cast<ConstantInt>(cast<PHINode>(IndVar)->getIncomingValue(0))->isZero()); 2034 assert(cast<PHINode>(IndVar)->getIncomingBlock(1) == Latch); 2035 2036 auto *NextIndVar = cast<PHINode>(IndVar)->getIncomingValue(1); 2037 assert(cast<Instruction>(NextIndVar)->getParent() == Latch); 2038 assert(cast<BinaryOperator>(NextIndVar)->getOpcode() == BinaryOperator::Add); 2039 assert(cast<BinaryOperator>(NextIndVar)->getOperand(0) == IndVar); 2040 assert(cast<ConstantInt>(cast<BinaryOperator>(NextIndVar)->getOperand(1)) 2041 ->isOne()); 2042 2043 Value *TripCount = getTripCount(); 2044 assert(TripCount && "Loop trip count not found?"); 2045 assert(IndVar->getType() == TripCount->getType() && 2046 "Trip count and induction variable must have the same type"); 2047 2048 auto *CmpI = cast<CmpInst>(&Cond->front()); 2049 assert(CmpI->getPredicate() == CmpInst::ICMP_ULT && 2050 "Exit condition must be a signed less-than comparison"); 2051 assert(CmpI->getOperand(0) == IndVar && 2052 "Exit condition must compare the induction variable"); 2053 assert(CmpI->getOperand(1) == TripCount && 2054 "Exit condition must compare with the trip count"); 2055 #endif 2056 } 2057