1 //===- OpenMPIRBuilder.cpp - Builder for LLVM-IR for OpenMP directives ----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// 10 /// This file implements the OpenMPIRBuilder class, which is used as a 11 /// convenient way to create LLVM instructions for OpenMP directives. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" 16 17 #include "llvm/ADT/StringRef.h" 18 #include "llvm/ADT/Triple.h" 19 #include "llvm/IR/CFG.h" 20 #include "llvm/IR/DebugInfo.h" 21 #include "llvm/IR/IRBuilder.h" 22 #include "llvm/IR/MDBuilder.h" 23 #include "llvm/IR/Value.h" 24 #include "llvm/Support/CommandLine.h" 25 #include "llvm/Support/Error.h" 26 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 27 #include "llvm/Transforms/Utils/CodeExtractor.h" 28 29 #include <sstream> 30 31 #define DEBUG_TYPE "openmp-ir-builder" 32 33 using namespace llvm; 34 using namespace omp; 35 36 static cl::opt<bool> 37 OptimisticAttributes("openmp-ir-builder-optimistic-attributes", cl::Hidden, 38 cl::desc("Use optimistic attributes describing " 39 "'as-if' properties of runtime calls."), 40 cl::init(false)); 41 42 void OpenMPIRBuilder::addAttributes(omp::RuntimeFunction FnID, Function &Fn) { 43 LLVMContext &Ctx = Fn.getContext(); 44 45 // Get the function's current attributes. 46 auto Attrs = Fn.getAttributes(); 47 auto FnAttrs = Attrs.getFnAttrs(); 48 auto RetAttrs = Attrs.getRetAttrs(); 49 SmallVector<AttributeSet, 4> ArgAttrs; 50 for (size_t ArgNo = 0; ArgNo < Fn.arg_size(); ++ArgNo) 51 ArgAttrs.emplace_back(Attrs.getParamAttrs(ArgNo)); 52 53 #define OMP_ATTRS_SET(VarName, AttrSet) AttributeSet VarName = AttrSet; 54 #include "llvm/Frontend/OpenMP/OMPKinds.def" 55 56 // Add attributes to the function declaration. 57 switch (FnID) { 58 #define OMP_RTL_ATTRS(Enum, FnAttrSet, RetAttrSet, ArgAttrSets) \ 59 case Enum: \ 60 FnAttrs = FnAttrs.addAttributes(Ctx, FnAttrSet); \ 61 RetAttrs = RetAttrs.addAttributes(Ctx, RetAttrSet); \ 62 for (size_t ArgNo = 0; ArgNo < ArgAttrSets.size(); ++ArgNo) \ 63 ArgAttrs[ArgNo] = \ 64 ArgAttrs[ArgNo].addAttributes(Ctx, ArgAttrSets[ArgNo]); \ 65 Fn.setAttributes(AttributeList::get(Ctx, FnAttrs, RetAttrs, ArgAttrs)); \ 66 break; 67 #include "llvm/Frontend/OpenMP/OMPKinds.def" 68 default: 69 // Attributes are optional. 70 break; 71 } 72 } 73 74 FunctionCallee 75 OpenMPIRBuilder::getOrCreateRuntimeFunction(Module &M, RuntimeFunction FnID) { 76 FunctionType *FnTy = nullptr; 77 Function *Fn = nullptr; 78 79 // Try to find the declation in the module first. 80 switch (FnID) { 81 #define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...) \ 82 case Enum: \ 83 FnTy = FunctionType::get(ReturnType, ArrayRef<Type *>{__VA_ARGS__}, \ 84 IsVarArg); \ 85 Fn = M.getFunction(Str); \ 86 break; 87 #include "llvm/Frontend/OpenMP/OMPKinds.def" 88 } 89 90 if (!Fn) { 91 // Create a new declaration if we need one. 92 switch (FnID) { 93 #define OMP_RTL(Enum, Str, ...) \ 94 case Enum: \ 95 Fn = Function::Create(FnTy, GlobalValue::ExternalLinkage, Str, M); \ 96 break; 97 #include "llvm/Frontend/OpenMP/OMPKinds.def" 98 } 99 100 // Add information if the runtime function takes a callback function 101 if (FnID == OMPRTL___kmpc_fork_call || FnID == OMPRTL___kmpc_fork_teams) { 102 if (!Fn->hasMetadata(LLVMContext::MD_callback)) { 103 LLVMContext &Ctx = Fn->getContext(); 104 MDBuilder MDB(Ctx); 105 // Annotate the callback behavior of the runtime function: 106 // - The callback callee is argument number 2 (microtask). 107 // - The first two arguments of the callback callee are unknown (-1). 108 // - All variadic arguments to the runtime function are passed to the 109 // callback callee. 110 Fn->addMetadata( 111 LLVMContext::MD_callback, 112 *MDNode::get(Ctx, {MDB.createCallbackEncoding( 113 2, {-1, -1}, /* VarArgsArePassed */ true)})); 114 } 115 } 116 117 LLVM_DEBUG(dbgs() << "Created OpenMP runtime function " << Fn->getName() 118 << " with type " << *Fn->getFunctionType() << "\n"); 119 addAttributes(FnID, *Fn); 120 121 } else { 122 LLVM_DEBUG(dbgs() << "Found OpenMP runtime function " << Fn->getName() 123 << " with type " << *Fn->getFunctionType() << "\n"); 124 } 125 126 assert(Fn && "Failed to create OpenMP runtime function"); 127 128 // Cast the function to the expected type if necessary 129 Constant *C = ConstantExpr::getBitCast(Fn, FnTy->getPointerTo()); 130 return {FnTy, C}; 131 } 132 133 Function *OpenMPIRBuilder::getOrCreateRuntimeFunctionPtr(RuntimeFunction FnID) { 134 FunctionCallee RTLFn = getOrCreateRuntimeFunction(M, FnID); 135 auto *Fn = dyn_cast<llvm::Function>(RTLFn.getCallee()); 136 assert(Fn && "Failed to create OpenMP runtime function pointer"); 137 return Fn; 138 } 139 140 void OpenMPIRBuilder::initialize() { initializeTypes(M); } 141 142 void OpenMPIRBuilder::finalize(Function *Fn, bool AllowExtractorSinking) { 143 SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet; 144 SmallVector<BasicBlock *, 32> Blocks; 145 SmallVector<OutlineInfo, 16> DeferredOutlines; 146 for (OutlineInfo &OI : OutlineInfos) { 147 // Skip functions that have not finalized yet; may happen with nested 148 // function generation. 149 if (Fn && OI.getFunction() != Fn) { 150 DeferredOutlines.push_back(OI); 151 continue; 152 } 153 154 ParallelRegionBlockSet.clear(); 155 Blocks.clear(); 156 OI.collectBlocks(ParallelRegionBlockSet, Blocks); 157 158 Function *OuterFn = OI.getFunction(); 159 CodeExtractorAnalysisCache CEAC(*OuterFn); 160 CodeExtractor Extractor(Blocks, /* DominatorTree */ nullptr, 161 /* AggregateArgs */ false, 162 /* BlockFrequencyInfo */ nullptr, 163 /* BranchProbabilityInfo */ nullptr, 164 /* AssumptionCache */ nullptr, 165 /* AllowVarArgs */ true, 166 /* AllowAlloca */ true, 167 /* Suffix */ ".omp_par"); 168 169 LLVM_DEBUG(dbgs() << "Before outlining: " << *OuterFn << "\n"); 170 LLVM_DEBUG(dbgs() << "Entry " << OI.EntryBB->getName() 171 << " Exit: " << OI.ExitBB->getName() << "\n"); 172 assert(Extractor.isEligible() && 173 "Expected OpenMP outlining to be possible!"); 174 175 Function *OutlinedFn = Extractor.extractCodeRegion(CEAC); 176 177 LLVM_DEBUG(dbgs() << "After outlining: " << *OuterFn << "\n"); 178 LLVM_DEBUG(dbgs() << " Outlined function: " << *OutlinedFn << "\n"); 179 assert(OutlinedFn->getReturnType()->isVoidTy() && 180 "OpenMP outlined functions should not return a value!"); 181 182 // For compability with the clang CG we move the outlined function after the 183 // one with the parallel region. 184 OutlinedFn->removeFromParent(); 185 M.getFunctionList().insertAfter(OuterFn->getIterator(), OutlinedFn); 186 187 // Remove the artificial entry introduced by the extractor right away, we 188 // made our own entry block after all. 189 { 190 BasicBlock &ArtificialEntry = OutlinedFn->getEntryBlock(); 191 assert(ArtificialEntry.getUniqueSuccessor() == OI.EntryBB); 192 assert(OI.EntryBB->getUniquePredecessor() == &ArtificialEntry); 193 if (AllowExtractorSinking) { 194 // Move instructions from the to-be-deleted ArtificialEntry to the entry 195 // basic block of the parallel region. CodeExtractor may have sunk 196 // allocas/bitcasts for values that are solely used in the outlined 197 // region and do not escape. 198 assert(!ArtificialEntry.empty() && 199 "Expected instructions to sink in the outlined region"); 200 for (BasicBlock::iterator It = ArtificialEntry.begin(), 201 End = ArtificialEntry.end(); 202 It != End;) { 203 Instruction &I = *It; 204 It++; 205 206 if (I.isTerminator()) 207 continue; 208 209 I.moveBefore(*OI.EntryBB, OI.EntryBB->getFirstInsertionPt()); 210 } 211 } 212 OI.EntryBB->moveBefore(&ArtificialEntry); 213 ArtificialEntry.eraseFromParent(); 214 } 215 assert(&OutlinedFn->getEntryBlock() == OI.EntryBB); 216 assert(OutlinedFn && OutlinedFn->getNumUses() == 1); 217 218 // Run a user callback, e.g. to add attributes. 219 if (OI.PostOutlineCB) 220 OI.PostOutlineCB(*OutlinedFn); 221 } 222 223 // Remove work items that have been completed. 224 OutlineInfos = std::move(DeferredOutlines); 225 } 226 227 OpenMPIRBuilder::~OpenMPIRBuilder() { 228 assert(OutlineInfos.empty() && "There must be no outstanding outlinings"); 229 } 230 231 Value *OpenMPIRBuilder::getOrCreateIdent(Constant *SrcLocStr, 232 IdentFlag LocFlags, 233 unsigned Reserve2Flags) { 234 // Enable "C-mode". 235 LocFlags |= OMP_IDENT_FLAG_KMPC; 236 237 Value *&Ident = 238 IdentMap[{SrcLocStr, uint64_t(LocFlags) << 31 | Reserve2Flags}]; 239 if (!Ident) { 240 Constant *I32Null = ConstantInt::getNullValue(Int32); 241 Constant *IdentData[] = { 242 I32Null, ConstantInt::get(Int32, uint32_t(LocFlags)), 243 ConstantInt::get(Int32, Reserve2Flags), I32Null, SrcLocStr}; 244 Constant *Initializer = ConstantStruct::get( 245 cast<StructType>(IdentPtr->getPointerElementType()), IdentData); 246 247 // Look for existing encoding of the location + flags, not needed but 248 // minimizes the difference to the existing solution while we transition. 249 for (GlobalVariable &GV : M.getGlobalList()) 250 if (GV.getType() == IdentPtr && GV.hasInitializer()) 251 if (GV.getInitializer() == Initializer) 252 return Ident = &GV; 253 254 auto *GV = new GlobalVariable(M, IdentPtr->getPointerElementType(), 255 /* isConstant = */ true, 256 GlobalValue::PrivateLinkage, Initializer); 257 GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); 258 GV->setAlignment(Align(8)); 259 Ident = GV; 260 } 261 return Builder.CreatePointerCast(Ident, IdentPtr); 262 } 263 264 Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef LocStr) { 265 Constant *&SrcLocStr = SrcLocStrMap[LocStr]; 266 if (!SrcLocStr) { 267 Constant *Initializer = 268 ConstantDataArray::getString(M.getContext(), LocStr); 269 270 // Look for existing encoding of the location, not needed but minimizes the 271 // difference to the existing solution while we transition. 272 for (GlobalVariable &GV : M.getGlobalList()) 273 if (GV.isConstant() && GV.hasInitializer() && 274 GV.getInitializer() == Initializer) 275 return SrcLocStr = ConstantExpr::getPointerCast(&GV, Int8Ptr); 276 277 SrcLocStr = Builder.CreateGlobalStringPtr(LocStr, /* Name */ "", 278 /* AddressSpace */ 0, &M); 279 } 280 return SrcLocStr; 281 } 282 283 Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef FunctionName, 284 StringRef FileName, 285 unsigned Line, 286 unsigned Column) { 287 SmallString<128> Buffer; 288 Buffer.push_back(';'); 289 Buffer.append(FileName); 290 Buffer.push_back(';'); 291 Buffer.append(FunctionName); 292 Buffer.push_back(';'); 293 Buffer.append(std::to_string(Line)); 294 Buffer.push_back(';'); 295 Buffer.append(std::to_string(Column)); 296 Buffer.push_back(';'); 297 Buffer.push_back(';'); 298 return getOrCreateSrcLocStr(Buffer.str()); 299 } 300 301 Constant *OpenMPIRBuilder::getOrCreateDefaultSrcLocStr() { 302 return getOrCreateSrcLocStr(";unknown;unknown;0;0;;"); 303 } 304 305 Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(DebugLoc DL, Function *F) { 306 DILocation *DIL = DL.get(); 307 if (!DIL) 308 return getOrCreateDefaultSrcLocStr(); 309 StringRef FileName = M.getName(); 310 if (DIFile *DIF = DIL->getFile()) 311 if (Optional<StringRef> Source = DIF->getSource()) 312 FileName = *Source; 313 StringRef Function = DIL->getScope()->getSubprogram()->getName(); 314 if (Function.empty() && F) 315 Function = F->getName(); 316 return getOrCreateSrcLocStr(Function, FileName, DIL->getLine(), 317 DIL->getColumn()); 318 } 319 320 Constant * 321 OpenMPIRBuilder::getOrCreateSrcLocStr(const LocationDescription &Loc) { 322 return getOrCreateSrcLocStr(Loc.DL, Loc.IP.getBlock()->getParent()); 323 } 324 325 Value *OpenMPIRBuilder::getOrCreateThreadID(Value *Ident) { 326 return Builder.CreateCall( 327 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num), Ident, 328 "omp_global_thread_num"); 329 } 330 331 OpenMPIRBuilder::InsertPointTy 332 OpenMPIRBuilder::createBarrier(const LocationDescription &Loc, Directive DK, 333 bool ForceSimpleCall, bool CheckCancelFlag) { 334 if (!updateToLocation(Loc)) 335 return Loc.IP; 336 return emitBarrierImpl(Loc, DK, ForceSimpleCall, CheckCancelFlag); 337 } 338 339 OpenMPIRBuilder::InsertPointTy 340 OpenMPIRBuilder::emitBarrierImpl(const LocationDescription &Loc, Directive Kind, 341 bool ForceSimpleCall, bool CheckCancelFlag) { 342 // Build call __kmpc_cancel_barrier(loc, thread_id) or 343 // __kmpc_barrier(loc, thread_id); 344 345 IdentFlag BarrierLocFlags; 346 switch (Kind) { 347 case OMPD_for: 348 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_FOR; 349 break; 350 case OMPD_sections: 351 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SECTIONS; 352 break; 353 case OMPD_single: 354 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SINGLE; 355 break; 356 case OMPD_barrier: 357 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_EXPL; 358 break; 359 default: 360 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL; 361 break; 362 } 363 364 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 365 Value *Args[] = {getOrCreateIdent(SrcLocStr, BarrierLocFlags), 366 getOrCreateThreadID(getOrCreateIdent(SrcLocStr))}; 367 368 // If we are in a cancellable parallel region, barriers are cancellation 369 // points. 370 // TODO: Check why we would force simple calls or to ignore the cancel flag. 371 bool UseCancelBarrier = 372 !ForceSimpleCall && isLastFinalizationInfoCancellable(OMPD_parallel); 373 374 Value *Result = 375 Builder.CreateCall(getOrCreateRuntimeFunctionPtr( 376 UseCancelBarrier ? OMPRTL___kmpc_cancel_barrier 377 : OMPRTL___kmpc_barrier), 378 Args); 379 380 if (UseCancelBarrier && CheckCancelFlag) 381 emitCancelationCheckImpl(Result, OMPD_parallel); 382 383 return Builder.saveIP(); 384 } 385 386 OpenMPIRBuilder::InsertPointTy 387 OpenMPIRBuilder::createCancel(const LocationDescription &Loc, 388 Value *IfCondition, 389 omp::Directive CanceledDirective) { 390 if (!updateToLocation(Loc)) 391 return Loc.IP; 392 393 // LLVM utilities like blocks with terminators. 394 auto *UI = Builder.CreateUnreachable(); 395 396 Instruction *ThenTI = UI, *ElseTI = nullptr; 397 if (IfCondition) 398 SplitBlockAndInsertIfThenElse(IfCondition, UI, &ThenTI, &ElseTI); 399 Builder.SetInsertPoint(ThenTI); 400 401 Value *CancelKind = nullptr; 402 switch (CanceledDirective) { 403 #define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \ 404 case DirectiveEnum: \ 405 CancelKind = Builder.getInt32(Value); \ 406 break; 407 #include "llvm/Frontend/OpenMP/OMPKinds.def" 408 default: 409 llvm_unreachable("Unknown cancel kind!"); 410 } 411 412 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 413 Value *Ident = getOrCreateIdent(SrcLocStr); 414 Value *Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind}; 415 Value *Result = Builder.CreateCall( 416 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancel), Args); 417 auto ExitCB = [this, CanceledDirective, Loc](InsertPointTy IP) { 418 if (CanceledDirective == OMPD_parallel) { 419 IRBuilder<>::InsertPointGuard IPG(Builder); 420 Builder.restoreIP(IP); 421 createBarrier(LocationDescription(Builder.saveIP(), Loc.DL), 422 omp::Directive::OMPD_unknown, /* ForceSimpleCall */ false, 423 /* CheckCancelFlag */ false); 424 } 425 }; 426 427 // The actual cancel logic is shared with others, e.g., cancel_barriers. 428 emitCancelationCheckImpl(Result, CanceledDirective, ExitCB); 429 430 // Update the insertion point and remove the terminator we introduced. 431 Builder.SetInsertPoint(UI->getParent()); 432 UI->eraseFromParent(); 433 434 return Builder.saveIP(); 435 } 436 437 void OpenMPIRBuilder::emitCancelationCheckImpl(Value *CancelFlag, 438 omp::Directive CanceledDirective, 439 FinalizeCallbackTy ExitCB) { 440 assert(isLastFinalizationInfoCancellable(CanceledDirective) && 441 "Unexpected cancellation!"); 442 443 // For a cancel barrier we create two new blocks. 444 BasicBlock *BB = Builder.GetInsertBlock(); 445 BasicBlock *NonCancellationBlock; 446 if (Builder.GetInsertPoint() == BB->end()) { 447 // TODO: This branch will not be needed once we moved to the 448 // OpenMPIRBuilder codegen completely. 449 NonCancellationBlock = BasicBlock::Create( 450 BB->getContext(), BB->getName() + ".cont", BB->getParent()); 451 } else { 452 NonCancellationBlock = SplitBlock(BB, &*Builder.GetInsertPoint()); 453 BB->getTerminator()->eraseFromParent(); 454 Builder.SetInsertPoint(BB); 455 } 456 BasicBlock *CancellationBlock = BasicBlock::Create( 457 BB->getContext(), BB->getName() + ".cncl", BB->getParent()); 458 459 // Jump to them based on the return value. 460 Value *Cmp = Builder.CreateIsNull(CancelFlag); 461 Builder.CreateCondBr(Cmp, NonCancellationBlock, CancellationBlock, 462 /* TODO weight */ nullptr, nullptr); 463 464 // From the cancellation block we finalize all variables and go to the 465 // post finalization block that is known to the FiniCB callback. 466 Builder.SetInsertPoint(CancellationBlock); 467 if (ExitCB) 468 ExitCB(Builder.saveIP()); 469 auto &FI = FinalizationStack.back(); 470 FI.FiniCB(Builder.saveIP()); 471 472 // The continuation block is where code generation continues. 473 Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->begin()); 474 } 475 476 IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( 477 const LocationDescription &Loc, InsertPointTy OuterAllocaIP, 478 BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB, 479 FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads, 480 omp::ProcBindKind ProcBind, bool IsCancellable) { 481 if (!updateToLocation(Loc)) 482 return Loc.IP; 483 484 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 485 Value *Ident = getOrCreateIdent(SrcLocStr); 486 Value *ThreadID = getOrCreateThreadID(Ident); 487 488 if (NumThreads) { 489 // Build call __kmpc_push_num_threads(&Ident, global_tid, num_threads) 490 Value *Args[] = { 491 Ident, ThreadID, 492 Builder.CreateIntCast(NumThreads, Int32, /*isSigned*/ false)}; 493 Builder.CreateCall( 494 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_num_threads), Args); 495 } 496 497 if (ProcBind != OMP_PROC_BIND_default) { 498 // Build call __kmpc_push_proc_bind(&Ident, global_tid, proc_bind) 499 Value *Args[] = { 500 Ident, ThreadID, 501 ConstantInt::get(Int32, unsigned(ProcBind), /*isSigned=*/true)}; 502 Builder.CreateCall( 503 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_proc_bind), Args); 504 } 505 506 BasicBlock *InsertBB = Builder.GetInsertBlock(); 507 Function *OuterFn = InsertBB->getParent(); 508 509 // Save the outer alloca block because the insertion iterator may get 510 // invalidated and we still need this later. 511 BasicBlock *OuterAllocaBlock = OuterAllocaIP.getBlock(); 512 513 // Vector to remember instructions we used only during the modeling but which 514 // we want to delete at the end. 515 SmallVector<Instruction *, 4> ToBeDeleted; 516 517 // Change the location to the outer alloca insertion point to create and 518 // initialize the allocas we pass into the parallel region. 519 Builder.restoreIP(OuterAllocaIP); 520 AllocaInst *TIDAddr = Builder.CreateAlloca(Int32, nullptr, "tid.addr"); 521 AllocaInst *ZeroAddr = Builder.CreateAlloca(Int32, nullptr, "zero.addr"); 522 523 // If there is an if condition we actually use the TIDAddr and ZeroAddr in the 524 // program, otherwise we only need them for modeling purposes to get the 525 // associated arguments in the outlined function. In the former case, 526 // initialize the allocas properly, in the latter case, delete them later. 527 if (IfCondition) { 528 Builder.CreateStore(Constant::getNullValue(Int32), TIDAddr); 529 Builder.CreateStore(Constant::getNullValue(Int32), ZeroAddr); 530 } else { 531 ToBeDeleted.push_back(TIDAddr); 532 ToBeDeleted.push_back(ZeroAddr); 533 } 534 535 // Create an artificial insertion point that will also ensure the blocks we 536 // are about to split are not degenerated. 537 auto *UI = new UnreachableInst(Builder.getContext(), InsertBB); 538 539 Instruction *ThenTI = UI, *ElseTI = nullptr; 540 if (IfCondition) 541 SplitBlockAndInsertIfThenElse(IfCondition, UI, &ThenTI, &ElseTI); 542 543 BasicBlock *ThenBB = ThenTI->getParent(); 544 BasicBlock *PRegEntryBB = ThenBB->splitBasicBlock(ThenTI, "omp.par.entry"); 545 BasicBlock *PRegBodyBB = 546 PRegEntryBB->splitBasicBlock(ThenTI, "omp.par.region"); 547 BasicBlock *PRegPreFiniBB = 548 PRegBodyBB->splitBasicBlock(ThenTI, "omp.par.pre_finalize"); 549 BasicBlock *PRegExitBB = 550 PRegPreFiniBB->splitBasicBlock(ThenTI, "omp.par.exit"); 551 552 auto FiniCBWrapper = [&](InsertPointTy IP) { 553 // Hide "open-ended" blocks from the given FiniCB by setting the right jump 554 // target to the region exit block. 555 if (IP.getBlock()->end() == IP.getPoint()) { 556 IRBuilder<>::InsertPointGuard IPG(Builder); 557 Builder.restoreIP(IP); 558 Instruction *I = Builder.CreateBr(PRegExitBB); 559 IP = InsertPointTy(I->getParent(), I->getIterator()); 560 } 561 assert(IP.getBlock()->getTerminator()->getNumSuccessors() == 1 && 562 IP.getBlock()->getTerminator()->getSuccessor(0) == PRegExitBB && 563 "Unexpected insertion point for finalization call!"); 564 return FiniCB(IP); 565 }; 566 567 FinalizationStack.push_back({FiniCBWrapper, OMPD_parallel, IsCancellable}); 568 569 // Generate the privatization allocas in the block that will become the entry 570 // of the outlined function. 571 Builder.SetInsertPoint(PRegEntryBB->getTerminator()); 572 InsertPointTy InnerAllocaIP = Builder.saveIP(); 573 574 AllocaInst *PrivTIDAddr = 575 Builder.CreateAlloca(Int32, nullptr, "tid.addr.local"); 576 Instruction *PrivTID = Builder.CreateLoad(Int32, PrivTIDAddr, "tid"); 577 578 // Add some fake uses for OpenMP provided arguments. 579 ToBeDeleted.push_back(Builder.CreateLoad(Int32, TIDAddr, "tid.addr.use")); 580 Instruction *ZeroAddrUse = Builder.CreateLoad(Int32, ZeroAddr, 581 "zero.addr.use"); 582 ToBeDeleted.push_back(ZeroAddrUse); 583 584 // ThenBB 585 // | 586 // V 587 // PRegionEntryBB <- Privatization allocas are placed here. 588 // | 589 // V 590 // PRegionBodyBB <- BodeGen is invoked here. 591 // | 592 // V 593 // PRegPreFiniBB <- The block we will start finalization from. 594 // | 595 // V 596 // PRegionExitBB <- A common exit to simplify block collection. 597 // 598 599 LLVM_DEBUG(dbgs() << "Before body codegen: " << *OuterFn << "\n"); 600 601 // Let the caller create the body. 602 assert(BodyGenCB && "Expected body generation callback!"); 603 InsertPointTy CodeGenIP(PRegBodyBB, PRegBodyBB->begin()); 604 BodyGenCB(InnerAllocaIP, CodeGenIP, *PRegPreFiniBB); 605 606 LLVM_DEBUG(dbgs() << "After body codegen: " << *OuterFn << "\n"); 607 608 FunctionCallee RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_call); 609 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { 610 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { 611 llvm::LLVMContext &Ctx = F->getContext(); 612 MDBuilder MDB(Ctx); 613 // Annotate the callback behavior of the __kmpc_fork_call: 614 // - The callback callee is argument number 2 (microtask). 615 // - The first two arguments of the callback callee are unknown (-1). 616 // - All variadic arguments to the __kmpc_fork_call are passed to the 617 // callback callee. 618 F->addMetadata( 619 llvm::LLVMContext::MD_callback, 620 *llvm::MDNode::get( 621 Ctx, {MDB.createCallbackEncoding(2, {-1, -1}, 622 /* VarArgsArePassed */ true)})); 623 } 624 } 625 626 OutlineInfo OI; 627 OI.PostOutlineCB = [=](Function &OutlinedFn) { 628 // Add some known attributes. 629 OutlinedFn.addParamAttr(0, Attribute::NoAlias); 630 OutlinedFn.addParamAttr(1, Attribute::NoAlias); 631 OutlinedFn.addFnAttr(Attribute::NoUnwind); 632 OutlinedFn.addFnAttr(Attribute::NoRecurse); 633 634 assert(OutlinedFn.arg_size() >= 2 && 635 "Expected at least tid and bounded tid as arguments"); 636 unsigned NumCapturedVars = 637 OutlinedFn.arg_size() - /* tid & bounded tid */ 2; 638 639 CallInst *CI = cast<CallInst>(OutlinedFn.user_back()); 640 CI->getParent()->setName("omp_parallel"); 641 Builder.SetInsertPoint(CI); 642 643 // Build call __kmpc_fork_call(Ident, n, microtask, var1, .., varn); 644 Value *ForkCallArgs[] = { 645 Ident, Builder.getInt32(NumCapturedVars), 646 Builder.CreateBitCast(&OutlinedFn, ParallelTaskPtr)}; 647 648 SmallVector<Value *, 16> RealArgs; 649 RealArgs.append(std::begin(ForkCallArgs), std::end(ForkCallArgs)); 650 RealArgs.append(CI->arg_begin() + /* tid & bound tid */ 2, CI->arg_end()); 651 652 Builder.CreateCall(RTLFn, RealArgs); 653 654 LLVM_DEBUG(dbgs() << "With fork_call placed: " 655 << *Builder.GetInsertBlock()->getParent() << "\n"); 656 657 InsertPointTy ExitIP(PRegExitBB, PRegExitBB->end()); 658 659 // Initialize the local TID stack location with the argument value. 660 Builder.SetInsertPoint(PrivTID); 661 Function::arg_iterator OutlinedAI = OutlinedFn.arg_begin(); 662 Builder.CreateStore(Builder.CreateLoad(Int32, OutlinedAI), PrivTIDAddr); 663 664 // If no "if" clause was present we do not need the call created during 665 // outlining, otherwise we reuse it in the serialized parallel region. 666 if (!ElseTI) { 667 CI->eraseFromParent(); 668 } else { 669 670 // If an "if" clause was present we are now generating the serialized 671 // version into the "else" branch. 672 Builder.SetInsertPoint(ElseTI); 673 674 // Build calls __kmpc_serialized_parallel(&Ident, GTid); 675 Value *SerializedParallelCallArgs[] = {Ident, ThreadID}; 676 Builder.CreateCall( 677 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_serialized_parallel), 678 SerializedParallelCallArgs); 679 680 // OutlinedFn(>id, &zero, CapturedStruct); 681 CI->removeFromParent(); 682 Builder.Insert(CI); 683 684 // __kmpc_end_serialized_parallel(&Ident, GTid); 685 Value *EndArgs[] = {Ident, ThreadID}; 686 Builder.CreateCall( 687 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_serialized_parallel), 688 EndArgs); 689 690 LLVM_DEBUG(dbgs() << "With serialized parallel region: " 691 << *Builder.GetInsertBlock()->getParent() << "\n"); 692 } 693 694 for (Instruction *I : ToBeDeleted) 695 I->eraseFromParent(); 696 }; 697 698 // Adjust the finalization stack, verify the adjustment, and call the 699 // finalize function a last time to finalize values between the pre-fini 700 // block and the exit block if we left the parallel "the normal way". 701 auto FiniInfo = FinalizationStack.pop_back_val(); 702 (void)FiniInfo; 703 assert(FiniInfo.DK == OMPD_parallel && 704 "Unexpected finalization stack state!"); 705 706 Instruction *PRegPreFiniTI = PRegPreFiniBB->getTerminator(); 707 708 InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->getIterator()); 709 FiniCB(PreFiniIP); 710 711 OI.EntryBB = PRegEntryBB; 712 OI.ExitBB = PRegExitBB; 713 714 SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet; 715 SmallVector<BasicBlock *, 32> Blocks; 716 OI.collectBlocks(ParallelRegionBlockSet, Blocks); 717 718 // Ensure a single exit node for the outlined region by creating one. 719 // We might have multiple incoming edges to the exit now due to finalizations, 720 // e.g., cancel calls that cause the control flow to leave the region. 721 BasicBlock *PRegOutlinedExitBB = PRegExitBB; 722 PRegExitBB = SplitBlock(PRegExitBB, &*PRegExitBB->getFirstInsertionPt()); 723 PRegOutlinedExitBB->setName("omp.par.outlined.exit"); 724 Blocks.push_back(PRegOutlinedExitBB); 725 726 CodeExtractorAnalysisCache CEAC(*OuterFn); 727 CodeExtractor Extractor(Blocks, /* DominatorTree */ nullptr, 728 /* AggregateArgs */ false, 729 /* BlockFrequencyInfo */ nullptr, 730 /* BranchProbabilityInfo */ nullptr, 731 /* AssumptionCache */ nullptr, 732 /* AllowVarArgs */ true, 733 /* AllowAlloca */ true, 734 /* Suffix */ ".omp_par"); 735 736 // Find inputs to, outputs from the code region. 737 BasicBlock *CommonExit = nullptr; 738 SetVector<Value *> Inputs, Outputs, SinkingCands, HoistingCands; 739 Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit); 740 Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands); 741 742 LLVM_DEBUG(dbgs() << "Before privatization: " << *OuterFn << "\n"); 743 744 FunctionCallee TIDRTLFn = 745 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num); 746 747 auto PrivHelper = [&](Value &V) { 748 if (&V == TIDAddr || &V == ZeroAddr) 749 return; 750 751 SetVector<Use *> Uses; 752 for (Use &U : V.uses()) 753 if (auto *UserI = dyn_cast<Instruction>(U.getUser())) 754 if (ParallelRegionBlockSet.count(UserI->getParent())) 755 Uses.insert(&U); 756 757 // __kmpc_fork_call expects extra arguments as pointers. If the input 758 // already has a pointer type, everything is fine. Otherwise, store the 759 // value onto stack and load it back inside the to-be-outlined region. This 760 // will ensure only the pointer will be passed to the function. 761 // FIXME: if there are more than 15 trailing arguments, they must be 762 // additionally packed in a struct. 763 Value *Inner = &V; 764 if (!V.getType()->isPointerTy()) { 765 IRBuilder<>::InsertPointGuard Guard(Builder); 766 LLVM_DEBUG(llvm::dbgs() << "Forwarding input as pointer: " << V << "\n"); 767 768 Builder.restoreIP(OuterAllocaIP); 769 Value *Ptr = 770 Builder.CreateAlloca(V.getType(), nullptr, V.getName() + ".reloaded"); 771 772 // Store to stack at end of the block that currently branches to the entry 773 // block of the to-be-outlined region. 774 Builder.SetInsertPoint(InsertBB, 775 InsertBB->getTerminator()->getIterator()); 776 Builder.CreateStore(&V, Ptr); 777 778 // Load back next to allocations in the to-be-outlined region. 779 Builder.restoreIP(InnerAllocaIP); 780 Inner = Builder.CreateLoad(V.getType(), Ptr); 781 } 782 783 Value *ReplacementValue = nullptr; 784 CallInst *CI = dyn_cast<CallInst>(&V); 785 if (CI && CI->getCalledFunction() == TIDRTLFn.getCallee()) { 786 ReplacementValue = PrivTID; 787 } else { 788 Builder.restoreIP( 789 PrivCB(InnerAllocaIP, Builder.saveIP(), V, *Inner, ReplacementValue)); 790 assert(ReplacementValue && 791 "Expected copy/create callback to set replacement value!"); 792 if (ReplacementValue == &V) 793 return; 794 } 795 796 for (Use *UPtr : Uses) 797 UPtr->set(ReplacementValue); 798 }; 799 800 // Reset the inner alloca insertion as it will be used for loading the values 801 // wrapped into pointers before passing them into the to-be-outlined region. 802 // Configure it to insert immediately after the fake use of zero address so 803 // that they are available in the generated body and so that the 804 // OpenMP-related values (thread ID and zero address pointers) remain leading 805 // in the argument list. 806 InnerAllocaIP = IRBuilder<>::InsertPoint( 807 ZeroAddrUse->getParent(), ZeroAddrUse->getNextNode()->getIterator()); 808 809 // Reset the outer alloca insertion point to the entry of the relevant block 810 // in case it was invalidated. 811 OuterAllocaIP = IRBuilder<>::InsertPoint( 812 OuterAllocaBlock, OuterAllocaBlock->getFirstInsertionPt()); 813 814 for (Value *Input : Inputs) { 815 LLVM_DEBUG(dbgs() << "Captured input: " << *Input << "\n"); 816 PrivHelper(*Input); 817 } 818 LLVM_DEBUG({ 819 for (Value *Output : Outputs) 820 LLVM_DEBUG(dbgs() << "Captured output: " << *Output << "\n"); 821 }); 822 assert(Outputs.empty() && 823 "OpenMP outlining should not produce live-out values!"); 824 825 LLVM_DEBUG(dbgs() << "After privatization: " << *OuterFn << "\n"); 826 LLVM_DEBUG({ 827 for (auto *BB : Blocks) 828 dbgs() << " PBR: " << BB->getName() << "\n"; 829 }); 830 831 // Register the outlined info. 832 addOutlineInfo(std::move(OI)); 833 834 InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end()); 835 UI->eraseFromParent(); 836 837 return AfterIP; 838 } 839 840 void OpenMPIRBuilder::emitFlush(const LocationDescription &Loc) { 841 // Build call void __kmpc_flush(ident_t *loc) 842 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 843 Value *Args[] = {getOrCreateIdent(SrcLocStr)}; 844 845 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_flush), Args); 846 } 847 848 void OpenMPIRBuilder::createFlush(const LocationDescription &Loc) { 849 if (!updateToLocation(Loc)) 850 return; 851 emitFlush(Loc); 852 } 853 854 void OpenMPIRBuilder::emitTaskwaitImpl(const LocationDescription &Loc) { 855 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 856 // global_tid); 857 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 858 Value *Ident = getOrCreateIdent(SrcLocStr); 859 Value *Args[] = {Ident, getOrCreateThreadID(Ident)}; 860 861 // Ignore return result until untied tasks are supported. 862 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskwait), 863 Args); 864 } 865 866 void OpenMPIRBuilder::createTaskwait(const LocationDescription &Loc) { 867 if (!updateToLocation(Loc)) 868 return; 869 emitTaskwaitImpl(Loc); 870 } 871 872 void OpenMPIRBuilder::emitTaskyieldImpl(const LocationDescription &Loc) { 873 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 874 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 875 Value *Ident = getOrCreateIdent(SrcLocStr); 876 Constant *I32Null = ConstantInt::getNullValue(Int32); 877 Value *Args[] = {Ident, getOrCreateThreadID(Ident), I32Null}; 878 879 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskyield), 880 Args); 881 } 882 883 void OpenMPIRBuilder::createTaskyield(const LocationDescription &Loc) { 884 if (!updateToLocation(Loc)) 885 return; 886 emitTaskyieldImpl(Loc); 887 } 888 889 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSections( 890 const LocationDescription &Loc, InsertPointTy AllocaIP, 891 ArrayRef<StorableBodyGenCallbackTy> SectionCBs, PrivatizeCallbackTy PrivCB, 892 FinalizeCallbackTy FiniCB, bool IsCancellable, bool IsNowait) { 893 if (!updateToLocation(Loc)) 894 return Loc.IP; 895 896 auto FiniCBWrapper = [&](InsertPointTy IP) { 897 if (IP.getBlock()->end() != IP.getPoint()) 898 return FiniCB(IP); 899 // This must be done otherwise any nested constructs using FinalizeOMPRegion 900 // will fail because that function requires the Finalization Basic Block to 901 // have a terminator, which is already removed by EmitOMPRegionBody. 902 // IP is currently at cancelation block. 903 // We need to backtrack to the condition block to fetch 904 // the exit block and create a branch from cancelation 905 // to exit block. 906 IRBuilder<>::InsertPointGuard IPG(Builder); 907 Builder.restoreIP(IP); 908 auto *CaseBB = IP.getBlock()->getSinglePredecessor(); 909 auto *CondBB = CaseBB->getSinglePredecessor()->getSinglePredecessor(); 910 auto *ExitBB = CondBB->getTerminator()->getSuccessor(1); 911 Instruction *I = Builder.CreateBr(ExitBB); 912 IP = InsertPointTy(I->getParent(), I->getIterator()); 913 return FiniCB(IP); 914 }; 915 916 FinalizationStack.push_back({FiniCBWrapper, OMPD_sections, IsCancellable}); 917 918 // Each section is emitted as a switch case 919 // Each finalization callback is handled from clang.EmitOMPSectionDirective() 920 // -> OMP.createSection() which generates the IR for each section 921 // Iterate through all sections and emit a switch construct: 922 // switch (IV) { 923 // case 0: 924 // <SectionStmt[0]>; 925 // break; 926 // ... 927 // case <NumSection> - 1: 928 // <SectionStmt[<NumSection> - 1]>; 929 // break; 930 // } 931 // ... 932 // section_loop.after: 933 // <FiniCB>; 934 auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, Value *IndVar) { 935 auto *CurFn = CodeGenIP.getBlock()->getParent(); 936 auto *ForIncBB = CodeGenIP.getBlock()->getSingleSuccessor(); 937 auto *ForExitBB = CodeGenIP.getBlock() 938 ->getSinglePredecessor() 939 ->getTerminator() 940 ->getSuccessor(1); 941 SwitchInst *SwitchStmt = Builder.CreateSwitch(IndVar, ForIncBB); 942 Builder.restoreIP(CodeGenIP); 943 unsigned CaseNumber = 0; 944 for (auto SectionCB : SectionCBs) { 945 auto *CaseBB = BasicBlock::Create(M.getContext(), 946 "omp_section_loop.body.case", CurFn); 947 SwitchStmt->addCase(Builder.getInt32(CaseNumber), CaseBB); 948 Builder.SetInsertPoint(CaseBB); 949 SectionCB(InsertPointTy(), Builder.saveIP(), *ForExitBB); 950 CaseNumber++; 951 } 952 // remove the existing terminator from body BB since there can be no 953 // terminators after switch/case 954 CodeGenIP.getBlock()->getTerminator()->eraseFromParent(); 955 }; 956 // Loop body ends here 957 // LowerBound, UpperBound, and STride for createCanonicalLoop 958 Type *I32Ty = Type::getInt32Ty(M.getContext()); 959 Value *LB = ConstantInt::get(I32Ty, 0); 960 Value *UB = ConstantInt::get(I32Ty, SectionCBs.size()); 961 Value *ST = ConstantInt::get(I32Ty, 1); 962 llvm::CanonicalLoopInfo *LoopInfo = createCanonicalLoop( 963 Loc, LoopBodyGenCB, LB, UB, ST, true, false, AllocaIP, "section_loop"); 964 InsertPointTy AfterIP = 965 applyStaticWorkshareLoop(Loc.DL, LoopInfo, AllocaIP, true); 966 BasicBlock *LoopAfterBB = AfterIP.getBlock(); 967 Instruction *SplitPos = LoopAfterBB->getTerminator(); 968 if (!isa_and_nonnull<BranchInst>(SplitPos)) 969 SplitPos = new UnreachableInst(Builder.getContext(), LoopAfterBB); 970 // ExitBB after LoopAfterBB because LoopAfterBB is used for FinalizationCB, 971 // which requires a BB with branch 972 BasicBlock *ExitBB = 973 LoopAfterBB->splitBasicBlock(SplitPos, "omp_sections.end"); 974 SplitPos->eraseFromParent(); 975 976 // Apply the finalization callback in LoopAfterBB 977 auto FiniInfo = FinalizationStack.pop_back_val(); 978 assert(FiniInfo.DK == OMPD_sections && 979 "Unexpected finalization stack state!"); 980 Builder.SetInsertPoint(LoopAfterBB->getTerminator()); 981 FiniInfo.FiniCB(Builder.saveIP()); 982 Builder.SetInsertPoint(ExitBB); 983 984 return Builder.saveIP(); 985 } 986 987 OpenMPIRBuilder::InsertPointTy 988 OpenMPIRBuilder::createSection(const LocationDescription &Loc, 989 BodyGenCallbackTy BodyGenCB, 990 FinalizeCallbackTy FiniCB) { 991 if (!updateToLocation(Loc)) 992 return Loc.IP; 993 994 auto FiniCBWrapper = [&](InsertPointTy IP) { 995 if (IP.getBlock()->end() != IP.getPoint()) 996 return FiniCB(IP); 997 // This must be done otherwise any nested constructs using FinalizeOMPRegion 998 // will fail because that function requires the Finalization Basic Block to 999 // have a terminator, which is already removed by EmitOMPRegionBody. 1000 // IP is currently at cancelation block. 1001 // We need to backtrack to the condition block to fetch 1002 // the exit block and create a branch from cancelation 1003 // to exit block. 1004 IRBuilder<>::InsertPointGuard IPG(Builder); 1005 Builder.restoreIP(IP); 1006 auto *CaseBB = Loc.IP.getBlock(); 1007 auto *CondBB = CaseBB->getSinglePredecessor()->getSinglePredecessor(); 1008 auto *ExitBB = CondBB->getTerminator()->getSuccessor(1); 1009 Instruction *I = Builder.CreateBr(ExitBB); 1010 IP = InsertPointTy(I->getParent(), I->getIterator()); 1011 return FiniCB(IP); 1012 }; 1013 1014 Directive OMPD = Directive::OMPD_sections; 1015 // Since we are using Finalization Callback here, HasFinalize 1016 // and IsCancellable have to be true 1017 return EmitOMPInlinedRegion(OMPD, nullptr, nullptr, BodyGenCB, FiniCBWrapper, 1018 /*Conditional*/ false, /*hasFinalize*/ true, 1019 /*IsCancellable*/ true); 1020 } 1021 1022 /// Create a function with a unique name and a "void (i8*, i8*)" signature in 1023 /// the given module and return it. 1024 Function *getFreshReductionFunc(Module &M) { 1025 Type *VoidTy = Type::getVoidTy(M.getContext()); 1026 Type *Int8PtrTy = Type::getInt8PtrTy(M.getContext()); 1027 auto *FuncTy = 1028 FunctionType::get(VoidTy, {Int8PtrTy, Int8PtrTy}, /* IsVarArg */ false); 1029 return Function::Create(FuncTy, GlobalVariable::InternalLinkage, 1030 M.getDataLayout().getDefaultGlobalsAddressSpace(), 1031 ".omp.reduction.func", &M); 1032 } 1033 1034 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createReductions( 1035 const LocationDescription &Loc, InsertPointTy AllocaIP, 1036 ArrayRef<ReductionInfo> ReductionInfos, bool IsNoWait) { 1037 for (const ReductionInfo &RI : ReductionInfos) { 1038 (void)RI; 1039 assert(RI.Variable && "expected non-null variable"); 1040 assert(RI.PrivateVariable && "expected non-null private variable"); 1041 assert(RI.ReductionGen && "expected non-null reduction generator callback"); 1042 assert(RI.Variable->getType() == RI.PrivateVariable->getType() && 1043 "expected variables and their private equivalents to have the same " 1044 "type"); 1045 assert(RI.Variable->getType()->isPointerTy() && 1046 "expected variables to be pointers"); 1047 } 1048 1049 if (!updateToLocation(Loc)) 1050 return InsertPointTy(); 1051 1052 BasicBlock *InsertBlock = Loc.IP.getBlock(); 1053 BasicBlock *ContinuationBlock = 1054 InsertBlock->splitBasicBlock(Loc.IP.getPoint(), "reduce.finalize"); 1055 InsertBlock->getTerminator()->eraseFromParent(); 1056 1057 // Create and populate array of type-erased pointers to private reduction 1058 // values. 1059 unsigned NumReductions = ReductionInfos.size(); 1060 Type *RedArrayTy = ArrayType::get(Builder.getInt8PtrTy(), NumReductions); 1061 Builder.restoreIP(AllocaIP); 1062 Value *RedArray = Builder.CreateAlloca(RedArrayTy, nullptr, "red.array"); 1063 1064 Builder.SetInsertPoint(InsertBlock, InsertBlock->end()); 1065 1066 for (auto En : enumerate(ReductionInfos)) { 1067 unsigned Index = En.index(); 1068 const ReductionInfo &RI = En.value(); 1069 Value *RedArrayElemPtr = Builder.CreateConstInBoundsGEP2_64( 1070 RedArrayTy, RedArray, 0, Index, "red.array.elem." + Twine(Index)); 1071 Value *Casted = 1072 Builder.CreateBitCast(RI.PrivateVariable, Builder.getInt8PtrTy(), 1073 "private.red.var." + Twine(Index) + ".casted"); 1074 Builder.CreateStore(Casted, RedArrayElemPtr); 1075 } 1076 1077 // Emit a call to the runtime function that orchestrates the reduction. 1078 // Declare the reduction function in the process. 1079 Function *Func = Builder.GetInsertBlock()->getParent(); 1080 Module *Module = Func->getParent(); 1081 Value *RedArrayPtr = 1082 Builder.CreateBitCast(RedArray, Builder.getInt8PtrTy(), "red.array.ptr"); 1083 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 1084 bool CanGenerateAtomic = 1085 llvm::all_of(ReductionInfos, [](const ReductionInfo &RI) { 1086 return RI.AtomicReductionGen; 1087 }); 1088 Value *Ident = getOrCreateIdent( 1089 SrcLocStr, CanGenerateAtomic ? IdentFlag::OMP_IDENT_FLAG_ATOMIC_REDUCE 1090 : IdentFlag(0)); 1091 Value *ThreadId = getOrCreateThreadID(Ident); 1092 Constant *NumVariables = Builder.getInt32(NumReductions); 1093 const DataLayout &DL = Module->getDataLayout(); 1094 unsigned RedArrayByteSize = DL.getTypeStoreSize(RedArrayTy); 1095 Constant *RedArraySize = Builder.getInt64(RedArrayByteSize); 1096 Function *ReductionFunc = getFreshReductionFunc(*Module); 1097 Value *Lock = getOMPCriticalRegionLock(".reduction"); 1098 Function *ReduceFunc = getOrCreateRuntimeFunctionPtr( 1099 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_reduce_nowait 1100 : RuntimeFunction::OMPRTL___kmpc_reduce); 1101 CallInst *ReduceCall = 1102 Builder.CreateCall(ReduceFunc, 1103 {Ident, ThreadId, NumVariables, RedArraySize, 1104 RedArrayPtr, ReductionFunc, Lock}, 1105 "reduce"); 1106 1107 // Create final reduction entry blocks for the atomic and non-atomic case. 1108 // Emit IR that dispatches control flow to one of the blocks based on the 1109 // reduction supporting the atomic mode. 1110 BasicBlock *NonAtomicRedBlock = 1111 BasicBlock::Create(Module->getContext(), "reduce.switch.nonatomic", Func); 1112 BasicBlock *AtomicRedBlock = 1113 BasicBlock::Create(Module->getContext(), "reduce.switch.atomic", Func); 1114 SwitchInst *Switch = 1115 Builder.CreateSwitch(ReduceCall, ContinuationBlock, /* NumCases */ 2); 1116 Switch->addCase(Builder.getInt32(1), NonAtomicRedBlock); 1117 Switch->addCase(Builder.getInt32(2), AtomicRedBlock); 1118 1119 // Populate the non-atomic reduction using the elementwise reduction function. 1120 // This loads the elements from the global and private variables and reduces 1121 // them before storing back the result to the global variable. 1122 Builder.SetInsertPoint(NonAtomicRedBlock); 1123 for (auto En : enumerate(ReductionInfos)) { 1124 const ReductionInfo &RI = En.value(); 1125 Type *ValueType = RI.getElementType(); 1126 Value *RedValue = Builder.CreateLoad(ValueType, RI.Variable, 1127 "red.value." + Twine(En.index())); 1128 Value *PrivateRedValue = 1129 Builder.CreateLoad(ValueType, RI.PrivateVariable, 1130 "red.private.value." + Twine(En.index())); 1131 Value *Reduced; 1132 Builder.restoreIP( 1133 RI.ReductionGen(Builder.saveIP(), RedValue, PrivateRedValue, Reduced)); 1134 if (!Builder.GetInsertBlock()) 1135 return InsertPointTy(); 1136 Builder.CreateStore(Reduced, RI.Variable); 1137 } 1138 Function *EndReduceFunc = getOrCreateRuntimeFunctionPtr( 1139 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_end_reduce_nowait 1140 : RuntimeFunction::OMPRTL___kmpc_end_reduce); 1141 Builder.CreateCall(EndReduceFunc, {Ident, ThreadId, Lock}); 1142 Builder.CreateBr(ContinuationBlock); 1143 1144 // Populate the atomic reduction using the atomic elementwise reduction 1145 // function. There are no loads/stores here because they will be happening 1146 // inside the atomic elementwise reduction. 1147 Builder.SetInsertPoint(AtomicRedBlock); 1148 if (CanGenerateAtomic) { 1149 for (const ReductionInfo &RI : ReductionInfos) { 1150 Builder.restoreIP(RI.AtomicReductionGen(Builder.saveIP(), RI.Variable, 1151 RI.PrivateVariable)); 1152 if (!Builder.GetInsertBlock()) 1153 return InsertPointTy(); 1154 } 1155 Builder.CreateBr(ContinuationBlock); 1156 } else { 1157 Builder.CreateUnreachable(); 1158 } 1159 1160 // Populate the outlined reduction function using the elementwise reduction 1161 // function. Partial values are extracted from the type-erased array of 1162 // pointers to private variables. 1163 BasicBlock *ReductionFuncBlock = 1164 BasicBlock::Create(Module->getContext(), "", ReductionFunc); 1165 Builder.SetInsertPoint(ReductionFuncBlock); 1166 Value *LHSArrayPtr = Builder.CreateBitCast(ReductionFunc->getArg(0), 1167 RedArrayTy->getPointerTo()); 1168 Value *RHSArrayPtr = Builder.CreateBitCast(ReductionFunc->getArg(1), 1169 RedArrayTy->getPointerTo()); 1170 for (auto En : enumerate(ReductionInfos)) { 1171 const ReductionInfo &RI = En.value(); 1172 Value *LHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64( 1173 RedArrayTy, LHSArrayPtr, 0, En.index()); 1174 Value *LHSI8Ptr = Builder.CreateLoad(Builder.getInt8PtrTy(), LHSI8PtrPtr); 1175 Value *LHSPtr = Builder.CreateBitCast(LHSI8Ptr, RI.Variable->getType()); 1176 Value *LHS = Builder.CreateLoad(RI.getElementType(), LHSPtr); 1177 Value *RHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64( 1178 RedArrayTy, RHSArrayPtr, 0, En.index()); 1179 Value *RHSI8Ptr = Builder.CreateLoad(Builder.getInt8PtrTy(), RHSI8PtrPtr); 1180 Value *RHSPtr = 1181 Builder.CreateBitCast(RHSI8Ptr, RI.PrivateVariable->getType()); 1182 Value *RHS = Builder.CreateLoad(RI.getElementType(), RHSPtr); 1183 Value *Reduced; 1184 Builder.restoreIP(RI.ReductionGen(Builder.saveIP(), LHS, RHS, Reduced)); 1185 if (!Builder.GetInsertBlock()) 1186 return InsertPointTy(); 1187 Builder.CreateStore(Reduced, LHSPtr); 1188 } 1189 Builder.CreateRetVoid(); 1190 1191 Builder.SetInsertPoint(ContinuationBlock); 1192 return Builder.saveIP(); 1193 } 1194 1195 OpenMPIRBuilder::InsertPointTy 1196 OpenMPIRBuilder::createMaster(const LocationDescription &Loc, 1197 BodyGenCallbackTy BodyGenCB, 1198 FinalizeCallbackTy FiniCB) { 1199 1200 if (!updateToLocation(Loc)) 1201 return Loc.IP; 1202 1203 Directive OMPD = Directive::OMPD_master; 1204 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 1205 Value *Ident = getOrCreateIdent(SrcLocStr); 1206 Value *ThreadId = getOrCreateThreadID(Ident); 1207 Value *Args[] = {Ident, ThreadId}; 1208 1209 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_master); 1210 Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args); 1211 1212 Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_master); 1213 Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args); 1214 1215 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB, 1216 /*Conditional*/ true, /*hasFinalize*/ true); 1217 } 1218 1219 OpenMPIRBuilder::InsertPointTy 1220 OpenMPIRBuilder::createMasked(const LocationDescription &Loc, 1221 BodyGenCallbackTy BodyGenCB, 1222 FinalizeCallbackTy FiniCB, Value *Filter) { 1223 if (!updateToLocation(Loc)) 1224 return Loc.IP; 1225 1226 Directive OMPD = Directive::OMPD_masked; 1227 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 1228 Value *Ident = getOrCreateIdent(SrcLocStr); 1229 Value *ThreadId = getOrCreateThreadID(Ident); 1230 Value *Args[] = {Ident, ThreadId, Filter}; 1231 Value *ArgsEnd[] = {Ident, ThreadId}; 1232 1233 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_masked); 1234 Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args); 1235 1236 Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_masked); 1237 Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, ArgsEnd); 1238 1239 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB, 1240 /*Conditional*/ true, /*hasFinalize*/ true); 1241 } 1242 1243 CanonicalLoopInfo *OpenMPIRBuilder::createLoopSkeleton( 1244 DebugLoc DL, Value *TripCount, Function *F, BasicBlock *PreInsertBefore, 1245 BasicBlock *PostInsertBefore, const Twine &Name) { 1246 Module *M = F->getParent(); 1247 LLVMContext &Ctx = M->getContext(); 1248 Type *IndVarTy = TripCount->getType(); 1249 1250 // Create the basic block structure. 1251 BasicBlock *Preheader = 1252 BasicBlock::Create(Ctx, "omp_" + Name + ".preheader", F, PreInsertBefore); 1253 BasicBlock *Header = 1254 BasicBlock::Create(Ctx, "omp_" + Name + ".header", F, PreInsertBefore); 1255 BasicBlock *Cond = 1256 BasicBlock::Create(Ctx, "omp_" + Name + ".cond", F, PreInsertBefore); 1257 BasicBlock *Body = 1258 BasicBlock::Create(Ctx, "omp_" + Name + ".body", F, PreInsertBefore); 1259 BasicBlock *Latch = 1260 BasicBlock::Create(Ctx, "omp_" + Name + ".inc", F, PostInsertBefore); 1261 BasicBlock *Exit = 1262 BasicBlock::Create(Ctx, "omp_" + Name + ".exit", F, PostInsertBefore); 1263 BasicBlock *After = 1264 BasicBlock::Create(Ctx, "omp_" + Name + ".after", F, PostInsertBefore); 1265 1266 // Use specified DebugLoc for new instructions. 1267 Builder.SetCurrentDebugLocation(DL); 1268 1269 Builder.SetInsertPoint(Preheader); 1270 Builder.CreateBr(Header); 1271 1272 Builder.SetInsertPoint(Header); 1273 PHINode *IndVarPHI = Builder.CreatePHI(IndVarTy, 2, "omp_" + Name + ".iv"); 1274 IndVarPHI->addIncoming(ConstantInt::get(IndVarTy, 0), Preheader); 1275 Builder.CreateBr(Cond); 1276 1277 Builder.SetInsertPoint(Cond); 1278 Value *Cmp = 1279 Builder.CreateICmpULT(IndVarPHI, TripCount, "omp_" + Name + ".cmp"); 1280 Builder.CreateCondBr(Cmp, Body, Exit); 1281 1282 Builder.SetInsertPoint(Body); 1283 Builder.CreateBr(Latch); 1284 1285 Builder.SetInsertPoint(Latch); 1286 Value *Next = Builder.CreateAdd(IndVarPHI, ConstantInt::get(IndVarTy, 1), 1287 "omp_" + Name + ".next", /*HasNUW=*/true); 1288 Builder.CreateBr(Header); 1289 IndVarPHI->addIncoming(Next, Latch); 1290 1291 Builder.SetInsertPoint(Exit); 1292 Builder.CreateBr(After); 1293 1294 // Remember and return the canonical control flow. 1295 LoopInfos.emplace_front(); 1296 CanonicalLoopInfo *CL = &LoopInfos.front(); 1297 1298 CL->Preheader = Preheader; 1299 CL->Header = Header; 1300 CL->Cond = Cond; 1301 CL->Body = Body; 1302 CL->Latch = Latch; 1303 CL->Exit = Exit; 1304 CL->After = After; 1305 1306 #ifndef NDEBUG 1307 CL->assertOK(); 1308 #endif 1309 return CL; 1310 } 1311 1312 CanonicalLoopInfo * 1313 OpenMPIRBuilder::createCanonicalLoop(const LocationDescription &Loc, 1314 LoopBodyGenCallbackTy BodyGenCB, 1315 Value *TripCount, const Twine &Name) { 1316 BasicBlock *BB = Loc.IP.getBlock(); 1317 BasicBlock *NextBB = BB->getNextNode(); 1318 1319 CanonicalLoopInfo *CL = createLoopSkeleton(Loc.DL, TripCount, BB->getParent(), 1320 NextBB, NextBB, Name); 1321 BasicBlock *After = CL->getAfter(); 1322 1323 // If location is not set, don't connect the loop. 1324 if (updateToLocation(Loc)) { 1325 // Split the loop at the insertion point: Branch to the preheader and move 1326 // every following instruction to after the loop (the After BB). Also, the 1327 // new successor is the loop's after block. 1328 Builder.CreateBr(CL->Preheader); 1329 After->getInstList().splice(After->begin(), BB->getInstList(), 1330 Builder.GetInsertPoint(), BB->end()); 1331 After->replaceSuccessorsPhiUsesWith(BB, After); 1332 } 1333 1334 // Emit the body content. We do it after connecting the loop to the CFG to 1335 // avoid that the callback encounters degenerate BBs. 1336 BodyGenCB(CL->getBodyIP(), CL->getIndVar()); 1337 1338 #ifndef NDEBUG 1339 CL->assertOK(); 1340 #endif 1341 return CL; 1342 } 1343 1344 CanonicalLoopInfo *OpenMPIRBuilder::createCanonicalLoop( 1345 const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, 1346 Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop, 1347 InsertPointTy ComputeIP, const Twine &Name) { 1348 1349 // Consider the following difficulties (assuming 8-bit signed integers): 1350 // * Adding \p Step to the loop counter which passes \p Stop may overflow: 1351 // DO I = 1, 100, 50 1352 /// * A \p Step of INT_MIN cannot not be normalized to a positive direction: 1353 // DO I = 100, 0, -128 1354 1355 // Start, Stop and Step must be of the same integer type. 1356 auto *IndVarTy = cast<IntegerType>(Start->getType()); 1357 assert(IndVarTy == Stop->getType() && "Stop type mismatch"); 1358 assert(IndVarTy == Step->getType() && "Step type mismatch"); 1359 1360 LocationDescription ComputeLoc = 1361 ComputeIP.isSet() ? LocationDescription(ComputeIP, Loc.DL) : Loc; 1362 updateToLocation(ComputeLoc); 1363 1364 ConstantInt *Zero = ConstantInt::get(IndVarTy, 0); 1365 ConstantInt *One = ConstantInt::get(IndVarTy, 1); 1366 1367 // Like Step, but always positive. 1368 Value *Incr = Step; 1369 1370 // Distance between Start and Stop; always positive. 1371 Value *Span; 1372 1373 // Condition whether there are no iterations are executed at all, e.g. because 1374 // UB < LB. 1375 Value *ZeroCmp; 1376 1377 if (IsSigned) { 1378 // Ensure that increment is positive. If not, negate and invert LB and UB. 1379 Value *IsNeg = Builder.CreateICmpSLT(Step, Zero); 1380 Incr = Builder.CreateSelect(IsNeg, Builder.CreateNeg(Step), Step); 1381 Value *LB = Builder.CreateSelect(IsNeg, Stop, Start); 1382 Value *UB = Builder.CreateSelect(IsNeg, Start, Stop); 1383 Span = Builder.CreateSub(UB, LB, "", false, true); 1384 ZeroCmp = Builder.CreateICmp( 1385 InclusiveStop ? CmpInst::ICMP_SLT : CmpInst::ICMP_SLE, UB, LB); 1386 } else { 1387 Span = Builder.CreateSub(Stop, Start, "", true); 1388 ZeroCmp = Builder.CreateICmp( 1389 InclusiveStop ? CmpInst::ICMP_ULT : CmpInst::ICMP_ULE, Stop, Start); 1390 } 1391 1392 Value *CountIfLooping; 1393 if (InclusiveStop) { 1394 CountIfLooping = Builder.CreateAdd(Builder.CreateUDiv(Span, Incr), One); 1395 } else { 1396 // Avoid incrementing past stop since it could overflow. 1397 Value *CountIfTwo = Builder.CreateAdd( 1398 Builder.CreateUDiv(Builder.CreateSub(Span, One), Incr), One); 1399 Value *OneCmp = Builder.CreateICmp( 1400 InclusiveStop ? CmpInst::ICMP_ULT : CmpInst::ICMP_ULE, Span, Incr); 1401 CountIfLooping = Builder.CreateSelect(OneCmp, One, CountIfTwo); 1402 } 1403 Value *TripCount = Builder.CreateSelect(ZeroCmp, Zero, CountIfLooping, 1404 "omp_" + Name + ".tripcount"); 1405 1406 auto BodyGen = [=](InsertPointTy CodeGenIP, Value *IV) { 1407 Builder.restoreIP(CodeGenIP); 1408 Value *Span = Builder.CreateMul(IV, Step); 1409 Value *IndVar = Builder.CreateAdd(Span, Start); 1410 BodyGenCB(Builder.saveIP(), IndVar); 1411 }; 1412 LocationDescription LoopLoc = ComputeIP.isSet() ? Loc.IP : Builder.saveIP(); 1413 return createCanonicalLoop(LoopLoc, BodyGen, TripCount, Name); 1414 } 1415 1416 // Returns an LLVM function to call for initializing loop bounds using OpenMP 1417 // static scheduling depending on `type`. Only i32 and i64 are supported by the 1418 // runtime. Always interpret integers as unsigned similarly to 1419 // CanonicalLoopInfo. 1420 static FunctionCallee getKmpcForStaticInitForType(Type *Ty, Module &M, 1421 OpenMPIRBuilder &OMPBuilder) { 1422 unsigned Bitwidth = Ty->getIntegerBitWidth(); 1423 if (Bitwidth == 32) 1424 return OMPBuilder.getOrCreateRuntimeFunction( 1425 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u); 1426 if (Bitwidth == 64) 1427 return OMPBuilder.getOrCreateRuntimeFunction( 1428 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u); 1429 llvm_unreachable("unknown OpenMP loop iterator bitwidth"); 1430 } 1431 1432 // Sets the number of loop iterations to the given value. This value must be 1433 // valid in the condition block (i.e., defined in the preheader) and is 1434 // interpreted as an unsigned integer. 1435 void setCanonicalLoopTripCount(CanonicalLoopInfo *CLI, Value *TripCount) { 1436 Instruction *CmpI = &CLI->getCond()->front(); 1437 assert(isa<CmpInst>(CmpI) && "First inst must compare IV with TripCount"); 1438 CmpI->setOperand(1, TripCount); 1439 CLI->assertOK(); 1440 } 1441 1442 OpenMPIRBuilder::InsertPointTy 1443 OpenMPIRBuilder::applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, 1444 InsertPointTy AllocaIP, 1445 bool NeedsBarrier, Value *Chunk) { 1446 assert(CLI->isValid() && "Requires a valid canonical loop"); 1447 1448 // Set up the source location value for OpenMP runtime. 1449 Builder.restoreIP(CLI->getPreheaderIP()); 1450 Builder.SetCurrentDebugLocation(DL); 1451 1452 Constant *SrcLocStr = getOrCreateSrcLocStr(DL); 1453 Value *SrcLoc = getOrCreateIdent(SrcLocStr); 1454 1455 // Declare useful OpenMP runtime functions. 1456 Value *IV = CLI->getIndVar(); 1457 Type *IVTy = IV->getType(); 1458 FunctionCallee StaticInit = getKmpcForStaticInitForType(IVTy, M, *this); 1459 FunctionCallee StaticFini = 1460 getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_for_static_fini); 1461 1462 // Allocate space for computed loop bounds as expected by the "init" function. 1463 Builder.restoreIP(AllocaIP); 1464 Type *I32Type = Type::getInt32Ty(M.getContext()); 1465 Value *PLastIter = Builder.CreateAlloca(I32Type, nullptr, "p.lastiter"); 1466 Value *PLowerBound = Builder.CreateAlloca(IVTy, nullptr, "p.lowerbound"); 1467 Value *PUpperBound = Builder.CreateAlloca(IVTy, nullptr, "p.upperbound"); 1468 Value *PStride = Builder.CreateAlloca(IVTy, nullptr, "p.stride"); 1469 1470 // At the end of the preheader, prepare for calling the "init" function by 1471 // storing the current loop bounds into the allocated space. A canonical loop 1472 // always iterates from 0 to trip-count with step 1. Note that "init" expects 1473 // and produces an inclusive upper bound. 1474 Builder.SetInsertPoint(CLI->getPreheader()->getTerminator()); 1475 Constant *Zero = ConstantInt::get(IVTy, 0); 1476 Constant *One = ConstantInt::get(IVTy, 1); 1477 Builder.CreateStore(Zero, PLowerBound); 1478 Value *UpperBound = Builder.CreateSub(CLI->getTripCount(), One); 1479 Builder.CreateStore(UpperBound, PUpperBound); 1480 Builder.CreateStore(One, PStride); 1481 1482 // FIXME: schedule(static) is NOT the same as schedule(static,1) 1483 if (!Chunk) 1484 Chunk = One; 1485 1486 Value *ThreadNum = getOrCreateThreadID(SrcLoc); 1487 1488 Constant *SchedulingType = 1489 ConstantInt::get(I32Type, static_cast<int>(OMPScheduleType::Static)); 1490 1491 // Call the "init" function and update the trip count of the loop with the 1492 // value it produced. 1493 Builder.CreateCall(StaticInit, 1494 {SrcLoc, ThreadNum, SchedulingType, PLastIter, PLowerBound, 1495 PUpperBound, PStride, One, Chunk}); 1496 Value *LowerBound = Builder.CreateLoad(IVTy, PLowerBound); 1497 Value *InclusiveUpperBound = Builder.CreateLoad(IVTy, PUpperBound); 1498 Value *TripCountMinusOne = Builder.CreateSub(InclusiveUpperBound, LowerBound); 1499 Value *TripCount = Builder.CreateAdd(TripCountMinusOne, One); 1500 setCanonicalLoopTripCount(CLI, TripCount); 1501 1502 // Update all uses of the induction variable except the one in the condition 1503 // block that compares it with the actual upper bound, and the increment in 1504 // the latch block. 1505 // TODO: this can eventually move to CanonicalLoopInfo or to a new 1506 // CanonicalLoopInfoUpdater interface. 1507 Builder.SetInsertPoint(CLI->getBody(), CLI->getBody()->getFirstInsertionPt()); 1508 Value *UpdatedIV = Builder.CreateAdd(IV, LowerBound); 1509 IV->replaceUsesWithIf(UpdatedIV, [&](Use &U) { 1510 auto *Instr = dyn_cast<Instruction>(U.getUser()); 1511 return !Instr || 1512 (Instr->getParent() != CLI->getCond() && 1513 Instr->getParent() != CLI->getLatch() && Instr != UpdatedIV); 1514 }); 1515 1516 // In the "exit" block, call the "fini" function. 1517 Builder.SetInsertPoint(CLI->getExit(), 1518 CLI->getExit()->getTerminator()->getIterator()); 1519 Builder.CreateCall(StaticFini, {SrcLoc, ThreadNum}); 1520 1521 // Add the barrier if requested. 1522 if (NeedsBarrier) 1523 createBarrier(LocationDescription(Builder.saveIP(), DL), 1524 omp::Directive::OMPD_for, /* ForceSimpleCall */ false, 1525 /* CheckCancelFlag */ false); 1526 1527 InsertPointTy AfterIP = CLI->getAfterIP(); 1528 CLI->invalidate(); 1529 1530 return AfterIP; 1531 } 1532 1533 OpenMPIRBuilder::InsertPointTy 1534 OpenMPIRBuilder::applyWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, 1535 InsertPointTy AllocaIP, bool NeedsBarrier) { 1536 // Currently only supports static schedules. 1537 return applyStaticWorkshareLoop(DL, CLI, AllocaIP, NeedsBarrier); 1538 } 1539 1540 /// Returns an LLVM function to call for initializing loop bounds using OpenMP 1541 /// dynamic scheduling depending on `type`. Only i32 and i64 are supported by 1542 /// the runtime. Always interpret integers as unsigned similarly to 1543 /// CanonicalLoopInfo. 1544 static FunctionCallee 1545 getKmpcForDynamicInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder) { 1546 unsigned Bitwidth = Ty->getIntegerBitWidth(); 1547 if (Bitwidth == 32) 1548 return OMPBuilder.getOrCreateRuntimeFunction( 1549 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_4u); 1550 if (Bitwidth == 64) 1551 return OMPBuilder.getOrCreateRuntimeFunction( 1552 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_8u); 1553 llvm_unreachable("unknown OpenMP loop iterator bitwidth"); 1554 } 1555 1556 /// Returns an LLVM function to call for updating the next loop using OpenMP 1557 /// dynamic scheduling depending on `type`. Only i32 and i64 are supported by 1558 /// the runtime. Always interpret integers as unsigned similarly to 1559 /// CanonicalLoopInfo. 1560 static FunctionCallee 1561 getKmpcForDynamicNextForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder) { 1562 unsigned Bitwidth = Ty->getIntegerBitWidth(); 1563 if (Bitwidth == 32) 1564 return OMPBuilder.getOrCreateRuntimeFunction( 1565 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_4u); 1566 if (Bitwidth == 64) 1567 return OMPBuilder.getOrCreateRuntimeFunction( 1568 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_8u); 1569 llvm_unreachable("unknown OpenMP loop iterator bitwidth"); 1570 } 1571 1572 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyDynamicWorkshareLoop( 1573 DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, 1574 OMPScheduleType SchedType, bool NeedsBarrier, Value *Chunk) { 1575 assert(CLI->isValid() && "Requires a valid canonical loop"); 1576 1577 // Set up the source location value for OpenMP runtime. 1578 Builder.SetCurrentDebugLocation(DL); 1579 1580 Constant *SrcLocStr = getOrCreateSrcLocStr(DL); 1581 Value *SrcLoc = getOrCreateIdent(SrcLocStr); 1582 1583 // Declare useful OpenMP runtime functions. 1584 Value *IV = CLI->getIndVar(); 1585 Type *IVTy = IV->getType(); 1586 FunctionCallee DynamicInit = getKmpcForDynamicInitForType(IVTy, M, *this); 1587 FunctionCallee DynamicNext = getKmpcForDynamicNextForType(IVTy, M, *this); 1588 1589 // Allocate space for computed loop bounds as expected by the "init" function. 1590 Builder.restoreIP(AllocaIP); 1591 Type *I32Type = Type::getInt32Ty(M.getContext()); 1592 Value *PLastIter = Builder.CreateAlloca(I32Type, nullptr, "p.lastiter"); 1593 Value *PLowerBound = Builder.CreateAlloca(IVTy, nullptr, "p.lowerbound"); 1594 Value *PUpperBound = Builder.CreateAlloca(IVTy, nullptr, "p.upperbound"); 1595 Value *PStride = Builder.CreateAlloca(IVTy, nullptr, "p.stride"); 1596 1597 // At the end of the preheader, prepare for calling the "init" function by 1598 // storing the current loop bounds into the allocated space. A canonical loop 1599 // always iterates from 0 to trip-count with step 1. Note that "init" expects 1600 // and produces an inclusive upper bound. 1601 BasicBlock *PreHeader = CLI->getPreheader(); 1602 Builder.SetInsertPoint(PreHeader->getTerminator()); 1603 Constant *One = ConstantInt::get(IVTy, 1); 1604 Builder.CreateStore(One, PLowerBound); 1605 Value *UpperBound = CLI->getTripCount(); 1606 Builder.CreateStore(UpperBound, PUpperBound); 1607 Builder.CreateStore(One, PStride); 1608 1609 BasicBlock *Header = CLI->getHeader(); 1610 BasicBlock *Exit = CLI->getExit(); 1611 BasicBlock *Cond = CLI->getCond(); 1612 InsertPointTy AfterIP = CLI->getAfterIP(); 1613 1614 // The CLI will be "broken" in the code below, as the loop is no longer 1615 // a valid canonical loop. 1616 1617 if (!Chunk) 1618 Chunk = One; 1619 1620 Value *ThreadNum = getOrCreateThreadID(SrcLoc); 1621 1622 Constant *SchedulingType = 1623 ConstantInt::get(I32Type, static_cast<int>(SchedType)); 1624 1625 // Call the "init" function. 1626 Builder.CreateCall(DynamicInit, 1627 {SrcLoc, ThreadNum, SchedulingType, /* LowerBound */ One, 1628 UpperBound, /* step */ One, Chunk}); 1629 1630 // An outer loop around the existing one. 1631 BasicBlock *OuterCond = BasicBlock::Create( 1632 PreHeader->getContext(), Twine(PreHeader->getName()) + ".outer.cond", 1633 PreHeader->getParent()); 1634 // This needs to be 32-bit always, so can't use the IVTy Zero above. 1635 Builder.SetInsertPoint(OuterCond, OuterCond->getFirstInsertionPt()); 1636 Value *Res = 1637 Builder.CreateCall(DynamicNext, {SrcLoc, ThreadNum, PLastIter, 1638 PLowerBound, PUpperBound, PStride}); 1639 Constant *Zero32 = ConstantInt::get(I32Type, 0); 1640 Value *MoreWork = Builder.CreateCmp(CmpInst::ICMP_NE, Res, Zero32); 1641 Value *LowerBound = 1642 Builder.CreateSub(Builder.CreateLoad(IVTy, PLowerBound), One, "lb"); 1643 Builder.CreateCondBr(MoreWork, Header, Exit); 1644 1645 // Change PHI-node in loop header to use outer cond rather than preheader, 1646 // and set IV to the LowerBound. 1647 Instruction *Phi = &Header->front(); 1648 auto *PI = cast<PHINode>(Phi); 1649 PI->setIncomingBlock(0, OuterCond); 1650 PI->setIncomingValue(0, LowerBound); 1651 1652 // Then set the pre-header to jump to the OuterCond 1653 Instruction *Term = PreHeader->getTerminator(); 1654 auto *Br = cast<BranchInst>(Term); 1655 Br->setSuccessor(0, OuterCond); 1656 1657 // Modify the inner condition: 1658 // * Use the UpperBound returned from the DynamicNext call. 1659 // * jump to the loop outer loop when done with one of the inner loops. 1660 Builder.SetInsertPoint(Cond, Cond->getFirstInsertionPt()); 1661 UpperBound = Builder.CreateLoad(IVTy, PUpperBound, "ub"); 1662 Instruction *Comp = &*Builder.GetInsertPoint(); 1663 auto *CI = cast<CmpInst>(Comp); 1664 CI->setOperand(1, UpperBound); 1665 // Redirect the inner exit to branch to outer condition. 1666 Instruction *Branch = &Cond->back(); 1667 auto *BI = cast<BranchInst>(Branch); 1668 assert(BI->getSuccessor(1) == Exit); 1669 BI->setSuccessor(1, OuterCond); 1670 1671 // Add the barrier if requested. 1672 if (NeedsBarrier) { 1673 Builder.SetInsertPoint(&Exit->back()); 1674 createBarrier(LocationDescription(Builder.saveIP(), DL), 1675 omp::Directive::OMPD_for, /* ForceSimpleCall */ false, 1676 /* CheckCancelFlag */ false); 1677 } 1678 1679 CLI->invalidate(); 1680 return AfterIP; 1681 } 1682 1683 /// Make \p Source branch to \p Target. 1684 /// 1685 /// Handles two situations: 1686 /// * \p Source already has an unconditional branch. 1687 /// * \p Source is a degenerate block (no terminator because the BB is 1688 /// the current head of the IR construction). 1689 static void redirectTo(BasicBlock *Source, BasicBlock *Target, DebugLoc DL) { 1690 if (Instruction *Term = Source->getTerminator()) { 1691 auto *Br = cast<BranchInst>(Term); 1692 assert(!Br->isConditional() && 1693 "BB's terminator must be an unconditional branch (or degenerate)"); 1694 BasicBlock *Succ = Br->getSuccessor(0); 1695 Succ->removePredecessor(Source, /*KeepOneInputPHIs=*/true); 1696 Br->setSuccessor(0, Target); 1697 return; 1698 } 1699 1700 auto *NewBr = BranchInst::Create(Target, Source); 1701 NewBr->setDebugLoc(DL); 1702 } 1703 1704 /// Redirect all edges that branch to \p OldTarget to \p NewTarget. That is, 1705 /// after this \p OldTarget will be orphaned. 1706 static void redirectAllPredecessorsTo(BasicBlock *OldTarget, 1707 BasicBlock *NewTarget, DebugLoc DL) { 1708 for (BasicBlock *Pred : make_early_inc_range(predecessors(OldTarget))) 1709 redirectTo(Pred, NewTarget, DL); 1710 } 1711 1712 /// Determine which blocks in \p BBs are reachable from outside and remove the 1713 /// ones that are not reachable from the function. 1714 static void removeUnusedBlocksFromParent(ArrayRef<BasicBlock *> BBs) { 1715 SmallPtrSet<BasicBlock *, 6> BBsToErase{BBs.begin(), BBs.end()}; 1716 auto HasRemainingUses = [&BBsToErase](BasicBlock *BB) { 1717 for (Use &U : BB->uses()) { 1718 auto *UseInst = dyn_cast<Instruction>(U.getUser()); 1719 if (!UseInst) 1720 continue; 1721 if (BBsToErase.count(UseInst->getParent())) 1722 continue; 1723 return true; 1724 } 1725 return false; 1726 }; 1727 1728 while (true) { 1729 bool Changed = false; 1730 for (BasicBlock *BB : make_early_inc_range(BBsToErase)) { 1731 if (HasRemainingUses(BB)) { 1732 BBsToErase.erase(BB); 1733 Changed = true; 1734 } 1735 } 1736 if (!Changed) 1737 break; 1738 } 1739 1740 SmallVector<BasicBlock *, 7> BBVec(BBsToErase.begin(), BBsToErase.end()); 1741 DeleteDeadBlocks(BBVec); 1742 } 1743 1744 CanonicalLoopInfo * 1745 OpenMPIRBuilder::collapseLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops, 1746 InsertPointTy ComputeIP) { 1747 assert(Loops.size() >= 1 && "At least one loop required"); 1748 size_t NumLoops = Loops.size(); 1749 1750 // Nothing to do if there is already just one loop. 1751 if (NumLoops == 1) 1752 return Loops.front(); 1753 1754 CanonicalLoopInfo *Outermost = Loops.front(); 1755 CanonicalLoopInfo *Innermost = Loops.back(); 1756 BasicBlock *OrigPreheader = Outermost->getPreheader(); 1757 BasicBlock *OrigAfter = Outermost->getAfter(); 1758 Function *F = OrigPreheader->getParent(); 1759 1760 // Setup the IRBuilder for inserting the trip count computation. 1761 Builder.SetCurrentDebugLocation(DL); 1762 if (ComputeIP.isSet()) 1763 Builder.restoreIP(ComputeIP); 1764 else 1765 Builder.restoreIP(Outermost->getPreheaderIP()); 1766 1767 // Derive the collapsed' loop trip count. 1768 // TODO: Find common/largest indvar type. 1769 Value *CollapsedTripCount = nullptr; 1770 for (CanonicalLoopInfo *L : Loops) { 1771 assert(L->isValid() && 1772 "All loops to collapse must be valid canonical loops"); 1773 Value *OrigTripCount = L->getTripCount(); 1774 if (!CollapsedTripCount) { 1775 CollapsedTripCount = OrigTripCount; 1776 continue; 1777 } 1778 1779 // TODO: Enable UndefinedSanitizer to diagnose an overflow here. 1780 CollapsedTripCount = Builder.CreateMul(CollapsedTripCount, OrigTripCount, 1781 {}, /*HasNUW=*/true); 1782 } 1783 1784 // Create the collapsed loop control flow. 1785 CanonicalLoopInfo *Result = 1786 createLoopSkeleton(DL, CollapsedTripCount, F, 1787 OrigPreheader->getNextNode(), OrigAfter, "collapsed"); 1788 1789 // Build the collapsed loop body code. 1790 // Start with deriving the input loop induction variables from the collapsed 1791 // one, using a divmod scheme. To preserve the original loops' order, the 1792 // innermost loop use the least significant bits. 1793 Builder.restoreIP(Result->getBodyIP()); 1794 1795 Value *Leftover = Result->getIndVar(); 1796 SmallVector<Value *> NewIndVars; 1797 NewIndVars.set_size(NumLoops); 1798 for (int i = NumLoops - 1; i >= 1; --i) { 1799 Value *OrigTripCount = Loops[i]->getTripCount(); 1800 1801 Value *NewIndVar = Builder.CreateURem(Leftover, OrigTripCount); 1802 NewIndVars[i] = NewIndVar; 1803 1804 Leftover = Builder.CreateUDiv(Leftover, OrigTripCount); 1805 } 1806 // Outermost loop gets all the remaining bits. 1807 NewIndVars[0] = Leftover; 1808 1809 // Construct the loop body control flow. 1810 // We progressively construct the branch structure following in direction of 1811 // the control flow, from the leading in-between code, the loop nest body, the 1812 // trailing in-between code, and rejoining the collapsed loop's latch. 1813 // ContinueBlock and ContinuePred keep track of the source(s) of next edge. If 1814 // the ContinueBlock is set, continue with that block. If ContinuePred, use 1815 // its predecessors as sources. 1816 BasicBlock *ContinueBlock = Result->getBody(); 1817 BasicBlock *ContinuePred = nullptr; 1818 auto ContinueWith = [&ContinueBlock, &ContinuePred, DL](BasicBlock *Dest, 1819 BasicBlock *NextSrc) { 1820 if (ContinueBlock) 1821 redirectTo(ContinueBlock, Dest, DL); 1822 else 1823 redirectAllPredecessorsTo(ContinuePred, Dest, DL); 1824 1825 ContinueBlock = nullptr; 1826 ContinuePred = NextSrc; 1827 }; 1828 1829 // The code before the nested loop of each level. 1830 // Because we are sinking it into the nest, it will be executed more often 1831 // that the original loop. More sophisticated schemes could keep track of what 1832 // the in-between code is and instantiate it only once per thread. 1833 for (size_t i = 0; i < NumLoops - 1; ++i) 1834 ContinueWith(Loops[i]->getBody(), Loops[i + 1]->getHeader()); 1835 1836 // Connect the loop nest body. 1837 ContinueWith(Innermost->getBody(), Innermost->getLatch()); 1838 1839 // The code after the nested loop at each level. 1840 for (size_t i = NumLoops - 1; i > 0; --i) 1841 ContinueWith(Loops[i]->getAfter(), Loops[i - 1]->getLatch()); 1842 1843 // Connect the finished loop to the collapsed loop latch. 1844 ContinueWith(Result->getLatch(), nullptr); 1845 1846 // Replace the input loops with the new collapsed loop. 1847 redirectTo(Outermost->getPreheader(), Result->getPreheader(), DL); 1848 redirectTo(Result->getAfter(), Outermost->getAfter(), DL); 1849 1850 // Replace the input loop indvars with the derived ones. 1851 for (size_t i = 0; i < NumLoops; ++i) 1852 Loops[i]->getIndVar()->replaceAllUsesWith(NewIndVars[i]); 1853 1854 // Remove unused parts of the input loops. 1855 SmallVector<BasicBlock *, 12> OldControlBBs; 1856 OldControlBBs.reserve(6 * Loops.size()); 1857 for (CanonicalLoopInfo *Loop : Loops) 1858 Loop->collectControlBlocks(OldControlBBs); 1859 removeUnusedBlocksFromParent(OldControlBBs); 1860 1861 for (CanonicalLoopInfo *L : Loops) 1862 L->invalidate(); 1863 1864 #ifndef NDEBUG 1865 Result->assertOK(); 1866 #endif 1867 return Result; 1868 } 1869 1870 std::vector<CanonicalLoopInfo *> 1871 OpenMPIRBuilder::tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops, 1872 ArrayRef<Value *> TileSizes) { 1873 assert(TileSizes.size() == Loops.size() && 1874 "Must pass as many tile sizes as there are loops"); 1875 int NumLoops = Loops.size(); 1876 assert(NumLoops >= 1 && "At least one loop to tile required"); 1877 1878 CanonicalLoopInfo *OutermostLoop = Loops.front(); 1879 CanonicalLoopInfo *InnermostLoop = Loops.back(); 1880 Function *F = OutermostLoop->getBody()->getParent(); 1881 BasicBlock *InnerEnter = InnermostLoop->getBody(); 1882 BasicBlock *InnerLatch = InnermostLoop->getLatch(); 1883 1884 // Collect original trip counts and induction variable to be accessible by 1885 // index. Also, the structure of the original loops is not preserved during 1886 // the construction of the tiled loops, so do it before we scavenge the BBs of 1887 // any original CanonicalLoopInfo. 1888 SmallVector<Value *, 4> OrigTripCounts, OrigIndVars; 1889 for (CanonicalLoopInfo *L : Loops) { 1890 assert(L->isValid() && "All input loops must be valid canonical loops"); 1891 OrigTripCounts.push_back(L->getTripCount()); 1892 OrigIndVars.push_back(L->getIndVar()); 1893 } 1894 1895 // Collect the code between loop headers. These may contain SSA definitions 1896 // that are used in the loop nest body. To be usable with in the innermost 1897 // body, these BasicBlocks will be sunk into the loop nest body. That is, 1898 // these instructions may be executed more often than before the tiling. 1899 // TODO: It would be sufficient to only sink them into body of the 1900 // corresponding tile loop. 1901 SmallVector<std::pair<BasicBlock *, BasicBlock *>, 4> InbetweenCode; 1902 for (int i = 0; i < NumLoops - 1; ++i) { 1903 CanonicalLoopInfo *Surrounding = Loops[i]; 1904 CanonicalLoopInfo *Nested = Loops[i + 1]; 1905 1906 BasicBlock *EnterBB = Surrounding->getBody(); 1907 BasicBlock *ExitBB = Nested->getHeader(); 1908 InbetweenCode.emplace_back(EnterBB, ExitBB); 1909 } 1910 1911 // Compute the trip counts of the floor loops. 1912 Builder.SetCurrentDebugLocation(DL); 1913 Builder.restoreIP(OutermostLoop->getPreheaderIP()); 1914 SmallVector<Value *, 4> FloorCount, FloorRems; 1915 for (int i = 0; i < NumLoops; ++i) { 1916 Value *TileSize = TileSizes[i]; 1917 Value *OrigTripCount = OrigTripCounts[i]; 1918 Type *IVType = OrigTripCount->getType(); 1919 1920 Value *FloorTripCount = Builder.CreateUDiv(OrigTripCount, TileSize); 1921 Value *FloorTripRem = Builder.CreateURem(OrigTripCount, TileSize); 1922 1923 // 0 if tripcount divides the tilesize, 1 otherwise. 1924 // 1 means we need an additional iteration for a partial tile. 1925 // 1926 // Unfortunately we cannot just use the roundup-formula 1927 // (tripcount + tilesize - 1)/tilesize 1928 // because the summation might overflow. We do not want introduce undefined 1929 // behavior when the untiled loop nest did not. 1930 Value *FloorTripOverflow = 1931 Builder.CreateICmpNE(FloorTripRem, ConstantInt::get(IVType, 0)); 1932 1933 FloorTripOverflow = Builder.CreateZExt(FloorTripOverflow, IVType); 1934 FloorTripCount = 1935 Builder.CreateAdd(FloorTripCount, FloorTripOverflow, 1936 "omp_floor" + Twine(i) + ".tripcount", true); 1937 1938 // Remember some values for later use. 1939 FloorCount.push_back(FloorTripCount); 1940 FloorRems.push_back(FloorTripRem); 1941 } 1942 1943 // Generate the new loop nest, from the outermost to the innermost. 1944 std::vector<CanonicalLoopInfo *> Result; 1945 Result.reserve(NumLoops * 2); 1946 1947 // The basic block of the surrounding loop that enters the nest generated 1948 // loop. 1949 BasicBlock *Enter = OutermostLoop->getPreheader(); 1950 1951 // The basic block of the surrounding loop where the inner code should 1952 // continue. 1953 BasicBlock *Continue = OutermostLoop->getAfter(); 1954 1955 // Where the next loop basic block should be inserted. 1956 BasicBlock *OutroInsertBefore = InnermostLoop->getExit(); 1957 1958 auto EmbeddNewLoop = 1959 [this, DL, F, InnerEnter, &Enter, &Continue, &OutroInsertBefore]( 1960 Value *TripCount, const Twine &Name) -> CanonicalLoopInfo * { 1961 CanonicalLoopInfo *EmbeddedLoop = createLoopSkeleton( 1962 DL, TripCount, F, InnerEnter, OutroInsertBefore, Name); 1963 redirectTo(Enter, EmbeddedLoop->getPreheader(), DL); 1964 redirectTo(EmbeddedLoop->getAfter(), Continue, DL); 1965 1966 // Setup the position where the next embedded loop connects to this loop. 1967 Enter = EmbeddedLoop->getBody(); 1968 Continue = EmbeddedLoop->getLatch(); 1969 OutroInsertBefore = EmbeddedLoop->getLatch(); 1970 return EmbeddedLoop; 1971 }; 1972 1973 auto EmbeddNewLoops = [&Result, &EmbeddNewLoop](ArrayRef<Value *> TripCounts, 1974 const Twine &NameBase) { 1975 for (auto P : enumerate(TripCounts)) { 1976 CanonicalLoopInfo *EmbeddedLoop = 1977 EmbeddNewLoop(P.value(), NameBase + Twine(P.index())); 1978 Result.push_back(EmbeddedLoop); 1979 } 1980 }; 1981 1982 EmbeddNewLoops(FloorCount, "floor"); 1983 1984 // Within the innermost floor loop, emit the code that computes the tile 1985 // sizes. 1986 Builder.SetInsertPoint(Enter->getTerminator()); 1987 SmallVector<Value *, 4> TileCounts; 1988 for (int i = 0; i < NumLoops; ++i) { 1989 CanonicalLoopInfo *FloorLoop = Result[i]; 1990 Value *TileSize = TileSizes[i]; 1991 1992 Value *FloorIsEpilogue = 1993 Builder.CreateICmpEQ(FloorLoop->getIndVar(), FloorCount[i]); 1994 Value *TileTripCount = 1995 Builder.CreateSelect(FloorIsEpilogue, FloorRems[i], TileSize); 1996 1997 TileCounts.push_back(TileTripCount); 1998 } 1999 2000 // Create the tile loops. 2001 EmbeddNewLoops(TileCounts, "tile"); 2002 2003 // Insert the inbetween code into the body. 2004 BasicBlock *BodyEnter = Enter; 2005 BasicBlock *BodyEntered = nullptr; 2006 for (std::pair<BasicBlock *, BasicBlock *> P : InbetweenCode) { 2007 BasicBlock *EnterBB = P.first; 2008 BasicBlock *ExitBB = P.second; 2009 2010 if (BodyEnter) 2011 redirectTo(BodyEnter, EnterBB, DL); 2012 else 2013 redirectAllPredecessorsTo(BodyEntered, EnterBB, DL); 2014 2015 BodyEnter = nullptr; 2016 BodyEntered = ExitBB; 2017 } 2018 2019 // Append the original loop nest body into the generated loop nest body. 2020 if (BodyEnter) 2021 redirectTo(BodyEnter, InnerEnter, DL); 2022 else 2023 redirectAllPredecessorsTo(BodyEntered, InnerEnter, DL); 2024 redirectAllPredecessorsTo(InnerLatch, Continue, DL); 2025 2026 // Replace the original induction variable with an induction variable computed 2027 // from the tile and floor induction variables. 2028 Builder.restoreIP(Result.back()->getBodyIP()); 2029 for (int i = 0; i < NumLoops; ++i) { 2030 CanonicalLoopInfo *FloorLoop = Result[i]; 2031 CanonicalLoopInfo *TileLoop = Result[NumLoops + i]; 2032 Value *OrigIndVar = OrigIndVars[i]; 2033 Value *Size = TileSizes[i]; 2034 2035 Value *Scale = 2036 Builder.CreateMul(Size, FloorLoop->getIndVar(), {}, /*HasNUW=*/true); 2037 Value *Shift = 2038 Builder.CreateAdd(Scale, TileLoop->getIndVar(), {}, /*HasNUW=*/true); 2039 OrigIndVar->replaceAllUsesWith(Shift); 2040 } 2041 2042 // Remove unused parts of the original loops. 2043 SmallVector<BasicBlock *, 12> OldControlBBs; 2044 OldControlBBs.reserve(6 * Loops.size()); 2045 for (CanonicalLoopInfo *Loop : Loops) 2046 Loop->collectControlBlocks(OldControlBBs); 2047 removeUnusedBlocksFromParent(OldControlBBs); 2048 2049 for (CanonicalLoopInfo *L : Loops) 2050 L->invalidate(); 2051 2052 #ifndef NDEBUG 2053 for (CanonicalLoopInfo *GenL : Result) 2054 GenL->assertOK(); 2055 #endif 2056 return Result; 2057 } 2058 2059 OpenMPIRBuilder::InsertPointTy 2060 OpenMPIRBuilder::createCopyPrivate(const LocationDescription &Loc, 2061 llvm::Value *BufSize, llvm::Value *CpyBuf, 2062 llvm::Value *CpyFn, llvm::Value *DidIt) { 2063 if (!updateToLocation(Loc)) 2064 return Loc.IP; 2065 2066 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 2067 Value *Ident = getOrCreateIdent(SrcLocStr); 2068 Value *ThreadId = getOrCreateThreadID(Ident); 2069 2070 llvm::Value *DidItLD = Builder.CreateLoad(Builder.getInt32Ty(), DidIt); 2071 2072 Value *Args[] = {Ident, ThreadId, BufSize, CpyBuf, CpyFn, DidItLD}; 2073 2074 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_copyprivate); 2075 Builder.CreateCall(Fn, Args); 2076 2077 return Builder.saveIP(); 2078 } 2079 2080 OpenMPIRBuilder::InsertPointTy 2081 OpenMPIRBuilder::createSingle(const LocationDescription &Loc, 2082 BodyGenCallbackTy BodyGenCB, 2083 FinalizeCallbackTy FiniCB, llvm::Value *DidIt) { 2084 2085 if (!updateToLocation(Loc)) 2086 return Loc.IP; 2087 2088 // If needed (i.e. not null), initialize `DidIt` with 0 2089 if (DidIt) { 2090 Builder.CreateStore(Builder.getInt32(0), DidIt); 2091 } 2092 2093 Directive OMPD = Directive::OMPD_single; 2094 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 2095 Value *Ident = getOrCreateIdent(SrcLocStr); 2096 Value *ThreadId = getOrCreateThreadID(Ident); 2097 Value *Args[] = {Ident, ThreadId}; 2098 2099 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_single); 2100 Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args); 2101 2102 Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_single); 2103 Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args); 2104 2105 // generates the following: 2106 // if (__kmpc_single()) { 2107 // .... single region ... 2108 // __kmpc_end_single 2109 // } 2110 2111 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB, 2112 /*Conditional*/ true, /*hasFinalize*/ true); 2113 } 2114 2115 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createCritical( 2116 const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, 2117 FinalizeCallbackTy FiniCB, StringRef CriticalName, Value *HintInst) { 2118 2119 if (!updateToLocation(Loc)) 2120 return Loc.IP; 2121 2122 Directive OMPD = Directive::OMPD_critical; 2123 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 2124 Value *Ident = getOrCreateIdent(SrcLocStr); 2125 Value *ThreadId = getOrCreateThreadID(Ident); 2126 Value *LockVar = getOMPCriticalRegionLock(CriticalName); 2127 Value *Args[] = {Ident, ThreadId, LockVar}; 2128 2129 SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), std::end(Args)); 2130 Function *RTFn = nullptr; 2131 if (HintInst) { 2132 // Add Hint to entry Args and create call 2133 EnterArgs.push_back(HintInst); 2134 RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical_with_hint); 2135 } else { 2136 RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical); 2137 } 2138 Instruction *EntryCall = Builder.CreateCall(RTFn, EnterArgs); 2139 2140 Function *ExitRTLFn = 2141 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_critical); 2142 Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args); 2143 2144 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB, 2145 /*Conditional*/ false, /*hasFinalize*/ true); 2146 } 2147 2148 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::EmitOMPInlinedRegion( 2149 Directive OMPD, Instruction *EntryCall, Instruction *ExitCall, 2150 BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool Conditional, 2151 bool HasFinalize, bool IsCancellable) { 2152 2153 if (HasFinalize) 2154 FinalizationStack.push_back({FiniCB, OMPD, IsCancellable}); 2155 2156 // Create inlined region's entry and body blocks, in preparation 2157 // for conditional creation 2158 BasicBlock *EntryBB = Builder.GetInsertBlock(); 2159 Instruction *SplitPos = EntryBB->getTerminator(); 2160 if (!isa_and_nonnull<BranchInst>(SplitPos)) 2161 SplitPos = new UnreachableInst(Builder.getContext(), EntryBB); 2162 BasicBlock *ExitBB = EntryBB->splitBasicBlock(SplitPos, "omp_region.end"); 2163 BasicBlock *FiniBB = 2164 EntryBB->splitBasicBlock(EntryBB->getTerminator(), "omp_region.finalize"); 2165 2166 Builder.SetInsertPoint(EntryBB->getTerminator()); 2167 emitCommonDirectiveEntry(OMPD, EntryCall, ExitBB, Conditional); 2168 2169 // generate body 2170 BodyGenCB(/* AllocaIP */ InsertPointTy(), 2171 /* CodeGenIP */ Builder.saveIP(), *FiniBB); 2172 2173 // If we didn't emit a branch to FiniBB during body generation, it means 2174 // FiniBB is unreachable (e.g. while(1);). stop generating all the 2175 // unreachable blocks, and remove anything we are not going to use. 2176 auto SkipEmittingRegion = FiniBB->hasNPredecessors(0); 2177 if (SkipEmittingRegion) { 2178 FiniBB->eraseFromParent(); 2179 ExitCall->eraseFromParent(); 2180 // Discard finalization if we have it. 2181 if (HasFinalize) { 2182 assert(!FinalizationStack.empty() && 2183 "Unexpected finalization stack state!"); 2184 FinalizationStack.pop_back(); 2185 } 2186 } else { 2187 // emit exit call and do any needed finalization. 2188 auto FinIP = InsertPointTy(FiniBB, FiniBB->getFirstInsertionPt()); 2189 assert(FiniBB->getTerminator()->getNumSuccessors() == 1 && 2190 FiniBB->getTerminator()->getSuccessor(0) == ExitBB && 2191 "Unexpected control flow graph state!!"); 2192 emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize); 2193 assert(FiniBB->getUniquePredecessor()->getUniqueSuccessor() == FiniBB && 2194 "Unexpected Control Flow State!"); 2195 MergeBlockIntoPredecessor(FiniBB); 2196 } 2197 2198 // If we are skipping the region of a non conditional, remove the exit 2199 // block, and clear the builder's insertion point. 2200 assert(SplitPos->getParent() == ExitBB && 2201 "Unexpected Insertion point location!"); 2202 if (!Conditional && SkipEmittingRegion) { 2203 ExitBB->eraseFromParent(); 2204 Builder.ClearInsertionPoint(); 2205 } else { 2206 auto merged = MergeBlockIntoPredecessor(ExitBB); 2207 BasicBlock *ExitPredBB = SplitPos->getParent(); 2208 auto InsertBB = merged ? ExitPredBB : ExitBB; 2209 if (!isa_and_nonnull<BranchInst>(SplitPos)) 2210 SplitPos->eraseFromParent(); 2211 Builder.SetInsertPoint(InsertBB); 2212 } 2213 2214 return Builder.saveIP(); 2215 } 2216 2217 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveEntry( 2218 Directive OMPD, Value *EntryCall, BasicBlock *ExitBB, bool Conditional) { 2219 // if nothing to do, Return current insertion point. 2220 if (!Conditional || !EntryCall) 2221 return Builder.saveIP(); 2222 2223 BasicBlock *EntryBB = Builder.GetInsertBlock(); 2224 Value *CallBool = Builder.CreateIsNotNull(EntryCall); 2225 auto *ThenBB = BasicBlock::Create(M.getContext(), "omp_region.body"); 2226 auto *UI = new UnreachableInst(Builder.getContext(), ThenBB); 2227 2228 // Emit thenBB and set the Builder's insertion point there for 2229 // body generation next. Place the block after the current block. 2230 Function *CurFn = EntryBB->getParent(); 2231 CurFn->getBasicBlockList().insertAfter(EntryBB->getIterator(), ThenBB); 2232 2233 // Move Entry branch to end of ThenBB, and replace with conditional 2234 // branch (If-stmt) 2235 Instruction *EntryBBTI = EntryBB->getTerminator(); 2236 Builder.CreateCondBr(CallBool, ThenBB, ExitBB); 2237 EntryBBTI->removeFromParent(); 2238 Builder.SetInsertPoint(UI); 2239 Builder.Insert(EntryBBTI); 2240 UI->eraseFromParent(); 2241 Builder.SetInsertPoint(ThenBB->getTerminator()); 2242 2243 // return an insertion point to ExitBB. 2244 return IRBuilder<>::InsertPoint(ExitBB, ExitBB->getFirstInsertionPt()); 2245 } 2246 2247 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveExit( 2248 omp::Directive OMPD, InsertPointTy FinIP, Instruction *ExitCall, 2249 bool HasFinalize) { 2250 2251 Builder.restoreIP(FinIP); 2252 2253 // If there is finalization to do, emit it before the exit call 2254 if (HasFinalize) { 2255 assert(!FinalizationStack.empty() && 2256 "Unexpected finalization stack state!"); 2257 2258 FinalizationInfo Fi = FinalizationStack.pop_back_val(); 2259 assert(Fi.DK == OMPD && "Unexpected Directive for Finalization call!"); 2260 2261 Fi.FiniCB(FinIP); 2262 2263 BasicBlock *FiniBB = FinIP.getBlock(); 2264 Instruction *FiniBBTI = FiniBB->getTerminator(); 2265 2266 // set Builder IP for call creation 2267 Builder.SetInsertPoint(FiniBBTI); 2268 } 2269 2270 if (!ExitCall) 2271 return Builder.saveIP(); 2272 2273 // place the Exitcall as last instruction before Finalization block terminator 2274 ExitCall->removeFromParent(); 2275 Builder.Insert(ExitCall); 2276 2277 return IRBuilder<>::InsertPoint(ExitCall->getParent(), 2278 ExitCall->getIterator()); 2279 } 2280 2281 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createCopyinClauseBlocks( 2282 InsertPointTy IP, Value *MasterAddr, Value *PrivateAddr, 2283 llvm::IntegerType *IntPtrTy, bool BranchtoEnd) { 2284 if (!IP.isSet()) 2285 return IP; 2286 2287 IRBuilder<>::InsertPointGuard IPG(Builder); 2288 2289 // creates the following CFG structure 2290 // OMP_Entry : (MasterAddr != PrivateAddr)? 2291 // F T 2292 // | \ 2293 // | copin.not.master 2294 // | / 2295 // v / 2296 // copyin.not.master.end 2297 // | 2298 // v 2299 // OMP.Entry.Next 2300 2301 BasicBlock *OMP_Entry = IP.getBlock(); 2302 Function *CurFn = OMP_Entry->getParent(); 2303 BasicBlock *CopyBegin = 2304 BasicBlock::Create(M.getContext(), "copyin.not.master", CurFn); 2305 BasicBlock *CopyEnd = nullptr; 2306 2307 // If entry block is terminated, split to preserve the branch to following 2308 // basic block (i.e. OMP.Entry.Next), otherwise, leave everything as is. 2309 if (isa_and_nonnull<BranchInst>(OMP_Entry->getTerminator())) { 2310 CopyEnd = OMP_Entry->splitBasicBlock(OMP_Entry->getTerminator(), 2311 "copyin.not.master.end"); 2312 OMP_Entry->getTerminator()->eraseFromParent(); 2313 } else { 2314 CopyEnd = 2315 BasicBlock::Create(M.getContext(), "copyin.not.master.end", CurFn); 2316 } 2317 2318 Builder.SetInsertPoint(OMP_Entry); 2319 Value *MasterPtr = Builder.CreatePtrToInt(MasterAddr, IntPtrTy); 2320 Value *PrivatePtr = Builder.CreatePtrToInt(PrivateAddr, IntPtrTy); 2321 Value *cmp = Builder.CreateICmpNE(MasterPtr, PrivatePtr); 2322 Builder.CreateCondBr(cmp, CopyBegin, CopyEnd); 2323 2324 Builder.SetInsertPoint(CopyBegin); 2325 if (BranchtoEnd) 2326 Builder.SetInsertPoint(Builder.CreateBr(CopyEnd)); 2327 2328 return Builder.saveIP(); 2329 } 2330 2331 CallInst *OpenMPIRBuilder::createOMPAlloc(const LocationDescription &Loc, 2332 Value *Size, Value *Allocator, 2333 std::string Name) { 2334 IRBuilder<>::InsertPointGuard IPG(Builder); 2335 Builder.restoreIP(Loc.IP); 2336 2337 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 2338 Value *Ident = getOrCreateIdent(SrcLocStr); 2339 Value *ThreadId = getOrCreateThreadID(Ident); 2340 Value *Args[] = {ThreadId, Size, Allocator}; 2341 2342 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_alloc); 2343 2344 return Builder.CreateCall(Fn, Args, Name); 2345 } 2346 2347 CallInst *OpenMPIRBuilder::createOMPFree(const LocationDescription &Loc, 2348 Value *Addr, Value *Allocator, 2349 std::string Name) { 2350 IRBuilder<>::InsertPointGuard IPG(Builder); 2351 Builder.restoreIP(Loc.IP); 2352 2353 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 2354 Value *Ident = getOrCreateIdent(SrcLocStr); 2355 Value *ThreadId = getOrCreateThreadID(Ident); 2356 Value *Args[] = {ThreadId, Addr, Allocator}; 2357 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_free); 2358 return Builder.CreateCall(Fn, Args, Name); 2359 } 2360 2361 CallInst *OpenMPIRBuilder::createCachedThreadPrivate( 2362 const LocationDescription &Loc, llvm::Value *Pointer, 2363 llvm::ConstantInt *Size, const llvm::Twine &Name) { 2364 IRBuilder<>::InsertPointGuard IPG(Builder); 2365 Builder.restoreIP(Loc.IP); 2366 2367 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 2368 Value *Ident = getOrCreateIdent(SrcLocStr); 2369 Value *ThreadId = getOrCreateThreadID(Ident); 2370 Constant *ThreadPrivateCache = 2371 getOrCreateOMPInternalVariable(Int8PtrPtr, Name); 2372 llvm::Value *Args[] = {Ident, ThreadId, Pointer, Size, ThreadPrivateCache}; 2373 2374 Function *Fn = 2375 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_threadprivate_cached); 2376 2377 return Builder.CreateCall(Fn, Args); 2378 } 2379 2380 OpenMPIRBuilder::InsertPointTy 2381 OpenMPIRBuilder::createTargetInit(const LocationDescription &Loc, bool IsSPMD, bool RequiresFullRuntime) { 2382 if (!updateToLocation(Loc)) 2383 return Loc.IP; 2384 2385 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 2386 Value *Ident = getOrCreateIdent(SrcLocStr); 2387 ConstantInt *IsSPMDVal = ConstantInt::getBool(Int32->getContext(), IsSPMD); 2388 ConstantInt *UseGenericStateMachine = 2389 ConstantInt::getBool(Int32->getContext(), !IsSPMD); 2390 ConstantInt *RequiresFullRuntimeVal = ConstantInt::getBool(Int32->getContext(), RequiresFullRuntime); 2391 2392 Function *Fn = getOrCreateRuntimeFunctionPtr( 2393 omp::RuntimeFunction::OMPRTL___kmpc_target_init); 2394 2395 CallInst *ThreadKind = 2396 Builder.CreateCall(Fn, {Ident, IsSPMDVal, UseGenericStateMachine, RequiresFullRuntimeVal}); 2397 2398 Value *ExecUserCode = Builder.CreateICmpEQ( 2399 ThreadKind, ConstantInt::get(ThreadKind->getType(), -1), "exec_user_code"); 2400 2401 // ThreadKind = __kmpc_target_init(...) 2402 // if (ThreadKind == -1) 2403 // user_code 2404 // else 2405 // return; 2406 2407 auto *UI = Builder.CreateUnreachable(); 2408 BasicBlock *CheckBB = UI->getParent(); 2409 BasicBlock *UserCodeEntryBB = CheckBB->splitBasicBlock(UI, "user_code.entry"); 2410 2411 BasicBlock *WorkerExitBB = BasicBlock::Create( 2412 CheckBB->getContext(), "worker.exit", CheckBB->getParent()); 2413 Builder.SetInsertPoint(WorkerExitBB); 2414 Builder.CreateRetVoid(); 2415 2416 auto *CheckBBTI = CheckBB->getTerminator(); 2417 Builder.SetInsertPoint(CheckBBTI); 2418 Builder.CreateCondBr(ExecUserCode, UI->getParent(), WorkerExitBB); 2419 2420 CheckBBTI->eraseFromParent(); 2421 UI->eraseFromParent(); 2422 2423 // Continue in the "user_code" block, see diagram above and in 2424 // openmp/libomptarget/deviceRTLs/common/include/target.h . 2425 return InsertPointTy(UserCodeEntryBB, UserCodeEntryBB->getFirstInsertionPt()); 2426 } 2427 2428 void OpenMPIRBuilder::createTargetDeinit(const LocationDescription &Loc, 2429 bool IsSPMD, bool RequiresFullRuntime) { 2430 if (!updateToLocation(Loc)) 2431 return; 2432 2433 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 2434 Value *Ident = getOrCreateIdent(SrcLocStr); 2435 ConstantInt *IsSPMDVal = ConstantInt::getBool(Int32->getContext(), IsSPMD); 2436 ConstantInt *RequiresFullRuntimeVal = ConstantInt::getBool(Int32->getContext(), RequiresFullRuntime); 2437 2438 Function *Fn = getOrCreateRuntimeFunctionPtr( 2439 omp::RuntimeFunction::OMPRTL___kmpc_target_deinit); 2440 2441 Builder.CreateCall(Fn, {Ident, IsSPMDVal, RequiresFullRuntimeVal}); 2442 } 2443 2444 std::string OpenMPIRBuilder::getNameWithSeparators(ArrayRef<StringRef> Parts, 2445 StringRef FirstSeparator, 2446 StringRef Separator) { 2447 SmallString<128> Buffer; 2448 llvm::raw_svector_ostream OS(Buffer); 2449 StringRef Sep = FirstSeparator; 2450 for (StringRef Part : Parts) { 2451 OS << Sep << Part; 2452 Sep = Separator; 2453 } 2454 return OS.str().str(); 2455 } 2456 2457 Constant *OpenMPIRBuilder::getOrCreateOMPInternalVariable( 2458 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 2459 // TODO: Replace the twine arg with stringref to get rid of the conversion 2460 // logic. However This is taken from current implementation in clang as is. 2461 // Since this method is used in many places exclusively for OMP internal use 2462 // we will keep it as is for temporarily until we move all users to the 2463 // builder and then, if possible, fix it everywhere in one go. 2464 SmallString<256> Buffer; 2465 llvm::raw_svector_ostream Out(Buffer); 2466 Out << Name; 2467 StringRef RuntimeName = Out.str(); 2468 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 2469 if (Elem.second) { 2470 assert(Elem.second->getType()->getPointerElementType() == Ty && 2471 "OMP internal variable has different type than requested"); 2472 } else { 2473 // TODO: investigate the appropriate linkage type used for the global 2474 // variable for possibly changing that to internal or private, or maybe 2475 // create different versions of the function for different OMP internal 2476 // variables. 2477 Elem.second = new llvm::GlobalVariable( 2478 M, Ty, /*IsConstant*/ false, llvm::GlobalValue::CommonLinkage, 2479 llvm::Constant::getNullValue(Ty), Elem.first(), 2480 /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal, 2481 AddressSpace); 2482 } 2483 2484 return Elem.second; 2485 } 2486 2487 Value *OpenMPIRBuilder::getOMPCriticalRegionLock(StringRef CriticalName) { 2488 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 2489 std::string Name = getNameWithSeparators({Prefix, "var"}, ".", "."); 2490 return getOrCreateOMPInternalVariable(KmpCriticalNameTy, Name); 2491 } 2492 2493 GlobalVariable * 2494 OpenMPIRBuilder::createOffloadMaptypes(SmallVectorImpl<uint64_t> &Mappings, 2495 std::string VarName) { 2496 llvm::Constant *MaptypesArrayInit = 2497 llvm::ConstantDataArray::get(M.getContext(), Mappings); 2498 auto *MaptypesArrayGlobal = new llvm::GlobalVariable( 2499 M, MaptypesArrayInit->getType(), 2500 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, MaptypesArrayInit, 2501 VarName); 2502 MaptypesArrayGlobal->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 2503 return MaptypesArrayGlobal; 2504 } 2505 2506 void OpenMPIRBuilder::createMapperAllocas(const LocationDescription &Loc, 2507 InsertPointTy AllocaIP, 2508 unsigned NumOperands, 2509 struct MapperAllocas &MapperAllocas) { 2510 if (!updateToLocation(Loc)) 2511 return; 2512 2513 auto *ArrI8PtrTy = ArrayType::get(Int8Ptr, NumOperands); 2514 auto *ArrI64Ty = ArrayType::get(Int64, NumOperands); 2515 Builder.restoreIP(AllocaIP); 2516 AllocaInst *ArgsBase = Builder.CreateAlloca(ArrI8PtrTy); 2517 AllocaInst *Args = Builder.CreateAlloca(ArrI8PtrTy); 2518 AllocaInst *ArgSizes = Builder.CreateAlloca(ArrI64Ty); 2519 Builder.restoreIP(Loc.IP); 2520 MapperAllocas.ArgsBase = ArgsBase; 2521 MapperAllocas.Args = Args; 2522 MapperAllocas.ArgSizes = ArgSizes; 2523 } 2524 2525 void OpenMPIRBuilder::emitMapperCall(const LocationDescription &Loc, 2526 Function *MapperFunc, Value *SrcLocInfo, 2527 Value *MaptypesArg, Value *MapnamesArg, 2528 struct MapperAllocas &MapperAllocas, 2529 int64_t DeviceID, unsigned NumOperands) { 2530 if (!updateToLocation(Loc)) 2531 return; 2532 2533 auto *ArrI8PtrTy = ArrayType::get(Int8Ptr, NumOperands); 2534 auto *ArrI64Ty = ArrayType::get(Int64, NumOperands); 2535 Value *ArgsBaseGEP = 2536 Builder.CreateInBoundsGEP(ArrI8PtrTy, MapperAllocas.ArgsBase, 2537 {Builder.getInt32(0), Builder.getInt32(0)}); 2538 Value *ArgsGEP = 2539 Builder.CreateInBoundsGEP(ArrI8PtrTy, MapperAllocas.Args, 2540 {Builder.getInt32(0), Builder.getInt32(0)}); 2541 Value *ArgSizesGEP = 2542 Builder.CreateInBoundsGEP(ArrI64Ty, MapperAllocas.ArgSizes, 2543 {Builder.getInt32(0), Builder.getInt32(0)}); 2544 Value *NullPtr = Constant::getNullValue(Int8Ptr->getPointerTo()); 2545 Builder.CreateCall(MapperFunc, 2546 {SrcLocInfo, Builder.getInt64(DeviceID), 2547 Builder.getInt32(NumOperands), ArgsBaseGEP, ArgsGEP, 2548 ArgSizesGEP, MaptypesArg, MapnamesArg, NullPtr}); 2549 } 2550 2551 bool OpenMPIRBuilder::checkAndEmitFlushAfterAtomic( 2552 const LocationDescription &Loc, llvm::AtomicOrdering AO, AtomicKind AK) { 2553 assert(!(AO == AtomicOrdering::NotAtomic || 2554 AO == llvm::AtomicOrdering::Unordered) && 2555 "Unexpected Atomic Ordering."); 2556 2557 bool Flush = false; 2558 llvm::AtomicOrdering FlushAO = AtomicOrdering::Monotonic; 2559 2560 switch (AK) { 2561 case Read: 2562 if (AO == AtomicOrdering::Acquire || AO == AtomicOrdering::AcquireRelease || 2563 AO == AtomicOrdering::SequentiallyConsistent) { 2564 FlushAO = AtomicOrdering::Acquire; 2565 Flush = true; 2566 } 2567 break; 2568 case Write: 2569 case Update: 2570 if (AO == AtomicOrdering::Release || AO == AtomicOrdering::AcquireRelease || 2571 AO == AtomicOrdering::SequentiallyConsistent) { 2572 FlushAO = AtomicOrdering::Release; 2573 Flush = true; 2574 } 2575 break; 2576 case Capture: 2577 switch (AO) { 2578 case AtomicOrdering::Acquire: 2579 FlushAO = AtomicOrdering::Acquire; 2580 Flush = true; 2581 break; 2582 case AtomicOrdering::Release: 2583 FlushAO = AtomicOrdering::Release; 2584 Flush = true; 2585 break; 2586 case AtomicOrdering::AcquireRelease: 2587 case AtomicOrdering::SequentiallyConsistent: 2588 FlushAO = AtomicOrdering::AcquireRelease; 2589 Flush = true; 2590 break; 2591 default: 2592 // do nothing - leave silently. 2593 break; 2594 } 2595 } 2596 2597 if (Flush) { 2598 // Currently Flush RT call still doesn't take memory_ordering, so for when 2599 // that happens, this tries to do the resolution of which atomic ordering 2600 // to use with but issue the flush call 2601 // TODO: pass `FlushAO` after memory ordering support is added 2602 (void)FlushAO; 2603 emitFlush(Loc); 2604 } 2605 2606 // for AO == AtomicOrdering::Monotonic and all other case combinations 2607 // do nothing 2608 return Flush; 2609 } 2610 2611 OpenMPIRBuilder::InsertPointTy 2612 OpenMPIRBuilder::createAtomicRead(const LocationDescription &Loc, 2613 AtomicOpValue &X, AtomicOpValue &V, 2614 AtomicOrdering AO) { 2615 if (!updateToLocation(Loc)) 2616 return Loc.IP; 2617 2618 Type *XTy = X.Var->getType(); 2619 assert(XTy->isPointerTy() && "OMP Atomic expects a pointer to target memory"); 2620 Type *XElemTy = XTy->getPointerElementType(); 2621 assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() || 2622 XElemTy->isPointerTy()) && 2623 "OMP atomic read expected a scalar type"); 2624 2625 Value *XRead = nullptr; 2626 2627 if (XElemTy->isIntegerTy()) { 2628 LoadInst *XLD = 2629 Builder.CreateLoad(XElemTy, X.Var, X.IsVolatile, "omp.atomic.read"); 2630 XLD->setAtomic(AO); 2631 XRead = cast<Value>(XLD); 2632 } else { 2633 // We need to bitcast and perform atomic op as integer 2634 unsigned Addrspace = cast<PointerType>(XTy)->getAddressSpace(); 2635 IntegerType *IntCastTy = 2636 IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits()); 2637 Value *XBCast = Builder.CreateBitCast( 2638 X.Var, IntCastTy->getPointerTo(Addrspace), "atomic.src.int.cast"); 2639 LoadInst *XLoad = 2640 Builder.CreateLoad(IntCastTy, XBCast, X.IsVolatile, "omp.atomic.load"); 2641 XLoad->setAtomic(AO); 2642 if (XElemTy->isFloatingPointTy()) { 2643 XRead = Builder.CreateBitCast(XLoad, XElemTy, "atomic.flt.cast"); 2644 } else { 2645 XRead = Builder.CreateIntToPtr(XLoad, XElemTy, "atomic.ptr.cast"); 2646 } 2647 } 2648 checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Read); 2649 Builder.CreateStore(XRead, V.Var, V.IsVolatile); 2650 return Builder.saveIP(); 2651 } 2652 2653 OpenMPIRBuilder::InsertPointTy 2654 OpenMPIRBuilder::createAtomicWrite(const LocationDescription &Loc, 2655 AtomicOpValue &X, Value *Expr, 2656 AtomicOrdering AO) { 2657 if (!updateToLocation(Loc)) 2658 return Loc.IP; 2659 2660 Type *XTy = X.Var->getType(); 2661 assert(XTy->isPointerTy() && "OMP Atomic expects a pointer to target memory"); 2662 Type *XElemTy = XTy->getPointerElementType(); 2663 assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() || 2664 XElemTy->isPointerTy()) && 2665 "OMP atomic write expected a scalar type"); 2666 2667 if (XElemTy->isIntegerTy()) { 2668 StoreInst *XSt = Builder.CreateStore(Expr, X.Var, X.IsVolatile); 2669 XSt->setAtomic(AO); 2670 } else { 2671 // We need to bitcast and perform atomic op as integers 2672 unsigned Addrspace = cast<PointerType>(XTy)->getAddressSpace(); 2673 IntegerType *IntCastTy = 2674 IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits()); 2675 Value *XBCast = Builder.CreateBitCast( 2676 X.Var, IntCastTy->getPointerTo(Addrspace), "atomic.dst.int.cast"); 2677 Value *ExprCast = 2678 Builder.CreateBitCast(Expr, IntCastTy, "atomic.src.int.cast"); 2679 StoreInst *XSt = Builder.CreateStore(ExprCast, XBCast, X.IsVolatile); 2680 XSt->setAtomic(AO); 2681 } 2682 2683 checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Write); 2684 return Builder.saveIP(); 2685 } 2686 2687 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicUpdate( 2688 const LocationDescription &Loc, Instruction *AllocIP, AtomicOpValue &X, 2689 Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, 2690 AtomicUpdateCallbackTy &UpdateOp, bool IsXLHSInRHSPart) { 2691 if (!updateToLocation(Loc)) 2692 return Loc.IP; 2693 2694 LLVM_DEBUG({ 2695 Type *XTy = X.Var->getType(); 2696 assert(XTy->isPointerTy() && 2697 "OMP Atomic expects a pointer to target memory"); 2698 Type *XElemTy = XTy->getPointerElementType(); 2699 assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() || 2700 XElemTy->isPointerTy()) && 2701 "OMP atomic update expected a scalar type"); 2702 assert((RMWOp != AtomicRMWInst::Max) && (RMWOp != AtomicRMWInst::Min) && 2703 (RMWOp != AtomicRMWInst::UMax) && (RMWOp != AtomicRMWInst::UMin) && 2704 "OpenMP atomic does not support LT or GT operations"); 2705 }); 2706 2707 emitAtomicUpdate(AllocIP, X.Var, Expr, AO, RMWOp, UpdateOp, X.IsVolatile, 2708 IsXLHSInRHSPart); 2709 checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Update); 2710 return Builder.saveIP(); 2711 } 2712 2713 Value *OpenMPIRBuilder::emitRMWOpAsInstruction(Value *Src1, Value *Src2, 2714 AtomicRMWInst::BinOp RMWOp) { 2715 switch (RMWOp) { 2716 case AtomicRMWInst::Add: 2717 return Builder.CreateAdd(Src1, Src2); 2718 case AtomicRMWInst::Sub: 2719 return Builder.CreateSub(Src1, Src2); 2720 case AtomicRMWInst::And: 2721 return Builder.CreateAnd(Src1, Src2); 2722 case AtomicRMWInst::Nand: 2723 return Builder.CreateNeg(Builder.CreateAnd(Src1, Src2)); 2724 case AtomicRMWInst::Or: 2725 return Builder.CreateOr(Src1, Src2); 2726 case AtomicRMWInst::Xor: 2727 return Builder.CreateXor(Src1, Src2); 2728 case AtomicRMWInst::Xchg: 2729 case AtomicRMWInst::FAdd: 2730 case AtomicRMWInst::FSub: 2731 case AtomicRMWInst::BAD_BINOP: 2732 case AtomicRMWInst::Max: 2733 case AtomicRMWInst::Min: 2734 case AtomicRMWInst::UMax: 2735 case AtomicRMWInst::UMin: 2736 llvm_unreachable("Unsupported atomic update operation"); 2737 } 2738 llvm_unreachable("Unsupported atomic update operation"); 2739 } 2740 2741 std::pair<Value *, Value *> 2742 OpenMPIRBuilder::emitAtomicUpdate(Instruction *AllocIP, Value *X, Value *Expr, 2743 AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, 2744 AtomicUpdateCallbackTy &UpdateOp, 2745 bool VolatileX, bool IsXLHSInRHSPart) { 2746 Type *XElemTy = X->getType()->getPointerElementType(); 2747 2748 bool DoCmpExch = 2749 ((RMWOp == AtomicRMWInst::BAD_BINOP) || (RMWOp == AtomicRMWInst::FAdd)) || 2750 (RMWOp == AtomicRMWInst::FSub) || 2751 (RMWOp == AtomicRMWInst::Sub && !IsXLHSInRHSPart); 2752 2753 std::pair<Value *, Value *> Res; 2754 if (XElemTy->isIntegerTy() && !DoCmpExch) { 2755 Res.first = Builder.CreateAtomicRMW(RMWOp, X, Expr, llvm::MaybeAlign(), AO); 2756 // not needed except in case of postfix captures. Generate anyway for 2757 // consistency with the else part. Will be removed with any DCE pass. 2758 Res.second = emitRMWOpAsInstruction(Res.first, Expr, RMWOp); 2759 } else { 2760 unsigned Addrspace = cast<PointerType>(X->getType())->getAddressSpace(); 2761 IntegerType *IntCastTy = 2762 IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits()); 2763 Value *XBCast = 2764 Builder.CreateBitCast(X, IntCastTy->getPointerTo(Addrspace)); 2765 LoadInst *OldVal = 2766 Builder.CreateLoad(IntCastTy, XBCast, X->getName() + ".atomic.load"); 2767 OldVal->setAtomic(AO); 2768 // CurBB 2769 // | /---\ 2770 // ContBB | 2771 // | \---/ 2772 // ExitBB 2773 BasicBlock *CurBB = Builder.GetInsertBlock(); 2774 Instruction *CurBBTI = CurBB->getTerminator(); 2775 CurBBTI = CurBBTI ? CurBBTI : Builder.CreateUnreachable(); 2776 BasicBlock *ExitBB = 2777 CurBB->splitBasicBlock(CurBBTI, X->getName() + ".atomic.exit"); 2778 BasicBlock *ContBB = CurBB->splitBasicBlock(CurBB->getTerminator(), 2779 X->getName() + ".atomic.cont"); 2780 ContBB->getTerminator()->eraseFromParent(); 2781 Builder.SetInsertPoint(ContBB); 2782 llvm::PHINode *PHI = Builder.CreatePHI(OldVal->getType(), 2); 2783 PHI->addIncoming(OldVal, CurBB); 2784 AllocaInst *NewAtomicAddr = Builder.CreateAlloca(XElemTy); 2785 NewAtomicAddr->setName(X->getName() + "x.new.val"); 2786 NewAtomicAddr->moveBefore(AllocIP); 2787 IntegerType *NewAtomicCastTy = 2788 IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits()); 2789 bool IsIntTy = XElemTy->isIntegerTy(); 2790 Value *NewAtomicIntAddr = 2791 (IsIntTy) 2792 ? NewAtomicAddr 2793 : Builder.CreateBitCast(NewAtomicAddr, 2794 NewAtomicCastTy->getPointerTo(Addrspace)); 2795 Value *OldExprVal = PHI; 2796 if (!IsIntTy) { 2797 if (XElemTy->isFloatingPointTy()) { 2798 OldExprVal = Builder.CreateBitCast(PHI, XElemTy, 2799 X->getName() + ".atomic.fltCast"); 2800 } else { 2801 OldExprVal = Builder.CreateIntToPtr(PHI, XElemTy, 2802 X->getName() + ".atomic.ptrCast"); 2803 } 2804 } 2805 2806 Value *Upd = UpdateOp(OldExprVal, Builder); 2807 Builder.CreateStore(Upd, NewAtomicAddr); 2808 LoadInst *DesiredVal = Builder.CreateLoad(XElemTy, NewAtomicIntAddr); 2809 Value *XAddr = 2810 (IsIntTy) 2811 ? X 2812 : Builder.CreateBitCast(X, IntCastTy->getPointerTo(Addrspace)); 2813 AtomicOrdering Failure = 2814 llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO); 2815 AtomicCmpXchgInst *Result = Builder.CreateAtomicCmpXchg( 2816 XAddr, OldExprVal, DesiredVal, llvm::MaybeAlign(), AO, Failure); 2817 Result->setVolatile(VolatileX); 2818 Value *PreviousVal = Builder.CreateExtractValue(Result, /*Idxs=*/0); 2819 Value *SuccessFailureVal = Builder.CreateExtractValue(Result, /*Idxs=*/1); 2820 PHI->addIncoming(PreviousVal, Builder.GetInsertBlock()); 2821 Builder.CreateCondBr(SuccessFailureVal, ExitBB, ContBB); 2822 2823 Res.first = OldExprVal; 2824 Res.second = Upd; 2825 2826 // set Insertion point in exit block 2827 if (UnreachableInst *ExitTI = 2828 dyn_cast<UnreachableInst>(ExitBB->getTerminator())) { 2829 CurBBTI->eraseFromParent(); 2830 Builder.SetInsertPoint(ExitBB); 2831 } else { 2832 Builder.SetInsertPoint(ExitTI); 2833 } 2834 } 2835 2836 return Res; 2837 } 2838 2839 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCapture( 2840 const LocationDescription &Loc, Instruction *AllocIP, AtomicOpValue &X, 2841 AtomicOpValue &V, Value *Expr, AtomicOrdering AO, 2842 AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, 2843 bool UpdateExpr, bool IsPostfixUpdate, bool IsXLHSInRHSPart) { 2844 if (!updateToLocation(Loc)) 2845 return Loc.IP; 2846 2847 LLVM_DEBUG({ 2848 Type *XTy = X.Var->getType(); 2849 assert(XTy->isPointerTy() && 2850 "OMP Atomic expects a pointer to target memory"); 2851 Type *XElemTy = XTy->getPointerElementType(); 2852 assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() || 2853 XElemTy->isPointerTy()) && 2854 "OMP atomic capture expected a scalar type"); 2855 assert((RMWOp != AtomicRMWInst::Max) && (RMWOp != AtomicRMWInst::Min) && 2856 "OpenMP atomic does not support LT or GT operations"); 2857 }); 2858 2859 // If UpdateExpr is 'x' updated with some `expr` not based on 'x', 2860 // 'x' is simply atomically rewritten with 'expr'. 2861 AtomicRMWInst::BinOp AtomicOp = (UpdateExpr ? RMWOp : AtomicRMWInst::Xchg); 2862 std::pair<Value *, Value *> Result = 2863 emitAtomicUpdate(AllocIP, X.Var, Expr, AO, AtomicOp, UpdateOp, 2864 X.IsVolatile, IsXLHSInRHSPart); 2865 2866 Value *CapturedVal = (IsPostfixUpdate ? Result.first : Result.second); 2867 Builder.CreateStore(CapturedVal, V.Var, V.IsVolatile); 2868 2869 checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Capture); 2870 return Builder.saveIP(); 2871 } 2872 2873 GlobalVariable * 2874 OpenMPIRBuilder::createOffloadMapnames(SmallVectorImpl<llvm::Constant *> &Names, 2875 std::string VarName) { 2876 llvm::Constant *MapNamesArrayInit = llvm::ConstantArray::get( 2877 llvm::ArrayType::get( 2878 llvm::Type::getInt8Ty(M.getContext())->getPointerTo(), Names.size()), 2879 Names); 2880 auto *MapNamesArrayGlobal = new llvm::GlobalVariable( 2881 M, MapNamesArrayInit->getType(), 2882 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, MapNamesArrayInit, 2883 VarName); 2884 return MapNamesArrayGlobal; 2885 } 2886 2887 // Create all simple and struct types exposed by the runtime and remember 2888 // the llvm::PointerTypes of them for easy access later. 2889 void OpenMPIRBuilder::initializeTypes(Module &M) { 2890 LLVMContext &Ctx = M.getContext(); 2891 StructType *T; 2892 #define OMP_TYPE(VarName, InitValue) VarName = InitValue; 2893 #define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \ 2894 VarName##Ty = ArrayType::get(ElemTy, ArraySize); \ 2895 VarName##PtrTy = PointerType::getUnqual(VarName##Ty); 2896 #define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \ 2897 VarName = FunctionType::get(ReturnType, {__VA_ARGS__}, IsVarArg); \ 2898 VarName##Ptr = PointerType::getUnqual(VarName); 2899 #define OMP_STRUCT_TYPE(VarName, StructName, ...) \ 2900 T = StructType::getTypeByName(Ctx, StructName); \ 2901 if (!T) \ 2902 T = StructType::create(Ctx, {__VA_ARGS__}, StructName); \ 2903 VarName = T; \ 2904 VarName##Ptr = PointerType::getUnqual(T); 2905 #include "llvm/Frontend/OpenMP/OMPKinds.def" 2906 } 2907 2908 void OpenMPIRBuilder::OutlineInfo::collectBlocks( 2909 SmallPtrSetImpl<BasicBlock *> &BlockSet, 2910 SmallVectorImpl<BasicBlock *> &BlockVector) { 2911 SmallVector<BasicBlock *, 32> Worklist; 2912 BlockSet.insert(EntryBB); 2913 BlockSet.insert(ExitBB); 2914 2915 Worklist.push_back(EntryBB); 2916 while (!Worklist.empty()) { 2917 BasicBlock *BB = Worklist.pop_back_val(); 2918 BlockVector.push_back(BB); 2919 for (BasicBlock *SuccBB : successors(BB)) 2920 if (BlockSet.insert(SuccBB).second) 2921 Worklist.push_back(SuccBB); 2922 } 2923 } 2924 2925 void CanonicalLoopInfo::collectControlBlocks( 2926 SmallVectorImpl<BasicBlock *> &BBs) { 2927 // We only count those BBs as control block for which we do not need to 2928 // reverse the CFG, i.e. not the loop body which can contain arbitrary control 2929 // flow. For consistency, this also means we do not add the Body block, which 2930 // is just the entry to the body code. 2931 BBs.reserve(BBs.size() + 6); 2932 BBs.append({Preheader, Header, Cond, Latch, Exit, After}); 2933 } 2934 2935 void CanonicalLoopInfo::assertOK() const { 2936 #ifndef NDEBUG 2937 // No constraints if this object currently does not describe a loop. 2938 if (!isValid()) 2939 return; 2940 2941 // Verify standard control-flow we use for OpenMP loops. 2942 assert(Preheader); 2943 assert(isa<BranchInst>(Preheader->getTerminator()) && 2944 "Preheader must terminate with unconditional branch"); 2945 assert(Preheader->getSingleSuccessor() == Header && 2946 "Preheader must jump to header"); 2947 2948 assert(Header); 2949 assert(isa<BranchInst>(Header->getTerminator()) && 2950 "Header must terminate with unconditional branch"); 2951 assert(Header->getSingleSuccessor() == Cond && 2952 "Header must jump to exiting block"); 2953 2954 assert(Cond); 2955 assert(Cond->getSinglePredecessor() == Header && 2956 "Exiting block only reachable from header"); 2957 2958 assert(isa<BranchInst>(Cond->getTerminator()) && 2959 "Exiting block must terminate with conditional branch"); 2960 assert(size(successors(Cond)) == 2 && 2961 "Exiting block must have two successors"); 2962 assert(cast<BranchInst>(Cond->getTerminator())->getSuccessor(0) == Body && 2963 "Exiting block's first successor jump to the body"); 2964 assert(cast<BranchInst>(Cond->getTerminator())->getSuccessor(1) == Exit && 2965 "Exiting block's second successor must exit the loop"); 2966 2967 assert(Body); 2968 assert(Body->getSinglePredecessor() == Cond && 2969 "Body only reachable from exiting block"); 2970 assert(!isa<PHINode>(Body->front())); 2971 2972 assert(Latch); 2973 assert(isa<BranchInst>(Latch->getTerminator()) && 2974 "Latch must terminate with unconditional branch"); 2975 assert(Latch->getSingleSuccessor() == Header && "Latch must jump to header"); 2976 // TODO: To support simple redirecting of the end of the body code that has 2977 // multiple; introduce another auxiliary basic block like preheader and after. 2978 assert(Latch->getSinglePredecessor() != nullptr); 2979 assert(!isa<PHINode>(Latch->front())); 2980 2981 assert(Exit); 2982 assert(isa<BranchInst>(Exit->getTerminator()) && 2983 "Exit block must terminate with unconditional branch"); 2984 assert(Exit->getSingleSuccessor() == After && 2985 "Exit block must jump to after block"); 2986 2987 assert(After); 2988 assert(After->getSinglePredecessor() == Exit && 2989 "After block only reachable from exit block"); 2990 assert(After->empty() || !isa<PHINode>(After->front())); 2991 2992 Instruction *IndVar = getIndVar(); 2993 assert(IndVar && "Canonical induction variable not found?"); 2994 assert(isa<IntegerType>(IndVar->getType()) && 2995 "Induction variable must be an integer"); 2996 assert(cast<PHINode>(IndVar)->getParent() == Header && 2997 "Induction variable must be a PHI in the loop header"); 2998 assert(cast<PHINode>(IndVar)->getIncomingBlock(0) == Preheader); 2999 assert( 3000 cast<ConstantInt>(cast<PHINode>(IndVar)->getIncomingValue(0))->isZero()); 3001 assert(cast<PHINode>(IndVar)->getIncomingBlock(1) == Latch); 3002 3003 auto *NextIndVar = cast<PHINode>(IndVar)->getIncomingValue(1); 3004 assert(cast<Instruction>(NextIndVar)->getParent() == Latch); 3005 assert(cast<BinaryOperator>(NextIndVar)->getOpcode() == BinaryOperator::Add); 3006 assert(cast<BinaryOperator>(NextIndVar)->getOperand(0) == IndVar); 3007 assert(cast<ConstantInt>(cast<BinaryOperator>(NextIndVar)->getOperand(1)) 3008 ->isOne()); 3009 3010 Value *TripCount = getTripCount(); 3011 assert(TripCount && "Loop trip count not found?"); 3012 assert(IndVar->getType() == TripCount->getType() && 3013 "Trip count and induction variable must have the same type"); 3014 3015 auto *CmpI = cast<CmpInst>(&Cond->front()); 3016 assert(CmpI->getPredicate() == CmpInst::ICMP_ULT && 3017 "Exit condition must be a signed less-than comparison"); 3018 assert(CmpI->getOperand(0) == IndVar && 3019 "Exit condition must compare the induction variable"); 3020 assert(CmpI->getOperand(1) == TripCount && 3021 "Exit condition must compare with the trip count"); 3022 #endif 3023 } 3024 3025 void CanonicalLoopInfo::invalidate() { 3026 Preheader = nullptr; 3027 Header = nullptr; 3028 Cond = nullptr; 3029 Body = nullptr; 3030 Latch = nullptr; 3031 Exit = nullptr; 3032 After = nullptr; 3033 } 3034