1 //===- OpenMPIRBuilder.cpp - Builder for LLVM-IR for OpenMP directives ----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// 10 /// This file implements the OpenMPIRBuilder class, which is used as a 11 /// convenient way to create LLVM instructions for OpenMP directives. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" 16 17 #include "llvm/ADT/StringRef.h" 18 #include "llvm/ADT/Triple.h" 19 #include "llvm/IR/CFG.h" 20 #include "llvm/IR/DebugInfo.h" 21 #include "llvm/IR/IRBuilder.h" 22 #include "llvm/IR/MDBuilder.h" 23 #include "llvm/Support/CommandLine.h" 24 #include "llvm/Support/Error.h" 25 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 26 #include "llvm/Transforms/Utils/CodeExtractor.h" 27 28 #include <sstream> 29 30 #define DEBUG_TYPE "openmp-ir-builder" 31 32 using namespace llvm; 33 using namespace omp; 34 35 static cl::opt<bool> 36 OptimisticAttributes("openmp-ir-builder-optimistic-attributes", cl::Hidden, 37 cl::desc("Use optimistic attributes describing " 38 "'as-if' properties of runtime calls."), 39 cl::init(false)); 40 41 void OpenMPIRBuilder::addAttributes(omp::RuntimeFunction FnID, Function &Fn) { 42 LLVMContext &Ctx = Fn.getContext(); 43 44 // Get the function's current attributes. 45 auto Attrs = Fn.getAttributes(); 46 auto FnAttrs = Attrs.getFnAttributes(); 47 auto RetAttrs = Attrs.getRetAttributes(); 48 SmallVector<AttributeSet, 4> ArgAttrs; 49 for (size_t ArgNo = 0; ArgNo < Fn.arg_size(); ++ArgNo) 50 ArgAttrs.emplace_back(Attrs.getParamAttributes(ArgNo)); 51 52 #define OMP_ATTRS_SET(VarName, AttrSet) AttributeSet VarName = AttrSet; 53 #include "llvm/Frontend/OpenMP/OMPKinds.def" 54 55 // Add attributes to the function declaration. 56 switch (FnID) { 57 #define OMP_RTL_ATTRS(Enum, FnAttrSet, RetAttrSet, ArgAttrSets) \ 58 case Enum: \ 59 FnAttrs = FnAttrs.addAttributes(Ctx, FnAttrSet); \ 60 RetAttrs = RetAttrs.addAttributes(Ctx, RetAttrSet); \ 61 for (size_t ArgNo = 0; ArgNo < ArgAttrSets.size(); ++ArgNo) \ 62 ArgAttrs[ArgNo] = \ 63 ArgAttrs[ArgNo].addAttributes(Ctx, ArgAttrSets[ArgNo]); \ 64 Fn.setAttributes(AttributeList::get(Ctx, FnAttrs, RetAttrs, ArgAttrs)); \ 65 break; 66 #include "llvm/Frontend/OpenMP/OMPKinds.def" 67 default: 68 // Attributes are optional. 69 break; 70 } 71 } 72 73 FunctionCallee 74 OpenMPIRBuilder::getOrCreateRuntimeFunction(Module &M, RuntimeFunction FnID) { 75 FunctionType *FnTy = nullptr; 76 Function *Fn = nullptr; 77 78 // Try to find the declation in the module first. 79 switch (FnID) { 80 #define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...) \ 81 case Enum: \ 82 FnTy = FunctionType::get(ReturnType, ArrayRef<Type *>{__VA_ARGS__}, \ 83 IsVarArg); \ 84 Fn = M.getFunction(Str); \ 85 break; 86 #include "llvm/Frontend/OpenMP/OMPKinds.def" 87 } 88 89 if (!Fn) { 90 // Create a new declaration if we need one. 91 switch (FnID) { 92 #define OMP_RTL(Enum, Str, ...) \ 93 case Enum: \ 94 Fn = Function::Create(FnTy, GlobalValue::ExternalLinkage, Str, M); \ 95 break; 96 #include "llvm/Frontend/OpenMP/OMPKinds.def" 97 } 98 99 // Add information if the runtime function takes a callback function 100 if (FnID == OMPRTL___kmpc_fork_call || FnID == OMPRTL___kmpc_fork_teams) { 101 if (!Fn->hasMetadata(LLVMContext::MD_callback)) { 102 LLVMContext &Ctx = Fn->getContext(); 103 MDBuilder MDB(Ctx); 104 // Annotate the callback behavior of the runtime function: 105 // - The callback callee is argument number 2 (microtask). 106 // - The first two arguments of the callback callee are unknown (-1). 107 // - All variadic arguments to the runtime function are passed to the 108 // callback callee. 109 Fn->addMetadata( 110 LLVMContext::MD_callback, 111 *MDNode::get(Ctx, {MDB.createCallbackEncoding( 112 2, {-1, -1}, /* VarArgsArePassed */ true)})); 113 } 114 } 115 116 LLVM_DEBUG(dbgs() << "Created OpenMP runtime function " << Fn->getName() 117 << " with type " << *Fn->getFunctionType() << "\n"); 118 addAttributes(FnID, *Fn); 119 120 } else { 121 LLVM_DEBUG(dbgs() << "Found OpenMP runtime function " << Fn->getName() 122 << " with type " << *Fn->getFunctionType() << "\n"); 123 } 124 125 assert(Fn && "Failed to create OpenMP runtime function"); 126 127 // Cast the function to the expected type if necessary 128 Constant *C = ConstantExpr::getBitCast(Fn, FnTy->getPointerTo()); 129 return {FnTy, C}; 130 } 131 132 Function *OpenMPIRBuilder::getOrCreateRuntimeFunctionPtr(RuntimeFunction FnID) { 133 FunctionCallee RTLFn = getOrCreateRuntimeFunction(M, FnID); 134 auto *Fn = dyn_cast<llvm::Function>(RTLFn.getCallee()); 135 assert(Fn && "Failed to create OpenMP runtime function pointer"); 136 return Fn; 137 } 138 139 void OpenMPIRBuilder::initialize() { initializeTypes(M); } 140 141 void OpenMPIRBuilder::finalize(Function *Fn, bool AllowExtractorSinking) { 142 SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet; 143 SmallVector<BasicBlock *, 32> Blocks; 144 SmallVector<OutlineInfo, 16> DeferredOutlines; 145 for (OutlineInfo &OI : OutlineInfos) { 146 // Skip functions that have not finalized yet; may happen with nested 147 // function generation. 148 if (Fn && OI.getFunction() != Fn) { 149 DeferredOutlines.push_back(OI); 150 continue; 151 } 152 153 ParallelRegionBlockSet.clear(); 154 Blocks.clear(); 155 OI.collectBlocks(ParallelRegionBlockSet, Blocks); 156 157 Function *OuterFn = OI.getFunction(); 158 CodeExtractorAnalysisCache CEAC(*OuterFn); 159 CodeExtractor Extractor(Blocks, /* DominatorTree */ nullptr, 160 /* AggregateArgs */ false, 161 /* BlockFrequencyInfo */ nullptr, 162 /* BranchProbabilityInfo */ nullptr, 163 /* AssumptionCache */ nullptr, 164 /* AllowVarArgs */ true, 165 /* AllowAlloca */ true, 166 /* Suffix */ ".omp_par"); 167 168 LLVM_DEBUG(dbgs() << "Before outlining: " << *OuterFn << "\n"); 169 LLVM_DEBUG(dbgs() << "Entry " << OI.EntryBB->getName() 170 << " Exit: " << OI.ExitBB->getName() << "\n"); 171 assert(Extractor.isEligible() && 172 "Expected OpenMP outlining to be possible!"); 173 174 Function *OutlinedFn = Extractor.extractCodeRegion(CEAC); 175 176 LLVM_DEBUG(dbgs() << "After outlining: " << *OuterFn << "\n"); 177 LLVM_DEBUG(dbgs() << " Outlined function: " << *OutlinedFn << "\n"); 178 assert(OutlinedFn->getReturnType()->isVoidTy() && 179 "OpenMP outlined functions should not return a value!"); 180 181 // For compability with the clang CG we move the outlined function after the 182 // one with the parallel region. 183 OutlinedFn->removeFromParent(); 184 M.getFunctionList().insertAfter(OuterFn->getIterator(), OutlinedFn); 185 186 // Remove the artificial entry introduced by the extractor right away, we 187 // made our own entry block after all. 188 { 189 BasicBlock &ArtificialEntry = OutlinedFn->getEntryBlock(); 190 assert(ArtificialEntry.getUniqueSuccessor() == OI.EntryBB); 191 assert(OI.EntryBB->getUniquePredecessor() == &ArtificialEntry); 192 if (AllowExtractorSinking) { 193 // Move instructions from the to-be-deleted ArtificialEntry to the entry 194 // basic block of the parallel region. CodeExtractor may have sunk 195 // allocas/bitcasts for values that are solely used in the outlined 196 // region and do not escape. 197 assert(!ArtificialEntry.empty() && 198 "Expected instructions to sink in the outlined region"); 199 for (BasicBlock::iterator It = ArtificialEntry.begin(), 200 End = ArtificialEntry.end(); 201 It != End;) { 202 Instruction &I = *It; 203 It++; 204 205 if (I.isTerminator()) 206 continue; 207 208 I.moveBefore(*OI.EntryBB, OI.EntryBB->getFirstInsertionPt()); 209 } 210 } 211 OI.EntryBB->moveBefore(&ArtificialEntry); 212 ArtificialEntry.eraseFromParent(); 213 } 214 assert(&OutlinedFn->getEntryBlock() == OI.EntryBB); 215 assert(OutlinedFn && OutlinedFn->getNumUses() == 1); 216 217 // Run a user callback, e.g. to add attributes. 218 if (OI.PostOutlineCB) 219 OI.PostOutlineCB(*OutlinedFn); 220 } 221 222 // Remove work items that have been completed. 223 OutlineInfos = std::move(DeferredOutlines); 224 } 225 226 OpenMPIRBuilder::~OpenMPIRBuilder() { 227 assert(OutlineInfos.empty() && "There must be no outstanding outlinings"); 228 } 229 230 Value *OpenMPIRBuilder::getOrCreateIdent(Constant *SrcLocStr, 231 IdentFlag LocFlags, 232 unsigned Reserve2Flags) { 233 // Enable "C-mode". 234 LocFlags |= OMP_IDENT_FLAG_KMPC; 235 236 Value *&Ident = 237 IdentMap[{SrcLocStr, uint64_t(LocFlags) << 31 | Reserve2Flags}]; 238 if (!Ident) { 239 Constant *I32Null = ConstantInt::getNullValue(Int32); 240 Constant *IdentData[] = { 241 I32Null, ConstantInt::get(Int32, uint32_t(LocFlags)), 242 ConstantInt::get(Int32, Reserve2Flags), I32Null, SrcLocStr}; 243 Constant *Initializer = ConstantStruct::get( 244 cast<StructType>(IdentPtr->getPointerElementType()), IdentData); 245 246 // Look for existing encoding of the location + flags, not needed but 247 // minimizes the difference to the existing solution while we transition. 248 for (GlobalVariable &GV : M.getGlobalList()) 249 if (GV.getType() == IdentPtr && GV.hasInitializer()) 250 if (GV.getInitializer() == Initializer) 251 return Ident = &GV; 252 253 auto *GV = new GlobalVariable(M, IdentPtr->getPointerElementType(), 254 /* isConstant = */ true, 255 GlobalValue::PrivateLinkage, Initializer); 256 GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); 257 GV->setAlignment(Align(8)); 258 Ident = GV; 259 } 260 return Builder.CreatePointerCast(Ident, IdentPtr); 261 } 262 263 Type *OpenMPIRBuilder::getLanemaskType() { 264 LLVMContext &Ctx = M.getContext(); 265 Triple triple(M.getTargetTriple()); 266 267 // This test is adequate until deviceRTL has finer grained lane widths 268 return triple.isAMDGCN() ? Type::getInt64Ty(Ctx) : Type::getInt32Ty(Ctx); 269 } 270 271 Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef LocStr) { 272 Constant *&SrcLocStr = SrcLocStrMap[LocStr]; 273 if (!SrcLocStr) { 274 Constant *Initializer = 275 ConstantDataArray::getString(M.getContext(), LocStr); 276 277 // Look for existing encoding of the location, not needed but minimizes the 278 // difference to the existing solution while we transition. 279 for (GlobalVariable &GV : M.getGlobalList()) 280 if (GV.isConstant() && GV.hasInitializer() && 281 GV.getInitializer() == Initializer) 282 return SrcLocStr = ConstantExpr::getPointerCast(&GV, Int8Ptr); 283 284 SrcLocStr = Builder.CreateGlobalStringPtr(LocStr, /* Name */ "", 285 /* AddressSpace */ 0, &M); 286 } 287 return SrcLocStr; 288 } 289 290 Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef FunctionName, 291 StringRef FileName, 292 unsigned Line, 293 unsigned Column) { 294 SmallString<128> Buffer; 295 Buffer.push_back(';'); 296 Buffer.append(FileName); 297 Buffer.push_back(';'); 298 Buffer.append(FunctionName); 299 Buffer.push_back(';'); 300 Buffer.append(std::to_string(Line)); 301 Buffer.push_back(';'); 302 Buffer.append(std::to_string(Column)); 303 Buffer.push_back(';'); 304 Buffer.push_back(';'); 305 return getOrCreateSrcLocStr(Buffer.str()); 306 } 307 308 Constant *OpenMPIRBuilder::getOrCreateDefaultSrcLocStr() { 309 return getOrCreateSrcLocStr(";unknown;unknown;0;0;;"); 310 } 311 312 Constant * 313 OpenMPIRBuilder::getOrCreateSrcLocStr(const LocationDescription &Loc) { 314 DILocation *DIL = Loc.DL.get(); 315 if (!DIL) 316 return getOrCreateDefaultSrcLocStr(); 317 StringRef FileName = M.getName(); 318 if (DIFile *DIF = DIL->getFile()) 319 if (Optional<StringRef> Source = DIF->getSource()) 320 FileName = *Source; 321 StringRef Function = DIL->getScope()->getSubprogram()->getName(); 322 Function = 323 !Function.empty() ? Function : Loc.IP.getBlock()->getParent()->getName(); 324 return getOrCreateSrcLocStr(Function, FileName, DIL->getLine(), 325 DIL->getColumn()); 326 } 327 328 Value *OpenMPIRBuilder::getOrCreateThreadID(Value *Ident) { 329 return Builder.CreateCall( 330 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num), Ident, 331 "omp_global_thread_num"); 332 } 333 334 OpenMPIRBuilder::InsertPointTy 335 OpenMPIRBuilder::createBarrier(const LocationDescription &Loc, Directive DK, 336 bool ForceSimpleCall, bool CheckCancelFlag) { 337 if (!updateToLocation(Loc)) 338 return Loc.IP; 339 return emitBarrierImpl(Loc, DK, ForceSimpleCall, CheckCancelFlag); 340 } 341 342 OpenMPIRBuilder::InsertPointTy 343 OpenMPIRBuilder::emitBarrierImpl(const LocationDescription &Loc, Directive Kind, 344 bool ForceSimpleCall, bool CheckCancelFlag) { 345 // Build call __kmpc_cancel_barrier(loc, thread_id) or 346 // __kmpc_barrier(loc, thread_id); 347 348 IdentFlag BarrierLocFlags; 349 switch (Kind) { 350 case OMPD_for: 351 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_FOR; 352 break; 353 case OMPD_sections: 354 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SECTIONS; 355 break; 356 case OMPD_single: 357 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SINGLE; 358 break; 359 case OMPD_barrier: 360 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_EXPL; 361 break; 362 default: 363 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL; 364 break; 365 } 366 367 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 368 Value *Args[] = {getOrCreateIdent(SrcLocStr, BarrierLocFlags), 369 getOrCreateThreadID(getOrCreateIdent(SrcLocStr))}; 370 371 // If we are in a cancellable parallel region, barriers are cancellation 372 // points. 373 // TODO: Check why we would force simple calls or to ignore the cancel flag. 374 bool UseCancelBarrier = 375 !ForceSimpleCall && isLastFinalizationInfoCancellable(OMPD_parallel); 376 377 Value *Result = 378 Builder.CreateCall(getOrCreateRuntimeFunctionPtr( 379 UseCancelBarrier ? OMPRTL___kmpc_cancel_barrier 380 : OMPRTL___kmpc_barrier), 381 Args); 382 383 if (UseCancelBarrier && CheckCancelFlag) 384 emitCancelationCheckImpl(Result, OMPD_parallel); 385 386 return Builder.saveIP(); 387 } 388 389 OpenMPIRBuilder::InsertPointTy 390 OpenMPIRBuilder::createCancel(const LocationDescription &Loc, 391 Value *IfCondition, 392 omp::Directive CanceledDirective) { 393 if (!updateToLocation(Loc)) 394 return Loc.IP; 395 396 // LLVM utilities like blocks with terminators. 397 auto *UI = Builder.CreateUnreachable(); 398 399 Instruction *ThenTI = UI, *ElseTI = nullptr; 400 if (IfCondition) 401 SplitBlockAndInsertIfThenElse(IfCondition, UI, &ThenTI, &ElseTI); 402 Builder.SetInsertPoint(ThenTI); 403 404 Value *CancelKind = nullptr; 405 switch (CanceledDirective) { 406 #define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \ 407 case DirectiveEnum: \ 408 CancelKind = Builder.getInt32(Value); \ 409 break; 410 #include "llvm/Frontend/OpenMP/OMPKinds.def" 411 default: 412 llvm_unreachable("Unknown cancel kind!"); 413 } 414 415 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 416 Value *Ident = getOrCreateIdent(SrcLocStr); 417 Value *Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind}; 418 Value *Result = Builder.CreateCall( 419 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancel), Args); 420 auto ExitCB = [this, CanceledDirective, Loc](InsertPointTy IP) { 421 if (CanceledDirective == OMPD_parallel) { 422 IRBuilder<>::InsertPointGuard IPG(Builder); 423 Builder.restoreIP(IP); 424 createBarrier(LocationDescription(Builder.saveIP(), Loc.DL), 425 omp::Directive::OMPD_unknown, /* ForceSimpleCall */ false, 426 /* CheckCancelFlag */ false); 427 } 428 }; 429 430 // The actual cancel logic is shared with others, e.g., cancel_barriers. 431 emitCancelationCheckImpl(Result, CanceledDirective, ExitCB); 432 433 // Update the insertion point and remove the terminator we introduced. 434 Builder.SetInsertPoint(UI->getParent()); 435 UI->eraseFromParent(); 436 437 return Builder.saveIP(); 438 } 439 440 void OpenMPIRBuilder::emitCancelationCheckImpl(Value *CancelFlag, 441 omp::Directive CanceledDirective, 442 FinalizeCallbackTy ExitCB) { 443 assert(isLastFinalizationInfoCancellable(CanceledDirective) && 444 "Unexpected cancellation!"); 445 446 // For a cancel barrier we create two new blocks. 447 BasicBlock *BB = Builder.GetInsertBlock(); 448 BasicBlock *NonCancellationBlock; 449 if (Builder.GetInsertPoint() == BB->end()) { 450 // TODO: This branch will not be needed once we moved to the 451 // OpenMPIRBuilder codegen completely. 452 NonCancellationBlock = BasicBlock::Create( 453 BB->getContext(), BB->getName() + ".cont", BB->getParent()); 454 } else { 455 NonCancellationBlock = SplitBlock(BB, &*Builder.GetInsertPoint()); 456 BB->getTerminator()->eraseFromParent(); 457 Builder.SetInsertPoint(BB); 458 } 459 BasicBlock *CancellationBlock = BasicBlock::Create( 460 BB->getContext(), BB->getName() + ".cncl", BB->getParent()); 461 462 // Jump to them based on the return value. 463 Value *Cmp = Builder.CreateIsNull(CancelFlag); 464 Builder.CreateCondBr(Cmp, NonCancellationBlock, CancellationBlock, 465 /* TODO weight */ nullptr, nullptr); 466 467 // From the cancellation block we finalize all variables and go to the 468 // post finalization block that is known to the FiniCB callback. 469 Builder.SetInsertPoint(CancellationBlock); 470 if (ExitCB) 471 ExitCB(Builder.saveIP()); 472 auto &FI = FinalizationStack.back(); 473 FI.FiniCB(Builder.saveIP()); 474 475 // The continuation block is where code generation continues. 476 Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->begin()); 477 } 478 479 IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( 480 const LocationDescription &Loc, InsertPointTy OuterAllocaIP, 481 BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB, 482 FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads, 483 omp::ProcBindKind ProcBind, bool IsCancellable) { 484 if (!updateToLocation(Loc)) 485 return Loc.IP; 486 487 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 488 Value *Ident = getOrCreateIdent(SrcLocStr); 489 Value *ThreadID = getOrCreateThreadID(Ident); 490 491 if (NumThreads) { 492 // Build call __kmpc_push_num_threads(&Ident, global_tid, num_threads) 493 Value *Args[] = { 494 Ident, ThreadID, 495 Builder.CreateIntCast(NumThreads, Int32, /*isSigned*/ false)}; 496 Builder.CreateCall( 497 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_num_threads), Args); 498 } 499 500 if (ProcBind != OMP_PROC_BIND_default) { 501 // Build call __kmpc_push_proc_bind(&Ident, global_tid, proc_bind) 502 Value *Args[] = { 503 Ident, ThreadID, 504 ConstantInt::get(Int32, unsigned(ProcBind), /*isSigned=*/true)}; 505 Builder.CreateCall( 506 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_proc_bind), Args); 507 } 508 509 BasicBlock *InsertBB = Builder.GetInsertBlock(); 510 Function *OuterFn = InsertBB->getParent(); 511 512 // Save the outer alloca block because the insertion iterator may get 513 // invalidated and we still need this later. 514 BasicBlock *OuterAllocaBlock = OuterAllocaIP.getBlock(); 515 516 // Vector to remember instructions we used only during the modeling but which 517 // we want to delete at the end. 518 SmallVector<Instruction *, 4> ToBeDeleted; 519 520 // Change the location to the outer alloca insertion point to create and 521 // initialize the allocas we pass into the parallel region. 522 Builder.restoreIP(OuterAllocaIP); 523 AllocaInst *TIDAddr = Builder.CreateAlloca(Int32, nullptr, "tid.addr"); 524 AllocaInst *ZeroAddr = Builder.CreateAlloca(Int32, nullptr, "zero.addr"); 525 526 // If there is an if condition we actually use the TIDAddr and ZeroAddr in the 527 // program, otherwise we only need them for modeling purposes to get the 528 // associated arguments in the outlined function. In the former case, 529 // initialize the allocas properly, in the latter case, delete them later. 530 if (IfCondition) { 531 Builder.CreateStore(Constant::getNullValue(Int32), TIDAddr); 532 Builder.CreateStore(Constant::getNullValue(Int32), ZeroAddr); 533 } else { 534 ToBeDeleted.push_back(TIDAddr); 535 ToBeDeleted.push_back(ZeroAddr); 536 } 537 538 // Create an artificial insertion point that will also ensure the blocks we 539 // are about to split are not degenerated. 540 auto *UI = new UnreachableInst(Builder.getContext(), InsertBB); 541 542 Instruction *ThenTI = UI, *ElseTI = nullptr; 543 if (IfCondition) 544 SplitBlockAndInsertIfThenElse(IfCondition, UI, &ThenTI, &ElseTI); 545 546 BasicBlock *ThenBB = ThenTI->getParent(); 547 BasicBlock *PRegEntryBB = ThenBB->splitBasicBlock(ThenTI, "omp.par.entry"); 548 BasicBlock *PRegBodyBB = 549 PRegEntryBB->splitBasicBlock(ThenTI, "omp.par.region"); 550 BasicBlock *PRegPreFiniBB = 551 PRegBodyBB->splitBasicBlock(ThenTI, "omp.par.pre_finalize"); 552 BasicBlock *PRegExitBB = 553 PRegPreFiniBB->splitBasicBlock(ThenTI, "omp.par.exit"); 554 555 auto FiniCBWrapper = [&](InsertPointTy IP) { 556 // Hide "open-ended" blocks from the given FiniCB by setting the right jump 557 // target to the region exit block. 558 if (IP.getBlock()->end() == IP.getPoint()) { 559 IRBuilder<>::InsertPointGuard IPG(Builder); 560 Builder.restoreIP(IP); 561 Instruction *I = Builder.CreateBr(PRegExitBB); 562 IP = InsertPointTy(I->getParent(), I->getIterator()); 563 } 564 assert(IP.getBlock()->getTerminator()->getNumSuccessors() == 1 && 565 IP.getBlock()->getTerminator()->getSuccessor(0) == PRegExitBB && 566 "Unexpected insertion point for finalization call!"); 567 return FiniCB(IP); 568 }; 569 570 FinalizationStack.push_back({FiniCBWrapper, OMPD_parallel, IsCancellable}); 571 572 // Generate the privatization allocas in the block that will become the entry 573 // of the outlined function. 574 Builder.SetInsertPoint(PRegEntryBB->getTerminator()); 575 InsertPointTy InnerAllocaIP = Builder.saveIP(); 576 577 AllocaInst *PrivTIDAddr = 578 Builder.CreateAlloca(Int32, nullptr, "tid.addr.local"); 579 Instruction *PrivTID = Builder.CreateLoad(Int32, PrivTIDAddr, "tid"); 580 581 // Add some fake uses for OpenMP provided arguments. 582 ToBeDeleted.push_back(Builder.CreateLoad(Int32, TIDAddr, "tid.addr.use")); 583 Instruction *ZeroAddrUse = Builder.CreateLoad(Int32, ZeroAddr, 584 "zero.addr.use"); 585 ToBeDeleted.push_back(ZeroAddrUse); 586 587 // ThenBB 588 // | 589 // V 590 // PRegionEntryBB <- Privatization allocas are placed here. 591 // | 592 // V 593 // PRegionBodyBB <- BodeGen is invoked here. 594 // | 595 // V 596 // PRegPreFiniBB <- The block we will start finalization from. 597 // | 598 // V 599 // PRegionExitBB <- A common exit to simplify block collection. 600 // 601 602 LLVM_DEBUG(dbgs() << "Before body codegen: " << *OuterFn << "\n"); 603 604 // Let the caller create the body. 605 assert(BodyGenCB && "Expected body generation callback!"); 606 InsertPointTy CodeGenIP(PRegBodyBB, PRegBodyBB->begin()); 607 BodyGenCB(InnerAllocaIP, CodeGenIP, *PRegPreFiniBB); 608 609 LLVM_DEBUG(dbgs() << "After body codegen: " << *OuterFn << "\n"); 610 611 FunctionCallee RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_call); 612 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { 613 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { 614 llvm::LLVMContext &Ctx = F->getContext(); 615 MDBuilder MDB(Ctx); 616 // Annotate the callback behavior of the __kmpc_fork_call: 617 // - The callback callee is argument number 2 (microtask). 618 // - The first two arguments of the callback callee are unknown (-1). 619 // - All variadic arguments to the __kmpc_fork_call are passed to the 620 // callback callee. 621 F->addMetadata( 622 llvm::LLVMContext::MD_callback, 623 *llvm::MDNode::get( 624 Ctx, {MDB.createCallbackEncoding(2, {-1, -1}, 625 /* VarArgsArePassed */ true)})); 626 } 627 } 628 629 OutlineInfo OI; 630 OI.PostOutlineCB = [=](Function &OutlinedFn) { 631 // Add some known attributes. 632 OutlinedFn.addParamAttr(0, Attribute::NoAlias); 633 OutlinedFn.addParamAttr(1, Attribute::NoAlias); 634 OutlinedFn.addFnAttr(Attribute::NoUnwind); 635 OutlinedFn.addFnAttr(Attribute::NoRecurse); 636 637 assert(OutlinedFn.arg_size() >= 2 && 638 "Expected at least tid and bounded tid as arguments"); 639 unsigned NumCapturedVars = 640 OutlinedFn.arg_size() - /* tid & bounded tid */ 2; 641 642 CallInst *CI = cast<CallInst>(OutlinedFn.user_back()); 643 CI->getParent()->setName("omp_parallel"); 644 Builder.SetInsertPoint(CI); 645 646 // Build call __kmpc_fork_call(Ident, n, microtask, var1, .., varn); 647 Value *ForkCallArgs[] = { 648 Ident, Builder.getInt32(NumCapturedVars), 649 Builder.CreateBitCast(&OutlinedFn, ParallelTaskPtr)}; 650 651 SmallVector<Value *, 16> RealArgs; 652 RealArgs.append(std::begin(ForkCallArgs), std::end(ForkCallArgs)); 653 RealArgs.append(CI->arg_begin() + /* tid & bound tid */ 2, CI->arg_end()); 654 655 Builder.CreateCall(RTLFn, RealArgs); 656 657 LLVM_DEBUG(dbgs() << "With fork_call placed: " 658 << *Builder.GetInsertBlock()->getParent() << "\n"); 659 660 InsertPointTy ExitIP(PRegExitBB, PRegExitBB->end()); 661 662 // Initialize the local TID stack location with the argument value. 663 Builder.SetInsertPoint(PrivTID); 664 Function::arg_iterator OutlinedAI = OutlinedFn.arg_begin(); 665 Builder.CreateStore(Builder.CreateLoad(Int32, OutlinedAI), PrivTIDAddr); 666 667 // If no "if" clause was present we do not need the call created during 668 // outlining, otherwise we reuse it in the serialized parallel region. 669 if (!ElseTI) { 670 CI->eraseFromParent(); 671 } else { 672 673 // If an "if" clause was present we are now generating the serialized 674 // version into the "else" branch. 675 Builder.SetInsertPoint(ElseTI); 676 677 // Build calls __kmpc_serialized_parallel(&Ident, GTid); 678 Value *SerializedParallelCallArgs[] = {Ident, ThreadID}; 679 Builder.CreateCall( 680 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_serialized_parallel), 681 SerializedParallelCallArgs); 682 683 // OutlinedFn(>id, &zero, CapturedStruct); 684 CI->removeFromParent(); 685 Builder.Insert(CI); 686 687 // __kmpc_end_serialized_parallel(&Ident, GTid); 688 Value *EndArgs[] = {Ident, ThreadID}; 689 Builder.CreateCall( 690 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_serialized_parallel), 691 EndArgs); 692 693 LLVM_DEBUG(dbgs() << "With serialized parallel region: " 694 << *Builder.GetInsertBlock()->getParent() << "\n"); 695 } 696 697 for (Instruction *I : ToBeDeleted) 698 I->eraseFromParent(); 699 }; 700 701 // Adjust the finalization stack, verify the adjustment, and call the 702 // finalize function a last time to finalize values between the pre-fini 703 // block and the exit block if we left the parallel "the normal way". 704 auto FiniInfo = FinalizationStack.pop_back_val(); 705 (void)FiniInfo; 706 assert(FiniInfo.DK == OMPD_parallel && 707 "Unexpected finalization stack state!"); 708 709 Instruction *PRegPreFiniTI = PRegPreFiniBB->getTerminator(); 710 711 InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->getIterator()); 712 FiniCB(PreFiniIP); 713 714 OI.EntryBB = PRegEntryBB; 715 OI.ExitBB = PRegExitBB; 716 717 SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet; 718 SmallVector<BasicBlock *, 32> Blocks; 719 OI.collectBlocks(ParallelRegionBlockSet, Blocks); 720 721 // Ensure a single exit node for the outlined region by creating one. 722 // We might have multiple incoming edges to the exit now due to finalizations, 723 // e.g., cancel calls that cause the control flow to leave the region. 724 BasicBlock *PRegOutlinedExitBB = PRegExitBB; 725 PRegExitBB = SplitBlock(PRegExitBB, &*PRegExitBB->getFirstInsertionPt()); 726 PRegOutlinedExitBB->setName("omp.par.outlined.exit"); 727 Blocks.push_back(PRegOutlinedExitBB); 728 729 CodeExtractorAnalysisCache CEAC(*OuterFn); 730 CodeExtractor Extractor(Blocks, /* DominatorTree */ nullptr, 731 /* AggregateArgs */ false, 732 /* BlockFrequencyInfo */ nullptr, 733 /* BranchProbabilityInfo */ nullptr, 734 /* AssumptionCache */ nullptr, 735 /* AllowVarArgs */ true, 736 /* AllowAlloca */ true, 737 /* Suffix */ ".omp_par"); 738 739 // Find inputs to, outputs from the code region. 740 BasicBlock *CommonExit = nullptr; 741 SetVector<Value *> Inputs, Outputs, SinkingCands, HoistingCands; 742 Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit); 743 Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands); 744 745 LLVM_DEBUG(dbgs() << "Before privatization: " << *OuterFn << "\n"); 746 747 FunctionCallee TIDRTLFn = 748 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num); 749 750 auto PrivHelper = [&](Value &V) { 751 if (&V == TIDAddr || &V == ZeroAddr) 752 return; 753 754 SetVector<Use *> Uses; 755 for (Use &U : V.uses()) 756 if (auto *UserI = dyn_cast<Instruction>(U.getUser())) 757 if (ParallelRegionBlockSet.count(UserI->getParent())) 758 Uses.insert(&U); 759 760 // __kmpc_fork_call expects extra arguments as pointers. If the input 761 // already has a pointer type, everything is fine. Otherwise, store the 762 // value onto stack and load it back inside the to-be-outlined region. This 763 // will ensure only the pointer will be passed to the function. 764 // FIXME: if there are more than 15 trailing arguments, they must be 765 // additionally packed in a struct. 766 Value *Inner = &V; 767 if (!V.getType()->isPointerTy()) { 768 IRBuilder<>::InsertPointGuard Guard(Builder); 769 LLVM_DEBUG(llvm::dbgs() << "Forwarding input as pointer: " << V << "\n"); 770 771 Builder.restoreIP(OuterAllocaIP); 772 Value *Ptr = 773 Builder.CreateAlloca(V.getType(), nullptr, V.getName() + ".reloaded"); 774 775 // Store to stack at end of the block that currently branches to the entry 776 // block of the to-be-outlined region. 777 Builder.SetInsertPoint(InsertBB, 778 InsertBB->getTerminator()->getIterator()); 779 Builder.CreateStore(&V, Ptr); 780 781 // Load back next to allocations in the to-be-outlined region. 782 Builder.restoreIP(InnerAllocaIP); 783 Inner = Builder.CreateLoad(V.getType(), Ptr); 784 } 785 786 Value *ReplacementValue = nullptr; 787 CallInst *CI = dyn_cast<CallInst>(&V); 788 if (CI && CI->getCalledFunction() == TIDRTLFn.getCallee()) { 789 ReplacementValue = PrivTID; 790 } else { 791 Builder.restoreIP( 792 PrivCB(InnerAllocaIP, Builder.saveIP(), V, *Inner, ReplacementValue)); 793 assert(ReplacementValue && 794 "Expected copy/create callback to set replacement value!"); 795 if (ReplacementValue == &V) 796 return; 797 } 798 799 for (Use *UPtr : Uses) 800 UPtr->set(ReplacementValue); 801 }; 802 803 // Reset the inner alloca insertion as it will be used for loading the values 804 // wrapped into pointers before passing them into the to-be-outlined region. 805 // Configure it to insert immediately after the fake use of zero address so 806 // that they are available in the generated body and so that the 807 // OpenMP-related values (thread ID and zero address pointers) remain leading 808 // in the argument list. 809 InnerAllocaIP = IRBuilder<>::InsertPoint( 810 ZeroAddrUse->getParent(), ZeroAddrUse->getNextNode()->getIterator()); 811 812 // Reset the outer alloca insertion point to the entry of the relevant block 813 // in case it was invalidated. 814 OuterAllocaIP = IRBuilder<>::InsertPoint( 815 OuterAllocaBlock, OuterAllocaBlock->getFirstInsertionPt()); 816 817 for (Value *Input : Inputs) { 818 LLVM_DEBUG(dbgs() << "Captured input: " << *Input << "\n"); 819 PrivHelper(*Input); 820 } 821 LLVM_DEBUG({ 822 for (Value *Output : Outputs) 823 LLVM_DEBUG(dbgs() << "Captured output: " << *Output << "\n"); 824 }); 825 assert(Outputs.empty() && 826 "OpenMP outlining should not produce live-out values!"); 827 828 LLVM_DEBUG(dbgs() << "After privatization: " << *OuterFn << "\n"); 829 LLVM_DEBUG({ 830 for (auto *BB : Blocks) 831 dbgs() << " PBR: " << BB->getName() << "\n"; 832 }); 833 834 // Register the outlined info. 835 addOutlineInfo(std::move(OI)); 836 837 InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end()); 838 UI->eraseFromParent(); 839 840 return AfterIP; 841 } 842 843 void OpenMPIRBuilder::emitFlush(const LocationDescription &Loc) { 844 // Build call void __kmpc_flush(ident_t *loc) 845 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 846 Value *Args[] = {getOrCreateIdent(SrcLocStr)}; 847 848 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_flush), Args); 849 } 850 851 void OpenMPIRBuilder::createFlush(const LocationDescription &Loc) { 852 if (!updateToLocation(Loc)) 853 return; 854 emitFlush(Loc); 855 } 856 857 void OpenMPIRBuilder::emitTaskwaitImpl(const LocationDescription &Loc) { 858 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 859 // global_tid); 860 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 861 Value *Ident = getOrCreateIdent(SrcLocStr); 862 Value *Args[] = {Ident, getOrCreateThreadID(Ident)}; 863 864 // Ignore return result until untied tasks are supported. 865 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskwait), 866 Args); 867 } 868 869 void OpenMPIRBuilder::createTaskwait(const LocationDescription &Loc) { 870 if (!updateToLocation(Loc)) 871 return; 872 emitTaskwaitImpl(Loc); 873 } 874 875 void OpenMPIRBuilder::emitTaskyieldImpl(const LocationDescription &Loc) { 876 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 877 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 878 Value *Ident = getOrCreateIdent(SrcLocStr); 879 Constant *I32Null = ConstantInt::getNullValue(Int32); 880 Value *Args[] = {Ident, getOrCreateThreadID(Ident), I32Null}; 881 882 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskyield), 883 Args); 884 } 885 886 void OpenMPIRBuilder::createTaskyield(const LocationDescription &Loc) { 887 if (!updateToLocation(Loc)) 888 return; 889 emitTaskyieldImpl(Loc); 890 } 891 892 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSections( 893 const LocationDescription &Loc, InsertPointTy AllocaIP, 894 ArrayRef<StorableBodyGenCallbackTy> SectionCBs, PrivatizeCallbackTy PrivCB, 895 FinalizeCallbackTy FiniCB, bool IsCancellable, bool IsNowait) { 896 if (!updateToLocation(Loc)) 897 return Loc.IP; 898 899 auto FiniCBWrapper = [&](InsertPointTy IP) { 900 if (IP.getBlock()->end() != IP.getPoint()) 901 return FiniCB(IP); 902 // This must be done otherwise any nested constructs using FinalizeOMPRegion 903 // will fail because that function requires the Finalization Basic Block to 904 // have a terminator, which is already removed by EmitOMPRegionBody. 905 // IP is currently at cancelation block. 906 // We need to backtrack to the condition block to fetch 907 // the exit block and create a branch from cancelation 908 // to exit block. 909 IRBuilder<>::InsertPointGuard IPG(Builder); 910 Builder.restoreIP(IP); 911 auto *CaseBB = IP.getBlock()->getSinglePredecessor(); 912 auto *CondBB = CaseBB->getSinglePredecessor()->getSinglePredecessor(); 913 auto *ExitBB = CondBB->getTerminator()->getSuccessor(1); 914 Instruction *I = Builder.CreateBr(ExitBB); 915 IP = InsertPointTy(I->getParent(), I->getIterator()); 916 return FiniCB(IP); 917 }; 918 919 FinalizationStack.push_back({FiniCBWrapper, OMPD_sections, IsCancellable}); 920 921 // Each section is emitted as a switch case 922 // Each finalization callback is handled from clang.EmitOMPSectionDirective() 923 // -> OMP.createSection() which generates the IR for each section 924 // Iterate through all sections and emit a switch construct: 925 // switch (IV) { 926 // case 0: 927 // <SectionStmt[0]>; 928 // break; 929 // ... 930 // case <NumSection> - 1: 931 // <SectionStmt[<NumSection> - 1]>; 932 // break; 933 // } 934 // ... 935 // section_loop.after: 936 // <FiniCB>; 937 auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, Value *IndVar) { 938 auto *CurFn = CodeGenIP.getBlock()->getParent(); 939 auto *ForIncBB = CodeGenIP.getBlock()->getSingleSuccessor(); 940 auto *ForExitBB = CodeGenIP.getBlock() 941 ->getSinglePredecessor() 942 ->getTerminator() 943 ->getSuccessor(1); 944 SwitchInst *SwitchStmt = Builder.CreateSwitch(IndVar, ForIncBB); 945 Builder.restoreIP(CodeGenIP); 946 unsigned CaseNumber = 0; 947 for (auto SectionCB : SectionCBs) { 948 auto *CaseBB = BasicBlock::Create(M.getContext(), 949 "omp_section_loop.body.case", CurFn); 950 SwitchStmt->addCase(Builder.getInt32(CaseNumber), CaseBB); 951 Builder.SetInsertPoint(CaseBB); 952 SectionCB(InsertPointTy(), Builder.saveIP(), *ForExitBB); 953 CaseNumber++; 954 } 955 // remove the existing terminator from body BB since there can be no 956 // terminators after switch/case 957 CodeGenIP.getBlock()->getTerminator()->eraseFromParent(); 958 }; 959 // Loop body ends here 960 // LowerBound, UpperBound, and STride for createCanonicalLoop 961 Type *I32Ty = Type::getInt32Ty(M.getContext()); 962 Value *LB = ConstantInt::get(I32Ty, 0); 963 Value *UB = ConstantInt::get(I32Ty, SectionCBs.size()); 964 Value *ST = ConstantInt::get(I32Ty, 1); 965 llvm::CanonicalLoopInfo *LoopInfo = createCanonicalLoop( 966 Loc, LoopBodyGenCB, LB, UB, ST, true, false, AllocaIP, "section_loop"); 967 LoopInfo = createStaticWorkshareLoop(Loc, LoopInfo, AllocaIP, true); 968 BasicBlock *LoopAfterBB = LoopInfo->getAfter(); 969 Instruction *SplitPos = LoopAfterBB->getTerminator(); 970 if (!isa_and_nonnull<BranchInst>(SplitPos)) 971 SplitPos = new UnreachableInst(Builder.getContext(), LoopAfterBB); 972 // ExitBB after LoopAfterBB because LoopAfterBB is used for FinalizationCB, 973 // which requires a BB with branch 974 BasicBlock *ExitBB = 975 LoopAfterBB->splitBasicBlock(SplitPos, "omp_sections.end"); 976 SplitPos->eraseFromParent(); 977 978 // Apply the finalization callback in LoopAfterBB 979 auto FiniInfo = FinalizationStack.pop_back_val(); 980 assert(FiniInfo.DK == OMPD_sections && 981 "Unexpected finalization stack state!"); 982 Builder.SetInsertPoint(LoopAfterBB->getTerminator()); 983 FiniInfo.FiniCB(Builder.saveIP()); 984 Builder.SetInsertPoint(ExitBB); 985 986 return Builder.saveIP(); 987 } 988 989 OpenMPIRBuilder::InsertPointTy 990 OpenMPIRBuilder::createSection(const LocationDescription &Loc, 991 BodyGenCallbackTy BodyGenCB, 992 FinalizeCallbackTy FiniCB) { 993 if (!updateToLocation(Loc)) 994 return Loc.IP; 995 996 auto FiniCBWrapper = [&](InsertPointTy IP) { 997 if (IP.getBlock()->end() != IP.getPoint()) 998 return FiniCB(IP); 999 // This must be done otherwise any nested constructs using FinalizeOMPRegion 1000 // will fail because that function requires the Finalization Basic Block to 1001 // have a terminator, which is already removed by EmitOMPRegionBody. 1002 // IP is currently at cancelation block. 1003 // We need to backtrack to the condition block to fetch 1004 // the exit block and create a branch from cancelation 1005 // to exit block. 1006 IRBuilder<>::InsertPointGuard IPG(Builder); 1007 Builder.restoreIP(IP); 1008 auto *CaseBB = Loc.IP.getBlock(); 1009 auto *CondBB = CaseBB->getSinglePredecessor()->getSinglePredecessor(); 1010 auto *ExitBB = CondBB->getTerminator()->getSuccessor(1); 1011 Instruction *I = Builder.CreateBr(ExitBB); 1012 IP = InsertPointTy(I->getParent(), I->getIterator()); 1013 return FiniCB(IP); 1014 }; 1015 1016 Directive OMPD = Directive::OMPD_sections; 1017 // Since we are using Finalization Callback here, HasFinalize 1018 // and IsCancellable have to be true 1019 return EmitOMPInlinedRegion(OMPD, nullptr, nullptr, BodyGenCB, FiniCBWrapper, 1020 /*Conditional*/ false, /*hasFinalize*/ true, 1021 /*IsCancellable*/ true); 1022 } 1023 1024 OpenMPIRBuilder::InsertPointTy 1025 OpenMPIRBuilder::createMaster(const LocationDescription &Loc, 1026 BodyGenCallbackTy BodyGenCB, 1027 FinalizeCallbackTy FiniCB) { 1028 1029 if (!updateToLocation(Loc)) 1030 return Loc.IP; 1031 1032 Directive OMPD = Directive::OMPD_master; 1033 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 1034 Value *Ident = getOrCreateIdent(SrcLocStr); 1035 Value *ThreadId = getOrCreateThreadID(Ident); 1036 Value *Args[] = {Ident, ThreadId}; 1037 1038 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_master); 1039 Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args); 1040 1041 Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_master); 1042 Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args); 1043 1044 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB, 1045 /*Conditional*/ true, /*hasFinalize*/ true); 1046 } 1047 1048 OpenMPIRBuilder::InsertPointTy 1049 OpenMPIRBuilder::createMasked(const LocationDescription &Loc, 1050 BodyGenCallbackTy BodyGenCB, 1051 FinalizeCallbackTy FiniCB, Value *Filter) { 1052 if (!updateToLocation(Loc)) 1053 return Loc.IP; 1054 1055 Directive OMPD = Directive::OMPD_masked; 1056 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 1057 Value *Ident = getOrCreateIdent(SrcLocStr); 1058 Value *ThreadId = getOrCreateThreadID(Ident); 1059 Value *Args[] = {Ident, ThreadId, Filter}; 1060 Value *ArgsEnd[] = {Ident, ThreadId}; 1061 1062 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_masked); 1063 Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args); 1064 1065 Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_masked); 1066 Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, ArgsEnd); 1067 1068 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB, 1069 /*Conditional*/ true, /*hasFinalize*/ true); 1070 } 1071 1072 CanonicalLoopInfo *OpenMPIRBuilder::createLoopSkeleton( 1073 DebugLoc DL, Value *TripCount, Function *F, BasicBlock *PreInsertBefore, 1074 BasicBlock *PostInsertBefore, const Twine &Name) { 1075 Module *M = F->getParent(); 1076 LLVMContext &Ctx = M->getContext(); 1077 Type *IndVarTy = TripCount->getType(); 1078 1079 // Create the basic block structure. 1080 BasicBlock *Preheader = 1081 BasicBlock::Create(Ctx, "omp_" + Name + ".preheader", F, PreInsertBefore); 1082 BasicBlock *Header = 1083 BasicBlock::Create(Ctx, "omp_" + Name + ".header", F, PreInsertBefore); 1084 BasicBlock *Cond = 1085 BasicBlock::Create(Ctx, "omp_" + Name + ".cond", F, PreInsertBefore); 1086 BasicBlock *Body = 1087 BasicBlock::Create(Ctx, "omp_" + Name + ".body", F, PreInsertBefore); 1088 BasicBlock *Latch = 1089 BasicBlock::Create(Ctx, "omp_" + Name + ".inc", F, PostInsertBefore); 1090 BasicBlock *Exit = 1091 BasicBlock::Create(Ctx, "omp_" + Name + ".exit", F, PostInsertBefore); 1092 BasicBlock *After = 1093 BasicBlock::Create(Ctx, "omp_" + Name + ".after", F, PostInsertBefore); 1094 1095 // Use specified DebugLoc for new instructions. 1096 Builder.SetCurrentDebugLocation(DL); 1097 1098 Builder.SetInsertPoint(Preheader); 1099 Builder.CreateBr(Header); 1100 1101 Builder.SetInsertPoint(Header); 1102 PHINode *IndVarPHI = Builder.CreatePHI(IndVarTy, 2, "omp_" + Name + ".iv"); 1103 IndVarPHI->addIncoming(ConstantInt::get(IndVarTy, 0), Preheader); 1104 Builder.CreateBr(Cond); 1105 1106 Builder.SetInsertPoint(Cond); 1107 Value *Cmp = 1108 Builder.CreateICmpULT(IndVarPHI, TripCount, "omp_" + Name + ".cmp"); 1109 Builder.CreateCondBr(Cmp, Body, Exit); 1110 1111 Builder.SetInsertPoint(Body); 1112 Builder.CreateBr(Latch); 1113 1114 Builder.SetInsertPoint(Latch); 1115 Value *Next = Builder.CreateAdd(IndVarPHI, ConstantInt::get(IndVarTy, 1), 1116 "omp_" + Name + ".next", /*HasNUW=*/true); 1117 Builder.CreateBr(Header); 1118 IndVarPHI->addIncoming(Next, Latch); 1119 1120 Builder.SetInsertPoint(Exit); 1121 Builder.CreateBr(After); 1122 1123 // Remember and return the canonical control flow. 1124 LoopInfos.emplace_front(); 1125 CanonicalLoopInfo *CL = &LoopInfos.front(); 1126 1127 CL->Preheader = Preheader; 1128 CL->Header = Header; 1129 CL->Cond = Cond; 1130 CL->Body = Body; 1131 CL->Latch = Latch; 1132 CL->Exit = Exit; 1133 CL->After = After; 1134 1135 CL->IsValid = true; 1136 1137 #ifndef NDEBUG 1138 CL->assertOK(); 1139 #endif 1140 return CL; 1141 } 1142 1143 CanonicalLoopInfo * 1144 OpenMPIRBuilder::createCanonicalLoop(const LocationDescription &Loc, 1145 LoopBodyGenCallbackTy BodyGenCB, 1146 Value *TripCount, const Twine &Name) { 1147 BasicBlock *BB = Loc.IP.getBlock(); 1148 BasicBlock *NextBB = BB->getNextNode(); 1149 1150 CanonicalLoopInfo *CL = createLoopSkeleton(Loc.DL, TripCount, BB->getParent(), 1151 NextBB, NextBB, Name); 1152 BasicBlock *After = CL->getAfter(); 1153 1154 // If location is not set, don't connect the loop. 1155 if (updateToLocation(Loc)) { 1156 // Split the loop at the insertion point: Branch to the preheader and move 1157 // every following instruction to after the loop (the After BB). Also, the 1158 // new successor is the loop's after block. 1159 Builder.CreateBr(CL->Preheader); 1160 After->getInstList().splice(After->begin(), BB->getInstList(), 1161 Builder.GetInsertPoint(), BB->end()); 1162 After->replaceSuccessorsPhiUsesWith(BB, After); 1163 } 1164 1165 // Emit the body content. We do it after connecting the loop to the CFG to 1166 // avoid that the callback encounters degenerate BBs. 1167 BodyGenCB(CL->getBodyIP(), CL->getIndVar()); 1168 1169 #ifndef NDEBUG 1170 CL->assertOK(); 1171 #endif 1172 return CL; 1173 } 1174 1175 CanonicalLoopInfo *OpenMPIRBuilder::createCanonicalLoop( 1176 const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, 1177 Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop, 1178 InsertPointTy ComputeIP, const Twine &Name) { 1179 1180 // Consider the following difficulties (assuming 8-bit signed integers): 1181 // * Adding \p Step to the loop counter which passes \p Stop may overflow: 1182 // DO I = 1, 100, 50 1183 /// * A \p Step of INT_MIN cannot not be normalized to a positive direction: 1184 // DO I = 100, 0, -128 1185 1186 // Start, Stop and Step must be of the same integer type. 1187 auto *IndVarTy = cast<IntegerType>(Start->getType()); 1188 assert(IndVarTy == Stop->getType() && "Stop type mismatch"); 1189 assert(IndVarTy == Step->getType() && "Step type mismatch"); 1190 1191 LocationDescription ComputeLoc = 1192 ComputeIP.isSet() ? LocationDescription(ComputeIP, Loc.DL) : Loc; 1193 updateToLocation(ComputeLoc); 1194 1195 ConstantInt *Zero = ConstantInt::get(IndVarTy, 0); 1196 ConstantInt *One = ConstantInt::get(IndVarTy, 1); 1197 1198 // Like Step, but always positive. 1199 Value *Incr = Step; 1200 1201 // Distance between Start and Stop; always positive. 1202 Value *Span; 1203 1204 // Condition whether there are no iterations are executed at all, e.g. because 1205 // UB < LB. 1206 Value *ZeroCmp; 1207 1208 if (IsSigned) { 1209 // Ensure that increment is positive. If not, negate and invert LB and UB. 1210 Value *IsNeg = Builder.CreateICmpSLT(Step, Zero); 1211 Incr = Builder.CreateSelect(IsNeg, Builder.CreateNeg(Step), Step); 1212 Value *LB = Builder.CreateSelect(IsNeg, Stop, Start); 1213 Value *UB = Builder.CreateSelect(IsNeg, Start, Stop); 1214 Span = Builder.CreateSub(UB, LB, "", false, true); 1215 ZeroCmp = Builder.CreateICmp( 1216 InclusiveStop ? CmpInst::ICMP_SLT : CmpInst::ICMP_SLE, UB, LB); 1217 } else { 1218 Span = Builder.CreateSub(Stop, Start, "", true); 1219 ZeroCmp = Builder.CreateICmp( 1220 InclusiveStop ? CmpInst::ICMP_ULT : CmpInst::ICMP_ULE, Stop, Start); 1221 } 1222 1223 Value *CountIfLooping; 1224 if (InclusiveStop) { 1225 CountIfLooping = Builder.CreateAdd(Builder.CreateUDiv(Span, Incr), One); 1226 } else { 1227 // Avoid incrementing past stop since it could overflow. 1228 Value *CountIfTwo = Builder.CreateAdd( 1229 Builder.CreateUDiv(Builder.CreateSub(Span, One), Incr), One); 1230 Value *OneCmp = Builder.CreateICmp( 1231 InclusiveStop ? CmpInst::ICMP_ULT : CmpInst::ICMP_ULE, Span, Incr); 1232 CountIfLooping = Builder.CreateSelect(OneCmp, One, CountIfTwo); 1233 } 1234 Value *TripCount = Builder.CreateSelect(ZeroCmp, Zero, CountIfLooping, 1235 "omp_" + Name + ".tripcount"); 1236 1237 auto BodyGen = [=](InsertPointTy CodeGenIP, Value *IV) { 1238 Builder.restoreIP(CodeGenIP); 1239 Value *Span = Builder.CreateMul(IV, Step); 1240 Value *IndVar = Builder.CreateAdd(Span, Start); 1241 BodyGenCB(Builder.saveIP(), IndVar); 1242 }; 1243 LocationDescription LoopLoc = ComputeIP.isSet() ? Loc.IP : Builder.saveIP(); 1244 return createCanonicalLoop(LoopLoc, BodyGen, TripCount, Name); 1245 } 1246 1247 // Returns an LLVM function to call for initializing loop bounds using OpenMP 1248 // static scheduling depending on `type`. Only i32 and i64 are supported by the 1249 // runtime. Always interpret integers as unsigned similarly to 1250 // CanonicalLoopInfo. 1251 static FunctionCallee getKmpcForStaticInitForType(Type *Ty, Module &M, 1252 OpenMPIRBuilder &OMPBuilder) { 1253 unsigned Bitwidth = Ty->getIntegerBitWidth(); 1254 if (Bitwidth == 32) 1255 return OMPBuilder.getOrCreateRuntimeFunction( 1256 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u); 1257 if (Bitwidth == 64) 1258 return OMPBuilder.getOrCreateRuntimeFunction( 1259 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u); 1260 llvm_unreachable("unknown OpenMP loop iterator bitwidth"); 1261 } 1262 1263 // Sets the number of loop iterations to the given value. This value must be 1264 // valid in the condition block (i.e., defined in the preheader) and is 1265 // interpreted as an unsigned integer. 1266 void setCanonicalLoopTripCount(CanonicalLoopInfo *CLI, Value *TripCount) { 1267 Instruction *CmpI = &CLI->getCond()->front(); 1268 assert(isa<CmpInst>(CmpI) && "First inst must compare IV with TripCount"); 1269 CmpI->setOperand(1, TripCount); 1270 CLI->assertOK(); 1271 } 1272 1273 CanonicalLoopInfo *OpenMPIRBuilder::createStaticWorkshareLoop( 1274 const LocationDescription &Loc, CanonicalLoopInfo *CLI, 1275 InsertPointTy AllocaIP, bool NeedsBarrier, Value *Chunk) { 1276 // Set up the source location value for OpenMP runtime. 1277 if (!updateToLocation(Loc)) 1278 return nullptr; 1279 1280 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 1281 Value *SrcLoc = getOrCreateIdent(SrcLocStr); 1282 1283 // Declare useful OpenMP runtime functions. 1284 Value *IV = CLI->getIndVar(); 1285 Type *IVTy = IV->getType(); 1286 FunctionCallee StaticInit = getKmpcForStaticInitForType(IVTy, M, *this); 1287 FunctionCallee StaticFini = 1288 getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_for_static_fini); 1289 1290 // Allocate space for computed loop bounds as expected by the "init" function. 1291 Builder.restoreIP(AllocaIP); 1292 Type *I32Type = Type::getInt32Ty(M.getContext()); 1293 Value *PLastIter = Builder.CreateAlloca(I32Type, nullptr, "p.lastiter"); 1294 Value *PLowerBound = Builder.CreateAlloca(IVTy, nullptr, "p.lowerbound"); 1295 Value *PUpperBound = Builder.CreateAlloca(IVTy, nullptr, "p.upperbound"); 1296 Value *PStride = Builder.CreateAlloca(IVTy, nullptr, "p.stride"); 1297 1298 // At the end of the preheader, prepare for calling the "init" function by 1299 // storing the current loop bounds into the allocated space. A canonical loop 1300 // always iterates from 0 to trip-count with step 1. Note that "init" expects 1301 // and produces an inclusive upper bound. 1302 Builder.SetInsertPoint(CLI->getPreheader()->getTerminator()); 1303 Constant *Zero = ConstantInt::get(IVTy, 0); 1304 Constant *One = ConstantInt::get(IVTy, 1); 1305 Builder.CreateStore(Zero, PLowerBound); 1306 Value *UpperBound = Builder.CreateSub(CLI->getTripCount(), One); 1307 Builder.CreateStore(UpperBound, PUpperBound); 1308 Builder.CreateStore(One, PStride); 1309 1310 if (!Chunk) 1311 Chunk = One; 1312 1313 Value *ThreadNum = getOrCreateThreadID(SrcLoc); 1314 1315 Constant *SchedulingType = 1316 ConstantInt::get(I32Type, static_cast<int>(OMPScheduleType::Static)); 1317 1318 // Call the "init" function and update the trip count of the loop with the 1319 // value it produced. 1320 Builder.CreateCall(StaticInit, 1321 {SrcLoc, ThreadNum, SchedulingType, PLastIter, PLowerBound, 1322 PUpperBound, PStride, One, Chunk}); 1323 Value *LowerBound = Builder.CreateLoad(IVTy, PLowerBound); 1324 Value *InclusiveUpperBound = Builder.CreateLoad(IVTy, PUpperBound); 1325 Value *TripCountMinusOne = Builder.CreateSub(InclusiveUpperBound, LowerBound); 1326 Value *TripCount = Builder.CreateAdd(TripCountMinusOne, One); 1327 setCanonicalLoopTripCount(CLI, TripCount); 1328 1329 // Update all uses of the induction variable except the one in the condition 1330 // block that compares it with the actual upper bound, and the increment in 1331 // the latch block. 1332 // TODO: this can eventually move to CanonicalLoopInfo or to a new 1333 // CanonicalLoopInfoUpdater interface. 1334 Builder.SetInsertPoint(CLI->getBody(), CLI->getBody()->getFirstInsertionPt()); 1335 Value *UpdatedIV = Builder.CreateAdd(IV, LowerBound); 1336 IV->replaceUsesWithIf(UpdatedIV, [&](Use &U) { 1337 auto *Instr = dyn_cast<Instruction>(U.getUser()); 1338 return !Instr || 1339 (Instr->getParent() != CLI->getCond() && 1340 Instr->getParent() != CLI->getLatch() && Instr != UpdatedIV); 1341 }); 1342 1343 // In the "exit" block, call the "fini" function. 1344 Builder.SetInsertPoint(CLI->getExit(), 1345 CLI->getExit()->getTerminator()->getIterator()); 1346 Builder.CreateCall(StaticFini, {SrcLoc, ThreadNum}); 1347 1348 // Add the barrier if requested. 1349 if (NeedsBarrier) 1350 createBarrier(LocationDescription(Builder.saveIP(), Loc.DL), 1351 omp::Directive::OMPD_for, /* ForceSimpleCall */ false, 1352 /* CheckCancelFlag */ false); 1353 1354 CLI->assertOK(); 1355 return CLI; 1356 } 1357 1358 CanonicalLoopInfo *OpenMPIRBuilder::createWorkshareLoop( 1359 const LocationDescription &Loc, CanonicalLoopInfo *CLI, 1360 InsertPointTy AllocaIP, bool NeedsBarrier) { 1361 // Currently only supports static schedules. 1362 return createStaticWorkshareLoop(Loc, CLI, AllocaIP, NeedsBarrier); 1363 } 1364 1365 /// Returns an LLVM function to call for initializing loop bounds using OpenMP 1366 /// dynamic scheduling depending on `type`. Only i32 and i64 are supported by 1367 /// the runtime. Always interpret integers as unsigned similarly to 1368 /// CanonicalLoopInfo. 1369 static FunctionCallee 1370 getKmpcForDynamicInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder) { 1371 unsigned Bitwidth = Ty->getIntegerBitWidth(); 1372 if (Bitwidth == 32) 1373 return OMPBuilder.getOrCreateRuntimeFunction( 1374 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_4u); 1375 if (Bitwidth == 64) 1376 return OMPBuilder.getOrCreateRuntimeFunction( 1377 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_8u); 1378 llvm_unreachable("unknown OpenMP loop iterator bitwidth"); 1379 } 1380 1381 /// Returns an LLVM function to call for updating the next loop using OpenMP 1382 /// dynamic scheduling depending on `type`. Only i32 and i64 are supported by 1383 /// the runtime. Always interpret integers as unsigned similarly to 1384 /// CanonicalLoopInfo. 1385 static FunctionCallee 1386 getKmpcForDynamicNextForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder) { 1387 unsigned Bitwidth = Ty->getIntegerBitWidth(); 1388 if (Bitwidth == 32) 1389 return OMPBuilder.getOrCreateRuntimeFunction( 1390 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_4u); 1391 if (Bitwidth == 64) 1392 return OMPBuilder.getOrCreateRuntimeFunction( 1393 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_8u); 1394 llvm_unreachable("unknown OpenMP loop iterator bitwidth"); 1395 } 1396 1397 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createDynamicWorkshareLoop( 1398 const LocationDescription &Loc, CanonicalLoopInfo *CLI, 1399 InsertPointTy AllocaIP, OMPScheduleType SchedType, bool NeedsBarrier, 1400 Value *Chunk) { 1401 // Set up the source location value for OpenMP runtime. 1402 Builder.SetCurrentDebugLocation(Loc.DL); 1403 1404 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 1405 Value *SrcLoc = getOrCreateIdent(SrcLocStr); 1406 1407 // Declare useful OpenMP runtime functions. 1408 Value *IV = CLI->getIndVar(); 1409 Type *IVTy = IV->getType(); 1410 FunctionCallee DynamicInit = getKmpcForDynamicInitForType(IVTy, M, *this); 1411 FunctionCallee DynamicNext = getKmpcForDynamicNextForType(IVTy, M, *this); 1412 1413 // Allocate space for computed loop bounds as expected by the "init" function. 1414 Builder.restoreIP(AllocaIP); 1415 Type *I32Type = Type::getInt32Ty(M.getContext()); 1416 Value *PLastIter = Builder.CreateAlloca(I32Type, nullptr, "p.lastiter"); 1417 Value *PLowerBound = Builder.CreateAlloca(IVTy, nullptr, "p.lowerbound"); 1418 Value *PUpperBound = Builder.CreateAlloca(IVTy, nullptr, "p.upperbound"); 1419 Value *PStride = Builder.CreateAlloca(IVTy, nullptr, "p.stride"); 1420 1421 // At the end of the preheader, prepare for calling the "init" function by 1422 // storing the current loop bounds into the allocated space. A canonical loop 1423 // always iterates from 0 to trip-count with step 1. Note that "init" expects 1424 // and produces an inclusive upper bound. 1425 BasicBlock *PreHeader = CLI->getPreheader(); 1426 Builder.SetInsertPoint(PreHeader->getTerminator()); 1427 Constant *One = ConstantInt::get(IVTy, 1); 1428 Builder.CreateStore(One, PLowerBound); 1429 Value *UpperBound = CLI->getTripCount(); 1430 Builder.CreateStore(UpperBound, PUpperBound); 1431 Builder.CreateStore(One, PStride); 1432 1433 BasicBlock *Header = CLI->getHeader(); 1434 BasicBlock *Exit = CLI->getExit(); 1435 BasicBlock *Cond = CLI->getCond(); 1436 InsertPointTy AfterIP = CLI->getAfterIP(); 1437 1438 // The CLI will be "broken" in the code below, as the loop is no longer 1439 // a valid canonical loop. 1440 1441 if (!Chunk) 1442 Chunk = One; 1443 1444 Value *ThreadNum = getOrCreateThreadID(SrcLoc); 1445 1446 Constant *SchedulingType = 1447 ConstantInt::get(I32Type, static_cast<int>(SchedType)); 1448 1449 // Call the "init" function. 1450 Builder.CreateCall(DynamicInit, 1451 {SrcLoc, ThreadNum, SchedulingType, /* LowerBound */ One, 1452 UpperBound, /* step */ One, Chunk}); 1453 1454 // An outer loop around the existing one. 1455 BasicBlock *OuterCond = BasicBlock::Create( 1456 PreHeader->getContext(), Twine(PreHeader->getName()) + ".outer.cond", 1457 PreHeader->getParent()); 1458 // This needs to be 32-bit always, so can't use the IVTy Zero above. 1459 Builder.SetInsertPoint(OuterCond, OuterCond->getFirstInsertionPt()); 1460 Value *Res = 1461 Builder.CreateCall(DynamicNext, {SrcLoc, ThreadNum, PLastIter, 1462 PLowerBound, PUpperBound, PStride}); 1463 Constant *Zero32 = ConstantInt::get(I32Type, 0); 1464 Value *MoreWork = Builder.CreateCmp(CmpInst::ICMP_NE, Res, Zero32); 1465 Value *LowerBound = 1466 Builder.CreateSub(Builder.CreateLoad(IVTy, PLowerBound), One, "lb"); 1467 Builder.CreateCondBr(MoreWork, Header, Exit); 1468 1469 // Change PHI-node in loop header to use outer cond rather than preheader, 1470 // and set IV to the LowerBound. 1471 Instruction *Phi = &Header->front(); 1472 auto *PI = cast<PHINode>(Phi); 1473 PI->setIncomingBlock(0, OuterCond); 1474 PI->setIncomingValue(0, LowerBound); 1475 1476 // Then set the pre-header to jump to the OuterCond 1477 Instruction *Term = PreHeader->getTerminator(); 1478 auto *Br = cast<BranchInst>(Term); 1479 Br->setSuccessor(0, OuterCond); 1480 1481 // Modify the inner condition: 1482 // * Use the UpperBound returned from the DynamicNext call. 1483 // * jump to the loop outer loop when done with one of the inner loops. 1484 Builder.SetInsertPoint(Cond, Cond->getFirstInsertionPt()); 1485 UpperBound = Builder.CreateLoad(IVTy, PUpperBound, "ub"); 1486 Instruction *Comp = &*Builder.GetInsertPoint(); 1487 auto *CI = cast<CmpInst>(Comp); 1488 CI->setOperand(1, UpperBound); 1489 // Redirect the inner exit to branch to outer condition. 1490 Instruction *Branch = &Cond->back(); 1491 auto *BI = cast<BranchInst>(Branch); 1492 assert(BI->getSuccessor(1) == Exit); 1493 BI->setSuccessor(1, OuterCond); 1494 1495 // Add the barrier if requested. 1496 if (NeedsBarrier) { 1497 Builder.SetInsertPoint(&Exit->back()); 1498 createBarrier(LocationDescription(Builder.saveIP(), Loc.DL), 1499 omp::Directive::OMPD_for, /* ForceSimpleCall */ false, 1500 /* CheckCancelFlag */ false); 1501 } 1502 1503 return AfterIP; 1504 } 1505 1506 /// Make \p Source branch to \p Target. 1507 /// 1508 /// Handles two situations: 1509 /// * \p Source already has an unconditional branch. 1510 /// * \p Source is a degenerate block (no terminator because the BB is 1511 /// the current head of the IR construction). 1512 static void redirectTo(BasicBlock *Source, BasicBlock *Target, DebugLoc DL) { 1513 if (Instruction *Term = Source->getTerminator()) { 1514 auto *Br = cast<BranchInst>(Term); 1515 assert(!Br->isConditional() && 1516 "BB's terminator must be an unconditional branch (or degenerate)"); 1517 BasicBlock *Succ = Br->getSuccessor(0); 1518 Succ->removePredecessor(Source, /*KeepOneInputPHIs=*/true); 1519 Br->setSuccessor(0, Target); 1520 return; 1521 } 1522 1523 auto *NewBr = BranchInst::Create(Target, Source); 1524 NewBr->setDebugLoc(DL); 1525 } 1526 1527 /// Redirect all edges that branch to \p OldTarget to \p NewTarget. That is, 1528 /// after this \p OldTarget will be orphaned. 1529 static void redirectAllPredecessorsTo(BasicBlock *OldTarget, 1530 BasicBlock *NewTarget, DebugLoc DL) { 1531 for (BasicBlock *Pred : make_early_inc_range(predecessors(OldTarget))) 1532 redirectTo(Pred, NewTarget, DL); 1533 } 1534 1535 /// Determine which blocks in \p BBs are reachable from outside and remove the 1536 /// ones that are not reachable from the function. 1537 static void removeUnusedBlocksFromParent(ArrayRef<BasicBlock *> BBs) { 1538 SmallPtrSet<BasicBlock *, 6> BBsToErase{BBs.begin(), BBs.end()}; 1539 auto HasRemainingUses = [&BBsToErase](BasicBlock *BB) { 1540 for (Use &U : BB->uses()) { 1541 auto *UseInst = dyn_cast<Instruction>(U.getUser()); 1542 if (!UseInst) 1543 continue; 1544 if (BBsToErase.count(UseInst->getParent())) 1545 continue; 1546 return true; 1547 } 1548 return false; 1549 }; 1550 1551 while (true) { 1552 bool Changed = false; 1553 for (BasicBlock *BB : make_early_inc_range(BBsToErase)) { 1554 if (HasRemainingUses(BB)) { 1555 BBsToErase.erase(BB); 1556 Changed = true; 1557 } 1558 } 1559 if (!Changed) 1560 break; 1561 } 1562 1563 SmallVector<BasicBlock *, 7> BBVec(BBsToErase.begin(), BBsToErase.end()); 1564 DeleteDeadBlocks(BBVec); 1565 } 1566 1567 CanonicalLoopInfo * 1568 OpenMPIRBuilder::collapseLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops, 1569 InsertPointTy ComputeIP) { 1570 assert(Loops.size() >= 1 && "At least one loop required"); 1571 size_t NumLoops = Loops.size(); 1572 1573 // Nothing to do if there is already just one loop. 1574 if (NumLoops == 1) 1575 return Loops.front(); 1576 1577 CanonicalLoopInfo *Outermost = Loops.front(); 1578 CanonicalLoopInfo *Innermost = Loops.back(); 1579 BasicBlock *OrigPreheader = Outermost->getPreheader(); 1580 BasicBlock *OrigAfter = Outermost->getAfter(); 1581 Function *F = OrigPreheader->getParent(); 1582 1583 // Setup the IRBuilder for inserting the trip count computation. 1584 Builder.SetCurrentDebugLocation(DL); 1585 if (ComputeIP.isSet()) 1586 Builder.restoreIP(ComputeIP); 1587 else 1588 Builder.restoreIP(Outermost->getPreheaderIP()); 1589 1590 // Derive the collapsed' loop trip count. 1591 // TODO: Find common/largest indvar type. 1592 Value *CollapsedTripCount = nullptr; 1593 for (CanonicalLoopInfo *L : Loops) { 1594 Value *OrigTripCount = L->getTripCount(); 1595 if (!CollapsedTripCount) { 1596 CollapsedTripCount = OrigTripCount; 1597 continue; 1598 } 1599 1600 // TODO: Enable UndefinedSanitizer to diagnose an overflow here. 1601 CollapsedTripCount = Builder.CreateMul(CollapsedTripCount, OrigTripCount, 1602 {}, /*HasNUW=*/true); 1603 } 1604 1605 // Create the collapsed loop control flow. 1606 CanonicalLoopInfo *Result = 1607 createLoopSkeleton(DL, CollapsedTripCount, F, 1608 OrigPreheader->getNextNode(), OrigAfter, "collapsed"); 1609 1610 // Build the collapsed loop body code. 1611 // Start with deriving the input loop induction variables from the collapsed 1612 // one, using a divmod scheme. To preserve the original loops' order, the 1613 // innermost loop use the least significant bits. 1614 Builder.restoreIP(Result->getBodyIP()); 1615 1616 Value *Leftover = Result->getIndVar(); 1617 SmallVector<Value *> NewIndVars; 1618 NewIndVars.set_size(NumLoops); 1619 for (int i = NumLoops - 1; i >= 1; --i) { 1620 Value *OrigTripCount = Loops[i]->getTripCount(); 1621 1622 Value *NewIndVar = Builder.CreateURem(Leftover, OrigTripCount); 1623 NewIndVars[i] = NewIndVar; 1624 1625 Leftover = Builder.CreateUDiv(Leftover, OrigTripCount); 1626 } 1627 // Outermost loop gets all the remaining bits. 1628 NewIndVars[0] = Leftover; 1629 1630 // Construct the loop body control flow. 1631 // We progressively construct the branch structure following in direction of 1632 // the control flow, from the leading in-between code, the loop nest body, the 1633 // trailing in-between code, and rejoining the collapsed loop's latch. 1634 // ContinueBlock and ContinuePred keep track of the source(s) of next edge. If 1635 // the ContinueBlock is set, continue with that block. If ContinuePred, use 1636 // its predecessors as sources. 1637 BasicBlock *ContinueBlock = Result->getBody(); 1638 BasicBlock *ContinuePred = nullptr; 1639 auto ContinueWith = [&ContinueBlock, &ContinuePred, DL](BasicBlock *Dest, 1640 BasicBlock *NextSrc) { 1641 if (ContinueBlock) 1642 redirectTo(ContinueBlock, Dest, DL); 1643 else 1644 redirectAllPredecessorsTo(ContinuePred, Dest, DL); 1645 1646 ContinueBlock = nullptr; 1647 ContinuePred = NextSrc; 1648 }; 1649 1650 // The code before the nested loop of each level. 1651 // Because we are sinking it into the nest, it will be executed more often 1652 // that the original loop. More sophisticated schemes could keep track of what 1653 // the in-between code is and instantiate it only once per thread. 1654 for (size_t i = 0; i < NumLoops - 1; ++i) 1655 ContinueWith(Loops[i]->getBody(), Loops[i + 1]->getHeader()); 1656 1657 // Connect the loop nest body. 1658 ContinueWith(Innermost->getBody(), Innermost->getLatch()); 1659 1660 // The code after the nested loop at each level. 1661 for (size_t i = NumLoops - 1; i > 0; --i) 1662 ContinueWith(Loops[i]->getAfter(), Loops[i - 1]->getLatch()); 1663 1664 // Connect the finished loop to the collapsed loop latch. 1665 ContinueWith(Result->getLatch(), nullptr); 1666 1667 // Replace the input loops with the new collapsed loop. 1668 redirectTo(Outermost->getPreheader(), Result->getPreheader(), DL); 1669 redirectTo(Result->getAfter(), Outermost->getAfter(), DL); 1670 1671 // Replace the input loop indvars with the derived ones. 1672 for (size_t i = 0; i < NumLoops; ++i) 1673 Loops[i]->getIndVar()->replaceAllUsesWith(NewIndVars[i]); 1674 1675 // Remove unused parts of the input loops. 1676 SmallVector<BasicBlock *, 12> OldControlBBs; 1677 OldControlBBs.reserve(6 * Loops.size()); 1678 for (CanonicalLoopInfo *Loop : Loops) 1679 Loop->collectControlBlocks(OldControlBBs); 1680 removeUnusedBlocksFromParent(OldControlBBs); 1681 1682 #ifndef NDEBUG 1683 Result->assertOK(); 1684 #endif 1685 return Result; 1686 } 1687 1688 std::vector<CanonicalLoopInfo *> 1689 OpenMPIRBuilder::tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops, 1690 ArrayRef<Value *> TileSizes) { 1691 assert(TileSizes.size() == Loops.size() && 1692 "Must pass as many tile sizes as there are loops"); 1693 int NumLoops = Loops.size(); 1694 assert(NumLoops >= 1 && "At least one loop to tile required"); 1695 1696 CanonicalLoopInfo *OutermostLoop = Loops.front(); 1697 CanonicalLoopInfo *InnermostLoop = Loops.back(); 1698 Function *F = OutermostLoop->getBody()->getParent(); 1699 BasicBlock *InnerEnter = InnermostLoop->getBody(); 1700 BasicBlock *InnerLatch = InnermostLoop->getLatch(); 1701 1702 // Collect original trip counts and induction variable to be accessible by 1703 // index. Also, the structure of the original loops is not preserved during 1704 // the construction of the tiled loops, so do it before we scavenge the BBs of 1705 // any original CanonicalLoopInfo. 1706 SmallVector<Value *, 4> OrigTripCounts, OrigIndVars; 1707 for (CanonicalLoopInfo *L : Loops) { 1708 OrigTripCounts.push_back(L->getTripCount()); 1709 OrigIndVars.push_back(L->getIndVar()); 1710 } 1711 1712 // Collect the code between loop headers. These may contain SSA definitions 1713 // that are used in the loop nest body. To be usable with in the innermost 1714 // body, these BasicBlocks will be sunk into the loop nest body. That is, 1715 // these instructions may be executed more often than before the tiling. 1716 // TODO: It would be sufficient to only sink them into body of the 1717 // corresponding tile loop. 1718 SmallVector<std::pair<BasicBlock *, BasicBlock *>, 4> InbetweenCode; 1719 for (int i = 0; i < NumLoops - 1; ++i) { 1720 CanonicalLoopInfo *Surrounding = Loops[i]; 1721 CanonicalLoopInfo *Nested = Loops[i + 1]; 1722 1723 BasicBlock *EnterBB = Surrounding->getBody(); 1724 BasicBlock *ExitBB = Nested->getHeader(); 1725 InbetweenCode.emplace_back(EnterBB, ExitBB); 1726 } 1727 1728 // Compute the trip counts of the floor loops. 1729 Builder.SetCurrentDebugLocation(DL); 1730 Builder.restoreIP(OutermostLoop->getPreheaderIP()); 1731 SmallVector<Value *, 4> FloorCount, FloorRems; 1732 for (int i = 0; i < NumLoops; ++i) { 1733 Value *TileSize = TileSizes[i]; 1734 Value *OrigTripCount = OrigTripCounts[i]; 1735 Type *IVType = OrigTripCount->getType(); 1736 1737 Value *FloorTripCount = Builder.CreateUDiv(OrigTripCount, TileSize); 1738 Value *FloorTripRem = Builder.CreateURem(OrigTripCount, TileSize); 1739 1740 // 0 if tripcount divides the tilesize, 1 otherwise. 1741 // 1 means we need an additional iteration for a partial tile. 1742 // 1743 // Unfortunately we cannot just use the roundup-formula 1744 // (tripcount + tilesize - 1)/tilesize 1745 // because the summation might overflow. We do not want introduce undefined 1746 // behavior when the untiled loop nest did not. 1747 Value *FloorTripOverflow = 1748 Builder.CreateICmpNE(FloorTripRem, ConstantInt::get(IVType, 0)); 1749 1750 FloorTripOverflow = Builder.CreateZExt(FloorTripOverflow, IVType); 1751 FloorTripCount = 1752 Builder.CreateAdd(FloorTripCount, FloorTripOverflow, 1753 "omp_floor" + Twine(i) + ".tripcount", true); 1754 1755 // Remember some values for later use. 1756 FloorCount.push_back(FloorTripCount); 1757 FloorRems.push_back(FloorTripRem); 1758 } 1759 1760 // Generate the new loop nest, from the outermost to the innermost. 1761 std::vector<CanonicalLoopInfo *> Result; 1762 Result.reserve(NumLoops * 2); 1763 1764 // The basic block of the surrounding loop that enters the nest generated 1765 // loop. 1766 BasicBlock *Enter = OutermostLoop->getPreheader(); 1767 1768 // The basic block of the surrounding loop where the inner code should 1769 // continue. 1770 BasicBlock *Continue = OutermostLoop->getAfter(); 1771 1772 // Where the next loop basic block should be inserted. 1773 BasicBlock *OutroInsertBefore = InnermostLoop->getExit(); 1774 1775 auto EmbeddNewLoop = 1776 [this, DL, F, InnerEnter, &Enter, &Continue, &OutroInsertBefore]( 1777 Value *TripCount, const Twine &Name) -> CanonicalLoopInfo * { 1778 CanonicalLoopInfo *EmbeddedLoop = createLoopSkeleton( 1779 DL, TripCount, F, InnerEnter, OutroInsertBefore, Name); 1780 redirectTo(Enter, EmbeddedLoop->getPreheader(), DL); 1781 redirectTo(EmbeddedLoop->getAfter(), Continue, DL); 1782 1783 // Setup the position where the next embedded loop connects to this loop. 1784 Enter = EmbeddedLoop->getBody(); 1785 Continue = EmbeddedLoop->getLatch(); 1786 OutroInsertBefore = EmbeddedLoop->getLatch(); 1787 return EmbeddedLoop; 1788 }; 1789 1790 auto EmbeddNewLoops = [&Result, &EmbeddNewLoop](ArrayRef<Value *> TripCounts, 1791 const Twine &NameBase) { 1792 for (auto P : enumerate(TripCounts)) { 1793 CanonicalLoopInfo *EmbeddedLoop = 1794 EmbeddNewLoop(P.value(), NameBase + Twine(P.index())); 1795 Result.push_back(EmbeddedLoop); 1796 } 1797 }; 1798 1799 EmbeddNewLoops(FloorCount, "floor"); 1800 1801 // Within the innermost floor loop, emit the code that computes the tile 1802 // sizes. 1803 Builder.SetInsertPoint(Enter->getTerminator()); 1804 SmallVector<Value *, 4> TileCounts; 1805 for (int i = 0; i < NumLoops; ++i) { 1806 CanonicalLoopInfo *FloorLoop = Result[i]; 1807 Value *TileSize = TileSizes[i]; 1808 1809 Value *FloorIsEpilogue = 1810 Builder.CreateICmpEQ(FloorLoop->getIndVar(), FloorCount[i]); 1811 Value *TileTripCount = 1812 Builder.CreateSelect(FloorIsEpilogue, FloorRems[i], TileSize); 1813 1814 TileCounts.push_back(TileTripCount); 1815 } 1816 1817 // Create the tile loops. 1818 EmbeddNewLoops(TileCounts, "tile"); 1819 1820 // Insert the inbetween code into the body. 1821 BasicBlock *BodyEnter = Enter; 1822 BasicBlock *BodyEntered = nullptr; 1823 for (std::pair<BasicBlock *, BasicBlock *> P : InbetweenCode) { 1824 BasicBlock *EnterBB = P.first; 1825 BasicBlock *ExitBB = P.second; 1826 1827 if (BodyEnter) 1828 redirectTo(BodyEnter, EnterBB, DL); 1829 else 1830 redirectAllPredecessorsTo(BodyEntered, EnterBB, DL); 1831 1832 BodyEnter = nullptr; 1833 BodyEntered = ExitBB; 1834 } 1835 1836 // Append the original loop nest body into the generated loop nest body. 1837 if (BodyEnter) 1838 redirectTo(BodyEnter, InnerEnter, DL); 1839 else 1840 redirectAllPredecessorsTo(BodyEntered, InnerEnter, DL); 1841 redirectAllPredecessorsTo(InnerLatch, Continue, DL); 1842 1843 // Replace the original induction variable with an induction variable computed 1844 // from the tile and floor induction variables. 1845 Builder.restoreIP(Result.back()->getBodyIP()); 1846 for (int i = 0; i < NumLoops; ++i) { 1847 CanonicalLoopInfo *FloorLoop = Result[i]; 1848 CanonicalLoopInfo *TileLoop = Result[NumLoops + i]; 1849 Value *OrigIndVar = OrigIndVars[i]; 1850 Value *Size = TileSizes[i]; 1851 1852 Value *Scale = 1853 Builder.CreateMul(Size, FloorLoop->getIndVar(), {}, /*HasNUW=*/true); 1854 Value *Shift = 1855 Builder.CreateAdd(Scale, TileLoop->getIndVar(), {}, /*HasNUW=*/true); 1856 OrigIndVar->replaceAllUsesWith(Shift); 1857 } 1858 1859 // Remove unused parts of the original loops. 1860 SmallVector<BasicBlock *, 12> OldControlBBs; 1861 OldControlBBs.reserve(6 * Loops.size()); 1862 for (CanonicalLoopInfo *Loop : Loops) 1863 Loop->collectControlBlocks(OldControlBBs); 1864 removeUnusedBlocksFromParent(OldControlBBs); 1865 1866 #ifndef NDEBUG 1867 for (CanonicalLoopInfo *GenL : Result) 1868 GenL->assertOK(); 1869 #endif 1870 return Result; 1871 } 1872 1873 OpenMPIRBuilder::InsertPointTy 1874 OpenMPIRBuilder::createCopyPrivate(const LocationDescription &Loc, 1875 llvm::Value *BufSize, llvm::Value *CpyBuf, 1876 llvm::Value *CpyFn, llvm::Value *DidIt) { 1877 if (!updateToLocation(Loc)) 1878 return Loc.IP; 1879 1880 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 1881 Value *Ident = getOrCreateIdent(SrcLocStr); 1882 Value *ThreadId = getOrCreateThreadID(Ident); 1883 1884 llvm::Value *DidItLD = Builder.CreateLoad(Builder.getInt32Ty(), DidIt); 1885 1886 Value *Args[] = {Ident, ThreadId, BufSize, CpyBuf, CpyFn, DidItLD}; 1887 1888 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_copyprivate); 1889 Builder.CreateCall(Fn, Args); 1890 1891 return Builder.saveIP(); 1892 } 1893 1894 OpenMPIRBuilder::InsertPointTy 1895 OpenMPIRBuilder::createSingle(const LocationDescription &Loc, 1896 BodyGenCallbackTy BodyGenCB, 1897 FinalizeCallbackTy FiniCB, llvm::Value *DidIt) { 1898 1899 if (!updateToLocation(Loc)) 1900 return Loc.IP; 1901 1902 // If needed (i.e. not null), initialize `DidIt` with 0 1903 if (DidIt) { 1904 Builder.CreateStore(Builder.getInt32(0), DidIt); 1905 } 1906 1907 Directive OMPD = Directive::OMPD_single; 1908 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 1909 Value *Ident = getOrCreateIdent(SrcLocStr); 1910 Value *ThreadId = getOrCreateThreadID(Ident); 1911 Value *Args[] = {Ident, ThreadId}; 1912 1913 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_single); 1914 Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args); 1915 1916 Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_single); 1917 Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args); 1918 1919 // generates the following: 1920 // if (__kmpc_single()) { 1921 // .... single region ... 1922 // __kmpc_end_single 1923 // } 1924 1925 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB, 1926 /*Conditional*/ true, /*hasFinalize*/ true); 1927 } 1928 1929 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createCritical( 1930 const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, 1931 FinalizeCallbackTy FiniCB, StringRef CriticalName, Value *HintInst) { 1932 1933 if (!updateToLocation(Loc)) 1934 return Loc.IP; 1935 1936 Directive OMPD = Directive::OMPD_critical; 1937 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 1938 Value *Ident = getOrCreateIdent(SrcLocStr); 1939 Value *ThreadId = getOrCreateThreadID(Ident); 1940 Value *LockVar = getOMPCriticalRegionLock(CriticalName); 1941 Value *Args[] = {Ident, ThreadId, LockVar}; 1942 1943 SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), std::end(Args)); 1944 Function *RTFn = nullptr; 1945 if (HintInst) { 1946 // Add Hint to entry Args and create call 1947 EnterArgs.push_back(HintInst); 1948 RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical_with_hint); 1949 } else { 1950 RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical); 1951 } 1952 Instruction *EntryCall = Builder.CreateCall(RTFn, EnterArgs); 1953 1954 Function *ExitRTLFn = 1955 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_critical); 1956 Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args); 1957 1958 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB, 1959 /*Conditional*/ false, /*hasFinalize*/ true); 1960 } 1961 1962 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::EmitOMPInlinedRegion( 1963 Directive OMPD, Instruction *EntryCall, Instruction *ExitCall, 1964 BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool Conditional, 1965 bool HasFinalize, bool IsCancellable) { 1966 1967 if (HasFinalize) 1968 FinalizationStack.push_back({FiniCB, OMPD, IsCancellable}); 1969 1970 // Create inlined region's entry and body blocks, in preparation 1971 // for conditional creation 1972 BasicBlock *EntryBB = Builder.GetInsertBlock(); 1973 Instruction *SplitPos = EntryBB->getTerminator(); 1974 if (!isa_and_nonnull<BranchInst>(SplitPos)) 1975 SplitPos = new UnreachableInst(Builder.getContext(), EntryBB); 1976 BasicBlock *ExitBB = EntryBB->splitBasicBlock(SplitPos, "omp_region.end"); 1977 BasicBlock *FiniBB = 1978 EntryBB->splitBasicBlock(EntryBB->getTerminator(), "omp_region.finalize"); 1979 1980 Builder.SetInsertPoint(EntryBB->getTerminator()); 1981 emitCommonDirectiveEntry(OMPD, EntryCall, ExitBB, Conditional); 1982 1983 // generate body 1984 BodyGenCB(/* AllocaIP */ InsertPointTy(), 1985 /* CodeGenIP */ Builder.saveIP(), *FiniBB); 1986 1987 // If we didn't emit a branch to FiniBB during body generation, it means 1988 // FiniBB is unreachable (e.g. while(1);). stop generating all the 1989 // unreachable blocks, and remove anything we are not going to use. 1990 auto SkipEmittingRegion = FiniBB->hasNPredecessors(0); 1991 if (SkipEmittingRegion) { 1992 FiniBB->eraseFromParent(); 1993 ExitCall->eraseFromParent(); 1994 // Discard finalization if we have it. 1995 if (HasFinalize) { 1996 assert(!FinalizationStack.empty() && 1997 "Unexpected finalization stack state!"); 1998 FinalizationStack.pop_back(); 1999 } 2000 } else { 2001 // emit exit call and do any needed finalization. 2002 auto FinIP = InsertPointTy(FiniBB, FiniBB->getFirstInsertionPt()); 2003 assert(FiniBB->getTerminator()->getNumSuccessors() == 1 && 2004 FiniBB->getTerminator()->getSuccessor(0) == ExitBB && 2005 "Unexpected control flow graph state!!"); 2006 emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize); 2007 assert(FiniBB->getUniquePredecessor()->getUniqueSuccessor() == FiniBB && 2008 "Unexpected Control Flow State!"); 2009 MergeBlockIntoPredecessor(FiniBB); 2010 } 2011 2012 // If we are skipping the region of a non conditional, remove the exit 2013 // block, and clear the builder's insertion point. 2014 assert(SplitPos->getParent() == ExitBB && 2015 "Unexpected Insertion point location!"); 2016 if (!Conditional && SkipEmittingRegion) { 2017 ExitBB->eraseFromParent(); 2018 Builder.ClearInsertionPoint(); 2019 } else { 2020 auto merged = MergeBlockIntoPredecessor(ExitBB); 2021 BasicBlock *ExitPredBB = SplitPos->getParent(); 2022 auto InsertBB = merged ? ExitPredBB : ExitBB; 2023 if (!isa_and_nonnull<BranchInst>(SplitPos)) 2024 SplitPos->eraseFromParent(); 2025 Builder.SetInsertPoint(InsertBB); 2026 } 2027 2028 return Builder.saveIP(); 2029 } 2030 2031 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveEntry( 2032 Directive OMPD, Value *EntryCall, BasicBlock *ExitBB, bool Conditional) { 2033 // if nothing to do, Return current insertion point. 2034 if (!Conditional || !EntryCall) 2035 return Builder.saveIP(); 2036 2037 BasicBlock *EntryBB = Builder.GetInsertBlock(); 2038 Value *CallBool = Builder.CreateIsNotNull(EntryCall); 2039 auto *ThenBB = BasicBlock::Create(M.getContext(), "omp_region.body"); 2040 auto *UI = new UnreachableInst(Builder.getContext(), ThenBB); 2041 2042 // Emit thenBB and set the Builder's insertion point there for 2043 // body generation next. Place the block after the current block. 2044 Function *CurFn = EntryBB->getParent(); 2045 CurFn->getBasicBlockList().insertAfter(EntryBB->getIterator(), ThenBB); 2046 2047 // Move Entry branch to end of ThenBB, and replace with conditional 2048 // branch (If-stmt) 2049 Instruction *EntryBBTI = EntryBB->getTerminator(); 2050 Builder.CreateCondBr(CallBool, ThenBB, ExitBB); 2051 EntryBBTI->removeFromParent(); 2052 Builder.SetInsertPoint(UI); 2053 Builder.Insert(EntryBBTI); 2054 UI->eraseFromParent(); 2055 Builder.SetInsertPoint(ThenBB->getTerminator()); 2056 2057 // return an insertion point to ExitBB. 2058 return IRBuilder<>::InsertPoint(ExitBB, ExitBB->getFirstInsertionPt()); 2059 } 2060 2061 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveExit( 2062 omp::Directive OMPD, InsertPointTy FinIP, Instruction *ExitCall, 2063 bool HasFinalize) { 2064 2065 Builder.restoreIP(FinIP); 2066 2067 // If there is finalization to do, emit it before the exit call 2068 if (HasFinalize) { 2069 assert(!FinalizationStack.empty() && 2070 "Unexpected finalization stack state!"); 2071 2072 FinalizationInfo Fi = FinalizationStack.pop_back_val(); 2073 assert(Fi.DK == OMPD && "Unexpected Directive for Finalization call!"); 2074 2075 Fi.FiniCB(FinIP); 2076 2077 BasicBlock *FiniBB = FinIP.getBlock(); 2078 Instruction *FiniBBTI = FiniBB->getTerminator(); 2079 2080 // set Builder IP for call creation 2081 Builder.SetInsertPoint(FiniBBTI); 2082 } 2083 2084 if (!ExitCall) 2085 return Builder.saveIP(); 2086 2087 // place the Exitcall as last instruction before Finalization block terminator 2088 ExitCall->removeFromParent(); 2089 Builder.Insert(ExitCall); 2090 2091 return IRBuilder<>::InsertPoint(ExitCall->getParent(), 2092 ExitCall->getIterator()); 2093 } 2094 2095 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createCopyinClauseBlocks( 2096 InsertPointTy IP, Value *MasterAddr, Value *PrivateAddr, 2097 llvm::IntegerType *IntPtrTy, bool BranchtoEnd) { 2098 if (!IP.isSet()) 2099 return IP; 2100 2101 IRBuilder<>::InsertPointGuard IPG(Builder); 2102 2103 // creates the following CFG structure 2104 // OMP_Entry : (MasterAddr != PrivateAddr)? 2105 // F T 2106 // | \ 2107 // | copin.not.master 2108 // | / 2109 // v / 2110 // copyin.not.master.end 2111 // | 2112 // v 2113 // OMP.Entry.Next 2114 2115 BasicBlock *OMP_Entry = IP.getBlock(); 2116 Function *CurFn = OMP_Entry->getParent(); 2117 BasicBlock *CopyBegin = 2118 BasicBlock::Create(M.getContext(), "copyin.not.master", CurFn); 2119 BasicBlock *CopyEnd = nullptr; 2120 2121 // If entry block is terminated, split to preserve the branch to following 2122 // basic block (i.e. OMP.Entry.Next), otherwise, leave everything as is. 2123 if (isa_and_nonnull<BranchInst>(OMP_Entry->getTerminator())) { 2124 CopyEnd = OMP_Entry->splitBasicBlock(OMP_Entry->getTerminator(), 2125 "copyin.not.master.end"); 2126 OMP_Entry->getTerminator()->eraseFromParent(); 2127 } else { 2128 CopyEnd = 2129 BasicBlock::Create(M.getContext(), "copyin.not.master.end", CurFn); 2130 } 2131 2132 Builder.SetInsertPoint(OMP_Entry); 2133 Value *MasterPtr = Builder.CreatePtrToInt(MasterAddr, IntPtrTy); 2134 Value *PrivatePtr = Builder.CreatePtrToInt(PrivateAddr, IntPtrTy); 2135 Value *cmp = Builder.CreateICmpNE(MasterPtr, PrivatePtr); 2136 Builder.CreateCondBr(cmp, CopyBegin, CopyEnd); 2137 2138 Builder.SetInsertPoint(CopyBegin); 2139 if (BranchtoEnd) 2140 Builder.SetInsertPoint(Builder.CreateBr(CopyEnd)); 2141 2142 return Builder.saveIP(); 2143 } 2144 2145 CallInst *OpenMPIRBuilder::createOMPAlloc(const LocationDescription &Loc, 2146 Value *Size, Value *Allocator, 2147 std::string Name) { 2148 IRBuilder<>::InsertPointGuard IPG(Builder); 2149 Builder.restoreIP(Loc.IP); 2150 2151 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 2152 Value *Ident = getOrCreateIdent(SrcLocStr); 2153 Value *ThreadId = getOrCreateThreadID(Ident); 2154 Value *Args[] = {ThreadId, Size, Allocator}; 2155 2156 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_alloc); 2157 2158 return Builder.CreateCall(Fn, Args, Name); 2159 } 2160 2161 CallInst *OpenMPIRBuilder::createOMPFree(const LocationDescription &Loc, 2162 Value *Addr, Value *Allocator, 2163 std::string Name) { 2164 IRBuilder<>::InsertPointGuard IPG(Builder); 2165 Builder.restoreIP(Loc.IP); 2166 2167 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 2168 Value *Ident = getOrCreateIdent(SrcLocStr); 2169 Value *ThreadId = getOrCreateThreadID(Ident); 2170 Value *Args[] = {ThreadId, Addr, Allocator}; 2171 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_free); 2172 return Builder.CreateCall(Fn, Args, Name); 2173 } 2174 2175 CallInst *OpenMPIRBuilder::createCachedThreadPrivate( 2176 const LocationDescription &Loc, llvm::Value *Pointer, 2177 llvm::ConstantInt *Size, const llvm::Twine &Name) { 2178 IRBuilder<>::InsertPointGuard IPG(Builder); 2179 Builder.restoreIP(Loc.IP); 2180 2181 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 2182 Value *Ident = getOrCreateIdent(SrcLocStr); 2183 Value *ThreadId = getOrCreateThreadID(Ident); 2184 Constant *ThreadPrivateCache = 2185 getOrCreateOMPInternalVariable(Int8PtrPtr, Name); 2186 llvm::Value *Args[] = {Ident, ThreadId, Pointer, Size, ThreadPrivateCache}; 2187 2188 Function *Fn = 2189 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_threadprivate_cached); 2190 2191 return Builder.CreateCall(Fn, Args); 2192 } 2193 2194 std::string OpenMPIRBuilder::getNameWithSeparators(ArrayRef<StringRef> Parts, 2195 StringRef FirstSeparator, 2196 StringRef Separator) { 2197 SmallString<128> Buffer; 2198 llvm::raw_svector_ostream OS(Buffer); 2199 StringRef Sep = FirstSeparator; 2200 for (StringRef Part : Parts) { 2201 OS << Sep << Part; 2202 Sep = Separator; 2203 } 2204 return OS.str().str(); 2205 } 2206 2207 Constant *OpenMPIRBuilder::getOrCreateOMPInternalVariable( 2208 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 2209 // TODO: Replace the twine arg with stringref to get rid of the conversion 2210 // logic. However This is taken from current implementation in clang as is. 2211 // Since this method is used in many places exclusively for OMP internal use 2212 // we will keep it as is for temporarily until we move all users to the 2213 // builder and then, if possible, fix it everywhere in one go. 2214 SmallString<256> Buffer; 2215 llvm::raw_svector_ostream Out(Buffer); 2216 Out << Name; 2217 StringRef RuntimeName = Out.str(); 2218 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 2219 if (Elem.second) { 2220 assert(Elem.second->getType()->getPointerElementType() == Ty && 2221 "OMP internal variable has different type than requested"); 2222 } else { 2223 // TODO: investigate the appropriate linkage type used for the global 2224 // variable for possibly changing that to internal or private, or maybe 2225 // create different versions of the function for different OMP internal 2226 // variables. 2227 Elem.second = new llvm::GlobalVariable( 2228 M, Ty, /*IsConstant*/ false, llvm::GlobalValue::CommonLinkage, 2229 llvm::Constant::getNullValue(Ty), Elem.first(), 2230 /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal, 2231 AddressSpace); 2232 } 2233 2234 return Elem.second; 2235 } 2236 2237 Value *OpenMPIRBuilder::getOMPCriticalRegionLock(StringRef CriticalName) { 2238 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 2239 std::string Name = getNameWithSeparators({Prefix, "var"}, ".", "."); 2240 return getOrCreateOMPInternalVariable(KmpCriticalNameTy, Name); 2241 } 2242 2243 GlobalVariable * 2244 OpenMPIRBuilder::createOffloadMaptypes(SmallVectorImpl<uint64_t> &Mappings, 2245 std::string VarName) { 2246 llvm::Constant *MaptypesArrayInit = 2247 llvm::ConstantDataArray::get(M.getContext(), Mappings); 2248 auto *MaptypesArrayGlobal = new llvm::GlobalVariable( 2249 M, MaptypesArrayInit->getType(), 2250 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, MaptypesArrayInit, 2251 VarName); 2252 MaptypesArrayGlobal->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 2253 return MaptypesArrayGlobal; 2254 } 2255 2256 bool OpenMPIRBuilder::checkAndEmitFlushAfterAtomic( 2257 const LocationDescription &Loc, llvm::AtomicOrdering AO, AtomicKind AK) { 2258 assert(!(AO == AtomicOrdering::NotAtomic || 2259 AO == llvm::AtomicOrdering::Unordered) && 2260 "Unexpected Atomic Ordering."); 2261 2262 bool Flush = false; 2263 llvm::AtomicOrdering FlushAO = AtomicOrdering::Monotonic; 2264 2265 switch (AK) { 2266 case Read: 2267 if (AO == AtomicOrdering::Acquire || AO == AtomicOrdering::AcquireRelease || 2268 AO == AtomicOrdering::SequentiallyConsistent) { 2269 FlushAO = AtomicOrdering::Acquire; 2270 Flush = true; 2271 } 2272 break; 2273 case Write: 2274 case Update: 2275 if (AO == AtomicOrdering::Release || AO == AtomicOrdering::AcquireRelease || 2276 AO == AtomicOrdering::SequentiallyConsistent) { 2277 FlushAO = AtomicOrdering::Release; 2278 Flush = true; 2279 } 2280 break; 2281 case Capture: 2282 switch (AO) { 2283 case AtomicOrdering::Acquire: 2284 FlushAO = AtomicOrdering::Acquire; 2285 Flush = true; 2286 break; 2287 case AtomicOrdering::Release: 2288 FlushAO = AtomicOrdering::Release; 2289 Flush = true; 2290 break; 2291 case AtomicOrdering::AcquireRelease: 2292 case AtomicOrdering::SequentiallyConsistent: 2293 FlushAO = AtomicOrdering::AcquireRelease; 2294 Flush = true; 2295 break; 2296 default: 2297 // do nothing - leave silently. 2298 break; 2299 } 2300 } 2301 2302 if (Flush) { 2303 // Currently Flush RT call still doesn't take memory_ordering, so for when 2304 // that happens, this tries to do the resolution of which atomic ordering 2305 // to use with but issue the flush call 2306 // TODO: pass `FlushAO` after memory ordering support is added 2307 (void)FlushAO; 2308 emitFlush(Loc); 2309 } 2310 2311 // for AO == AtomicOrdering::Monotonic and all other case combinations 2312 // do nothing 2313 return Flush; 2314 } 2315 2316 OpenMPIRBuilder::InsertPointTy 2317 OpenMPIRBuilder::createAtomicRead(const LocationDescription &Loc, 2318 AtomicOpValue &X, AtomicOpValue &V, 2319 AtomicOrdering AO) { 2320 if (!updateToLocation(Loc)) 2321 return Loc.IP; 2322 2323 Type *XTy = X.Var->getType(); 2324 assert(XTy->isPointerTy() && "OMP Atomic expects a pointer to target memory"); 2325 Type *XElemTy = XTy->getPointerElementType(); 2326 assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() || 2327 XElemTy->isPointerTy()) && 2328 "OMP atomic read expected a scalar type"); 2329 2330 Value *XRead = nullptr; 2331 2332 if (XElemTy->isIntegerTy()) { 2333 LoadInst *XLD = 2334 Builder.CreateLoad(XElemTy, X.Var, X.IsVolatile, "omp.atomic.read"); 2335 XLD->setAtomic(AO); 2336 XRead = cast<Value>(XLD); 2337 } else { 2338 // We need to bitcast and perform atomic op as integer 2339 unsigned Addrspace = cast<PointerType>(XTy)->getAddressSpace(); 2340 IntegerType *IntCastTy = 2341 IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits()); 2342 Value *XBCast = Builder.CreateBitCast( 2343 X.Var, IntCastTy->getPointerTo(Addrspace), "atomic.src.int.cast"); 2344 LoadInst *XLoad = 2345 Builder.CreateLoad(IntCastTy, XBCast, X.IsVolatile, "omp.atomic.load"); 2346 XLoad->setAtomic(AO); 2347 if (XElemTy->isFloatingPointTy()) { 2348 XRead = Builder.CreateBitCast(XLoad, XElemTy, "atomic.flt.cast"); 2349 } else { 2350 XRead = Builder.CreateIntToPtr(XLoad, XElemTy, "atomic.ptr.cast"); 2351 } 2352 } 2353 checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Read); 2354 Builder.CreateStore(XRead, V.Var, V.IsVolatile); 2355 return Builder.saveIP(); 2356 } 2357 2358 OpenMPIRBuilder::InsertPointTy 2359 OpenMPIRBuilder::createAtomicWrite(const LocationDescription &Loc, 2360 AtomicOpValue &X, Value *Expr, 2361 AtomicOrdering AO) { 2362 if (!updateToLocation(Loc)) 2363 return Loc.IP; 2364 2365 Type *XTy = X.Var->getType(); 2366 assert(XTy->isPointerTy() && "OMP Atomic expects a pointer to target memory"); 2367 Type *XElemTy = XTy->getPointerElementType(); 2368 assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() || 2369 XElemTy->isPointerTy()) && 2370 "OMP atomic write expected a scalar type"); 2371 2372 if (XElemTy->isIntegerTy()) { 2373 StoreInst *XSt = Builder.CreateStore(Expr, X.Var, X.IsVolatile); 2374 XSt->setAtomic(AO); 2375 } else { 2376 // We need to bitcast and perform atomic op as integers 2377 unsigned Addrspace = cast<PointerType>(XTy)->getAddressSpace(); 2378 IntegerType *IntCastTy = 2379 IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits()); 2380 Value *XBCast = Builder.CreateBitCast( 2381 X.Var, IntCastTy->getPointerTo(Addrspace), "atomic.dst.int.cast"); 2382 Value *ExprCast = 2383 Builder.CreateBitCast(Expr, IntCastTy, "atomic.src.int.cast"); 2384 StoreInst *XSt = Builder.CreateStore(ExprCast, XBCast, X.IsVolatile); 2385 XSt->setAtomic(AO); 2386 } 2387 2388 checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Write); 2389 return Builder.saveIP(); 2390 } 2391 2392 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicUpdate( 2393 const LocationDescription &Loc, Instruction *AllocIP, AtomicOpValue &X, 2394 Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, 2395 AtomicUpdateCallbackTy &UpdateOp, bool IsXLHSInRHSPart) { 2396 if (!updateToLocation(Loc)) 2397 return Loc.IP; 2398 2399 LLVM_DEBUG({ 2400 Type *XTy = X.Var->getType(); 2401 assert(XTy->isPointerTy() && 2402 "OMP Atomic expects a pointer to target memory"); 2403 Type *XElemTy = XTy->getPointerElementType(); 2404 assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() || 2405 XElemTy->isPointerTy()) && 2406 "OMP atomic update expected a scalar type"); 2407 assert((RMWOp != AtomicRMWInst::Max) && (RMWOp != AtomicRMWInst::Min) && 2408 (RMWOp != AtomicRMWInst::UMax) && (RMWOp != AtomicRMWInst::UMin) && 2409 "OpenMP atomic does not support LT or GT operations"); 2410 }); 2411 2412 emitAtomicUpdate(AllocIP, X.Var, Expr, AO, RMWOp, UpdateOp, X.IsVolatile, 2413 IsXLHSInRHSPart); 2414 checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Update); 2415 return Builder.saveIP(); 2416 } 2417 2418 Value *OpenMPIRBuilder::emitRMWOpAsInstruction(Value *Src1, Value *Src2, 2419 AtomicRMWInst::BinOp RMWOp) { 2420 switch (RMWOp) { 2421 case AtomicRMWInst::Add: 2422 return Builder.CreateAdd(Src1, Src2); 2423 case AtomicRMWInst::Sub: 2424 return Builder.CreateSub(Src1, Src2); 2425 case AtomicRMWInst::And: 2426 return Builder.CreateAnd(Src1, Src2); 2427 case AtomicRMWInst::Nand: 2428 return Builder.CreateNeg(Builder.CreateAnd(Src1, Src2)); 2429 case AtomicRMWInst::Or: 2430 return Builder.CreateOr(Src1, Src2); 2431 case AtomicRMWInst::Xor: 2432 return Builder.CreateXor(Src1, Src2); 2433 case AtomicRMWInst::Xchg: 2434 case AtomicRMWInst::FAdd: 2435 case AtomicRMWInst::FSub: 2436 case AtomicRMWInst::BAD_BINOP: 2437 case AtomicRMWInst::Max: 2438 case AtomicRMWInst::Min: 2439 case AtomicRMWInst::UMax: 2440 case AtomicRMWInst::UMin: 2441 llvm_unreachable("Unsupported atomic update operation"); 2442 } 2443 llvm_unreachable("Unsupported atomic update operation"); 2444 } 2445 2446 std::pair<Value *, Value *> 2447 OpenMPIRBuilder::emitAtomicUpdate(Instruction *AllocIP, Value *X, Value *Expr, 2448 AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, 2449 AtomicUpdateCallbackTy &UpdateOp, 2450 bool VolatileX, bool IsXLHSInRHSPart) { 2451 Type *XElemTy = X->getType()->getPointerElementType(); 2452 2453 bool DoCmpExch = 2454 ((RMWOp == AtomicRMWInst::BAD_BINOP) || (RMWOp == AtomicRMWInst::FAdd)) || 2455 (RMWOp == AtomicRMWInst::FSub) || 2456 (RMWOp == AtomicRMWInst::Sub && !IsXLHSInRHSPart); 2457 2458 std::pair<Value *, Value *> Res; 2459 if (XElemTy->isIntegerTy() && !DoCmpExch) { 2460 Res.first = Builder.CreateAtomicRMW(RMWOp, X, Expr, llvm::MaybeAlign(), AO); 2461 // not needed except in case of postfix captures. Generate anyway for 2462 // consistency with the else part. Will be removed with any DCE pass. 2463 Res.second = emitRMWOpAsInstruction(Res.first, Expr, RMWOp); 2464 } else { 2465 unsigned Addrspace = cast<PointerType>(X->getType())->getAddressSpace(); 2466 IntegerType *IntCastTy = 2467 IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits()); 2468 Value *XBCast = 2469 Builder.CreateBitCast(X, IntCastTy->getPointerTo(Addrspace)); 2470 LoadInst *OldVal = 2471 Builder.CreateLoad(IntCastTy, XBCast, X->getName() + ".atomic.load"); 2472 OldVal->setAtomic(AO); 2473 // CurBB 2474 // | /---\ 2475 // ContBB | 2476 // | \---/ 2477 // ExitBB 2478 BasicBlock *CurBB = Builder.GetInsertBlock(); 2479 Instruction *CurBBTI = CurBB->getTerminator(); 2480 CurBBTI = CurBBTI ? CurBBTI : Builder.CreateUnreachable(); 2481 BasicBlock *ExitBB = 2482 CurBB->splitBasicBlock(CurBBTI, X->getName() + ".atomic.exit"); 2483 BasicBlock *ContBB = CurBB->splitBasicBlock(CurBB->getTerminator(), 2484 X->getName() + ".atomic.cont"); 2485 ContBB->getTerminator()->eraseFromParent(); 2486 Builder.SetInsertPoint(ContBB); 2487 llvm::PHINode *PHI = Builder.CreatePHI(OldVal->getType(), 2); 2488 PHI->addIncoming(OldVal, CurBB); 2489 AllocaInst *NewAtomicAddr = Builder.CreateAlloca(XElemTy); 2490 NewAtomicAddr->setName(X->getName() + "x.new.val"); 2491 NewAtomicAddr->moveBefore(AllocIP); 2492 IntegerType *NewAtomicCastTy = 2493 IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits()); 2494 bool IsIntTy = XElemTy->isIntegerTy(); 2495 Value *NewAtomicIntAddr = 2496 (IsIntTy) 2497 ? NewAtomicAddr 2498 : Builder.CreateBitCast(NewAtomicAddr, 2499 NewAtomicCastTy->getPointerTo(Addrspace)); 2500 Value *OldExprVal = PHI; 2501 if (!IsIntTy) { 2502 if (XElemTy->isFloatingPointTy()) { 2503 OldExprVal = Builder.CreateBitCast(PHI, XElemTy, 2504 X->getName() + ".atomic.fltCast"); 2505 } else { 2506 OldExprVal = Builder.CreateIntToPtr(PHI, XElemTy, 2507 X->getName() + ".atomic.ptrCast"); 2508 } 2509 } 2510 2511 Value *Upd = UpdateOp(OldExprVal, Builder); 2512 Builder.CreateStore(Upd, NewAtomicAddr); 2513 LoadInst *DesiredVal = Builder.CreateLoad(XElemTy, NewAtomicIntAddr); 2514 Value *XAddr = 2515 (IsIntTy) 2516 ? X 2517 : Builder.CreateBitCast(X, IntCastTy->getPointerTo(Addrspace)); 2518 AtomicOrdering Failure = 2519 llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO); 2520 AtomicCmpXchgInst *Result = Builder.CreateAtomicCmpXchg( 2521 XAddr, OldExprVal, DesiredVal, llvm::MaybeAlign(), AO, Failure); 2522 Result->setVolatile(VolatileX); 2523 Value *PreviousVal = Builder.CreateExtractValue(Result, /*Idxs=*/0); 2524 Value *SuccessFailureVal = Builder.CreateExtractValue(Result, /*Idxs=*/1); 2525 PHI->addIncoming(PreviousVal, Builder.GetInsertBlock()); 2526 Builder.CreateCondBr(SuccessFailureVal, ExitBB, ContBB); 2527 2528 Res.first = OldExprVal; 2529 Res.second = Upd; 2530 2531 // set Insertion point in exit block 2532 if (UnreachableInst *ExitTI = 2533 dyn_cast<UnreachableInst>(ExitBB->getTerminator())) { 2534 CurBBTI->eraseFromParent(); 2535 Builder.SetInsertPoint(ExitBB); 2536 } else { 2537 Builder.SetInsertPoint(ExitTI); 2538 } 2539 } 2540 2541 return Res; 2542 } 2543 2544 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCapture( 2545 const LocationDescription &Loc, Instruction *AllocIP, AtomicOpValue &X, 2546 AtomicOpValue &V, Value *Expr, AtomicOrdering AO, 2547 AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, 2548 bool UpdateExpr, bool IsPostfixUpdate, bool IsXLHSInRHSPart) { 2549 if (!updateToLocation(Loc)) 2550 return Loc.IP; 2551 2552 LLVM_DEBUG({ 2553 Type *XTy = X.Var->getType(); 2554 assert(XTy->isPointerTy() && 2555 "OMP Atomic expects a pointer to target memory"); 2556 Type *XElemTy = XTy->getPointerElementType(); 2557 assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() || 2558 XElemTy->isPointerTy()) && 2559 "OMP atomic capture expected a scalar type"); 2560 assert((RMWOp != AtomicRMWInst::Max) && (RMWOp != AtomicRMWInst::Min) && 2561 "OpenMP atomic does not support LT or GT operations"); 2562 }); 2563 2564 // If UpdateExpr is 'x' updated with some `expr` not based on 'x', 2565 // 'x' is simply atomically rewritten with 'expr'. 2566 AtomicRMWInst::BinOp AtomicOp = (UpdateExpr ? RMWOp : AtomicRMWInst::Xchg); 2567 std::pair<Value *, Value *> Result = 2568 emitAtomicUpdate(AllocIP, X.Var, Expr, AO, AtomicOp, UpdateOp, 2569 X.IsVolatile, IsXLHSInRHSPart); 2570 2571 Value *CapturedVal = (IsPostfixUpdate ? Result.first : Result.second); 2572 Builder.CreateStore(CapturedVal, V.Var, V.IsVolatile); 2573 2574 checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Capture); 2575 return Builder.saveIP(); 2576 } 2577 2578 GlobalVariable * 2579 OpenMPIRBuilder::createOffloadMapnames(SmallVectorImpl<llvm::Constant *> &Names, 2580 std::string VarName) { 2581 llvm::Constant *MapNamesArrayInit = llvm::ConstantArray::get( 2582 llvm::ArrayType::get( 2583 llvm::Type::getInt8Ty(M.getContext())->getPointerTo(), Names.size()), 2584 Names); 2585 auto *MapNamesArrayGlobal = new llvm::GlobalVariable( 2586 M, MapNamesArrayInit->getType(), 2587 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, MapNamesArrayInit, 2588 VarName); 2589 return MapNamesArrayGlobal; 2590 } 2591 2592 // Create all simple and struct types exposed by the runtime and remember 2593 // the llvm::PointerTypes of them for easy access later. 2594 void OpenMPIRBuilder::initializeTypes(Module &M) { 2595 LLVMContext &Ctx = M.getContext(); 2596 StructType *T; 2597 #define OMP_TYPE(VarName, InitValue) VarName = InitValue; 2598 #define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \ 2599 VarName##Ty = ArrayType::get(ElemTy, ArraySize); \ 2600 VarName##PtrTy = PointerType::getUnqual(VarName##Ty); 2601 #define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \ 2602 VarName = FunctionType::get(ReturnType, {__VA_ARGS__}, IsVarArg); \ 2603 VarName##Ptr = PointerType::getUnqual(VarName); 2604 #define OMP_STRUCT_TYPE(VarName, StructName, ...) \ 2605 T = StructType::getTypeByName(Ctx, StructName); \ 2606 if (!T) \ 2607 T = StructType::create(Ctx, {__VA_ARGS__}, StructName); \ 2608 VarName = T; \ 2609 VarName##Ptr = PointerType::getUnqual(T); 2610 #include "llvm/Frontend/OpenMP/OMPKinds.def" 2611 } 2612 2613 void OpenMPIRBuilder::OutlineInfo::collectBlocks( 2614 SmallPtrSetImpl<BasicBlock *> &BlockSet, 2615 SmallVectorImpl<BasicBlock *> &BlockVector) { 2616 SmallVector<BasicBlock *, 32> Worklist; 2617 BlockSet.insert(EntryBB); 2618 BlockSet.insert(ExitBB); 2619 2620 Worklist.push_back(EntryBB); 2621 while (!Worklist.empty()) { 2622 BasicBlock *BB = Worklist.pop_back_val(); 2623 BlockVector.push_back(BB); 2624 for (BasicBlock *SuccBB : successors(BB)) 2625 if (BlockSet.insert(SuccBB).second) 2626 Worklist.push_back(SuccBB); 2627 } 2628 } 2629 2630 void CanonicalLoopInfo::collectControlBlocks( 2631 SmallVectorImpl<BasicBlock *> &BBs) { 2632 // We only count those BBs as control block for which we do not need to 2633 // reverse the CFG, i.e. not the loop body which can contain arbitrary control 2634 // flow. For consistency, this also means we do not add the Body block, which 2635 // is just the entry to the body code. 2636 BBs.reserve(BBs.size() + 6); 2637 BBs.append({Preheader, Header, Cond, Latch, Exit, After}); 2638 } 2639 2640 void CanonicalLoopInfo::assertOK() const { 2641 #ifndef NDEBUG 2642 if (!IsValid) 2643 return; 2644 2645 // Verify standard control-flow we use for OpenMP loops. 2646 assert(Preheader); 2647 assert(isa<BranchInst>(Preheader->getTerminator()) && 2648 "Preheader must terminate with unconditional branch"); 2649 assert(Preheader->getSingleSuccessor() == Header && 2650 "Preheader must jump to header"); 2651 2652 assert(Header); 2653 assert(isa<BranchInst>(Header->getTerminator()) && 2654 "Header must terminate with unconditional branch"); 2655 assert(Header->getSingleSuccessor() == Cond && 2656 "Header must jump to exiting block"); 2657 2658 assert(Cond); 2659 assert(Cond->getSinglePredecessor() == Header && 2660 "Exiting block only reachable from header"); 2661 2662 assert(isa<BranchInst>(Cond->getTerminator()) && 2663 "Exiting block must terminate with conditional branch"); 2664 assert(size(successors(Cond)) == 2 && 2665 "Exiting block must have two successors"); 2666 assert(cast<BranchInst>(Cond->getTerminator())->getSuccessor(0) == Body && 2667 "Exiting block's first successor jump to the body"); 2668 assert(cast<BranchInst>(Cond->getTerminator())->getSuccessor(1) == Exit && 2669 "Exiting block's second successor must exit the loop"); 2670 2671 assert(Body); 2672 assert(Body->getSinglePredecessor() == Cond && 2673 "Body only reachable from exiting block"); 2674 assert(!isa<PHINode>(Body->front())); 2675 2676 assert(Latch); 2677 assert(isa<BranchInst>(Latch->getTerminator()) && 2678 "Latch must terminate with unconditional branch"); 2679 assert(Latch->getSingleSuccessor() == Header && "Latch must jump to header"); 2680 // TODO: To support simple redirecting of the end of the body code that has 2681 // multiple; introduce another auxiliary basic block like preheader and after. 2682 assert(Latch->getSinglePredecessor() != nullptr); 2683 assert(!isa<PHINode>(Latch->front())); 2684 2685 assert(Exit); 2686 assert(isa<BranchInst>(Exit->getTerminator()) && 2687 "Exit block must terminate with unconditional branch"); 2688 assert(Exit->getSingleSuccessor() == After && 2689 "Exit block must jump to after block"); 2690 2691 assert(After); 2692 assert(After->getSinglePredecessor() == Exit && 2693 "After block only reachable from exit block"); 2694 assert(After->empty() || !isa<PHINode>(After->front())); 2695 2696 Instruction *IndVar = getIndVar(); 2697 assert(IndVar && "Canonical induction variable not found?"); 2698 assert(isa<IntegerType>(IndVar->getType()) && 2699 "Induction variable must be an integer"); 2700 assert(cast<PHINode>(IndVar)->getParent() == Header && 2701 "Induction variable must be a PHI in the loop header"); 2702 assert(cast<PHINode>(IndVar)->getIncomingBlock(0) == Preheader); 2703 assert( 2704 cast<ConstantInt>(cast<PHINode>(IndVar)->getIncomingValue(0))->isZero()); 2705 assert(cast<PHINode>(IndVar)->getIncomingBlock(1) == Latch); 2706 2707 auto *NextIndVar = cast<PHINode>(IndVar)->getIncomingValue(1); 2708 assert(cast<Instruction>(NextIndVar)->getParent() == Latch); 2709 assert(cast<BinaryOperator>(NextIndVar)->getOpcode() == BinaryOperator::Add); 2710 assert(cast<BinaryOperator>(NextIndVar)->getOperand(0) == IndVar); 2711 assert(cast<ConstantInt>(cast<BinaryOperator>(NextIndVar)->getOperand(1)) 2712 ->isOne()); 2713 2714 Value *TripCount = getTripCount(); 2715 assert(TripCount && "Loop trip count not found?"); 2716 assert(IndVar->getType() == TripCount->getType() && 2717 "Trip count and induction variable must have the same type"); 2718 2719 auto *CmpI = cast<CmpInst>(&Cond->front()); 2720 assert(CmpI->getPredicate() == CmpInst::ICMP_ULT && 2721 "Exit condition must be a signed less-than comparison"); 2722 assert(CmpI->getOperand(0) == IndVar && 2723 "Exit condition must compare the induction variable"); 2724 assert(CmpI->getOperand(1) == TripCount && 2725 "Exit condition must compare with the trip count"); 2726 #endif 2727 } 2728