1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This contains code to emit OpenMP nodes as LLVM code. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGCleanup.h" 15 #include "CGOpenMPRuntime.h" 16 #include "CodeGenFunction.h" 17 #include "CodeGenModule.h" 18 #include "TargetInfo.h" 19 #include "clang/AST/Stmt.h" 20 #include "clang/AST/StmtOpenMP.h" 21 #include "clang/AST/DeclOpenMP.h" 22 using namespace clang; 23 using namespace CodeGen; 24 25 namespace { 26 /// Lexical scope for OpenMP executable constructs, that handles correct codegen 27 /// for captured expressions. 28 class OMPLexicalScope { 29 CodeGenFunction::LexicalScope Scope; 30 void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 31 for (const auto *C : S.clauses()) { 32 if (auto *CPI = OMPClauseWithPreInit::get(C)) { 33 if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { 34 for (const auto *I : PreInit->decls()) { 35 if (!I->hasAttr<OMPCaptureNoInitAttr>()) 36 CGF.EmitVarDecl(cast<VarDecl>(*I)); 37 else { 38 CodeGenFunction::AutoVarEmission Emission = 39 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 40 CGF.EmitAutoVarCleanups(Emission); 41 } 42 } 43 } 44 } 45 } 46 } 47 48 class PostUpdateCleanup final : public EHScopeStack::Cleanup { 49 const OMPExecutableDirective &S; 50 51 public: 52 PostUpdateCleanup(const OMPExecutableDirective &S) : S(S) {} 53 54 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 55 if (!CGF.HaveInsertPoint()) 56 return; 57 (void)S; 58 // TODO: add cleanups for clauses that require post update. 59 } 60 }; 61 62 public: 63 OMPLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 64 : Scope(CGF, S.getSourceRange()) { 65 emitPreInitStmt(CGF, S); 66 CGF.EHStack.pushCleanup<PostUpdateCleanup>(NormalAndEHCleanup, S); 67 } 68 }; 69 } // namespace 70 71 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) { 72 auto &C = getContext(); 73 llvm::Value *Size = nullptr; 74 auto SizeInChars = C.getTypeSizeInChars(Ty); 75 if (SizeInChars.isZero()) { 76 // getTypeSizeInChars() returns 0 for a VLA. 77 while (auto *VAT = C.getAsVariableArrayType(Ty)) { 78 llvm::Value *ArraySize; 79 std::tie(ArraySize, Ty) = getVLASize(VAT); 80 Size = Size ? Builder.CreateNUWMul(Size, ArraySize) : ArraySize; 81 } 82 SizeInChars = C.getTypeSizeInChars(Ty); 83 if (SizeInChars.isZero()) 84 return llvm::ConstantInt::get(SizeTy, /*V=*/0); 85 Size = Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars)); 86 } else 87 Size = CGM.getSize(SizeInChars); 88 return Size; 89 } 90 91 void CodeGenFunction::GenerateOpenMPCapturedVars( 92 const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) { 93 const RecordDecl *RD = S.getCapturedRecordDecl(); 94 auto CurField = RD->field_begin(); 95 auto CurCap = S.captures().begin(); 96 for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(), 97 E = S.capture_init_end(); 98 I != E; ++I, ++CurField, ++CurCap) { 99 if (CurField->hasCapturedVLAType()) { 100 auto VAT = CurField->getCapturedVLAType(); 101 auto *Val = VLASizeMap[VAT->getSizeExpr()]; 102 CapturedVars.push_back(Val); 103 } else if (CurCap->capturesThis()) 104 CapturedVars.push_back(CXXThisValue); 105 else if (CurCap->capturesVariableByCopy()) 106 CapturedVars.push_back( 107 EmitLoadOfLValue(EmitLValue(*I), SourceLocation()).getScalarVal()); 108 else { 109 assert(CurCap->capturesVariable() && "Expected capture by reference."); 110 CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer()); 111 } 112 } 113 } 114 115 static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType, 116 StringRef Name, LValue AddrLV, 117 bool isReferenceType = false) { 118 ASTContext &Ctx = CGF.getContext(); 119 120 auto *CastedPtr = CGF.EmitScalarConversion( 121 AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(), 122 Ctx.getPointerType(DstType), SourceLocation()); 123 auto TmpAddr = 124 CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType)) 125 .getAddress(); 126 127 // If we are dealing with references we need to return the address of the 128 // reference instead of the reference of the value. 129 if (isReferenceType) { 130 QualType RefType = Ctx.getLValueReferenceType(DstType); 131 auto *RefVal = TmpAddr.getPointer(); 132 TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name) + ".ref"); 133 auto TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType); 134 CGF.EmitScalarInit(RefVal, TmpLVal); 135 } 136 137 return TmpAddr; 138 } 139 140 llvm::Function * 141 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { 142 assert( 143 CapturedStmtInfo && 144 "CapturedStmtInfo should be set when generating the captured function"); 145 const CapturedDecl *CD = S.getCapturedDecl(); 146 const RecordDecl *RD = S.getCapturedRecordDecl(); 147 assert(CD->hasBody() && "missing CapturedDecl body"); 148 149 // Build the argument list. 150 ASTContext &Ctx = CGM.getContext(); 151 FunctionArgList Args; 152 Args.append(CD->param_begin(), 153 std::next(CD->param_begin(), CD->getContextParamPosition())); 154 auto I = S.captures().begin(); 155 for (auto *FD : RD->fields()) { 156 QualType ArgType = FD->getType(); 157 IdentifierInfo *II = nullptr; 158 VarDecl *CapVar = nullptr; 159 160 // If this is a capture by copy and the type is not a pointer, the outlined 161 // function argument type should be uintptr and the value properly casted to 162 // uintptr. This is necessary given that the runtime library is only able to 163 // deal with pointers. We can pass in the same way the VLA type sizes to the 164 // outlined function. 165 if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) || 166 I->capturesVariableArrayType()) 167 ArgType = Ctx.getUIntPtrType(); 168 169 if (I->capturesVariable() || I->capturesVariableByCopy()) { 170 CapVar = I->getCapturedVar(); 171 II = CapVar->getIdentifier(); 172 } else if (I->capturesThis()) 173 II = &getContext().Idents.get("this"); 174 else { 175 assert(I->capturesVariableArrayType()); 176 II = &getContext().Idents.get("vla"); 177 } 178 if (ArgType->isVariablyModifiedType()) 179 ArgType = getContext().getVariableArrayDecayedType(ArgType); 180 Args.push_back(ImplicitParamDecl::Create(getContext(), nullptr, 181 FD->getLocation(), II, ArgType)); 182 ++I; 183 } 184 Args.append( 185 std::next(CD->param_begin(), CD->getContextParamPosition() + 1), 186 CD->param_end()); 187 188 // Create the function declaration. 189 FunctionType::ExtInfo ExtInfo; 190 const CGFunctionInfo &FuncInfo = 191 CGM.getTypes().arrangeFreeFunctionDeclaration(Ctx.VoidTy, Args, ExtInfo, 192 /*IsVariadic=*/false); 193 llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); 194 195 llvm::Function *F = llvm::Function::Create( 196 FuncLLVMTy, llvm::GlobalValue::InternalLinkage, 197 CapturedStmtInfo->getHelperName(), &CGM.getModule()); 198 CGM.SetInternalFunctionAttributes(CD, F, FuncInfo); 199 if (CD->isNothrow()) 200 F->addFnAttr(llvm::Attribute::NoUnwind); 201 202 // Generate the function. 203 StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getLocation(), 204 CD->getBody()->getLocStart()); 205 unsigned Cnt = CD->getContextParamPosition(); 206 I = S.captures().begin(); 207 for (auto *FD : RD->fields()) { 208 // If we are capturing a pointer by copy we don't need to do anything, just 209 // use the value that we get from the arguments. 210 if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) { 211 setAddrOfLocalVar(I->getCapturedVar(), GetAddrOfLocalVar(Args[Cnt])); 212 ++Cnt; 213 ++I; 214 continue; 215 } 216 217 LValue ArgLVal = 218 MakeAddrLValue(GetAddrOfLocalVar(Args[Cnt]), Args[Cnt]->getType(), 219 AlignmentSource::Decl); 220 if (FD->hasCapturedVLAType()) { 221 LValue CastedArgLVal = 222 MakeAddrLValue(castValueFromUintptr(*this, FD->getType(), 223 Args[Cnt]->getName(), ArgLVal), 224 FD->getType(), AlignmentSource::Decl); 225 auto *ExprArg = 226 EmitLoadOfLValue(CastedArgLVal, SourceLocation()).getScalarVal(); 227 auto VAT = FD->getCapturedVLAType(); 228 VLASizeMap[VAT->getSizeExpr()] = ExprArg; 229 } else if (I->capturesVariable()) { 230 auto *Var = I->getCapturedVar(); 231 QualType VarTy = Var->getType(); 232 Address ArgAddr = ArgLVal.getAddress(); 233 if (!VarTy->isReferenceType()) { 234 ArgAddr = EmitLoadOfReference( 235 ArgAddr, ArgLVal.getType()->castAs<ReferenceType>()); 236 } 237 setAddrOfLocalVar( 238 Var, Address(ArgAddr.getPointer(), getContext().getDeclAlign(Var))); 239 } else if (I->capturesVariableByCopy()) { 240 assert(!FD->getType()->isAnyPointerType() && 241 "Not expecting a captured pointer."); 242 auto *Var = I->getCapturedVar(); 243 QualType VarTy = Var->getType(); 244 setAddrOfLocalVar(I->getCapturedVar(), 245 castValueFromUintptr(*this, FD->getType(), 246 Args[Cnt]->getName(), ArgLVal, 247 VarTy->isReferenceType())); 248 } else { 249 // If 'this' is captured, load it into CXXThisValue. 250 assert(I->capturesThis()); 251 CXXThisValue = 252 EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation()).getScalarVal(); 253 } 254 ++Cnt; 255 ++I; 256 } 257 258 PGO.assignRegionCounters(GlobalDecl(CD), F); 259 CapturedStmtInfo->EmitBody(*this, CD->getBody()); 260 FinishFunction(CD->getBodyRBrace()); 261 262 return F; 263 } 264 265 //===----------------------------------------------------------------------===// 266 // OpenMP Directive Emission 267 //===----------------------------------------------------------------------===// 268 void CodeGenFunction::EmitOMPAggregateAssign( 269 Address DestAddr, Address SrcAddr, QualType OriginalType, 270 const llvm::function_ref<void(Address, Address)> &CopyGen) { 271 // Perform element-by-element initialization. 272 QualType ElementTy; 273 274 // Drill down to the base element type on both arrays. 275 auto ArrayTy = OriginalType->getAsArrayTypeUnsafe(); 276 auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr); 277 SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 278 279 auto SrcBegin = SrcAddr.getPointer(); 280 auto DestBegin = DestAddr.getPointer(); 281 // Cast from pointer to array type to pointer to single element. 282 auto DestEnd = Builder.CreateGEP(DestBegin, NumElements); 283 // The basic structure here is a while-do loop. 284 auto BodyBB = createBasicBlock("omp.arraycpy.body"); 285 auto DoneBB = createBasicBlock("omp.arraycpy.done"); 286 auto IsEmpty = 287 Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty"); 288 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 289 290 // Enter the loop body, making that address the current address. 291 auto EntryBB = Builder.GetInsertBlock(); 292 EmitBlock(BodyBB); 293 294 CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy); 295 296 llvm::PHINode *SrcElementPHI = 297 Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 298 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 299 Address SrcElementCurrent = 300 Address(SrcElementPHI, 301 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 302 303 llvm::PHINode *DestElementPHI = 304 Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 305 DestElementPHI->addIncoming(DestBegin, EntryBB); 306 Address DestElementCurrent = 307 Address(DestElementPHI, 308 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 309 310 // Emit copy. 311 CopyGen(DestElementCurrent, SrcElementCurrent); 312 313 // Shift the address forward by one element. 314 auto DestElementNext = Builder.CreateConstGEP1_32( 315 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 316 auto SrcElementNext = Builder.CreateConstGEP1_32( 317 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 318 // Check whether we've reached the end. 319 auto Done = 320 Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 321 Builder.CreateCondBr(Done, DoneBB, BodyBB); 322 DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock()); 323 SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock()); 324 325 // Done. 326 EmitBlock(DoneBB, /*IsFinished=*/true); 327 } 328 329 /// \brief Emit initialization of arrays of complex types. 330 /// \param DestAddr Address of the array. 331 /// \param Type Type of array. 332 /// \param Init Initial expression of array. 333 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 334 QualType Type, const Expr *Init) { 335 // Perform element-by-element initialization. 336 QualType ElementTy; 337 338 // Drill down to the base element type on both arrays. 339 auto ArrayTy = Type->getAsArrayTypeUnsafe(); 340 auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 341 DestAddr = 342 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 343 344 auto DestBegin = DestAddr.getPointer(); 345 // Cast from pointer to array type to pointer to single element. 346 auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 347 // The basic structure here is a while-do loop. 348 auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 349 auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 350 auto IsEmpty = 351 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 352 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 353 354 // Enter the loop body, making that address the current address. 355 auto EntryBB = CGF.Builder.GetInsertBlock(); 356 CGF.EmitBlock(BodyBB); 357 358 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 359 360 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 361 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 362 DestElementPHI->addIncoming(DestBegin, EntryBB); 363 Address DestElementCurrent = 364 Address(DestElementPHI, 365 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 366 367 // Emit copy. 368 { 369 CodeGenFunction::RunCleanupsScope InitScope(CGF); 370 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 371 /*IsInitializer=*/false); 372 } 373 374 // Shift the address forward by one element. 375 auto DestElementNext = CGF.Builder.CreateConstGEP1_32( 376 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 377 // Check whether we've reached the end. 378 auto Done = 379 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 380 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 381 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 382 383 // Done. 384 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 385 } 386 387 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr, 388 Address SrcAddr, const VarDecl *DestVD, 389 const VarDecl *SrcVD, const Expr *Copy) { 390 if (OriginalType->isArrayType()) { 391 auto *BO = dyn_cast<BinaryOperator>(Copy); 392 if (BO && BO->getOpcode() == BO_Assign) { 393 // Perform simple memcpy for simple copying. 394 EmitAggregateAssign(DestAddr, SrcAddr, OriginalType); 395 } else { 396 // For arrays with complex element types perform element by element 397 // copying. 398 EmitOMPAggregateAssign( 399 DestAddr, SrcAddr, OriginalType, 400 [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) { 401 // Working with the single array element, so have to remap 402 // destination and source variables to corresponding array 403 // elements. 404 CodeGenFunction::OMPPrivateScope Remap(*this); 405 Remap.addPrivate(DestVD, [DestElement]() -> Address { 406 return DestElement; 407 }); 408 Remap.addPrivate( 409 SrcVD, [SrcElement]() -> Address { return SrcElement; }); 410 (void)Remap.Privatize(); 411 EmitIgnoredExpr(Copy); 412 }); 413 } 414 } else { 415 // Remap pseudo source variable to private copy. 416 CodeGenFunction::OMPPrivateScope Remap(*this); 417 Remap.addPrivate(SrcVD, [SrcAddr]() -> Address { return SrcAddr; }); 418 Remap.addPrivate(DestVD, [DestAddr]() -> Address { return DestAddr; }); 419 (void)Remap.Privatize(); 420 // Emit copying of the whole variable. 421 EmitIgnoredExpr(Copy); 422 } 423 } 424 425 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, 426 OMPPrivateScope &PrivateScope) { 427 if (!HaveInsertPoint()) 428 return false; 429 bool FirstprivateIsLastprivate = false; 430 llvm::DenseSet<const VarDecl *> Lastprivates; 431 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 432 for (const auto *D : C->varlists()) 433 Lastprivates.insert( 434 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); 435 } 436 llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate; 437 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { 438 auto IRef = C->varlist_begin(); 439 auto InitsRef = C->inits().begin(); 440 for (auto IInit : C->private_copies()) { 441 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 442 FirstprivateIsLastprivate = 443 FirstprivateIsLastprivate || 444 (Lastprivates.count(OrigVD->getCanonicalDecl()) > 0); 445 if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) { 446 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 447 auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl()); 448 bool IsRegistered; 449 DeclRefExpr DRE( 450 const_cast<VarDecl *>(OrigVD), 451 /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup( 452 OrigVD) != nullptr, 453 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 454 Address OriginalAddr = EmitLValue(&DRE).getAddress(); 455 QualType Type = OrigVD->getType(); 456 if (Type->isArrayType()) { 457 // Emit VarDecl with copy init for arrays. 458 // Get the address of the original variable captured in current 459 // captured region. 460 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 461 auto Emission = EmitAutoVarAlloca(*VD); 462 auto *Init = VD->getInit(); 463 if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) { 464 // Perform simple memcpy. 465 EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr, 466 Type); 467 } else { 468 EmitOMPAggregateAssign( 469 Emission.getAllocatedAddress(), OriginalAddr, Type, 470 [this, VDInit, Init](Address DestElement, 471 Address SrcElement) { 472 // Clean up any temporaries needed by the initialization. 473 RunCleanupsScope InitScope(*this); 474 // Emit initialization for single element. 475 setAddrOfLocalVar(VDInit, SrcElement); 476 EmitAnyExprToMem(Init, DestElement, 477 Init->getType().getQualifiers(), 478 /*IsInitializer*/ false); 479 LocalDeclMap.erase(VDInit); 480 }); 481 } 482 EmitAutoVarCleanups(Emission); 483 return Emission.getAllocatedAddress(); 484 }); 485 } else { 486 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 487 // Emit private VarDecl with copy init. 488 // Remap temp VDInit variable to the address of the original 489 // variable 490 // (for proper handling of captured global variables). 491 setAddrOfLocalVar(VDInit, OriginalAddr); 492 EmitDecl(*VD); 493 LocalDeclMap.erase(VDInit); 494 return GetAddrOfLocalVar(VD); 495 }); 496 } 497 assert(IsRegistered && 498 "firstprivate var already registered as private"); 499 // Silence the warning about unused variable. 500 (void)IsRegistered; 501 } 502 ++IRef; 503 ++InitsRef; 504 } 505 } 506 return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty(); 507 } 508 509 void CodeGenFunction::EmitOMPPrivateClause( 510 const OMPExecutableDirective &D, 511 CodeGenFunction::OMPPrivateScope &PrivateScope) { 512 if (!HaveInsertPoint()) 513 return; 514 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 515 for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) { 516 auto IRef = C->varlist_begin(); 517 for (auto IInit : C->private_copies()) { 518 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 519 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 520 auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 521 bool IsRegistered = 522 PrivateScope.addPrivate(OrigVD, [&]() -> Address { 523 // Emit private VarDecl with copy init. 524 EmitDecl(*VD); 525 return GetAddrOfLocalVar(VD); 526 }); 527 assert(IsRegistered && "private var already registered as private"); 528 // Silence the warning about unused variable. 529 (void)IsRegistered; 530 } 531 ++IRef; 532 } 533 } 534 } 535 536 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { 537 if (!HaveInsertPoint()) 538 return false; 539 // threadprivate_var1 = master_threadprivate_var1; 540 // operator=(threadprivate_var2, master_threadprivate_var2); 541 // ... 542 // __kmpc_barrier(&loc, global_tid); 543 llvm::DenseSet<const VarDecl *> CopiedVars; 544 llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr; 545 for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) { 546 auto IRef = C->varlist_begin(); 547 auto ISrcRef = C->source_exprs().begin(); 548 auto IDestRef = C->destination_exprs().begin(); 549 for (auto *AssignOp : C->assignment_ops()) { 550 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 551 QualType Type = VD->getType(); 552 if (CopiedVars.insert(VD->getCanonicalDecl()).second) { 553 // Get the address of the master variable. If we are emitting code with 554 // TLS support, the address is passed from the master as field in the 555 // captured declaration. 556 Address MasterAddr = Address::invalid(); 557 if (getLangOpts().OpenMPUseTLS && 558 getContext().getTargetInfo().isTLSSupported()) { 559 assert(CapturedStmtInfo->lookup(VD) && 560 "Copyin threadprivates should have been captured!"); 561 DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(), 562 VK_LValue, (*IRef)->getExprLoc()); 563 MasterAddr = EmitLValue(&DRE).getAddress(); 564 LocalDeclMap.erase(VD); 565 } else { 566 MasterAddr = 567 Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD) 568 : CGM.GetAddrOfGlobal(VD), 569 getContext().getDeclAlign(VD)); 570 } 571 // Get the address of the threadprivate variable. 572 Address PrivateAddr = EmitLValue(*IRef).getAddress(); 573 if (CopiedVars.size() == 1) { 574 // At first check if current thread is a master thread. If it is, no 575 // need to copy data. 576 CopyBegin = createBasicBlock("copyin.not.master"); 577 CopyEnd = createBasicBlock("copyin.not.master.end"); 578 Builder.CreateCondBr( 579 Builder.CreateICmpNE( 580 Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy), 581 Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy)), 582 CopyBegin, CopyEnd); 583 EmitBlock(CopyBegin); 584 } 585 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 586 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 587 EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp); 588 } 589 ++IRef; 590 ++ISrcRef; 591 ++IDestRef; 592 } 593 } 594 if (CopyEnd) { 595 // Exit out of copying procedure for non-master thread. 596 EmitBlock(CopyEnd, /*IsFinished=*/true); 597 return true; 598 } 599 return false; 600 } 601 602 bool CodeGenFunction::EmitOMPLastprivateClauseInit( 603 const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) { 604 if (!HaveInsertPoint()) 605 return false; 606 bool HasAtLeastOneLastprivate = false; 607 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 608 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 609 HasAtLeastOneLastprivate = true; 610 auto IRef = C->varlist_begin(); 611 auto IDestRef = C->destination_exprs().begin(); 612 for (auto *IInit : C->private_copies()) { 613 // Keep the address of the original variable for future update at the end 614 // of the loop. 615 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 616 if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) { 617 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 618 PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address { 619 DeclRefExpr DRE( 620 const_cast<VarDecl *>(OrigVD), 621 /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup( 622 OrigVD) != nullptr, 623 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 624 return EmitLValue(&DRE).getAddress(); 625 }); 626 // Check if the variable is also a firstprivate: in this case IInit is 627 // not generated. Initialization of this variable will happen in codegen 628 // for 'firstprivate' clause. 629 if (IInit) { 630 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 631 bool IsRegistered = 632 PrivateScope.addPrivate(OrigVD, [&]() -> Address { 633 // Emit private VarDecl with copy init. 634 EmitDecl(*VD); 635 return GetAddrOfLocalVar(VD); 636 }); 637 assert(IsRegistered && 638 "lastprivate var already registered as private"); 639 (void)IsRegistered; 640 } 641 } 642 ++IRef; 643 ++IDestRef; 644 } 645 } 646 return HasAtLeastOneLastprivate; 647 } 648 649 void CodeGenFunction::EmitOMPLastprivateClauseFinal( 650 const OMPExecutableDirective &D, llvm::Value *IsLastIterCond) { 651 if (!HaveInsertPoint()) 652 return; 653 // Emit following code: 654 // if (<IsLastIterCond>) { 655 // orig_var1 = private_orig_var1; 656 // ... 657 // orig_varn = private_orig_varn; 658 // } 659 llvm::BasicBlock *ThenBB = nullptr; 660 llvm::BasicBlock *DoneBB = nullptr; 661 if (IsLastIterCond) { 662 ThenBB = createBasicBlock(".omp.lastprivate.then"); 663 DoneBB = createBasicBlock(".omp.lastprivate.done"); 664 Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB); 665 EmitBlock(ThenBB); 666 } 667 llvm::DenseMap<const Decl *, const Expr *> LoopCountersAndUpdates; 668 if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) { 669 auto IC = LoopDirective->counters().begin(); 670 for (auto F : LoopDirective->finals()) { 671 auto *D = cast<DeclRefExpr>(*IC)->getDecl()->getCanonicalDecl(); 672 LoopCountersAndUpdates[D] = F; 673 ++IC; 674 } 675 } 676 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 677 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 678 auto IRef = C->varlist_begin(); 679 auto ISrcRef = C->source_exprs().begin(); 680 auto IDestRef = C->destination_exprs().begin(); 681 for (auto *AssignOp : C->assignment_ops()) { 682 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 683 QualType Type = PrivateVD->getType(); 684 auto *CanonicalVD = PrivateVD->getCanonicalDecl(); 685 if (AlreadyEmittedVars.insert(CanonicalVD).second) { 686 // If lastprivate variable is a loop control variable for loop-based 687 // directive, update its value before copyin back to original 688 // variable. 689 if (auto *UpExpr = LoopCountersAndUpdates.lookup(CanonicalVD)) 690 EmitIgnoredExpr(UpExpr); 691 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 692 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 693 // Get the address of the original variable. 694 Address OriginalAddr = GetAddrOfLocalVar(DestVD); 695 // Get the address of the private variable. 696 Address PrivateAddr = GetAddrOfLocalVar(PrivateVD); 697 if (auto RefTy = PrivateVD->getType()->getAs<ReferenceType>()) 698 PrivateAddr = 699 Address(Builder.CreateLoad(PrivateAddr), 700 getNaturalTypeAlignment(RefTy->getPointeeType())); 701 EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp); 702 } 703 ++IRef; 704 ++ISrcRef; 705 ++IDestRef; 706 } 707 if (auto *PostUpdate = C->getPostUpdateExpr()) 708 EmitIgnoredExpr(PostUpdate); 709 } 710 if (IsLastIterCond) 711 EmitBlock(DoneBB, /*IsFinished=*/true); 712 } 713 714 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 715 LValue BaseLV, llvm::Value *Addr) { 716 Address Tmp = Address::invalid(); 717 Address TopTmp = Address::invalid(); 718 Address MostTopTmp = Address::invalid(); 719 BaseTy = BaseTy.getNonReferenceType(); 720 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 721 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 722 Tmp = CGF.CreateMemTemp(BaseTy); 723 if (TopTmp.isValid()) 724 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 725 else 726 MostTopTmp = Tmp; 727 TopTmp = Tmp; 728 BaseTy = BaseTy->getPointeeType(); 729 } 730 llvm::Type *Ty = BaseLV.getPointer()->getType(); 731 if (Tmp.isValid()) 732 Ty = Tmp.getElementType(); 733 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 734 if (Tmp.isValid()) { 735 CGF.Builder.CreateStore(Addr, Tmp); 736 return MostTopTmp; 737 } 738 return Address(Addr, BaseLV.getAlignment()); 739 } 740 741 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 742 LValue BaseLV) { 743 BaseTy = BaseTy.getNonReferenceType(); 744 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 745 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 746 if (auto *PtrTy = BaseTy->getAs<PointerType>()) 747 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy); 748 else { 749 BaseLV = CGF.EmitLoadOfReferenceLValue(BaseLV.getAddress(), 750 BaseTy->castAs<ReferenceType>()); 751 } 752 BaseTy = BaseTy->getPointeeType(); 753 } 754 return CGF.MakeAddrLValue( 755 Address( 756 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 757 BaseLV.getPointer(), CGF.ConvertTypeForMem(ElTy)->getPointerTo()), 758 BaseLV.getAlignment()), 759 BaseLV.getType(), BaseLV.getAlignmentSource()); 760 } 761 762 void CodeGenFunction::EmitOMPReductionClauseInit( 763 const OMPExecutableDirective &D, 764 CodeGenFunction::OMPPrivateScope &PrivateScope) { 765 if (!HaveInsertPoint()) 766 return; 767 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 768 auto ILHS = C->lhs_exprs().begin(); 769 auto IRHS = C->rhs_exprs().begin(); 770 auto IPriv = C->privates().begin(); 771 for (auto IRef : C->varlists()) { 772 auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 773 auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 774 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl()); 775 if (auto *OASE = dyn_cast<OMPArraySectionExpr>(IRef)) { 776 auto *Base = OASE->getBase()->IgnoreParenImpCasts(); 777 while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 778 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 779 while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 780 Base = TempASE->getBase()->IgnoreParenImpCasts(); 781 auto *DE = cast<DeclRefExpr>(Base); 782 auto *OrigVD = cast<VarDecl>(DE->getDecl()); 783 auto OASELValueLB = EmitOMPArraySectionExpr(OASE); 784 auto OASELValueUB = 785 EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 786 auto OriginalBaseLValue = EmitLValue(DE); 787 LValue BaseLValue = 788 loadToBegin(*this, OrigVD->getType(), OASELValueLB.getType(), 789 OriginalBaseLValue); 790 // Store the address of the original variable associated with the LHS 791 // implicit variable. 792 PrivateScope.addPrivate(LHSVD, [this, OASELValueLB]() -> Address { 793 return OASELValueLB.getAddress(); 794 }); 795 // Emit reduction copy. 796 bool IsRegistered = PrivateScope.addPrivate( 797 OrigVD, [this, OrigVD, PrivateVD, BaseLValue, OASELValueLB, 798 OASELValueUB, OriginalBaseLValue]() -> Address { 799 // Emit VarDecl with copy init for arrays. 800 // Get the address of the original variable captured in current 801 // captured region. 802 auto *Size = Builder.CreatePtrDiff(OASELValueUB.getPointer(), 803 OASELValueLB.getPointer()); 804 Size = Builder.CreateNUWAdd( 805 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 806 CodeGenFunction::OpaqueValueMapping OpaqueMap( 807 *this, cast<OpaqueValueExpr>( 808 getContext() 809 .getAsVariableArrayType(PrivateVD->getType()) 810 ->getSizeExpr()), 811 RValue::get(Size)); 812 EmitVariablyModifiedType(PrivateVD->getType()); 813 auto Emission = EmitAutoVarAlloca(*PrivateVD); 814 auto Addr = Emission.getAllocatedAddress(); 815 auto *Init = PrivateVD->getInit(); 816 EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(), Init); 817 EmitAutoVarCleanups(Emission); 818 // Emit private VarDecl with reduction init. 819 auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(), 820 OASELValueLB.getPointer()); 821 auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset); 822 return castToBase(*this, OrigVD->getType(), 823 OASELValueLB.getType(), OriginalBaseLValue, 824 Ptr); 825 }); 826 assert(IsRegistered && "private var already registered as private"); 827 // Silence the warning about unused variable. 828 (void)IsRegistered; 829 PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { 830 return GetAddrOfLocalVar(PrivateVD); 831 }); 832 } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(IRef)) { 833 auto *Base = ASE->getBase()->IgnoreParenImpCasts(); 834 while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 835 Base = TempASE->getBase()->IgnoreParenImpCasts(); 836 auto *DE = cast<DeclRefExpr>(Base); 837 auto *OrigVD = cast<VarDecl>(DE->getDecl()); 838 auto ASELValue = EmitLValue(ASE); 839 auto OriginalBaseLValue = EmitLValue(DE); 840 LValue BaseLValue = loadToBegin( 841 *this, OrigVD->getType(), ASELValue.getType(), OriginalBaseLValue); 842 // Store the address of the original variable associated with the LHS 843 // implicit variable. 844 PrivateScope.addPrivate(LHSVD, [this, ASELValue]() -> Address { 845 return ASELValue.getAddress(); 846 }); 847 // Emit reduction copy. 848 bool IsRegistered = PrivateScope.addPrivate( 849 OrigVD, [this, OrigVD, PrivateVD, BaseLValue, ASELValue, 850 OriginalBaseLValue]() -> Address { 851 // Emit private VarDecl with reduction init. 852 EmitDecl(*PrivateVD); 853 auto Addr = GetAddrOfLocalVar(PrivateVD); 854 auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(), 855 ASELValue.getPointer()); 856 auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset); 857 return castToBase(*this, OrigVD->getType(), ASELValue.getType(), 858 OriginalBaseLValue, Ptr); 859 }); 860 assert(IsRegistered && "private var already registered as private"); 861 // Silence the warning about unused variable. 862 (void)IsRegistered; 863 PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address { 864 return Builder.CreateElementBitCast( 865 GetAddrOfLocalVar(PrivateVD), ConvertTypeForMem(RHSVD->getType()), 866 "rhs.begin"); 867 }); 868 } else { 869 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); 870 QualType Type = PrivateVD->getType(); 871 if (getContext().getAsArrayType(Type)) { 872 // Store the address of the original variable associated with the LHS 873 // implicit variable. 874 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 875 CapturedStmtInfo->lookup(OrigVD) != nullptr, 876 IRef->getType(), VK_LValue, IRef->getExprLoc()); 877 Address OriginalAddr = EmitLValue(&DRE).getAddress(); 878 PrivateScope.addPrivate(LHSVD, [this, OriginalAddr, 879 LHSVD]() -> Address { 880 return Builder.CreateElementBitCast( 881 OriginalAddr, ConvertTypeForMem(LHSVD->getType()), 882 "lhs.begin"); 883 }); 884 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 885 if (Type->isVariablyModifiedType()) { 886 CodeGenFunction::OpaqueValueMapping OpaqueMap( 887 *this, cast<OpaqueValueExpr>( 888 getContext() 889 .getAsVariableArrayType(PrivateVD->getType()) 890 ->getSizeExpr()), 891 RValue::get( 892 getTypeSize(OrigVD->getType().getNonReferenceType()))); 893 EmitVariablyModifiedType(Type); 894 } 895 auto Emission = EmitAutoVarAlloca(*PrivateVD); 896 auto Addr = Emission.getAllocatedAddress(); 897 auto *Init = PrivateVD->getInit(); 898 EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(), Init); 899 EmitAutoVarCleanups(Emission); 900 return Emission.getAllocatedAddress(); 901 }); 902 assert(IsRegistered && "private var already registered as private"); 903 // Silence the warning about unused variable. 904 (void)IsRegistered; 905 PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address { 906 return Builder.CreateElementBitCast( 907 GetAddrOfLocalVar(PrivateVD), 908 ConvertTypeForMem(RHSVD->getType()), "rhs.begin"); 909 }); 910 } else { 911 // Store the address of the original variable associated with the LHS 912 // implicit variable. 913 PrivateScope.addPrivate(LHSVD, [this, OrigVD, IRef]() -> Address { 914 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 915 CapturedStmtInfo->lookup(OrigVD) != nullptr, 916 IRef->getType(), VK_LValue, IRef->getExprLoc()); 917 return EmitLValue(&DRE).getAddress(); 918 }); 919 // Emit reduction copy. 920 bool IsRegistered = 921 PrivateScope.addPrivate(OrigVD, [this, PrivateVD]() -> Address { 922 // Emit private VarDecl with reduction init. 923 EmitDecl(*PrivateVD); 924 return GetAddrOfLocalVar(PrivateVD); 925 }); 926 assert(IsRegistered && "private var already registered as private"); 927 // Silence the warning about unused variable. 928 (void)IsRegistered; 929 PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { 930 return GetAddrOfLocalVar(PrivateVD); 931 }); 932 } 933 } 934 ++ILHS; 935 ++IRHS; 936 ++IPriv; 937 } 938 } 939 } 940 941 void CodeGenFunction::EmitOMPReductionClauseFinal( 942 const OMPExecutableDirective &D) { 943 if (!HaveInsertPoint()) 944 return; 945 llvm::SmallVector<const Expr *, 8> Privates; 946 llvm::SmallVector<const Expr *, 8> LHSExprs; 947 llvm::SmallVector<const Expr *, 8> RHSExprs; 948 llvm::SmallVector<const Expr *, 8> ReductionOps; 949 bool HasAtLeastOneReduction = false; 950 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 951 HasAtLeastOneReduction = true; 952 Privates.append(C->privates().begin(), C->privates().end()); 953 LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 954 RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 955 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 956 } 957 if (HasAtLeastOneReduction) { 958 // Emit nowait reduction if nowait clause is present or directive is a 959 // parallel directive (it always has implicit barrier). 960 CGM.getOpenMPRuntime().emitReduction( 961 *this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps, 962 D.getSingleClause<OMPNowaitClause>() || 963 isOpenMPParallelDirective(D.getDirectiveKind()) || 964 D.getDirectiveKind() == OMPD_simd, 965 D.getDirectiveKind() == OMPD_simd); 966 } 967 } 968 969 static void emitPostUpdateForReductionClause( 970 CodeGenFunction &CGF, const OMPExecutableDirective &D, 971 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 972 if (!CGF.HaveInsertPoint()) 973 return; 974 llvm::BasicBlock *DoneBB = nullptr; 975 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 976 if (auto *PostUpdate = C->getPostUpdateExpr()) { 977 if (!DoneBB) { 978 if (auto *Cond = CondGen(CGF)) { 979 // If the first post-update expression is found, emit conditional 980 // block if it was requested. 981 auto *ThenBB = CGF.createBasicBlock(".omp.reduction.pu"); 982 DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done"); 983 CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB); 984 CGF.EmitBlock(ThenBB); 985 } 986 } 987 CGF.EmitIgnoredExpr(PostUpdate); 988 } 989 } 990 if (DoneBB) 991 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 992 } 993 994 static void emitCommonOMPParallelDirective(CodeGenFunction &CGF, 995 const OMPExecutableDirective &S, 996 OpenMPDirectiveKind InnermostKind, 997 const RegionCodeGenTy &CodeGen) { 998 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 999 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 1000 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 1001 auto OutlinedFn = CGF.CGM.getOpenMPRuntime(). 1002 emitParallelOrTeamsOutlinedFunction(S, 1003 *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 1004 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) { 1005 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 1006 auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), 1007 /*IgnoreResultAssign*/ true); 1008 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause( 1009 CGF, NumThreads, NumThreadsClause->getLocStart()); 1010 } 1011 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) { 1012 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 1013 CGF.CGM.getOpenMPRuntime().emitProcBindClause( 1014 CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart()); 1015 } 1016 const Expr *IfCond = nullptr; 1017 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 1018 if (C->getNameModifier() == OMPD_unknown || 1019 C->getNameModifier() == OMPD_parallel) { 1020 IfCond = C->getCondition(); 1021 break; 1022 } 1023 } 1024 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn, 1025 CapturedVars, IfCond); 1026 } 1027 1028 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { 1029 OMPLexicalScope Scope(*this, S); 1030 // Emit parallel region as a standalone region. 1031 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 1032 OMPPrivateScope PrivateScope(CGF); 1033 bool Copyins = CGF.EmitOMPCopyinClause(S); 1034 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 1035 if (Copyins) { 1036 // Emit implicit barrier to synchronize threads and avoid data races on 1037 // propagation master's thread values of threadprivate variables to local 1038 // instances of that variables of all other implicit threads. 1039 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 1040 CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 1041 /*ForceSimpleCall=*/true); 1042 } 1043 CGF.EmitOMPPrivateClause(S, PrivateScope); 1044 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 1045 (void)PrivateScope.Privatize(); 1046 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1047 CGF.EmitOMPReductionClauseFinal(S); 1048 }; 1049 emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen); 1050 emitPostUpdateForReductionClause( 1051 *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1052 } 1053 1054 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, 1055 JumpDest LoopExit) { 1056 RunCleanupsScope BodyScope(*this); 1057 // Update counters values on current iteration. 1058 for (auto I : D.updates()) { 1059 EmitIgnoredExpr(I); 1060 } 1061 // Update the linear variables. 1062 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1063 for (auto U : C->updates()) { 1064 EmitIgnoredExpr(U); 1065 } 1066 } 1067 1068 // On a continue in the body, jump to the end. 1069 auto Continue = getJumpDestInCurrentScope("omp.body.continue"); 1070 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1071 // Emit loop body. 1072 EmitStmt(D.getBody()); 1073 // The end (updates/cleanups). 1074 EmitBlock(Continue.getBlock()); 1075 BreakContinueStack.pop_back(); 1076 } 1077 1078 void CodeGenFunction::EmitOMPInnerLoop( 1079 const Stmt &S, bool RequiresCleanup, const Expr *LoopCond, 1080 const Expr *IncExpr, 1081 const llvm::function_ref<void(CodeGenFunction &)> &BodyGen, 1082 const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen) { 1083 auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end"); 1084 1085 // Start the loop with a block that tests the condition. 1086 auto CondBlock = createBasicBlock("omp.inner.for.cond"); 1087 EmitBlock(CondBlock); 1088 LoopStack.push(CondBlock); 1089 1090 // If there are any cleanups between here and the loop-exit scope, 1091 // create a block to stage a loop exit along. 1092 auto ExitBlock = LoopExit.getBlock(); 1093 if (RequiresCleanup) 1094 ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup"); 1095 1096 auto LoopBody = createBasicBlock("omp.inner.for.body"); 1097 1098 // Emit condition. 1099 EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S)); 1100 if (ExitBlock != LoopExit.getBlock()) { 1101 EmitBlock(ExitBlock); 1102 EmitBranchThroughCleanup(LoopExit); 1103 } 1104 1105 EmitBlock(LoopBody); 1106 incrementProfileCounter(&S); 1107 1108 // Create a block for the increment. 1109 auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc"); 1110 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1111 1112 BodyGen(*this); 1113 1114 // Emit "IV = IV + 1" and a back-edge to the condition block. 1115 EmitBlock(Continue.getBlock()); 1116 EmitIgnoredExpr(IncExpr); 1117 PostIncGen(*this); 1118 BreakContinueStack.pop_back(); 1119 EmitBranch(CondBlock); 1120 LoopStack.pop(); 1121 // Emit the fall-through block. 1122 EmitBlock(LoopExit.getBlock()); 1123 } 1124 1125 void CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { 1126 if (!HaveInsertPoint()) 1127 return; 1128 // Emit inits for the linear variables. 1129 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1130 for (auto Init : C->inits()) { 1131 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl()); 1132 auto *OrigVD = cast<VarDecl>( 1133 cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())->getDecl()); 1134 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1135 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1136 VD->getInit()->getType(), VK_LValue, 1137 VD->getInit()->getExprLoc()); 1138 AutoVarEmission Emission = EmitAutoVarAlloca(*VD); 1139 EmitExprAsInit(&DRE, VD, 1140 MakeAddrLValue(Emission.getAllocatedAddress(), VD->getType()), 1141 /*capturedByInit=*/false); 1142 EmitAutoVarCleanups(Emission); 1143 } 1144 // Emit the linear steps for the linear clauses. 1145 // If a step is not constant, it is pre-calculated before the loop. 1146 if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep())) 1147 if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) { 1148 EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl())); 1149 // Emit calculation of the linear step. 1150 EmitIgnoredExpr(CS); 1151 } 1152 } 1153 } 1154 1155 static void emitLinearClauseFinal(CodeGenFunction &CGF, 1156 const OMPLoopDirective &D) { 1157 if (!CGF.HaveInsertPoint()) 1158 return; 1159 // Emit the final values of the linear variables. 1160 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1161 auto IC = C->varlist_begin(); 1162 for (auto F : C->finals()) { 1163 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl()); 1164 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1165 CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr, 1166 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); 1167 Address OrigAddr = CGF.EmitLValue(&DRE).getAddress(); 1168 CodeGenFunction::OMPPrivateScope VarScope(CGF); 1169 VarScope.addPrivate(OrigVD, 1170 [OrigAddr]() -> Address { return OrigAddr; }); 1171 (void)VarScope.Privatize(); 1172 CGF.EmitIgnoredExpr(F); 1173 ++IC; 1174 } 1175 } 1176 } 1177 1178 static void emitAlignedClause(CodeGenFunction &CGF, 1179 const OMPExecutableDirective &D) { 1180 if (!CGF.HaveInsertPoint()) 1181 return; 1182 for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) { 1183 unsigned ClauseAlignment = 0; 1184 if (auto AlignmentExpr = Clause->getAlignment()) { 1185 auto AlignmentCI = 1186 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); 1187 ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue()); 1188 } 1189 for (auto E : Clause->varlists()) { 1190 unsigned Alignment = ClauseAlignment; 1191 if (Alignment == 0) { 1192 // OpenMP [2.8.1, Description] 1193 // If no optional parameter is specified, implementation-defined default 1194 // alignments for SIMD instructions on the target platforms are assumed. 1195 Alignment = 1196 CGF.getContext() 1197 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign( 1198 E->getType()->getPointeeType())) 1199 .getQuantity(); 1200 } 1201 assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) && 1202 "alignment is not power of 2"); 1203 if (Alignment != 0) { 1204 llvm::Value *PtrValue = CGF.EmitScalarExpr(E); 1205 CGF.EmitAlignmentAssumption(PtrValue, Alignment); 1206 } 1207 } 1208 } 1209 } 1210 1211 static void emitPrivateLoopCounters(CodeGenFunction &CGF, 1212 CodeGenFunction::OMPPrivateScope &LoopScope, 1213 ArrayRef<Expr *> Counters, 1214 ArrayRef<Expr *> PrivateCounters) { 1215 if (!CGF.HaveInsertPoint()) 1216 return; 1217 auto I = PrivateCounters.begin(); 1218 for (auto *E : Counters) { 1219 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1220 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()); 1221 Address Addr = Address::invalid(); 1222 (void)LoopScope.addPrivate(PrivateVD, [&]() -> Address { 1223 // Emit var without initialization. 1224 auto VarEmission = CGF.EmitAutoVarAlloca(*PrivateVD); 1225 CGF.EmitAutoVarCleanups(VarEmission); 1226 Addr = VarEmission.getAllocatedAddress(); 1227 return Addr; 1228 }); 1229 (void)LoopScope.addPrivate(VD, [&]() -> Address { return Addr; }); 1230 ++I; 1231 } 1232 } 1233 1234 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S, 1235 const Expr *Cond, llvm::BasicBlock *TrueBlock, 1236 llvm::BasicBlock *FalseBlock, uint64_t TrueCount) { 1237 if (!CGF.HaveInsertPoint()) 1238 return; 1239 { 1240 CodeGenFunction::OMPPrivateScope PreCondScope(CGF); 1241 emitPrivateLoopCounters(CGF, PreCondScope, S.counters(), 1242 S.private_counters()); 1243 (void)PreCondScope.Privatize(); 1244 // Get initial values of real counters. 1245 for (auto I : S.inits()) { 1246 CGF.EmitIgnoredExpr(I); 1247 } 1248 } 1249 // Check that loop is executed at least one time. 1250 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount); 1251 } 1252 1253 static void 1254 emitPrivateLinearVars(CodeGenFunction &CGF, const OMPExecutableDirective &D, 1255 CodeGenFunction::OMPPrivateScope &PrivateScope) { 1256 if (!CGF.HaveInsertPoint()) 1257 return; 1258 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1259 auto CurPrivate = C->privates().begin(); 1260 for (auto *E : C->varlists()) { 1261 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1262 auto *PrivateVD = 1263 cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl()); 1264 bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address { 1265 // Emit private VarDecl with copy init. 1266 CGF.EmitVarDecl(*PrivateVD); 1267 return CGF.GetAddrOfLocalVar(PrivateVD); 1268 }); 1269 assert(IsRegistered && "linear var already registered as private"); 1270 // Silence the warning about unused variable. 1271 (void)IsRegistered; 1272 ++CurPrivate; 1273 } 1274 } 1275 } 1276 1277 static void emitSimdlenSafelenClause(CodeGenFunction &CGF, 1278 const OMPExecutableDirective &D, 1279 bool IsMonotonic) { 1280 if (!CGF.HaveInsertPoint()) 1281 return; 1282 if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) { 1283 RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(), 1284 /*ignoreResult=*/true); 1285 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 1286 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 1287 // In presence of finite 'safelen', it may be unsafe to mark all 1288 // the memory instructions parallel, because loop-carried 1289 // dependences of 'safelen' iterations are possible. 1290 if (!IsMonotonic) 1291 CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>()); 1292 } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) { 1293 RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(), 1294 /*ignoreResult=*/true); 1295 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 1296 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 1297 // In presence of finite 'safelen', it may be unsafe to mark all 1298 // the memory instructions parallel, because loop-carried 1299 // dependences of 'safelen' iterations are possible. 1300 CGF.LoopStack.setParallel(false); 1301 } 1302 } 1303 1304 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D, 1305 bool IsMonotonic) { 1306 // Walk clauses and process safelen/lastprivate. 1307 LoopStack.setParallel(!IsMonotonic); 1308 LoopStack.setVectorizeEnable(true); 1309 emitSimdlenSafelenClause(*this, D, IsMonotonic); 1310 } 1311 1312 void CodeGenFunction::EmitOMPSimdFinal(const OMPLoopDirective &D) { 1313 if (!HaveInsertPoint()) 1314 return; 1315 auto IC = D.counters().begin(); 1316 for (auto F : D.finals()) { 1317 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl()); 1318 if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD)) { 1319 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1320 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1321 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); 1322 Address OrigAddr = EmitLValue(&DRE).getAddress(); 1323 OMPPrivateScope VarScope(*this); 1324 VarScope.addPrivate(OrigVD, 1325 [OrigAddr]() -> Address { return OrigAddr; }); 1326 (void)VarScope.Privatize(); 1327 EmitIgnoredExpr(F); 1328 } 1329 ++IC; 1330 } 1331 emitLinearClauseFinal(*this, D); 1332 } 1333 1334 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { 1335 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 1336 // if (PreCond) { 1337 // for (IV in 0..LastIteration) BODY; 1338 // <Final counter/linear vars updates>; 1339 // } 1340 // 1341 1342 // Emit: if (PreCond) - begin. 1343 // If the condition constant folds and can be elided, avoid emitting the 1344 // whole loop. 1345 bool CondConstant; 1346 llvm::BasicBlock *ContBlock = nullptr; 1347 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 1348 if (!CondConstant) 1349 return; 1350 } else { 1351 auto *ThenBlock = CGF.createBasicBlock("simd.if.then"); 1352 ContBlock = CGF.createBasicBlock("simd.if.end"); 1353 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 1354 CGF.getProfileCount(&S)); 1355 CGF.EmitBlock(ThenBlock); 1356 CGF.incrementProfileCounter(&S); 1357 } 1358 1359 // Emit the loop iteration variable. 1360 const Expr *IVExpr = S.getIterationVariable(); 1361 const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 1362 CGF.EmitVarDecl(*IVDecl); 1363 CGF.EmitIgnoredExpr(S.getInit()); 1364 1365 // Emit the iterations count variable. 1366 // If it is not a variable, Sema decided to calculate iterations count on 1367 // each iteration (e.g., it is foldable into a constant). 1368 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 1369 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 1370 // Emit calculation of the iterations count. 1371 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 1372 } 1373 1374 CGF.EmitOMPSimdInit(S); 1375 1376 emitAlignedClause(CGF, S); 1377 CGF.EmitOMPLinearClauseInit(S); 1378 bool HasLastprivateClause; 1379 { 1380 OMPPrivateScope LoopScope(CGF); 1381 emitPrivateLoopCounters(CGF, LoopScope, S.counters(), 1382 S.private_counters()); 1383 emitPrivateLinearVars(CGF, S, LoopScope); 1384 CGF.EmitOMPPrivateClause(S, LoopScope); 1385 CGF.EmitOMPReductionClauseInit(S, LoopScope); 1386 HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 1387 (void)LoopScope.Privatize(); 1388 CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 1389 S.getInc(), 1390 [&S](CodeGenFunction &CGF) { 1391 CGF.EmitOMPLoopBody(S, JumpDest()); 1392 CGF.EmitStopPoint(&S); 1393 }, 1394 [](CodeGenFunction &) {}); 1395 // Emit final copy of the lastprivate variables at the end of loops. 1396 if (HasLastprivateClause) { 1397 CGF.EmitOMPLastprivateClauseFinal(S); 1398 } 1399 CGF.EmitOMPReductionClauseFinal(S); 1400 emitPostUpdateForReductionClause( 1401 CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1402 } 1403 CGF.EmitOMPSimdFinal(S); 1404 // Emit: if (PreCond) - end. 1405 if (ContBlock) { 1406 CGF.EmitBranch(ContBlock); 1407 CGF.EmitBlock(ContBlock, true); 1408 } 1409 }; 1410 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 1411 } 1412 1413 void CodeGenFunction::EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic, 1414 const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, 1415 Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) { 1416 auto &RT = CGM.getOpenMPRuntime(); 1417 1418 const Expr *IVExpr = S.getIterationVariable(); 1419 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1420 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1421 1422 auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end"); 1423 1424 // Start the loop with a block that tests the condition. 1425 auto CondBlock = createBasicBlock("omp.dispatch.cond"); 1426 EmitBlock(CondBlock); 1427 LoopStack.push(CondBlock); 1428 1429 llvm::Value *BoolCondVal = nullptr; 1430 if (!DynamicOrOrdered) { 1431 // UB = min(UB, GlobalUB) 1432 EmitIgnoredExpr(S.getEnsureUpperBound()); 1433 // IV = LB 1434 EmitIgnoredExpr(S.getInit()); 1435 // IV < UB 1436 BoolCondVal = EvaluateExprAsBool(S.getCond()); 1437 } else { 1438 BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, 1439 IL, LB, UB, ST); 1440 } 1441 1442 // If there are any cleanups between here and the loop-exit scope, 1443 // create a block to stage a loop exit along. 1444 auto ExitBlock = LoopExit.getBlock(); 1445 if (LoopScope.requiresCleanups()) 1446 ExitBlock = createBasicBlock("omp.dispatch.cleanup"); 1447 1448 auto LoopBody = createBasicBlock("omp.dispatch.body"); 1449 Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock); 1450 if (ExitBlock != LoopExit.getBlock()) { 1451 EmitBlock(ExitBlock); 1452 EmitBranchThroughCleanup(LoopExit); 1453 } 1454 EmitBlock(LoopBody); 1455 1456 // Emit "IV = LB" (in case of static schedule, we have already calculated new 1457 // LB for loop condition and emitted it above). 1458 if (DynamicOrOrdered) 1459 EmitIgnoredExpr(S.getInit()); 1460 1461 // Create a block for the increment. 1462 auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); 1463 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1464 1465 // Generate !llvm.loop.parallel metadata for loads and stores for loops 1466 // with dynamic/guided scheduling and without ordered clause. 1467 if (!isOpenMPSimdDirective(S.getDirectiveKind())) 1468 LoopStack.setParallel(!IsMonotonic); 1469 else 1470 EmitOMPSimdInit(S, IsMonotonic); 1471 1472 SourceLocation Loc = S.getLocStart(); 1473 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), 1474 [&S, LoopExit](CodeGenFunction &CGF) { 1475 CGF.EmitOMPLoopBody(S, LoopExit); 1476 CGF.EmitStopPoint(&S); 1477 }, 1478 [Ordered, IVSize, IVSigned, Loc](CodeGenFunction &CGF) { 1479 if (Ordered) { 1480 CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd( 1481 CGF, Loc, IVSize, IVSigned); 1482 } 1483 }); 1484 1485 EmitBlock(Continue.getBlock()); 1486 BreakContinueStack.pop_back(); 1487 if (!DynamicOrOrdered) { 1488 // Emit "LB = LB + Stride", "UB = UB + Stride". 1489 EmitIgnoredExpr(S.getNextLowerBound()); 1490 EmitIgnoredExpr(S.getNextUpperBound()); 1491 } 1492 1493 EmitBranch(CondBlock); 1494 LoopStack.pop(); 1495 // Emit the fall-through block. 1496 EmitBlock(LoopExit.getBlock()); 1497 1498 // Tell the runtime we are done. 1499 if (!DynamicOrOrdered) 1500 RT.emitForStaticFinish(*this, S.getLocEnd()); 1501 1502 } 1503 1504 void CodeGenFunction::EmitOMPForOuterLoop( 1505 OpenMPScheduleClauseKind ScheduleKind, bool IsMonotonic, 1506 const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, 1507 Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) { 1508 auto &RT = CGM.getOpenMPRuntime(); 1509 1510 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). 1511 const bool DynamicOrOrdered = Ordered || RT.isDynamic(ScheduleKind); 1512 1513 assert((Ordered || 1514 !RT.isStaticNonchunked(ScheduleKind, /*Chunked=*/Chunk != nullptr)) && 1515 "static non-chunked schedule does not need outer loop"); 1516 1517 // Emit outer loop. 1518 // 1519 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1520 // When schedule(dynamic,chunk_size) is specified, the iterations are 1521 // distributed to threads in the team in chunks as the threads request them. 1522 // Each thread executes a chunk of iterations, then requests another chunk, 1523 // until no chunks remain to be distributed. Each chunk contains chunk_size 1524 // iterations, except for the last chunk to be distributed, which may have 1525 // fewer iterations. When no chunk_size is specified, it defaults to 1. 1526 // 1527 // When schedule(guided,chunk_size) is specified, the iterations are assigned 1528 // to threads in the team in chunks as the executing threads request them. 1529 // Each thread executes a chunk of iterations, then requests another chunk, 1530 // until no chunks remain to be assigned. For a chunk_size of 1, the size of 1531 // each chunk is proportional to the number of unassigned iterations divided 1532 // by the number of threads in the team, decreasing to 1. For a chunk_size 1533 // with value k (greater than 1), the size of each chunk is determined in the 1534 // same way, with the restriction that the chunks do not contain fewer than k 1535 // iterations (except for the last chunk to be assigned, which may have fewer 1536 // than k iterations). 1537 // 1538 // When schedule(auto) is specified, the decision regarding scheduling is 1539 // delegated to the compiler and/or runtime system. The programmer gives the 1540 // implementation the freedom to choose any possible mapping of iterations to 1541 // threads in the team. 1542 // 1543 // When schedule(runtime) is specified, the decision regarding scheduling is 1544 // deferred until run time, and the schedule and chunk size are taken from the 1545 // run-sched-var ICV. If the ICV is set to auto, the schedule is 1546 // implementation defined 1547 // 1548 // while(__kmpc_dispatch_next(&LB, &UB)) { 1549 // idx = LB; 1550 // while (idx <= UB) { BODY; ++idx; 1551 // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only. 1552 // } // inner loop 1553 // } 1554 // 1555 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1556 // When schedule(static, chunk_size) is specified, iterations are divided into 1557 // chunks of size chunk_size, and the chunks are assigned to the threads in 1558 // the team in a round-robin fashion in the order of the thread number. 1559 // 1560 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { 1561 // while (idx <= UB) { BODY; ++idx; } // inner loop 1562 // LB = LB + ST; 1563 // UB = UB + ST; 1564 // } 1565 // 1566 1567 const Expr *IVExpr = S.getIterationVariable(); 1568 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1569 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1570 1571 if (DynamicOrOrdered) { 1572 llvm::Value *UBVal = EmitScalarExpr(S.getLastIteration()); 1573 RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, 1574 IVSize, IVSigned, Ordered, UBVal, Chunk); 1575 } else { 1576 RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, 1577 Ordered, IL, LB, UB, ST, Chunk); 1578 } 1579 1580 EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, Ordered, LB, UB, 1581 ST, IL, Chunk); 1582 } 1583 1584 void CodeGenFunction::EmitOMPDistributeOuterLoop( 1585 OpenMPDistScheduleClauseKind ScheduleKind, 1586 const OMPDistributeDirective &S, OMPPrivateScope &LoopScope, 1587 Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) { 1588 1589 auto &RT = CGM.getOpenMPRuntime(); 1590 1591 // Emit outer loop. 1592 // Same behavior as a OMPForOuterLoop, except that schedule cannot be 1593 // dynamic 1594 // 1595 1596 const Expr *IVExpr = S.getIterationVariable(); 1597 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1598 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1599 1600 RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, 1601 IVSize, IVSigned, /* Ordered = */ false, 1602 IL, LB, UB, ST, Chunk); 1603 1604 EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, 1605 S, LoopScope, /* Ordered = */ false, LB, UB, ST, IL, Chunk); 1606 } 1607 1608 /// \brief Emit a helper variable and return corresponding lvalue. 1609 static LValue EmitOMPHelperVar(CodeGenFunction &CGF, 1610 const DeclRefExpr *Helper) { 1611 auto VDecl = cast<VarDecl>(Helper->getDecl()); 1612 CGF.EmitVarDecl(*VDecl); 1613 return CGF.EmitLValue(Helper); 1614 } 1615 1616 namespace { 1617 struct ScheduleKindModifiersTy { 1618 OpenMPScheduleClauseKind Kind; 1619 OpenMPScheduleClauseModifier M1; 1620 OpenMPScheduleClauseModifier M2; 1621 ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind, 1622 OpenMPScheduleClauseModifier M1, 1623 OpenMPScheduleClauseModifier M2) 1624 : Kind(Kind), M1(M1), M2(M2) {} 1625 }; 1626 } // namespace 1627 1628 bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { 1629 // Emit the loop iteration variable. 1630 auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 1631 auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); 1632 EmitVarDecl(*IVDecl); 1633 1634 // Emit the iterations count variable. 1635 // If it is not a variable, Sema decided to calculate iterations count on each 1636 // iteration (e.g., it is foldable into a constant). 1637 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 1638 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 1639 // Emit calculation of the iterations count. 1640 EmitIgnoredExpr(S.getCalcLastIteration()); 1641 } 1642 1643 auto &RT = CGM.getOpenMPRuntime(); 1644 1645 bool HasLastprivateClause; 1646 // Check pre-condition. 1647 { 1648 // Skip the entire loop if we don't meet the precondition. 1649 // If the condition constant folds and can be elided, avoid emitting the 1650 // whole loop. 1651 bool CondConstant; 1652 llvm::BasicBlock *ContBlock = nullptr; 1653 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 1654 if (!CondConstant) 1655 return false; 1656 } else { 1657 auto *ThenBlock = createBasicBlock("omp.precond.then"); 1658 ContBlock = createBasicBlock("omp.precond.end"); 1659 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 1660 getProfileCount(&S)); 1661 EmitBlock(ThenBlock); 1662 incrementProfileCounter(&S); 1663 } 1664 1665 emitAlignedClause(*this, S); 1666 EmitOMPLinearClauseInit(S); 1667 // Emit 'then' code. 1668 { 1669 // Emit helper vars inits. 1670 LValue LB = 1671 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable())); 1672 LValue UB = 1673 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable())); 1674 LValue ST = 1675 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 1676 LValue IL = 1677 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 1678 1679 OMPPrivateScope LoopScope(*this); 1680 if (EmitOMPFirstprivateClause(S, LoopScope)) { 1681 // Emit implicit barrier to synchronize threads and avoid data races on 1682 // initialization of firstprivate variables and post-update of 1683 // lastprivate variables. 1684 CGM.getOpenMPRuntime().emitBarrierCall( 1685 *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 1686 /*ForceSimpleCall=*/true); 1687 } 1688 EmitOMPPrivateClause(S, LoopScope); 1689 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 1690 EmitOMPReductionClauseInit(S, LoopScope); 1691 emitPrivateLoopCounters(*this, LoopScope, S.counters(), 1692 S.private_counters()); 1693 emitPrivateLinearVars(*this, S, LoopScope); 1694 (void)LoopScope.Privatize(); 1695 1696 // Detect the loop schedule kind and chunk. 1697 llvm::Value *Chunk = nullptr; 1698 OpenMPScheduleClauseKind ScheduleKind = OMPC_SCHEDULE_unknown; 1699 OpenMPScheduleClauseModifier M1 = OMPC_SCHEDULE_MODIFIER_unknown; 1700 OpenMPScheduleClauseModifier M2 = OMPC_SCHEDULE_MODIFIER_unknown; 1701 if (auto *C = S.getSingleClause<OMPScheduleClause>()) { 1702 ScheduleKind = C->getScheduleKind(); 1703 M1 = C->getFirstScheduleModifier(); 1704 M2 = C->getSecondScheduleModifier(); 1705 if (const auto *Ch = C->getChunkSize()) { 1706 Chunk = EmitScalarExpr(Ch); 1707 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 1708 S.getIterationVariable()->getType(), 1709 S.getLocStart()); 1710 } 1711 } 1712 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1713 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1714 const bool Ordered = S.getSingleClause<OMPOrderedClause>() != nullptr; 1715 // OpenMP 4.5, 2.7.1 Loop Construct, Description. 1716 // If the static schedule kind is specified or if the ordered clause is 1717 // specified, and if no monotonic modifier is specified, the effect will 1718 // be as if the monotonic modifier was specified. 1719 if (RT.isStaticNonchunked(ScheduleKind, 1720 /* Chunked */ Chunk != nullptr) && 1721 !Ordered) { 1722 if (isOpenMPSimdDirective(S.getDirectiveKind())) 1723 EmitOMPSimdInit(S, /*IsMonotonic=*/true); 1724 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1725 // When no chunk_size is specified, the iteration space is divided into 1726 // chunks that are approximately equal in size, and at most one chunk is 1727 // distributed to each thread. Note that the size of the chunks is 1728 // unspecified in this case. 1729 RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, 1730 IVSize, IVSigned, Ordered, 1731 IL.getAddress(), LB.getAddress(), 1732 UB.getAddress(), ST.getAddress()); 1733 auto LoopExit = 1734 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 1735 // UB = min(UB, GlobalUB); 1736 EmitIgnoredExpr(S.getEnsureUpperBound()); 1737 // IV = LB; 1738 EmitIgnoredExpr(S.getInit()); 1739 // while (idx <= UB) { BODY; ++idx; } 1740 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 1741 S.getInc(), 1742 [&S, LoopExit](CodeGenFunction &CGF) { 1743 CGF.EmitOMPLoopBody(S, LoopExit); 1744 CGF.EmitStopPoint(&S); 1745 }, 1746 [](CodeGenFunction &) {}); 1747 EmitBlock(LoopExit.getBlock()); 1748 // Tell the runtime we are done. 1749 RT.emitForStaticFinish(*this, S.getLocStart()); 1750 } else { 1751 const bool IsMonotonic = Ordered || 1752 ScheduleKind == OMPC_SCHEDULE_static || 1753 ScheduleKind == OMPC_SCHEDULE_unknown || 1754 M1 == OMPC_SCHEDULE_MODIFIER_monotonic || 1755 M2 == OMPC_SCHEDULE_MODIFIER_monotonic; 1756 // Emit the outer loop, which requests its work chunk [LB..UB] from 1757 // runtime and runs the inner loop to process it. 1758 EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered, 1759 LB.getAddress(), UB.getAddress(), ST.getAddress(), 1760 IL.getAddress(), Chunk); 1761 } 1762 EmitOMPReductionClauseFinal(S); 1763 // Emit post-update of the reduction variables if IsLastIter != 0. 1764 emitPostUpdateForReductionClause( 1765 *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * { 1766 return CGF.Builder.CreateIsNotNull( 1767 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 1768 }); 1769 // Emit final copy of the lastprivate variables if IsLastIter != 0. 1770 if (HasLastprivateClause) 1771 EmitOMPLastprivateClauseFinal( 1772 S, Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart()))); 1773 } 1774 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 1775 EmitOMPSimdFinal(S); 1776 } 1777 // We're now done with the loop, so jump to the continuation block. 1778 if (ContBlock) { 1779 EmitBranch(ContBlock); 1780 EmitBlock(ContBlock, true); 1781 } 1782 } 1783 return HasLastprivateClause; 1784 } 1785 1786 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { 1787 bool HasLastprivates = false; 1788 { 1789 OMPLexicalScope Scope(*this, S); 1790 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF) { 1791 HasLastprivates = CGF.EmitOMPWorksharingLoop(S); 1792 }; 1793 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen, 1794 S.hasCancel()); 1795 } 1796 1797 // Emit an implicit barrier at the end. 1798 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) { 1799 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); 1800 } 1801 } 1802 1803 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { 1804 bool HasLastprivates = false; 1805 { 1806 OMPLexicalScope Scope(*this, S); 1807 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF) { 1808 HasLastprivates = CGF.EmitOMPWorksharingLoop(S); 1809 }; 1810 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 1811 } 1812 1813 // Emit an implicit barrier at the end. 1814 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) { 1815 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); 1816 } 1817 } 1818 1819 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, 1820 const Twine &Name, 1821 llvm::Value *Init = nullptr) { 1822 auto LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty); 1823 if (Init) 1824 CGF.EmitScalarInit(Init, LVal); 1825 return LVal; 1826 } 1827 1828 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { 1829 auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt(); 1830 auto *CS = dyn_cast<CompoundStmt>(Stmt); 1831 bool HasLastprivates = false; 1832 auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF) { 1833 auto &C = CGF.CGM.getContext(); 1834 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 1835 // Emit helper vars inits. 1836 LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.", 1837 CGF.Builder.getInt32(0)); 1838 auto *GlobalUBVal = CS != nullptr ? CGF.Builder.getInt32(CS->size() - 1) 1839 : CGF.Builder.getInt32(0); 1840 LValue UB = 1841 createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal); 1842 LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.", 1843 CGF.Builder.getInt32(1)); 1844 LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.", 1845 CGF.Builder.getInt32(0)); 1846 // Loop counter. 1847 LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv."); 1848 OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 1849 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV); 1850 OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 1851 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); 1852 // Generate condition for loop. 1853 BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, 1854 OK_Ordinary, S.getLocStart(), 1855 /*fpContractable=*/false); 1856 // Increment for loop counter. 1857 UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary, 1858 S.getLocStart()); 1859 auto BodyGen = [Stmt, CS, &S, &IV](CodeGenFunction &CGF) { 1860 // Iterate through all sections and emit a switch construct: 1861 // switch (IV) { 1862 // case 0: 1863 // <SectionStmt[0]>; 1864 // break; 1865 // ... 1866 // case <NumSection> - 1: 1867 // <SectionStmt[<NumSection> - 1]>; 1868 // break; 1869 // } 1870 // .omp.sections.exit: 1871 auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit"); 1872 auto *SwitchStmt = CGF.Builder.CreateSwitch( 1873 CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB, 1874 CS == nullptr ? 1 : CS->size()); 1875 if (CS) { 1876 unsigned CaseNumber = 0; 1877 for (auto *SubStmt : CS->children()) { 1878 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 1879 CGF.EmitBlock(CaseBB); 1880 SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB); 1881 CGF.EmitStmt(SubStmt); 1882 CGF.EmitBranch(ExitBB); 1883 ++CaseNumber; 1884 } 1885 } else { 1886 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 1887 CGF.EmitBlock(CaseBB); 1888 SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB); 1889 CGF.EmitStmt(Stmt); 1890 CGF.EmitBranch(ExitBB); 1891 } 1892 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 1893 }; 1894 1895 CodeGenFunction::OMPPrivateScope LoopScope(CGF); 1896 if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) { 1897 // Emit implicit barrier to synchronize threads and avoid data races on 1898 // initialization of firstprivate variables and post-update of lastprivate 1899 // variables. 1900 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 1901 CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 1902 /*ForceSimpleCall=*/true); 1903 } 1904 CGF.EmitOMPPrivateClause(S, LoopScope); 1905 HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 1906 CGF.EmitOMPReductionClauseInit(S, LoopScope); 1907 (void)LoopScope.Privatize(); 1908 1909 // Emit static non-chunked loop. 1910 CGF.CGM.getOpenMPRuntime().emitForStaticInit( 1911 CGF, S.getLocStart(), OMPC_SCHEDULE_static, /*IVSize=*/32, 1912 /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), LB.getAddress(), 1913 UB.getAddress(), ST.getAddress()); 1914 // UB = min(UB, GlobalUB); 1915 auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart()); 1916 auto *MinUBGlobalUB = CGF.Builder.CreateSelect( 1917 CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal); 1918 CGF.EmitStoreOfScalar(MinUBGlobalUB, UB); 1919 // IV = LB; 1920 CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV); 1921 // while (idx <= UB) { BODY; ++idx; } 1922 CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen, 1923 [](CodeGenFunction &) {}); 1924 // Tell the runtime we are done. 1925 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocStart()); 1926 CGF.EmitOMPReductionClauseFinal(S); 1927 // Emit post-update of the reduction variables if IsLastIter != 0. 1928 emitPostUpdateForReductionClause( 1929 CGF, S, [&](CodeGenFunction &CGF) -> llvm::Value * { 1930 return CGF.Builder.CreateIsNotNull( 1931 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 1932 }); 1933 1934 // Emit final copy of the lastprivate variables if IsLastIter != 0. 1935 if (HasLastprivates) 1936 CGF.EmitOMPLastprivateClauseFinal( 1937 S, CGF.Builder.CreateIsNotNull( 1938 CGF.EmitLoadOfScalar(IL, S.getLocStart()))); 1939 }; 1940 1941 bool HasCancel = false; 1942 if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S)) 1943 HasCancel = OSD->hasCancel(); 1944 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S)) 1945 HasCancel = OPSD->hasCancel(); 1946 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen, 1947 HasCancel); 1948 // Emit barrier for lastprivates only if 'sections' directive has 'nowait' 1949 // clause. Otherwise the barrier will be generated by the codegen for the 1950 // directive. 1951 if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) { 1952 // Emit implicit barrier to synchronize threads and avoid data races on 1953 // initialization of firstprivate variables. 1954 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), 1955 OMPD_unknown); 1956 } 1957 } 1958 1959 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { 1960 { 1961 OMPLexicalScope Scope(*this, S); 1962 EmitSections(S); 1963 } 1964 // Emit an implicit barrier at the end. 1965 if (!S.getSingleClause<OMPNowaitClause>()) { 1966 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), 1967 OMPD_sections); 1968 } 1969 } 1970 1971 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { 1972 OMPLexicalScope Scope(*this, S); 1973 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 1974 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1975 }; 1976 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen, 1977 S.hasCancel()); 1978 } 1979 1980 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { 1981 llvm::SmallVector<const Expr *, 8> CopyprivateVars; 1982 llvm::SmallVector<const Expr *, 8> DestExprs; 1983 llvm::SmallVector<const Expr *, 8> SrcExprs; 1984 llvm::SmallVector<const Expr *, 8> AssignmentOps; 1985 // Check if there are any 'copyprivate' clauses associated with this 1986 // 'single' construct. 1987 // Build a list of copyprivate variables along with helper expressions 1988 // (<source>, <destination>, <destination>=<source> expressions) 1989 for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) { 1990 CopyprivateVars.append(C->varlists().begin(), C->varlists().end()); 1991 DestExprs.append(C->destination_exprs().begin(), 1992 C->destination_exprs().end()); 1993 SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end()); 1994 AssignmentOps.append(C->assignment_ops().begin(), 1995 C->assignment_ops().end()); 1996 } 1997 { 1998 OMPLexicalScope Scope(*this, S); 1999 // Emit code for 'single' region along with 'copyprivate' clauses 2000 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 2001 CodeGenFunction::OMPPrivateScope SingleScope(CGF); 2002 (void)CGF.EmitOMPFirstprivateClause(S, SingleScope); 2003 CGF.EmitOMPPrivateClause(S, SingleScope); 2004 (void)SingleScope.Privatize(); 2005 CGF.EmitStmt( 2006 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2007 }; 2008 CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(), 2009 CopyprivateVars, DestExprs, 2010 SrcExprs, AssignmentOps); 2011 } 2012 // Emit an implicit barrier at the end (to avoid data race on firstprivate 2013 // init or if no 'nowait' clause was specified and no 'copyprivate' clause). 2014 if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) { 2015 CGM.getOpenMPRuntime().emitBarrierCall( 2016 *this, S.getLocStart(), 2017 S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single); 2018 } 2019 } 2020 2021 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { 2022 OMPLexicalScope Scope(*this, S); 2023 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 2024 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2025 }; 2026 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart()); 2027 } 2028 2029 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { 2030 OMPLexicalScope Scope(*this, S); 2031 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 2032 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2033 }; 2034 Expr *Hint = nullptr; 2035 if (auto *HintClause = S.getSingleClause<OMPHintClause>()) 2036 Hint = HintClause->getHint(); 2037 CGM.getOpenMPRuntime().emitCriticalRegion(*this, 2038 S.getDirectiveName().getAsString(), 2039 CodeGen, S.getLocStart(), Hint); 2040 } 2041 2042 void CodeGenFunction::EmitOMPParallelForDirective( 2043 const OMPParallelForDirective &S) { 2044 // Emit directive as a combined directive that consists of two implicit 2045 // directives: 'parallel' with 'for' directive. 2046 OMPLexicalScope Scope(*this, S); 2047 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 2048 CGF.EmitOMPWorksharingLoop(S); 2049 }; 2050 emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen); 2051 } 2052 2053 void CodeGenFunction::EmitOMPParallelForSimdDirective( 2054 const OMPParallelForSimdDirective &S) { 2055 // Emit directive as a combined directive that consists of two implicit 2056 // directives: 'parallel' with 'for' directive. 2057 OMPLexicalScope Scope(*this, S); 2058 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 2059 CGF.EmitOMPWorksharingLoop(S); 2060 }; 2061 emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen); 2062 } 2063 2064 void CodeGenFunction::EmitOMPParallelSectionsDirective( 2065 const OMPParallelSectionsDirective &S) { 2066 // Emit directive as a combined directive that consists of two implicit 2067 // directives: 'parallel' with 'sections' directive. 2068 OMPLexicalScope Scope(*this, S); 2069 auto &&CodeGen = [&S](CodeGenFunction &CGF) { CGF.EmitSections(S); }; 2070 emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen); 2071 } 2072 2073 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { 2074 // Emit outlined function for task construct. 2075 OMPLexicalScope Scope(*this, S); 2076 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2077 auto CapturedStruct = GenerateCapturedStmtArgument(*CS); 2078 auto *I = CS->getCapturedDecl()->param_begin(); 2079 auto *PartId = std::next(I); 2080 // The first function argument for tasks is a thread id, the second one is a 2081 // part id (0 for tied tasks, >=0 for untied task). 2082 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 2083 // Get list of private variables. 2084 llvm::SmallVector<const Expr *, 8> PrivateVars; 2085 llvm::SmallVector<const Expr *, 8> PrivateCopies; 2086 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 2087 auto IRef = C->varlist_begin(); 2088 for (auto *IInit : C->private_copies()) { 2089 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2090 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2091 PrivateVars.push_back(*IRef); 2092 PrivateCopies.push_back(IInit); 2093 } 2094 ++IRef; 2095 } 2096 } 2097 EmittedAsPrivate.clear(); 2098 // Get list of firstprivate variables. 2099 llvm::SmallVector<const Expr *, 8> FirstprivateVars; 2100 llvm::SmallVector<const Expr *, 8> FirstprivateCopies; 2101 llvm::SmallVector<const Expr *, 8> FirstprivateInits; 2102 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 2103 auto IRef = C->varlist_begin(); 2104 auto IElemInitRef = C->inits().begin(); 2105 for (auto *IInit : C->private_copies()) { 2106 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2107 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2108 FirstprivateVars.push_back(*IRef); 2109 FirstprivateCopies.push_back(IInit); 2110 FirstprivateInits.push_back(*IElemInitRef); 2111 } 2112 ++IRef; 2113 ++IElemInitRef; 2114 } 2115 } 2116 // Build list of dependences. 2117 llvm::SmallVector<std::pair<OpenMPDependClauseKind, const Expr *>, 8> 2118 Dependences; 2119 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) { 2120 for (auto *IRef : C->varlists()) { 2121 Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef)); 2122 } 2123 } 2124 auto &&CodeGen = [PartId, &S, &PrivateVars, &FirstprivateVars]( 2125 CodeGenFunction &CGF) { 2126 // Set proper addresses for generated private copies. 2127 auto *CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2128 OMPPrivateScope Scope(CGF); 2129 if (!PrivateVars.empty() || !FirstprivateVars.empty()) { 2130 auto *CopyFn = CGF.Builder.CreateLoad( 2131 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3))); 2132 auto *PrivatesPtr = CGF.Builder.CreateLoad( 2133 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2))); 2134 // Map privates. 2135 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> 2136 PrivatePtrs; 2137 llvm::SmallVector<llvm::Value *, 16> CallArgs; 2138 CallArgs.push_back(PrivatesPtr); 2139 for (auto *E : PrivateVars) { 2140 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2141 Address PrivatePtr = 2142 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType())); 2143 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2144 CallArgs.push_back(PrivatePtr.getPointer()); 2145 } 2146 for (auto *E : FirstprivateVars) { 2147 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2148 Address PrivatePtr = 2149 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType())); 2150 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2151 CallArgs.push_back(PrivatePtr.getPointer()); 2152 } 2153 CGF.EmitRuntimeCall(CopyFn, CallArgs); 2154 for (auto &&Pair : PrivatePtrs) { 2155 Address Replacement(CGF.Builder.CreateLoad(Pair.second), 2156 CGF.getContext().getDeclAlign(Pair.first)); 2157 Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); 2158 } 2159 } 2160 (void)Scope.Privatize(); 2161 if (*PartId) { 2162 // TODO: emit code for untied tasks. 2163 } 2164 CGF.EmitStmt(CS->getCapturedStmt()); 2165 }; 2166 auto OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( 2167 S, *I, OMPD_task, CodeGen); 2168 // Check if we should emit tied or untied task. 2169 bool Tied = !S.getSingleClause<OMPUntiedClause>(); 2170 // Check if the task is final 2171 llvm::PointerIntPair<llvm::Value *, 1, bool> Final; 2172 if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) { 2173 // If the condition constant folds and can be elided, try to avoid emitting 2174 // the condition and the dead arm of the if/else. 2175 auto *Cond = Clause->getCondition(); 2176 bool CondConstant; 2177 if (ConstantFoldsToSimpleInteger(Cond, CondConstant)) 2178 Final.setInt(CondConstant); 2179 else 2180 Final.setPointer(EvaluateExprAsBool(Cond)); 2181 } else { 2182 // By default the task is not final. 2183 Final.setInt(/*IntVal=*/false); 2184 } 2185 auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 2186 const Expr *IfCond = nullptr; 2187 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 2188 if (C->getNameModifier() == OMPD_unknown || 2189 C->getNameModifier() == OMPD_task) { 2190 IfCond = C->getCondition(); 2191 break; 2192 } 2193 } 2194 CGM.getOpenMPRuntime().emitTaskCall( 2195 *this, S.getLocStart(), S, Tied, Final, OutlinedFn, SharedsTy, 2196 CapturedStruct, IfCond, PrivateVars, PrivateCopies, FirstprivateVars, 2197 FirstprivateCopies, FirstprivateInits, Dependences); 2198 } 2199 2200 void CodeGenFunction::EmitOMPTaskyieldDirective( 2201 const OMPTaskyieldDirective &S) { 2202 CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart()); 2203 } 2204 2205 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { 2206 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier); 2207 } 2208 2209 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) { 2210 CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart()); 2211 } 2212 2213 void CodeGenFunction::EmitOMPTaskgroupDirective( 2214 const OMPTaskgroupDirective &S) { 2215 OMPLexicalScope Scope(*this, S); 2216 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 2217 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2218 }; 2219 CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart()); 2220 } 2221 2222 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { 2223 CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> { 2224 if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) { 2225 return llvm::makeArrayRef(FlushClause->varlist_begin(), 2226 FlushClause->varlist_end()); 2227 } 2228 return llvm::None; 2229 }(), S.getLocStart()); 2230 } 2231 2232 void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) { 2233 // Emit the loop iteration variable. 2234 auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 2235 auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); 2236 EmitVarDecl(*IVDecl); 2237 2238 // Emit the iterations count variable. 2239 // If it is not a variable, Sema decided to calculate iterations count on each 2240 // iteration (e.g., it is foldable into a constant). 2241 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 2242 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 2243 // Emit calculation of the iterations count. 2244 EmitIgnoredExpr(S.getCalcLastIteration()); 2245 } 2246 2247 auto &RT = CGM.getOpenMPRuntime(); 2248 2249 // Check pre-condition. 2250 { 2251 // Skip the entire loop if we don't meet the precondition. 2252 // If the condition constant folds and can be elided, avoid emitting the 2253 // whole loop. 2254 bool CondConstant; 2255 llvm::BasicBlock *ContBlock = nullptr; 2256 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 2257 if (!CondConstant) 2258 return; 2259 } else { 2260 auto *ThenBlock = createBasicBlock("omp.precond.then"); 2261 ContBlock = createBasicBlock("omp.precond.end"); 2262 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 2263 getProfileCount(&S)); 2264 EmitBlock(ThenBlock); 2265 incrementProfileCounter(&S); 2266 } 2267 2268 // Emit 'then' code. 2269 { 2270 // Emit helper vars inits. 2271 LValue LB = 2272 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable())); 2273 LValue UB = 2274 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable())); 2275 LValue ST = 2276 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 2277 LValue IL = 2278 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 2279 2280 OMPPrivateScope LoopScope(*this); 2281 emitPrivateLoopCounters(*this, LoopScope, S.counters(), 2282 S.private_counters()); 2283 (void)LoopScope.Privatize(); 2284 2285 // Detect the distribute schedule kind and chunk. 2286 llvm::Value *Chunk = nullptr; 2287 OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown; 2288 if (auto *C = S.getSingleClause<OMPDistScheduleClause>()) { 2289 ScheduleKind = C->getDistScheduleKind(); 2290 if (const auto *Ch = C->getChunkSize()) { 2291 Chunk = EmitScalarExpr(Ch); 2292 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 2293 S.getIterationVariable()->getType(), 2294 S.getLocStart()); 2295 } 2296 } 2297 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2298 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2299 2300 // OpenMP [2.10.8, distribute Construct, Description] 2301 // If dist_schedule is specified, kind must be static. If specified, 2302 // iterations are divided into chunks of size chunk_size, chunks are 2303 // assigned to the teams of the league in a round-robin fashion in the 2304 // order of the team number. When no chunk_size is specified, the 2305 // iteration space is divided into chunks that are approximately equal 2306 // in size, and at most one chunk is distributed to each team of the 2307 // league. The size of the chunks is unspecified in this case. 2308 if (RT.isStaticNonchunked(ScheduleKind, 2309 /* Chunked */ Chunk != nullptr)) { 2310 RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, 2311 IVSize, IVSigned, /* Ordered = */ false, 2312 IL.getAddress(), LB.getAddress(), 2313 UB.getAddress(), ST.getAddress()); 2314 auto LoopExit = 2315 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 2316 // UB = min(UB, GlobalUB); 2317 EmitIgnoredExpr(S.getEnsureUpperBound()); 2318 // IV = LB; 2319 EmitIgnoredExpr(S.getInit()); 2320 // while (idx <= UB) { BODY; ++idx; } 2321 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 2322 S.getInc(), 2323 [&S, LoopExit](CodeGenFunction &CGF) { 2324 CGF.EmitOMPLoopBody(S, LoopExit); 2325 CGF.EmitStopPoint(&S); 2326 }, 2327 [](CodeGenFunction &) {}); 2328 EmitBlock(LoopExit.getBlock()); 2329 // Tell the runtime we are done. 2330 RT.emitForStaticFinish(*this, S.getLocStart()); 2331 } else { 2332 // Emit the outer loop, which requests its work chunk [LB..UB] from 2333 // runtime and runs the inner loop to process it. 2334 EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, 2335 LB.getAddress(), UB.getAddress(), ST.getAddress(), 2336 IL.getAddress(), Chunk); 2337 } 2338 } 2339 2340 // We're now done with the loop, so jump to the continuation block. 2341 if (ContBlock) { 2342 EmitBranch(ContBlock); 2343 EmitBlock(ContBlock, true); 2344 } 2345 } 2346 } 2347 2348 void CodeGenFunction::EmitOMPDistributeDirective( 2349 const OMPDistributeDirective &S) { 2350 LexicalScope Scope(*this, S.getSourceRange()); 2351 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 2352 CGF.EmitOMPDistributeLoop(S); 2353 }; 2354 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen, 2355 false); 2356 } 2357 2358 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, 2359 const CapturedStmt *S) { 2360 CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); 2361 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; 2362 CGF.CapturedStmtInfo = &CapStmtInfo; 2363 auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S); 2364 Fn->addFnAttr(llvm::Attribute::NoInline); 2365 return Fn; 2366 } 2367 2368 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { 2369 if (!S.getAssociatedStmt()) 2370 return; 2371 OMPLexicalScope Scope(*this, S); 2372 auto *C = S.getSingleClause<OMPSIMDClause>(); 2373 auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF) { 2374 if (C) { 2375 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2376 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 2377 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 2378 auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS); 2379 CGF.EmitNounwindRuntimeCall(OutlinedFn, CapturedVars); 2380 } else { 2381 CGF.EmitStmt( 2382 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2383 } 2384 }; 2385 CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C); 2386 } 2387 2388 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, 2389 QualType SrcType, QualType DestType, 2390 SourceLocation Loc) { 2391 assert(CGF.hasScalarEvaluationKind(DestType) && 2392 "DestType must have scalar evaluation kind."); 2393 assert(!Val.isAggregate() && "Must be a scalar or complex."); 2394 return Val.isScalar() 2395 ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType, 2396 Loc) 2397 : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType, 2398 DestType, Loc); 2399 } 2400 2401 static CodeGenFunction::ComplexPairTy 2402 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, 2403 QualType DestType, SourceLocation Loc) { 2404 assert(CGF.getEvaluationKind(DestType) == TEK_Complex && 2405 "DestType must have complex evaluation kind."); 2406 CodeGenFunction::ComplexPairTy ComplexVal; 2407 if (Val.isScalar()) { 2408 // Convert the input element to the element type of the complex. 2409 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 2410 auto ScalarVal = CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, 2411 DestElementType, Loc); 2412 ComplexVal = CodeGenFunction::ComplexPairTy( 2413 ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType())); 2414 } else { 2415 assert(Val.isComplex() && "Must be a scalar or complex."); 2416 auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType(); 2417 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 2418 ComplexVal.first = CGF.EmitScalarConversion( 2419 Val.getComplexVal().first, SrcElementType, DestElementType, Loc); 2420 ComplexVal.second = CGF.EmitScalarConversion( 2421 Val.getComplexVal().second, SrcElementType, DestElementType, Loc); 2422 } 2423 return ComplexVal; 2424 } 2425 2426 static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst, 2427 LValue LVal, RValue RVal) { 2428 if (LVal.isGlobalReg()) { 2429 CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal); 2430 } else { 2431 CGF.EmitAtomicStore(RVal, LVal, IsSeqCst ? llvm::SequentiallyConsistent 2432 : llvm::Monotonic, 2433 LVal.isVolatile(), /*IsInit=*/false); 2434 } 2435 } 2436 2437 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal, 2438 QualType RValTy, SourceLocation Loc) { 2439 switch (getEvaluationKind(LVal.getType())) { 2440 case TEK_Scalar: 2441 EmitStoreThroughLValue(RValue::get(convertToScalarValue( 2442 *this, RVal, RValTy, LVal.getType(), Loc)), 2443 LVal); 2444 break; 2445 case TEK_Complex: 2446 EmitStoreOfComplex( 2447 convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal, 2448 /*isInit=*/false); 2449 break; 2450 case TEK_Aggregate: 2451 llvm_unreachable("Must be a scalar or complex."); 2452 } 2453 } 2454 2455 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst, 2456 const Expr *X, const Expr *V, 2457 SourceLocation Loc) { 2458 // v = x; 2459 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue"); 2460 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue"); 2461 LValue XLValue = CGF.EmitLValue(X); 2462 LValue VLValue = CGF.EmitLValue(V); 2463 RValue Res = XLValue.isGlobalReg() 2464 ? CGF.EmitLoadOfLValue(XLValue, Loc) 2465 : CGF.EmitAtomicLoad(XLValue, Loc, 2466 IsSeqCst ? llvm::SequentiallyConsistent 2467 : llvm::Monotonic, 2468 XLValue.isVolatile()); 2469 // OpenMP, 2.12.6, atomic Construct 2470 // Any atomic construct with a seq_cst clause forces the atomically 2471 // performed operation to include an implicit flush operation without a 2472 // list. 2473 if (IsSeqCst) 2474 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 2475 CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc); 2476 } 2477 2478 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst, 2479 const Expr *X, const Expr *E, 2480 SourceLocation Loc) { 2481 // x = expr; 2482 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue"); 2483 emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E)); 2484 // OpenMP, 2.12.6, atomic Construct 2485 // Any atomic construct with a seq_cst clause forces the atomically 2486 // performed operation to include an implicit flush operation without a 2487 // list. 2488 if (IsSeqCst) 2489 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 2490 } 2491 2492 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, 2493 RValue Update, 2494 BinaryOperatorKind BO, 2495 llvm::AtomicOrdering AO, 2496 bool IsXLHSInRHSPart) { 2497 auto &Context = CGF.CGM.getContext(); 2498 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x' 2499 // expression is simple and atomic is allowed for the given type for the 2500 // target platform. 2501 if (BO == BO_Comma || !Update.isScalar() || 2502 !Update.getScalarVal()->getType()->isIntegerTy() || 2503 !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) && 2504 (Update.getScalarVal()->getType() != 2505 X.getAddress().getElementType())) || 2506 !X.getAddress().getElementType()->isIntegerTy() || 2507 !Context.getTargetInfo().hasBuiltinAtomic( 2508 Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment()))) 2509 return std::make_pair(false, RValue::get(nullptr)); 2510 2511 llvm::AtomicRMWInst::BinOp RMWOp; 2512 switch (BO) { 2513 case BO_Add: 2514 RMWOp = llvm::AtomicRMWInst::Add; 2515 break; 2516 case BO_Sub: 2517 if (!IsXLHSInRHSPart) 2518 return std::make_pair(false, RValue::get(nullptr)); 2519 RMWOp = llvm::AtomicRMWInst::Sub; 2520 break; 2521 case BO_And: 2522 RMWOp = llvm::AtomicRMWInst::And; 2523 break; 2524 case BO_Or: 2525 RMWOp = llvm::AtomicRMWInst::Or; 2526 break; 2527 case BO_Xor: 2528 RMWOp = llvm::AtomicRMWInst::Xor; 2529 break; 2530 case BO_LT: 2531 RMWOp = X.getType()->hasSignedIntegerRepresentation() 2532 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min 2533 : llvm::AtomicRMWInst::Max) 2534 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin 2535 : llvm::AtomicRMWInst::UMax); 2536 break; 2537 case BO_GT: 2538 RMWOp = X.getType()->hasSignedIntegerRepresentation() 2539 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max 2540 : llvm::AtomicRMWInst::Min) 2541 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax 2542 : llvm::AtomicRMWInst::UMin); 2543 break; 2544 case BO_Assign: 2545 RMWOp = llvm::AtomicRMWInst::Xchg; 2546 break; 2547 case BO_Mul: 2548 case BO_Div: 2549 case BO_Rem: 2550 case BO_Shl: 2551 case BO_Shr: 2552 case BO_LAnd: 2553 case BO_LOr: 2554 return std::make_pair(false, RValue::get(nullptr)); 2555 case BO_PtrMemD: 2556 case BO_PtrMemI: 2557 case BO_LE: 2558 case BO_GE: 2559 case BO_EQ: 2560 case BO_NE: 2561 case BO_AddAssign: 2562 case BO_SubAssign: 2563 case BO_AndAssign: 2564 case BO_OrAssign: 2565 case BO_XorAssign: 2566 case BO_MulAssign: 2567 case BO_DivAssign: 2568 case BO_RemAssign: 2569 case BO_ShlAssign: 2570 case BO_ShrAssign: 2571 case BO_Comma: 2572 llvm_unreachable("Unsupported atomic update operation"); 2573 } 2574 auto *UpdateVal = Update.getScalarVal(); 2575 if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) { 2576 UpdateVal = CGF.Builder.CreateIntCast( 2577 IC, X.getAddress().getElementType(), 2578 X.getType()->hasSignedIntegerRepresentation()); 2579 } 2580 auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO); 2581 return std::make_pair(true, RValue::get(Res)); 2582 } 2583 2584 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr( 2585 LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart, 2586 llvm::AtomicOrdering AO, SourceLocation Loc, 2587 const llvm::function_ref<RValue(RValue)> &CommonGen) { 2588 // Update expressions are allowed to have the following forms: 2589 // x binop= expr; -> xrval + expr; 2590 // x++, ++x -> xrval + 1; 2591 // x--, --x -> xrval - 1; 2592 // x = x binop expr; -> xrval binop expr 2593 // x = expr Op x; - > expr binop xrval; 2594 auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart); 2595 if (!Res.first) { 2596 if (X.isGlobalReg()) { 2597 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop 2598 // 'xrval'. 2599 EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X); 2600 } else { 2601 // Perform compare-and-swap procedure. 2602 EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified()); 2603 } 2604 } 2605 return Res; 2606 } 2607 2608 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst, 2609 const Expr *X, const Expr *E, 2610 const Expr *UE, bool IsXLHSInRHSPart, 2611 SourceLocation Loc) { 2612 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 2613 "Update expr in 'atomic update' must be a binary operator."); 2614 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 2615 // Update expressions are allowed to have the following forms: 2616 // x binop= expr; -> xrval + expr; 2617 // x++, ++x -> xrval + 1; 2618 // x--, --x -> xrval - 1; 2619 // x = x binop expr; -> xrval binop expr 2620 // x = expr Op x; - > expr binop xrval; 2621 assert(X->isLValue() && "X of 'omp atomic update' is not lvalue"); 2622 LValue XLValue = CGF.EmitLValue(X); 2623 RValue ExprRValue = CGF.EmitAnyExpr(E); 2624 auto AO = IsSeqCst ? llvm::SequentiallyConsistent : llvm::Monotonic; 2625 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 2626 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 2627 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 2628 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 2629 auto Gen = 2630 [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue { 2631 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 2632 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 2633 return CGF.EmitAnyExpr(UE); 2634 }; 2635 (void)CGF.EmitOMPAtomicSimpleUpdateExpr( 2636 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 2637 // OpenMP, 2.12.6, atomic Construct 2638 // Any atomic construct with a seq_cst clause forces the atomically 2639 // performed operation to include an implicit flush operation without a 2640 // list. 2641 if (IsSeqCst) 2642 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 2643 } 2644 2645 static RValue convertToType(CodeGenFunction &CGF, RValue Value, 2646 QualType SourceType, QualType ResType, 2647 SourceLocation Loc) { 2648 switch (CGF.getEvaluationKind(ResType)) { 2649 case TEK_Scalar: 2650 return RValue::get( 2651 convertToScalarValue(CGF, Value, SourceType, ResType, Loc)); 2652 case TEK_Complex: { 2653 auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc); 2654 return RValue::getComplex(Res.first, Res.second); 2655 } 2656 case TEK_Aggregate: 2657 break; 2658 } 2659 llvm_unreachable("Must be a scalar or complex."); 2660 } 2661 2662 static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst, 2663 bool IsPostfixUpdate, const Expr *V, 2664 const Expr *X, const Expr *E, 2665 const Expr *UE, bool IsXLHSInRHSPart, 2666 SourceLocation Loc) { 2667 assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue"); 2668 assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue"); 2669 RValue NewVVal; 2670 LValue VLValue = CGF.EmitLValue(V); 2671 LValue XLValue = CGF.EmitLValue(X); 2672 RValue ExprRValue = CGF.EmitAnyExpr(E); 2673 auto AO = IsSeqCst ? llvm::SequentiallyConsistent : llvm::Monotonic; 2674 QualType NewVValType; 2675 if (UE) { 2676 // 'x' is updated with some additional value. 2677 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 2678 "Update expr in 'atomic capture' must be a binary operator."); 2679 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 2680 // Update expressions are allowed to have the following forms: 2681 // x binop= expr; -> xrval + expr; 2682 // x++, ++x -> xrval + 1; 2683 // x--, --x -> xrval - 1; 2684 // x = x binop expr; -> xrval binop expr 2685 // x = expr Op x; - > expr binop xrval; 2686 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 2687 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 2688 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 2689 NewVValType = XRValExpr->getType(); 2690 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 2691 auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr, 2692 IsSeqCst, IsPostfixUpdate](RValue XRValue) -> RValue { 2693 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 2694 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 2695 RValue Res = CGF.EmitAnyExpr(UE); 2696 NewVVal = IsPostfixUpdate ? XRValue : Res; 2697 return Res; 2698 }; 2699 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 2700 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 2701 if (Res.first) { 2702 // 'atomicrmw' instruction was generated. 2703 if (IsPostfixUpdate) { 2704 // Use old value from 'atomicrmw'. 2705 NewVVal = Res.second; 2706 } else { 2707 // 'atomicrmw' does not provide new value, so evaluate it using old 2708 // value of 'x'. 2709 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 2710 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second); 2711 NewVVal = CGF.EmitAnyExpr(UE); 2712 } 2713 } 2714 } else { 2715 // 'x' is simply rewritten with some 'expr'. 2716 NewVValType = X->getType().getNonReferenceType(); 2717 ExprRValue = convertToType(CGF, ExprRValue, E->getType(), 2718 X->getType().getNonReferenceType(), Loc); 2719 auto &&Gen = [&CGF, &NewVVal, ExprRValue](RValue XRValue) -> RValue { 2720 NewVVal = XRValue; 2721 return ExprRValue; 2722 }; 2723 // Try to perform atomicrmw xchg, otherwise simple exchange. 2724 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 2725 XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO, 2726 Loc, Gen); 2727 if (Res.first) { 2728 // 'atomicrmw' instruction was generated. 2729 NewVVal = IsPostfixUpdate ? Res.second : ExprRValue; 2730 } 2731 } 2732 // Emit post-update store to 'v' of old/new 'x' value. 2733 CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc); 2734 // OpenMP, 2.12.6, atomic Construct 2735 // Any atomic construct with a seq_cst clause forces the atomically 2736 // performed operation to include an implicit flush operation without a 2737 // list. 2738 if (IsSeqCst) 2739 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 2740 } 2741 2742 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, 2743 bool IsSeqCst, bool IsPostfixUpdate, 2744 const Expr *X, const Expr *V, const Expr *E, 2745 const Expr *UE, bool IsXLHSInRHSPart, 2746 SourceLocation Loc) { 2747 switch (Kind) { 2748 case OMPC_read: 2749 EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc); 2750 break; 2751 case OMPC_write: 2752 EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc); 2753 break; 2754 case OMPC_unknown: 2755 case OMPC_update: 2756 EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc); 2757 break; 2758 case OMPC_capture: 2759 EmitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE, 2760 IsXLHSInRHSPart, Loc); 2761 break; 2762 case OMPC_if: 2763 case OMPC_final: 2764 case OMPC_num_threads: 2765 case OMPC_private: 2766 case OMPC_firstprivate: 2767 case OMPC_lastprivate: 2768 case OMPC_reduction: 2769 case OMPC_safelen: 2770 case OMPC_simdlen: 2771 case OMPC_collapse: 2772 case OMPC_default: 2773 case OMPC_seq_cst: 2774 case OMPC_shared: 2775 case OMPC_linear: 2776 case OMPC_aligned: 2777 case OMPC_copyin: 2778 case OMPC_copyprivate: 2779 case OMPC_flush: 2780 case OMPC_proc_bind: 2781 case OMPC_schedule: 2782 case OMPC_ordered: 2783 case OMPC_nowait: 2784 case OMPC_untied: 2785 case OMPC_threadprivate: 2786 case OMPC_depend: 2787 case OMPC_mergeable: 2788 case OMPC_device: 2789 case OMPC_threads: 2790 case OMPC_simd: 2791 case OMPC_map: 2792 case OMPC_num_teams: 2793 case OMPC_thread_limit: 2794 case OMPC_priority: 2795 case OMPC_grainsize: 2796 case OMPC_nogroup: 2797 case OMPC_num_tasks: 2798 case OMPC_hint: 2799 case OMPC_dist_schedule: 2800 case OMPC_defaultmap: 2801 llvm_unreachable("Clause is not allowed in 'omp atomic'."); 2802 } 2803 } 2804 2805 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { 2806 bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>(); 2807 OpenMPClauseKind Kind = OMPC_unknown; 2808 for (auto *C : S.clauses()) { 2809 // Find first clause (skip seq_cst clause, if it is first). 2810 if (C->getClauseKind() != OMPC_seq_cst) { 2811 Kind = C->getClauseKind(); 2812 break; 2813 } 2814 } 2815 2816 const auto *CS = 2817 S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 2818 if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) { 2819 enterFullExpression(EWC); 2820 } 2821 // Processing for statements under 'atomic capture'. 2822 if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) { 2823 for (const auto *C : Compound->body()) { 2824 if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) { 2825 enterFullExpression(EWC); 2826 } 2827 } 2828 } 2829 2830 OMPLexicalScope Scope(*this, S); 2831 auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF) { 2832 CGF.EmitStopPoint(CS); 2833 EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(), 2834 S.getV(), S.getExpr(), S.getUpdateExpr(), 2835 S.isXLHSInRHSPart(), S.getLocStart()); 2836 }; 2837 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen); 2838 } 2839 2840 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { 2841 OMPLexicalScope Scope(*this, S); 2842 const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt()); 2843 2844 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 2845 GenerateOpenMPCapturedVars(CS, CapturedVars); 2846 2847 llvm::Function *Fn = nullptr; 2848 llvm::Constant *FnID = nullptr; 2849 2850 // Check if we have any if clause associated with the directive. 2851 const Expr *IfCond = nullptr; 2852 2853 if (auto *C = S.getSingleClause<OMPIfClause>()) { 2854 IfCond = C->getCondition(); 2855 } 2856 2857 // Check if we have any device clause associated with the directive. 2858 const Expr *Device = nullptr; 2859 if (auto *C = S.getSingleClause<OMPDeviceClause>()) { 2860 Device = C->getDevice(); 2861 } 2862 2863 // Check if we have an if clause whose conditional always evaluates to false 2864 // or if we do not have any targets specified. If so the target region is not 2865 // an offload entry point. 2866 bool IsOffloadEntry = true; 2867 if (IfCond) { 2868 bool Val; 2869 if (ConstantFoldsToSimpleInteger(IfCond, Val) && !Val) 2870 IsOffloadEntry = false; 2871 } 2872 if (CGM.getLangOpts().OMPTargetTriples.empty()) 2873 IsOffloadEntry = false; 2874 2875 assert(CurFuncDecl && "No parent declaration for target region!"); 2876 StringRef ParentName; 2877 // In case we have Ctors/Dtors we use the complete type variant to produce 2878 // the mangling of the device outlined kernel. 2879 if (auto *D = dyn_cast<CXXConstructorDecl>(CurFuncDecl)) 2880 ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete)); 2881 else if (auto *D = dyn_cast<CXXDestructorDecl>(CurFuncDecl)) 2882 ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete)); 2883 else 2884 ParentName = 2885 CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CurFuncDecl))); 2886 2887 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID, 2888 IsOffloadEntry); 2889 2890 CGM.getOpenMPRuntime().emitTargetCall(*this, S, Fn, FnID, IfCond, Device, 2891 CapturedVars); 2892 } 2893 2894 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF, 2895 const OMPExecutableDirective &S, 2896 OpenMPDirectiveKind InnermostKind, 2897 const RegionCodeGenTy &CodeGen) { 2898 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2899 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 2900 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 2901 auto OutlinedFn = CGF.CGM.getOpenMPRuntime(). 2902 emitParallelOrTeamsOutlinedFunction(S, 2903 *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 2904 2905 const OMPTeamsDirective &TD = *dyn_cast<OMPTeamsDirective>(&S); 2906 const OMPNumTeamsClause *NT = TD.getSingleClause<OMPNumTeamsClause>(); 2907 const OMPThreadLimitClause *TL = TD.getSingleClause<OMPThreadLimitClause>(); 2908 if (NT || TL) { 2909 llvm::Value *NumTeamsVal = (NT) ? CGF.Builder.CreateIntCast( 2910 CGF.EmitScalarExpr(NT->getNumTeams()), CGF.CGM.Int32Ty, 2911 /* isSigned = */ true) : 2912 CGF.Builder.getInt32(0); 2913 2914 llvm::Value *ThreadLimitVal = (TL) ? CGF.Builder.CreateIntCast( 2915 CGF.EmitScalarExpr(TL->getThreadLimit()), CGF.CGM.Int32Ty, 2916 /* isSigned = */ true) : 2917 CGF.Builder.getInt32(0); 2918 2919 CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeamsVal, 2920 ThreadLimitVal, S.getLocStart()); 2921 } 2922 2923 CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn, 2924 CapturedVars); 2925 } 2926 2927 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) { 2928 LexicalScope Scope(*this, S.getSourceRange()); 2929 // Emit parallel region as a standalone region. 2930 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 2931 OMPPrivateScope PrivateScope(CGF); 2932 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 2933 CGF.EmitOMPPrivateClause(S, PrivateScope); 2934 (void)PrivateScope.Privatize(); 2935 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2936 }; 2937 emitCommonOMPTeamsDirective(*this, S, OMPD_teams, CodeGen); 2938 } 2939 2940 void CodeGenFunction::EmitOMPCancellationPointDirective( 2941 const OMPCancellationPointDirective &S) { 2942 CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(), 2943 S.getCancelRegion()); 2944 } 2945 2946 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { 2947 const Expr *IfCond = nullptr; 2948 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 2949 if (C->getNameModifier() == OMPD_unknown || 2950 C->getNameModifier() == OMPD_cancel) { 2951 IfCond = C->getCondition(); 2952 break; 2953 } 2954 } 2955 CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(), IfCond, 2956 S.getCancelRegion()); 2957 } 2958 2959 CodeGenFunction::JumpDest 2960 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) { 2961 if (Kind == OMPD_parallel || Kind == OMPD_task) 2962 return ReturnBlock; 2963 assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections || 2964 Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for); 2965 return BreakContinueStack.back().BreakBlock; 2966 } 2967 2968 // Generate the instructions for '#pragma omp target data' directive. 2969 void CodeGenFunction::EmitOMPTargetDataDirective( 2970 const OMPTargetDataDirective &S) { 2971 // emit the code inside the construct for now 2972 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2973 CGM.getOpenMPRuntime().emitInlinedDirective( 2974 *this, OMPD_target_data, 2975 [&CS](CodeGenFunction &CGF) { CGF.EmitStmt(CS->getCapturedStmt()); }); 2976 } 2977 2978 void CodeGenFunction::EmitOMPTargetEnterDataDirective( 2979 const OMPTargetEnterDataDirective &S) { 2980 // TODO: codegen for target enter data. 2981 } 2982 2983 void CodeGenFunction::EmitOMPTargetExitDataDirective( 2984 const OMPTargetExitDataDirective &S) { 2985 // TODO: codegen for target exit data. 2986 } 2987 2988 void CodeGenFunction::EmitOMPTargetParallelDirective( 2989 const OMPTargetParallelDirective &S) { 2990 // TODO: codegen for target parallel. 2991 } 2992 2993 void CodeGenFunction::EmitOMPTargetParallelForDirective( 2994 const OMPTargetParallelForDirective &S) { 2995 // TODO: codegen for target parallel for. 2996 } 2997 2998 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) { 2999 // emit the code inside the construct for now 3000 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 3001 CGM.getOpenMPRuntime().emitInlinedDirective( 3002 *this, OMPD_taskloop, 3003 [&CS](CodeGenFunction &CGF) { CGF.EmitStmt(CS->getCapturedStmt()); }); 3004 } 3005 3006 void CodeGenFunction::EmitOMPTaskLoopSimdDirective( 3007 const OMPTaskLoopSimdDirective &S) { 3008 // emit the code inside the construct for now 3009 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 3010 CGM.getOpenMPRuntime().emitInlinedDirective( 3011 *this, OMPD_taskloop_simd, 3012 [&CS](CodeGenFunction &CGF) { CGF.EmitStmt(CS->getCapturedStmt()); }); 3013 } 3014 3015