1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This contains code to emit OpenMP nodes as LLVM code. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGCleanup.h" 15 #include "CGOpenMPRuntime.h" 16 #include "CodeGenFunction.h" 17 #include "CodeGenModule.h" 18 #include "TargetInfo.h" 19 #include "clang/AST/Stmt.h" 20 #include "clang/AST/StmtOpenMP.h" 21 #include "clang/AST/DeclOpenMP.h" 22 using namespace clang; 23 using namespace CodeGen; 24 25 namespace { 26 /// Lexical scope for OpenMP executable constructs, that handles correct codegen 27 /// for captured expressions. 28 class OMPLexicalScope { 29 CodeGenFunction::LexicalScope Scope; 30 void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 31 for (const auto *C : S.clauses()) { 32 if (auto *CPI = OMPClauseWithPreInit::get(C)) { 33 if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { 34 for (const auto *I : PreInit->decls()) { 35 if (!I->hasAttr<OMPCaptureNoInitAttr>()) 36 CGF.EmitVarDecl(cast<VarDecl>(*I)); 37 else { 38 CodeGenFunction::AutoVarEmission Emission = 39 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 40 CGF.EmitAutoVarCleanups(Emission); 41 } 42 } 43 } 44 } 45 } 46 } 47 48 public: 49 OMPLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 50 : Scope(CGF, S.getSourceRange()) { 51 emitPreInitStmt(CGF, S); 52 } 53 }; 54 } // namespace 55 56 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) { 57 auto &C = getContext(); 58 llvm::Value *Size = nullptr; 59 auto SizeInChars = C.getTypeSizeInChars(Ty); 60 if (SizeInChars.isZero()) { 61 // getTypeSizeInChars() returns 0 for a VLA. 62 while (auto *VAT = C.getAsVariableArrayType(Ty)) { 63 llvm::Value *ArraySize; 64 std::tie(ArraySize, Ty) = getVLASize(VAT); 65 Size = Size ? Builder.CreateNUWMul(Size, ArraySize) : ArraySize; 66 } 67 SizeInChars = C.getTypeSizeInChars(Ty); 68 if (SizeInChars.isZero()) 69 return llvm::ConstantInt::get(SizeTy, /*V=*/0); 70 Size = Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars)); 71 } else 72 Size = CGM.getSize(SizeInChars); 73 return Size; 74 } 75 76 void CodeGenFunction::GenerateOpenMPCapturedVars( 77 const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) { 78 const RecordDecl *RD = S.getCapturedRecordDecl(); 79 auto CurField = RD->field_begin(); 80 auto CurCap = S.captures().begin(); 81 for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(), 82 E = S.capture_init_end(); 83 I != E; ++I, ++CurField, ++CurCap) { 84 if (CurField->hasCapturedVLAType()) { 85 auto VAT = CurField->getCapturedVLAType(); 86 auto *Val = VLASizeMap[VAT->getSizeExpr()]; 87 CapturedVars.push_back(Val); 88 } else if (CurCap->capturesThis()) 89 CapturedVars.push_back(CXXThisValue); 90 else if (CurCap->capturesVariableByCopy()) 91 CapturedVars.push_back( 92 EmitLoadOfLValue(EmitLValue(*I), SourceLocation()).getScalarVal()); 93 else { 94 assert(CurCap->capturesVariable() && "Expected capture by reference."); 95 CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer()); 96 } 97 } 98 } 99 100 static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType, 101 StringRef Name, LValue AddrLV, 102 bool isReferenceType = false) { 103 ASTContext &Ctx = CGF.getContext(); 104 105 auto *CastedPtr = CGF.EmitScalarConversion( 106 AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(), 107 Ctx.getPointerType(DstType), SourceLocation()); 108 auto TmpAddr = 109 CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType)) 110 .getAddress(); 111 112 // If we are dealing with references we need to return the address of the 113 // reference instead of the reference of the value. 114 if (isReferenceType) { 115 QualType RefType = Ctx.getLValueReferenceType(DstType); 116 auto *RefVal = TmpAddr.getPointer(); 117 TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name) + ".ref"); 118 auto TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType); 119 CGF.EmitScalarInit(RefVal, TmpLVal); 120 } 121 122 return TmpAddr; 123 } 124 125 llvm::Function * 126 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { 127 assert( 128 CapturedStmtInfo && 129 "CapturedStmtInfo should be set when generating the captured function"); 130 const CapturedDecl *CD = S.getCapturedDecl(); 131 const RecordDecl *RD = S.getCapturedRecordDecl(); 132 assert(CD->hasBody() && "missing CapturedDecl body"); 133 134 // Build the argument list. 135 ASTContext &Ctx = CGM.getContext(); 136 FunctionArgList Args; 137 Args.append(CD->param_begin(), 138 std::next(CD->param_begin(), CD->getContextParamPosition())); 139 auto I = S.captures().begin(); 140 for (auto *FD : RD->fields()) { 141 QualType ArgType = FD->getType(); 142 IdentifierInfo *II = nullptr; 143 VarDecl *CapVar = nullptr; 144 145 // If this is a capture by copy and the type is not a pointer, the outlined 146 // function argument type should be uintptr and the value properly casted to 147 // uintptr. This is necessary given that the runtime library is only able to 148 // deal with pointers. We can pass in the same way the VLA type sizes to the 149 // outlined function. 150 if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) || 151 I->capturesVariableArrayType()) 152 ArgType = Ctx.getUIntPtrType(); 153 154 if (I->capturesVariable() || I->capturesVariableByCopy()) { 155 CapVar = I->getCapturedVar(); 156 II = CapVar->getIdentifier(); 157 } else if (I->capturesThis()) 158 II = &getContext().Idents.get("this"); 159 else { 160 assert(I->capturesVariableArrayType()); 161 II = &getContext().Idents.get("vla"); 162 } 163 if (ArgType->isVariablyModifiedType()) 164 ArgType = getContext().getVariableArrayDecayedType(ArgType); 165 Args.push_back(ImplicitParamDecl::Create(getContext(), nullptr, 166 FD->getLocation(), II, ArgType)); 167 ++I; 168 } 169 Args.append( 170 std::next(CD->param_begin(), CD->getContextParamPosition() + 1), 171 CD->param_end()); 172 173 // Create the function declaration. 174 FunctionType::ExtInfo ExtInfo; 175 const CGFunctionInfo &FuncInfo = 176 CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args); 177 llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); 178 179 llvm::Function *F = llvm::Function::Create( 180 FuncLLVMTy, llvm::GlobalValue::InternalLinkage, 181 CapturedStmtInfo->getHelperName(), &CGM.getModule()); 182 CGM.SetInternalFunctionAttributes(CD, F, FuncInfo); 183 if (CD->isNothrow()) 184 F->addFnAttr(llvm::Attribute::NoUnwind); 185 186 // Generate the function. 187 StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getLocation(), 188 CD->getBody()->getLocStart()); 189 unsigned Cnt = CD->getContextParamPosition(); 190 I = S.captures().begin(); 191 for (auto *FD : RD->fields()) { 192 // If we are capturing a pointer by copy we don't need to do anything, just 193 // use the value that we get from the arguments. 194 if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) { 195 setAddrOfLocalVar(I->getCapturedVar(), GetAddrOfLocalVar(Args[Cnt])); 196 ++Cnt; 197 ++I; 198 continue; 199 } 200 201 LValue ArgLVal = 202 MakeAddrLValue(GetAddrOfLocalVar(Args[Cnt]), Args[Cnt]->getType(), 203 AlignmentSource::Decl); 204 if (FD->hasCapturedVLAType()) { 205 LValue CastedArgLVal = 206 MakeAddrLValue(castValueFromUintptr(*this, FD->getType(), 207 Args[Cnt]->getName(), ArgLVal), 208 FD->getType(), AlignmentSource::Decl); 209 auto *ExprArg = 210 EmitLoadOfLValue(CastedArgLVal, SourceLocation()).getScalarVal(); 211 auto VAT = FD->getCapturedVLAType(); 212 VLASizeMap[VAT->getSizeExpr()] = ExprArg; 213 } else if (I->capturesVariable()) { 214 auto *Var = I->getCapturedVar(); 215 QualType VarTy = Var->getType(); 216 Address ArgAddr = ArgLVal.getAddress(); 217 if (!VarTy->isReferenceType()) { 218 ArgAddr = EmitLoadOfReference( 219 ArgAddr, ArgLVal.getType()->castAs<ReferenceType>()); 220 } 221 setAddrOfLocalVar( 222 Var, Address(ArgAddr.getPointer(), getContext().getDeclAlign(Var))); 223 } else if (I->capturesVariableByCopy()) { 224 assert(!FD->getType()->isAnyPointerType() && 225 "Not expecting a captured pointer."); 226 auto *Var = I->getCapturedVar(); 227 QualType VarTy = Var->getType(); 228 setAddrOfLocalVar(I->getCapturedVar(), 229 castValueFromUintptr(*this, FD->getType(), 230 Args[Cnt]->getName(), ArgLVal, 231 VarTy->isReferenceType())); 232 } else { 233 // If 'this' is captured, load it into CXXThisValue. 234 assert(I->capturesThis()); 235 CXXThisValue = 236 EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation()).getScalarVal(); 237 } 238 ++Cnt; 239 ++I; 240 } 241 242 PGO.assignRegionCounters(GlobalDecl(CD), F); 243 CapturedStmtInfo->EmitBody(*this, CD->getBody()); 244 FinishFunction(CD->getBodyRBrace()); 245 246 return F; 247 } 248 249 //===----------------------------------------------------------------------===// 250 // OpenMP Directive Emission 251 //===----------------------------------------------------------------------===// 252 void CodeGenFunction::EmitOMPAggregateAssign( 253 Address DestAddr, Address SrcAddr, QualType OriginalType, 254 const llvm::function_ref<void(Address, Address)> &CopyGen) { 255 // Perform element-by-element initialization. 256 QualType ElementTy; 257 258 // Drill down to the base element type on both arrays. 259 auto ArrayTy = OriginalType->getAsArrayTypeUnsafe(); 260 auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr); 261 SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 262 263 auto SrcBegin = SrcAddr.getPointer(); 264 auto DestBegin = DestAddr.getPointer(); 265 // Cast from pointer to array type to pointer to single element. 266 auto DestEnd = Builder.CreateGEP(DestBegin, NumElements); 267 // The basic structure here is a while-do loop. 268 auto BodyBB = createBasicBlock("omp.arraycpy.body"); 269 auto DoneBB = createBasicBlock("omp.arraycpy.done"); 270 auto IsEmpty = 271 Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty"); 272 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 273 274 // Enter the loop body, making that address the current address. 275 auto EntryBB = Builder.GetInsertBlock(); 276 EmitBlock(BodyBB); 277 278 CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy); 279 280 llvm::PHINode *SrcElementPHI = 281 Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 282 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 283 Address SrcElementCurrent = 284 Address(SrcElementPHI, 285 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 286 287 llvm::PHINode *DestElementPHI = 288 Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 289 DestElementPHI->addIncoming(DestBegin, EntryBB); 290 Address DestElementCurrent = 291 Address(DestElementPHI, 292 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 293 294 // Emit copy. 295 CopyGen(DestElementCurrent, SrcElementCurrent); 296 297 // Shift the address forward by one element. 298 auto DestElementNext = Builder.CreateConstGEP1_32( 299 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 300 auto SrcElementNext = Builder.CreateConstGEP1_32( 301 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 302 // Check whether we've reached the end. 303 auto Done = 304 Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 305 Builder.CreateCondBr(Done, DoneBB, BodyBB); 306 DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock()); 307 SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock()); 308 309 // Done. 310 EmitBlock(DoneBB, /*IsFinished=*/true); 311 } 312 313 /// \brief Emit initialization of arrays of complex types. 314 /// \param DestAddr Address of the array. 315 /// \param Type Type of array. 316 /// \param Init Initial expression of array. 317 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 318 QualType Type, const Expr *Init) { 319 // Perform element-by-element initialization. 320 QualType ElementTy; 321 322 // Drill down to the base element type on both arrays. 323 auto ArrayTy = Type->getAsArrayTypeUnsafe(); 324 auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 325 DestAddr = 326 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 327 328 auto DestBegin = DestAddr.getPointer(); 329 // Cast from pointer to array type to pointer to single element. 330 auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 331 // The basic structure here is a while-do loop. 332 auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 333 auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 334 auto IsEmpty = 335 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 336 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 337 338 // Enter the loop body, making that address the current address. 339 auto EntryBB = CGF.Builder.GetInsertBlock(); 340 CGF.EmitBlock(BodyBB); 341 342 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 343 344 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 345 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 346 DestElementPHI->addIncoming(DestBegin, EntryBB); 347 Address DestElementCurrent = 348 Address(DestElementPHI, 349 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 350 351 // Emit copy. 352 { 353 CodeGenFunction::RunCleanupsScope InitScope(CGF); 354 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 355 /*IsInitializer=*/false); 356 } 357 358 // Shift the address forward by one element. 359 auto DestElementNext = CGF.Builder.CreateConstGEP1_32( 360 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 361 // Check whether we've reached the end. 362 auto Done = 363 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 364 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 365 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 366 367 // Done. 368 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 369 } 370 371 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr, 372 Address SrcAddr, const VarDecl *DestVD, 373 const VarDecl *SrcVD, const Expr *Copy) { 374 if (OriginalType->isArrayType()) { 375 auto *BO = dyn_cast<BinaryOperator>(Copy); 376 if (BO && BO->getOpcode() == BO_Assign) { 377 // Perform simple memcpy for simple copying. 378 EmitAggregateAssign(DestAddr, SrcAddr, OriginalType); 379 } else { 380 // For arrays with complex element types perform element by element 381 // copying. 382 EmitOMPAggregateAssign( 383 DestAddr, SrcAddr, OriginalType, 384 [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) { 385 // Working with the single array element, so have to remap 386 // destination and source variables to corresponding array 387 // elements. 388 CodeGenFunction::OMPPrivateScope Remap(*this); 389 Remap.addPrivate(DestVD, [DestElement]() -> Address { 390 return DestElement; 391 }); 392 Remap.addPrivate( 393 SrcVD, [SrcElement]() -> Address { return SrcElement; }); 394 (void)Remap.Privatize(); 395 EmitIgnoredExpr(Copy); 396 }); 397 } 398 } else { 399 // Remap pseudo source variable to private copy. 400 CodeGenFunction::OMPPrivateScope Remap(*this); 401 Remap.addPrivate(SrcVD, [SrcAddr]() -> Address { return SrcAddr; }); 402 Remap.addPrivate(DestVD, [DestAddr]() -> Address { return DestAddr; }); 403 (void)Remap.Privatize(); 404 // Emit copying of the whole variable. 405 EmitIgnoredExpr(Copy); 406 } 407 } 408 409 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, 410 OMPPrivateScope &PrivateScope) { 411 if (!HaveInsertPoint()) 412 return false; 413 bool FirstprivateIsLastprivate = false; 414 llvm::DenseSet<const VarDecl *> Lastprivates; 415 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 416 for (const auto *D : C->varlists()) 417 Lastprivates.insert( 418 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); 419 } 420 llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate; 421 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { 422 auto IRef = C->varlist_begin(); 423 auto InitsRef = C->inits().begin(); 424 for (auto IInit : C->private_copies()) { 425 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 426 FirstprivateIsLastprivate = 427 FirstprivateIsLastprivate || 428 (Lastprivates.count(OrigVD->getCanonicalDecl()) > 0); 429 if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) { 430 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 431 auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl()); 432 bool IsRegistered; 433 DeclRefExpr DRE( 434 const_cast<VarDecl *>(OrigVD), 435 /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup( 436 OrigVD) != nullptr, 437 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 438 Address OriginalAddr = EmitLValue(&DRE).getAddress(); 439 QualType Type = OrigVD->getType(); 440 if (Type->isArrayType()) { 441 // Emit VarDecl with copy init for arrays. 442 // Get the address of the original variable captured in current 443 // captured region. 444 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 445 auto Emission = EmitAutoVarAlloca(*VD); 446 auto *Init = VD->getInit(); 447 if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) { 448 // Perform simple memcpy. 449 EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr, 450 Type); 451 } else { 452 EmitOMPAggregateAssign( 453 Emission.getAllocatedAddress(), OriginalAddr, Type, 454 [this, VDInit, Init](Address DestElement, 455 Address SrcElement) { 456 // Clean up any temporaries needed by the initialization. 457 RunCleanupsScope InitScope(*this); 458 // Emit initialization for single element. 459 setAddrOfLocalVar(VDInit, SrcElement); 460 EmitAnyExprToMem(Init, DestElement, 461 Init->getType().getQualifiers(), 462 /*IsInitializer*/ false); 463 LocalDeclMap.erase(VDInit); 464 }); 465 } 466 EmitAutoVarCleanups(Emission); 467 return Emission.getAllocatedAddress(); 468 }); 469 } else { 470 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 471 // Emit private VarDecl with copy init. 472 // Remap temp VDInit variable to the address of the original 473 // variable 474 // (for proper handling of captured global variables). 475 setAddrOfLocalVar(VDInit, OriginalAddr); 476 EmitDecl(*VD); 477 LocalDeclMap.erase(VDInit); 478 return GetAddrOfLocalVar(VD); 479 }); 480 } 481 assert(IsRegistered && 482 "firstprivate var already registered as private"); 483 // Silence the warning about unused variable. 484 (void)IsRegistered; 485 } 486 ++IRef; 487 ++InitsRef; 488 } 489 } 490 return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty(); 491 } 492 493 void CodeGenFunction::EmitOMPPrivateClause( 494 const OMPExecutableDirective &D, 495 CodeGenFunction::OMPPrivateScope &PrivateScope) { 496 if (!HaveInsertPoint()) 497 return; 498 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 499 for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) { 500 auto IRef = C->varlist_begin(); 501 for (auto IInit : C->private_copies()) { 502 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 503 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 504 auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 505 bool IsRegistered = 506 PrivateScope.addPrivate(OrigVD, [&]() -> Address { 507 // Emit private VarDecl with copy init. 508 EmitDecl(*VD); 509 return GetAddrOfLocalVar(VD); 510 }); 511 assert(IsRegistered && "private var already registered as private"); 512 // Silence the warning about unused variable. 513 (void)IsRegistered; 514 } 515 ++IRef; 516 } 517 } 518 } 519 520 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { 521 if (!HaveInsertPoint()) 522 return false; 523 // threadprivate_var1 = master_threadprivate_var1; 524 // operator=(threadprivate_var2, master_threadprivate_var2); 525 // ... 526 // __kmpc_barrier(&loc, global_tid); 527 llvm::DenseSet<const VarDecl *> CopiedVars; 528 llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr; 529 for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) { 530 auto IRef = C->varlist_begin(); 531 auto ISrcRef = C->source_exprs().begin(); 532 auto IDestRef = C->destination_exprs().begin(); 533 for (auto *AssignOp : C->assignment_ops()) { 534 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 535 QualType Type = VD->getType(); 536 if (CopiedVars.insert(VD->getCanonicalDecl()).second) { 537 // Get the address of the master variable. If we are emitting code with 538 // TLS support, the address is passed from the master as field in the 539 // captured declaration. 540 Address MasterAddr = Address::invalid(); 541 if (getLangOpts().OpenMPUseTLS && 542 getContext().getTargetInfo().isTLSSupported()) { 543 assert(CapturedStmtInfo->lookup(VD) && 544 "Copyin threadprivates should have been captured!"); 545 DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(), 546 VK_LValue, (*IRef)->getExprLoc()); 547 MasterAddr = EmitLValue(&DRE).getAddress(); 548 LocalDeclMap.erase(VD); 549 } else { 550 MasterAddr = 551 Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD) 552 : CGM.GetAddrOfGlobal(VD), 553 getContext().getDeclAlign(VD)); 554 } 555 // Get the address of the threadprivate variable. 556 Address PrivateAddr = EmitLValue(*IRef).getAddress(); 557 if (CopiedVars.size() == 1) { 558 // At first check if current thread is a master thread. If it is, no 559 // need to copy data. 560 CopyBegin = createBasicBlock("copyin.not.master"); 561 CopyEnd = createBasicBlock("copyin.not.master.end"); 562 Builder.CreateCondBr( 563 Builder.CreateICmpNE( 564 Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy), 565 Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy)), 566 CopyBegin, CopyEnd); 567 EmitBlock(CopyBegin); 568 } 569 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 570 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 571 EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp); 572 } 573 ++IRef; 574 ++ISrcRef; 575 ++IDestRef; 576 } 577 } 578 if (CopyEnd) { 579 // Exit out of copying procedure for non-master thread. 580 EmitBlock(CopyEnd, /*IsFinished=*/true); 581 return true; 582 } 583 return false; 584 } 585 586 bool CodeGenFunction::EmitOMPLastprivateClauseInit( 587 const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) { 588 if (!HaveInsertPoint()) 589 return false; 590 bool HasAtLeastOneLastprivate = false; 591 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 592 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 593 HasAtLeastOneLastprivate = true; 594 auto IRef = C->varlist_begin(); 595 auto IDestRef = C->destination_exprs().begin(); 596 for (auto *IInit : C->private_copies()) { 597 // Keep the address of the original variable for future update at the end 598 // of the loop. 599 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 600 if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) { 601 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 602 PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address { 603 DeclRefExpr DRE( 604 const_cast<VarDecl *>(OrigVD), 605 /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup( 606 OrigVD) != nullptr, 607 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 608 return EmitLValue(&DRE).getAddress(); 609 }); 610 // Check if the variable is also a firstprivate: in this case IInit is 611 // not generated. Initialization of this variable will happen in codegen 612 // for 'firstprivate' clause. 613 if (IInit) { 614 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 615 bool IsRegistered = 616 PrivateScope.addPrivate(OrigVD, [&]() -> Address { 617 // Emit private VarDecl with copy init. 618 EmitDecl(*VD); 619 return GetAddrOfLocalVar(VD); 620 }); 621 assert(IsRegistered && 622 "lastprivate var already registered as private"); 623 (void)IsRegistered; 624 } 625 } 626 ++IRef; 627 ++IDestRef; 628 } 629 } 630 return HasAtLeastOneLastprivate; 631 } 632 633 void CodeGenFunction::EmitOMPLastprivateClauseFinal( 634 const OMPExecutableDirective &D, llvm::Value *IsLastIterCond) { 635 if (!HaveInsertPoint()) 636 return; 637 // Emit following code: 638 // if (<IsLastIterCond>) { 639 // orig_var1 = private_orig_var1; 640 // ... 641 // orig_varn = private_orig_varn; 642 // } 643 llvm::BasicBlock *ThenBB = nullptr; 644 llvm::BasicBlock *DoneBB = nullptr; 645 if (IsLastIterCond) { 646 ThenBB = createBasicBlock(".omp.lastprivate.then"); 647 DoneBB = createBasicBlock(".omp.lastprivate.done"); 648 Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB); 649 EmitBlock(ThenBB); 650 } 651 llvm::DenseMap<const Decl *, const Expr *> LoopCountersAndUpdates; 652 if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) { 653 auto IC = LoopDirective->counters().begin(); 654 for (auto F : LoopDirective->finals()) { 655 auto *D = cast<DeclRefExpr>(*IC)->getDecl()->getCanonicalDecl(); 656 LoopCountersAndUpdates[D] = F; 657 ++IC; 658 } 659 } 660 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 661 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 662 auto IRef = C->varlist_begin(); 663 auto ISrcRef = C->source_exprs().begin(); 664 auto IDestRef = C->destination_exprs().begin(); 665 for (auto *AssignOp : C->assignment_ops()) { 666 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 667 QualType Type = PrivateVD->getType(); 668 auto *CanonicalVD = PrivateVD->getCanonicalDecl(); 669 if (AlreadyEmittedVars.insert(CanonicalVD).second) { 670 // If lastprivate variable is a loop control variable for loop-based 671 // directive, update its value before copyin back to original 672 // variable. 673 if (auto *UpExpr = LoopCountersAndUpdates.lookup(CanonicalVD)) 674 EmitIgnoredExpr(UpExpr); 675 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 676 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 677 // Get the address of the original variable. 678 Address OriginalAddr = GetAddrOfLocalVar(DestVD); 679 // Get the address of the private variable. 680 Address PrivateAddr = GetAddrOfLocalVar(PrivateVD); 681 if (auto RefTy = PrivateVD->getType()->getAs<ReferenceType>()) 682 PrivateAddr = 683 Address(Builder.CreateLoad(PrivateAddr), 684 getNaturalTypeAlignment(RefTy->getPointeeType())); 685 EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp); 686 } 687 ++IRef; 688 ++ISrcRef; 689 ++IDestRef; 690 } 691 if (auto *PostUpdate = C->getPostUpdateExpr()) 692 EmitIgnoredExpr(PostUpdate); 693 } 694 if (IsLastIterCond) 695 EmitBlock(DoneBB, /*IsFinished=*/true); 696 } 697 698 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 699 LValue BaseLV, llvm::Value *Addr) { 700 Address Tmp = Address::invalid(); 701 Address TopTmp = Address::invalid(); 702 Address MostTopTmp = Address::invalid(); 703 BaseTy = BaseTy.getNonReferenceType(); 704 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 705 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 706 Tmp = CGF.CreateMemTemp(BaseTy); 707 if (TopTmp.isValid()) 708 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 709 else 710 MostTopTmp = Tmp; 711 TopTmp = Tmp; 712 BaseTy = BaseTy->getPointeeType(); 713 } 714 llvm::Type *Ty = BaseLV.getPointer()->getType(); 715 if (Tmp.isValid()) 716 Ty = Tmp.getElementType(); 717 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 718 if (Tmp.isValid()) { 719 CGF.Builder.CreateStore(Addr, Tmp); 720 return MostTopTmp; 721 } 722 return Address(Addr, BaseLV.getAlignment()); 723 } 724 725 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 726 LValue BaseLV) { 727 BaseTy = BaseTy.getNonReferenceType(); 728 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 729 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 730 if (auto *PtrTy = BaseTy->getAs<PointerType>()) 731 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy); 732 else { 733 BaseLV = CGF.EmitLoadOfReferenceLValue(BaseLV.getAddress(), 734 BaseTy->castAs<ReferenceType>()); 735 } 736 BaseTy = BaseTy->getPointeeType(); 737 } 738 return CGF.MakeAddrLValue( 739 Address( 740 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 741 BaseLV.getPointer(), CGF.ConvertTypeForMem(ElTy)->getPointerTo()), 742 BaseLV.getAlignment()), 743 BaseLV.getType(), BaseLV.getAlignmentSource()); 744 } 745 746 void CodeGenFunction::EmitOMPReductionClauseInit( 747 const OMPExecutableDirective &D, 748 CodeGenFunction::OMPPrivateScope &PrivateScope) { 749 if (!HaveInsertPoint()) 750 return; 751 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 752 auto ILHS = C->lhs_exprs().begin(); 753 auto IRHS = C->rhs_exprs().begin(); 754 auto IPriv = C->privates().begin(); 755 for (auto IRef : C->varlists()) { 756 auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 757 auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 758 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl()); 759 if (auto *OASE = dyn_cast<OMPArraySectionExpr>(IRef)) { 760 auto *Base = OASE->getBase()->IgnoreParenImpCasts(); 761 while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 762 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 763 while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 764 Base = TempASE->getBase()->IgnoreParenImpCasts(); 765 auto *DE = cast<DeclRefExpr>(Base); 766 auto *OrigVD = cast<VarDecl>(DE->getDecl()); 767 auto OASELValueLB = EmitOMPArraySectionExpr(OASE); 768 auto OASELValueUB = 769 EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 770 auto OriginalBaseLValue = EmitLValue(DE); 771 LValue BaseLValue = 772 loadToBegin(*this, OrigVD->getType(), OASELValueLB.getType(), 773 OriginalBaseLValue); 774 // Store the address of the original variable associated with the LHS 775 // implicit variable. 776 PrivateScope.addPrivate(LHSVD, [this, OASELValueLB]() -> Address { 777 return OASELValueLB.getAddress(); 778 }); 779 // Emit reduction copy. 780 bool IsRegistered = PrivateScope.addPrivate( 781 OrigVD, [this, OrigVD, PrivateVD, BaseLValue, OASELValueLB, 782 OASELValueUB, OriginalBaseLValue]() -> Address { 783 // Emit VarDecl with copy init for arrays. 784 // Get the address of the original variable captured in current 785 // captured region. 786 auto *Size = Builder.CreatePtrDiff(OASELValueUB.getPointer(), 787 OASELValueLB.getPointer()); 788 Size = Builder.CreateNUWAdd( 789 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 790 CodeGenFunction::OpaqueValueMapping OpaqueMap( 791 *this, cast<OpaqueValueExpr>( 792 getContext() 793 .getAsVariableArrayType(PrivateVD->getType()) 794 ->getSizeExpr()), 795 RValue::get(Size)); 796 EmitVariablyModifiedType(PrivateVD->getType()); 797 auto Emission = EmitAutoVarAlloca(*PrivateVD); 798 auto Addr = Emission.getAllocatedAddress(); 799 auto *Init = PrivateVD->getInit(); 800 EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(), Init); 801 EmitAutoVarCleanups(Emission); 802 // Emit private VarDecl with reduction init. 803 auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(), 804 OASELValueLB.getPointer()); 805 auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset); 806 return castToBase(*this, OrigVD->getType(), 807 OASELValueLB.getType(), OriginalBaseLValue, 808 Ptr); 809 }); 810 assert(IsRegistered && "private var already registered as private"); 811 // Silence the warning about unused variable. 812 (void)IsRegistered; 813 PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { 814 return GetAddrOfLocalVar(PrivateVD); 815 }); 816 } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(IRef)) { 817 auto *Base = ASE->getBase()->IgnoreParenImpCasts(); 818 while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 819 Base = TempASE->getBase()->IgnoreParenImpCasts(); 820 auto *DE = cast<DeclRefExpr>(Base); 821 auto *OrigVD = cast<VarDecl>(DE->getDecl()); 822 auto ASELValue = EmitLValue(ASE); 823 auto OriginalBaseLValue = EmitLValue(DE); 824 LValue BaseLValue = loadToBegin( 825 *this, OrigVD->getType(), ASELValue.getType(), OriginalBaseLValue); 826 // Store the address of the original variable associated with the LHS 827 // implicit variable. 828 PrivateScope.addPrivate(LHSVD, [this, ASELValue]() -> Address { 829 return ASELValue.getAddress(); 830 }); 831 // Emit reduction copy. 832 bool IsRegistered = PrivateScope.addPrivate( 833 OrigVD, [this, OrigVD, PrivateVD, BaseLValue, ASELValue, 834 OriginalBaseLValue]() -> Address { 835 // Emit private VarDecl with reduction init. 836 EmitDecl(*PrivateVD); 837 auto Addr = GetAddrOfLocalVar(PrivateVD); 838 auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(), 839 ASELValue.getPointer()); 840 auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset); 841 return castToBase(*this, OrigVD->getType(), ASELValue.getType(), 842 OriginalBaseLValue, Ptr); 843 }); 844 assert(IsRegistered && "private var already registered as private"); 845 // Silence the warning about unused variable. 846 (void)IsRegistered; 847 PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address { 848 return Builder.CreateElementBitCast( 849 GetAddrOfLocalVar(PrivateVD), ConvertTypeForMem(RHSVD->getType()), 850 "rhs.begin"); 851 }); 852 } else { 853 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); 854 QualType Type = PrivateVD->getType(); 855 if (getContext().getAsArrayType(Type)) { 856 // Store the address of the original variable associated with the LHS 857 // implicit variable. 858 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 859 CapturedStmtInfo->lookup(OrigVD) != nullptr, 860 IRef->getType(), VK_LValue, IRef->getExprLoc()); 861 Address OriginalAddr = EmitLValue(&DRE).getAddress(); 862 PrivateScope.addPrivate(LHSVD, [this, OriginalAddr, 863 LHSVD]() -> Address { 864 return Builder.CreateElementBitCast( 865 OriginalAddr, ConvertTypeForMem(LHSVD->getType()), 866 "lhs.begin"); 867 }); 868 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 869 if (Type->isVariablyModifiedType()) { 870 CodeGenFunction::OpaqueValueMapping OpaqueMap( 871 *this, cast<OpaqueValueExpr>( 872 getContext() 873 .getAsVariableArrayType(PrivateVD->getType()) 874 ->getSizeExpr()), 875 RValue::get( 876 getTypeSize(OrigVD->getType().getNonReferenceType()))); 877 EmitVariablyModifiedType(Type); 878 } 879 auto Emission = EmitAutoVarAlloca(*PrivateVD); 880 auto Addr = Emission.getAllocatedAddress(); 881 auto *Init = PrivateVD->getInit(); 882 EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(), Init); 883 EmitAutoVarCleanups(Emission); 884 return Emission.getAllocatedAddress(); 885 }); 886 assert(IsRegistered && "private var already registered as private"); 887 // Silence the warning about unused variable. 888 (void)IsRegistered; 889 PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address { 890 return Builder.CreateElementBitCast( 891 GetAddrOfLocalVar(PrivateVD), 892 ConvertTypeForMem(RHSVD->getType()), "rhs.begin"); 893 }); 894 } else { 895 // Store the address of the original variable associated with the LHS 896 // implicit variable. 897 PrivateScope.addPrivate(LHSVD, [this, OrigVD, IRef]() -> Address { 898 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 899 CapturedStmtInfo->lookup(OrigVD) != nullptr, 900 IRef->getType(), VK_LValue, IRef->getExprLoc()); 901 return EmitLValue(&DRE).getAddress(); 902 }); 903 // Emit reduction copy. 904 bool IsRegistered = 905 PrivateScope.addPrivate(OrigVD, [this, PrivateVD]() -> Address { 906 // Emit private VarDecl with reduction init. 907 EmitDecl(*PrivateVD); 908 return GetAddrOfLocalVar(PrivateVD); 909 }); 910 assert(IsRegistered && "private var already registered as private"); 911 // Silence the warning about unused variable. 912 (void)IsRegistered; 913 PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { 914 return GetAddrOfLocalVar(PrivateVD); 915 }); 916 } 917 } 918 ++ILHS; 919 ++IRHS; 920 ++IPriv; 921 } 922 } 923 } 924 925 void CodeGenFunction::EmitOMPReductionClauseFinal( 926 const OMPExecutableDirective &D) { 927 if (!HaveInsertPoint()) 928 return; 929 llvm::SmallVector<const Expr *, 8> Privates; 930 llvm::SmallVector<const Expr *, 8> LHSExprs; 931 llvm::SmallVector<const Expr *, 8> RHSExprs; 932 llvm::SmallVector<const Expr *, 8> ReductionOps; 933 bool HasAtLeastOneReduction = false; 934 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 935 HasAtLeastOneReduction = true; 936 Privates.append(C->privates().begin(), C->privates().end()); 937 LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 938 RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 939 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 940 } 941 if (HasAtLeastOneReduction) { 942 // Emit nowait reduction if nowait clause is present or directive is a 943 // parallel directive (it always has implicit barrier). 944 CGM.getOpenMPRuntime().emitReduction( 945 *this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps, 946 D.getSingleClause<OMPNowaitClause>() || 947 isOpenMPParallelDirective(D.getDirectiveKind()) || 948 D.getDirectiveKind() == OMPD_simd, 949 D.getDirectiveKind() == OMPD_simd); 950 } 951 } 952 953 static void emitPostUpdateForReductionClause( 954 CodeGenFunction &CGF, const OMPExecutableDirective &D, 955 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 956 if (!CGF.HaveInsertPoint()) 957 return; 958 llvm::BasicBlock *DoneBB = nullptr; 959 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 960 if (auto *PostUpdate = C->getPostUpdateExpr()) { 961 if (!DoneBB) { 962 if (auto *Cond = CondGen(CGF)) { 963 // If the first post-update expression is found, emit conditional 964 // block if it was requested. 965 auto *ThenBB = CGF.createBasicBlock(".omp.reduction.pu"); 966 DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done"); 967 CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB); 968 CGF.EmitBlock(ThenBB); 969 } 970 } 971 CGF.EmitIgnoredExpr(PostUpdate); 972 } 973 } 974 if (DoneBB) 975 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 976 } 977 978 static void emitCommonOMPParallelDirective(CodeGenFunction &CGF, 979 const OMPExecutableDirective &S, 980 OpenMPDirectiveKind InnermostKind, 981 const RegionCodeGenTy &CodeGen) { 982 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 983 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 984 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 985 auto OutlinedFn = CGF.CGM.getOpenMPRuntime(). 986 emitParallelOrTeamsOutlinedFunction(S, 987 *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 988 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) { 989 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 990 auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), 991 /*IgnoreResultAssign*/ true); 992 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause( 993 CGF, NumThreads, NumThreadsClause->getLocStart()); 994 } 995 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) { 996 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 997 CGF.CGM.getOpenMPRuntime().emitProcBindClause( 998 CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart()); 999 } 1000 const Expr *IfCond = nullptr; 1001 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 1002 if (C->getNameModifier() == OMPD_unknown || 1003 C->getNameModifier() == OMPD_parallel) { 1004 IfCond = C->getCondition(); 1005 break; 1006 } 1007 } 1008 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn, 1009 CapturedVars, IfCond); 1010 } 1011 1012 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { 1013 OMPLexicalScope Scope(*this, S); 1014 // Emit parallel region as a standalone region. 1015 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 1016 OMPPrivateScope PrivateScope(CGF); 1017 bool Copyins = CGF.EmitOMPCopyinClause(S); 1018 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 1019 if (Copyins) { 1020 // Emit implicit barrier to synchronize threads and avoid data races on 1021 // propagation master's thread values of threadprivate variables to local 1022 // instances of that variables of all other implicit threads. 1023 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 1024 CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 1025 /*ForceSimpleCall=*/true); 1026 } 1027 CGF.EmitOMPPrivateClause(S, PrivateScope); 1028 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 1029 (void)PrivateScope.Privatize(); 1030 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1031 CGF.EmitOMPReductionClauseFinal(S); 1032 }; 1033 emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen); 1034 emitPostUpdateForReductionClause( 1035 *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1036 } 1037 1038 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, 1039 JumpDest LoopExit) { 1040 RunCleanupsScope BodyScope(*this); 1041 // Update counters values on current iteration. 1042 for (auto I : D.updates()) { 1043 EmitIgnoredExpr(I); 1044 } 1045 // Update the linear variables. 1046 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1047 for (auto U : C->updates()) { 1048 EmitIgnoredExpr(U); 1049 } 1050 } 1051 1052 // On a continue in the body, jump to the end. 1053 auto Continue = getJumpDestInCurrentScope("omp.body.continue"); 1054 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1055 // Emit loop body. 1056 EmitStmt(D.getBody()); 1057 // The end (updates/cleanups). 1058 EmitBlock(Continue.getBlock()); 1059 BreakContinueStack.pop_back(); 1060 } 1061 1062 void CodeGenFunction::EmitOMPInnerLoop( 1063 const Stmt &S, bool RequiresCleanup, const Expr *LoopCond, 1064 const Expr *IncExpr, 1065 const llvm::function_ref<void(CodeGenFunction &)> &BodyGen, 1066 const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen) { 1067 auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end"); 1068 1069 // Start the loop with a block that tests the condition. 1070 auto CondBlock = createBasicBlock("omp.inner.for.cond"); 1071 EmitBlock(CondBlock); 1072 LoopStack.push(CondBlock); 1073 1074 // If there are any cleanups between here and the loop-exit scope, 1075 // create a block to stage a loop exit along. 1076 auto ExitBlock = LoopExit.getBlock(); 1077 if (RequiresCleanup) 1078 ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup"); 1079 1080 auto LoopBody = createBasicBlock("omp.inner.for.body"); 1081 1082 // Emit condition. 1083 EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S)); 1084 if (ExitBlock != LoopExit.getBlock()) { 1085 EmitBlock(ExitBlock); 1086 EmitBranchThroughCleanup(LoopExit); 1087 } 1088 1089 EmitBlock(LoopBody); 1090 incrementProfileCounter(&S); 1091 1092 // Create a block for the increment. 1093 auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc"); 1094 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1095 1096 BodyGen(*this); 1097 1098 // Emit "IV = IV + 1" and a back-edge to the condition block. 1099 EmitBlock(Continue.getBlock()); 1100 EmitIgnoredExpr(IncExpr); 1101 PostIncGen(*this); 1102 BreakContinueStack.pop_back(); 1103 EmitBranch(CondBlock); 1104 LoopStack.pop(); 1105 // Emit the fall-through block. 1106 EmitBlock(LoopExit.getBlock()); 1107 } 1108 1109 void CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { 1110 if (!HaveInsertPoint()) 1111 return; 1112 // Emit inits for the linear variables. 1113 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1114 for (auto Init : C->inits()) { 1115 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl()); 1116 if (auto *Ref = dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) { 1117 AutoVarEmission Emission = EmitAutoVarAlloca(*VD); 1118 auto *OrigVD = cast<VarDecl>(Ref->getDecl()); 1119 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1120 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1121 VD->getInit()->getType(), VK_LValue, 1122 VD->getInit()->getExprLoc()); 1123 EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(), 1124 VD->getType()), 1125 /*capturedByInit=*/false); 1126 EmitAutoVarCleanups(Emission); 1127 } else 1128 EmitVarDecl(*VD); 1129 } 1130 // Emit the linear steps for the linear clauses. 1131 // If a step is not constant, it is pre-calculated before the loop. 1132 if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep())) 1133 if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) { 1134 EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl())); 1135 // Emit calculation of the linear step. 1136 EmitIgnoredExpr(CS); 1137 } 1138 } 1139 } 1140 1141 static void emitLinearClauseFinal( 1142 CodeGenFunction &CGF, const OMPLoopDirective &D, 1143 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1144 if (!CGF.HaveInsertPoint()) 1145 return; 1146 llvm::BasicBlock *DoneBB = nullptr; 1147 // Emit the final values of the linear variables. 1148 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1149 auto IC = C->varlist_begin(); 1150 for (auto F : C->finals()) { 1151 if (!DoneBB) { 1152 if (auto *Cond = CondGen(CGF)) { 1153 // If the first post-update expression is found, emit conditional 1154 // block if it was requested. 1155 auto *ThenBB = CGF.createBasicBlock(".omp.linear.pu"); 1156 DoneBB = CGF.createBasicBlock(".omp.linear.pu.done"); 1157 CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1158 CGF.EmitBlock(ThenBB); 1159 } 1160 } 1161 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl()); 1162 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1163 CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr, 1164 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); 1165 Address OrigAddr = CGF.EmitLValue(&DRE).getAddress(); 1166 CodeGenFunction::OMPPrivateScope VarScope(CGF); 1167 VarScope.addPrivate(OrigVD, 1168 [OrigAddr]() -> Address { return OrigAddr; }); 1169 (void)VarScope.Privatize(); 1170 CGF.EmitIgnoredExpr(F); 1171 ++IC; 1172 } 1173 if (auto *PostUpdate = C->getPostUpdateExpr()) 1174 CGF.EmitIgnoredExpr(PostUpdate); 1175 } 1176 if (DoneBB) 1177 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 1178 } 1179 1180 static void emitAlignedClause(CodeGenFunction &CGF, 1181 const OMPExecutableDirective &D) { 1182 if (!CGF.HaveInsertPoint()) 1183 return; 1184 for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) { 1185 unsigned ClauseAlignment = 0; 1186 if (auto AlignmentExpr = Clause->getAlignment()) { 1187 auto AlignmentCI = 1188 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); 1189 ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue()); 1190 } 1191 for (auto E : Clause->varlists()) { 1192 unsigned Alignment = ClauseAlignment; 1193 if (Alignment == 0) { 1194 // OpenMP [2.8.1, Description] 1195 // If no optional parameter is specified, implementation-defined default 1196 // alignments for SIMD instructions on the target platforms are assumed. 1197 Alignment = 1198 CGF.getContext() 1199 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign( 1200 E->getType()->getPointeeType())) 1201 .getQuantity(); 1202 } 1203 assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) && 1204 "alignment is not power of 2"); 1205 if (Alignment != 0) { 1206 llvm::Value *PtrValue = CGF.EmitScalarExpr(E); 1207 CGF.EmitAlignmentAssumption(PtrValue, Alignment); 1208 } 1209 } 1210 } 1211 } 1212 1213 static void emitPrivateLoopCounters(CodeGenFunction &CGF, 1214 CodeGenFunction::OMPPrivateScope &LoopScope, 1215 ArrayRef<Expr *> Counters, 1216 ArrayRef<Expr *> PrivateCounters) { 1217 if (!CGF.HaveInsertPoint()) 1218 return; 1219 auto I = PrivateCounters.begin(); 1220 for (auto *E : Counters) { 1221 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1222 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()); 1223 Address Addr = Address::invalid(); 1224 (void)LoopScope.addPrivate(PrivateVD, [&]() -> Address { 1225 // Emit var without initialization. 1226 auto VarEmission = CGF.EmitAutoVarAlloca(*PrivateVD); 1227 CGF.EmitAutoVarCleanups(VarEmission); 1228 Addr = VarEmission.getAllocatedAddress(); 1229 return Addr; 1230 }); 1231 (void)LoopScope.addPrivate(VD, [&]() -> Address { return Addr; }); 1232 ++I; 1233 } 1234 } 1235 1236 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S, 1237 const Expr *Cond, llvm::BasicBlock *TrueBlock, 1238 llvm::BasicBlock *FalseBlock, uint64_t TrueCount) { 1239 if (!CGF.HaveInsertPoint()) 1240 return; 1241 { 1242 CodeGenFunction::OMPPrivateScope PreCondScope(CGF); 1243 emitPrivateLoopCounters(CGF, PreCondScope, S.counters(), 1244 S.private_counters()); 1245 (void)PreCondScope.Privatize(); 1246 // Get initial values of real counters. 1247 for (auto I : S.inits()) { 1248 CGF.EmitIgnoredExpr(I); 1249 } 1250 } 1251 // Check that loop is executed at least one time. 1252 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount); 1253 } 1254 1255 static void 1256 emitPrivateLinearVars(CodeGenFunction &CGF, const OMPExecutableDirective &D, 1257 CodeGenFunction::OMPPrivateScope &PrivateScope) { 1258 if (!CGF.HaveInsertPoint()) 1259 return; 1260 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1261 auto CurPrivate = C->privates().begin(); 1262 for (auto *E : C->varlists()) { 1263 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1264 auto *PrivateVD = 1265 cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl()); 1266 bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address { 1267 // Emit private VarDecl with copy init. 1268 CGF.EmitVarDecl(*PrivateVD); 1269 return CGF.GetAddrOfLocalVar(PrivateVD); 1270 }); 1271 assert(IsRegistered && "linear var already registered as private"); 1272 // Silence the warning about unused variable. 1273 (void)IsRegistered; 1274 ++CurPrivate; 1275 } 1276 } 1277 } 1278 1279 static void emitSimdlenSafelenClause(CodeGenFunction &CGF, 1280 const OMPExecutableDirective &D, 1281 bool IsMonotonic) { 1282 if (!CGF.HaveInsertPoint()) 1283 return; 1284 if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) { 1285 RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(), 1286 /*ignoreResult=*/true); 1287 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 1288 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 1289 // In presence of finite 'safelen', it may be unsafe to mark all 1290 // the memory instructions parallel, because loop-carried 1291 // dependences of 'safelen' iterations are possible. 1292 if (!IsMonotonic) 1293 CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>()); 1294 } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) { 1295 RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(), 1296 /*ignoreResult=*/true); 1297 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 1298 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 1299 // In presence of finite 'safelen', it may be unsafe to mark all 1300 // the memory instructions parallel, because loop-carried 1301 // dependences of 'safelen' iterations are possible. 1302 CGF.LoopStack.setParallel(false); 1303 } 1304 } 1305 1306 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D, 1307 bool IsMonotonic) { 1308 // Walk clauses and process safelen/lastprivate. 1309 LoopStack.setParallel(!IsMonotonic); 1310 LoopStack.setVectorizeEnable(true); 1311 emitSimdlenSafelenClause(*this, D, IsMonotonic); 1312 } 1313 1314 void CodeGenFunction::EmitOMPSimdFinal( 1315 const OMPLoopDirective &D, 1316 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1317 if (!HaveInsertPoint()) 1318 return; 1319 llvm::BasicBlock *DoneBB = nullptr; 1320 auto IC = D.counters().begin(); 1321 for (auto F : D.finals()) { 1322 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl()); 1323 if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD)) { 1324 if (!DoneBB) { 1325 if (auto *Cond = CondGen(*this)) { 1326 // If the first post-update expression is found, emit conditional 1327 // block if it was requested. 1328 auto *ThenBB = createBasicBlock(".omp.final.then"); 1329 DoneBB = createBasicBlock(".omp.final.done"); 1330 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1331 EmitBlock(ThenBB); 1332 } 1333 } 1334 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1335 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1336 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); 1337 Address OrigAddr = EmitLValue(&DRE).getAddress(); 1338 OMPPrivateScope VarScope(*this); 1339 VarScope.addPrivate(OrigVD, 1340 [OrigAddr]() -> Address { return OrigAddr; }); 1341 (void)VarScope.Privatize(); 1342 EmitIgnoredExpr(F); 1343 } 1344 ++IC; 1345 } 1346 if (DoneBB) 1347 EmitBlock(DoneBB, /*IsFinished=*/true); 1348 } 1349 1350 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { 1351 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 1352 // if (PreCond) { 1353 // for (IV in 0..LastIteration) BODY; 1354 // <Final counter/linear vars updates>; 1355 // } 1356 // 1357 1358 // Emit: if (PreCond) - begin. 1359 // If the condition constant folds and can be elided, avoid emitting the 1360 // whole loop. 1361 bool CondConstant; 1362 llvm::BasicBlock *ContBlock = nullptr; 1363 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 1364 if (!CondConstant) 1365 return; 1366 } else { 1367 auto *ThenBlock = CGF.createBasicBlock("simd.if.then"); 1368 ContBlock = CGF.createBasicBlock("simd.if.end"); 1369 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 1370 CGF.getProfileCount(&S)); 1371 CGF.EmitBlock(ThenBlock); 1372 CGF.incrementProfileCounter(&S); 1373 } 1374 1375 // Emit the loop iteration variable. 1376 const Expr *IVExpr = S.getIterationVariable(); 1377 const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 1378 CGF.EmitVarDecl(*IVDecl); 1379 CGF.EmitIgnoredExpr(S.getInit()); 1380 1381 // Emit the iterations count variable. 1382 // If it is not a variable, Sema decided to calculate iterations count on 1383 // each iteration (e.g., it is foldable into a constant). 1384 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 1385 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 1386 // Emit calculation of the iterations count. 1387 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 1388 } 1389 1390 CGF.EmitOMPSimdInit(S); 1391 1392 emitAlignedClause(CGF, S); 1393 CGF.EmitOMPLinearClauseInit(S); 1394 bool HasLastprivateClause; 1395 { 1396 OMPPrivateScope LoopScope(CGF); 1397 emitPrivateLoopCounters(CGF, LoopScope, S.counters(), 1398 S.private_counters()); 1399 emitPrivateLinearVars(CGF, S, LoopScope); 1400 CGF.EmitOMPPrivateClause(S, LoopScope); 1401 CGF.EmitOMPReductionClauseInit(S, LoopScope); 1402 HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 1403 (void)LoopScope.Privatize(); 1404 CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 1405 S.getInc(), 1406 [&S](CodeGenFunction &CGF) { 1407 CGF.EmitOMPLoopBody(S, JumpDest()); 1408 CGF.EmitStopPoint(&S); 1409 }, 1410 [](CodeGenFunction &) {}); 1411 // Emit final copy of the lastprivate variables at the end of loops. 1412 if (HasLastprivateClause) { 1413 CGF.EmitOMPLastprivateClauseFinal(S); 1414 } 1415 CGF.EmitOMPReductionClauseFinal(S); 1416 emitPostUpdateForReductionClause( 1417 CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1418 } 1419 CGF.EmitOMPSimdFinal( 1420 S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1421 emitLinearClauseFinal( 1422 CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1423 // Emit: if (PreCond) - end. 1424 if (ContBlock) { 1425 CGF.EmitBranch(ContBlock); 1426 CGF.EmitBlock(ContBlock, true); 1427 } 1428 }; 1429 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 1430 } 1431 1432 void CodeGenFunction::EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic, 1433 const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, 1434 Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) { 1435 auto &RT = CGM.getOpenMPRuntime(); 1436 1437 const Expr *IVExpr = S.getIterationVariable(); 1438 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1439 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1440 1441 auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end"); 1442 1443 // Start the loop with a block that tests the condition. 1444 auto CondBlock = createBasicBlock("omp.dispatch.cond"); 1445 EmitBlock(CondBlock); 1446 LoopStack.push(CondBlock); 1447 1448 llvm::Value *BoolCondVal = nullptr; 1449 if (!DynamicOrOrdered) { 1450 // UB = min(UB, GlobalUB) 1451 EmitIgnoredExpr(S.getEnsureUpperBound()); 1452 // IV = LB 1453 EmitIgnoredExpr(S.getInit()); 1454 // IV < UB 1455 BoolCondVal = EvaluateExprAsBool(S.getCond()); 1456 } else { 1457 BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, 1458 IL, LB, UB, ST); 1459 } 1460 1461 // If there are any cleanups between here and the loop-exit scope, 1462 // create a block to stage a loop exit along. 1463 auto ExitBlock = LoopExit.getBlock(); 1464 if (LoopScope.requiresCleanups()) 1465 ExitBlock = createBasicBlock("omp.dispatch.cleanup"); 1466 1467 auto LoopBody = createBasicBlock("omp.dispatch.body"); 1468 Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock); 1469 if (ExitBlock != LoopExit.getBlock()) { 1470 EmitBlock(ExitBlock); 1471 EmitBranchThroughCleanup(LoopExit); 1472 } 1473 EmitBlock(LoopBody); 1474 1475 // Emit "IV = LB" (in case of static schedule, we have already calculated new 1476 // LB for loop condition and emitted it above). 1477 if (DynamicOrOrdered) 1478 EmitIgnoredExpr(S.getInit()); 1479 1480 // Create a block for the increment. 1481 auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); 1482 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1483 1484 // Generate !llvm.loop.parallel metadata for loads and stores for loops 1485 // with dynamic/guided scheduling and without ordered clause. 1486 if (!isOpenMPSimdDirective(S.getDirectiveKind())) 1487 LoopStack.setParallel(!IsMonotonic); 1488 else 1489 EmitOMPSimdInit(S, IsMonotonic); 1490 1491 SourceLocation Loc = S.getLocStart(); 1492 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), 1493 [&S, LoopExit](CodeGenFunction &CGF) { 1494 CGF.EmitOMPLoopBody(S, LoopExit); 1495 CGF.EmitStopPoint(&S); 1496 }, 1497 [Ordered, IVSize, IVSigned, Loc](CodeGenFunction &CGF) { 1498 if (Ordered) { 1499 CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd( 1500 CGF, Loc, IVSize, IVSigned); 1501 } 1502 }); 1503 1504 EmitBlock(Continue.getBlock()); 1505 BreakContinueStack.pop_back(); 1506 if (!DynamicOrOrdered) { 1507 // Emit "LB = LB + Stride", "UB = UB + Stride". 1508 EmitIgnoredExpr(S.getNextLowerBound()); 1509 EmitIgnoredExpr(S.getNextUpperBound()); 1510 } 1511 1512 EmitBranch(CondBlock); 1513 LoopStack.pop(); 1514 // Emit the fall-through block. 1515 EmitBlock(LoopExit.getBlock()); 1516 1517 // Tell the runtime we are done. 1518 if (!DynamicOrOrdered) 1519 RT.emitForStaticFinish(*this, S.getLocEnd()); 1520 1521 } 1522 1523 void CodeGenFunction::EmitOMPForOuterLoop( 1524 OpenMPScheduleClauseKind ScheduleKind, bool IsMonotonic, 1525 const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, 1526 Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) { 1527 auto &RT = CGM.getOpenMPRuntime(); 1528 1529 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). 1530 const bool DynamicOrOrdered = Ordered || RT.isDynamic(ScheduleKind); 1531 1532 assert((Ordered || 1533 !RT.isStaticNonchunked(ScheduleKind, /*Chunked=*/Chunk != nullptr)) && 1534 "static non-chunked schedule does not need outer loop"); 1535 1536 // Emit outer loop. 1537 // 1538 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1539 // When schedule(dynamic,chunk_size) is specified, the iterations are 1540 // distributed to threads in the team in chunks as the threads request them. 1541 // Each thread executes a chunk of iterations, then requests another chunk, 1542 // until no chunks remain to be distributed. Each chunk contains chunk_size 1543 // iterations, except for the last chunk to be distributed, which may have 1544 // fewer iterations. When no chunk_size is specified, it defaults to 1. 1545 // 1546 // When schedule(guided,chunk_size) is specified, the iterations are assigned 1547 // to threads in the team in chunks as the executing threads request them. 1548 // Each thread executes a chunk of iterations, then requests another chunk, 1549 // until no chunks remain to be assigned. For a chunk_size of 1, the size of 1550 // each chunk is proportional to the number of unassigned iterations divided 1551 // by the number of threads in the team, decreasing to 1. For a chunk_size 1552 // with value k (greater than 1), the size of each chunk is determined in the 1553 // same way, with the restriction that the chunks do not contain fewer than k 1554 // iterations (except for the last chunk to be assigned, which may have fewer 1555 // than k iterations). 1556 // 1557 // When schedule(auto) is specified, the decision regarding scheduling is 1558 // delegated to the compiler and/or runtime system. The programmer gives the 1559 // implementation the freedom to choose any possible mapping of iterations to 1560 // threads in the team. 1561 // 1562 // When schedule(runtime) is specified, the decision regarding scheduling is 1563 // deferred until run time, and the schedule and chunk size are taken from the 1564 // run-sched-var ICV. If the ICV is set to auto, the schedule is 1565 // implementation defined 1566 // 1567 // while(__kmpc_dispatch_next(&LB, &UB)) { 1568 // idx = LB; 1569 // while (idx <= UB) { BODY; ++idx; 1570 // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only. 1571 // } // inner loop 1572 // } 1573 // 1574 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1575 // When schedule(static, chunk_size) is specified, iterations are divided into 1576 // chunks of size chunk_size, and the chunks are assigned to the threads in 1577 // the team in a round-robin fashion in the order of the thread number. 1578 // 1579 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { 1580 // while (idx <= UB) { BODY; ++idx; } // inner loop 1581 // LB = LB + ST; 1582 // UB = UB + ST; 1583 // } 1584 // 1585 1586 const Expr *IVExpr = S.getIterationVariable(); 1587 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1588 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1589 1590 if (DynamicOrOrdered) { 1591 llvm::Value *UBVal = EmitScalarExpr(S.getLastIteration()); 1592 RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, 1593 IVSize, IVSigned, Ordered, UBVal, Chunk); 1594 } else { 1595 RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, 1596 Ordered, IL, LB, UB, ST, Chunk); 1597 } 1598 1599 EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, Ordered, LB, UB, 1600 ST, IL, Chunk); 1601 } 1602 1603 void CodeGenFunction::EmitOMPDistributeOuterLoop( 1604 OpenMPDistScheduleClauseKind ScheduleKind, 1605 const OMPDistributeDirective &S, OMPPrivateScope &LoopScope, 1606 Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) { 1607 1608 auto &RT = CGM.getOpenMPRuntime(); 1609 1610 // Emit outer loop. 1611 // Same behavior as a OMPForOuterLoop, except that schedule cannot be 1612 // dynamic 1613 // 1614 1615 const Expr *IVExpr = S.getIterationVariable(); 1616 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1617 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1618 1619 RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, 1620 IVSize, IVSigned, /* Ordered = */ false, 1621 IL, LB, UB, ST, Chunk); 1622 1623 EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, 1624 S, LoopScope, /* Ordered = */ false, LB, UB, ST, IL, Chunk); 1625 } 1626 1627 /// \brief Emit a helper variable and return corresponding lvalue. 1628 static LValue EmitOMPHelperVar(CodeGenFunction &CGF, 1629 const DeclRefExpr *Helper) { 1630 auto VDecl = cast<VarDecl>(Helper->getDecl()); 1631 CGF.EmitVarDecl(*VDecl); 1632 return CGF.EmitLValue(Helper); 1633 } 1634 1635 namespace { 1636 struct ScheduleKindModifiersTy { 1637 OpenMPScheduleClauseKind Kind; 1638 OpenMPScheduleClauseModifier M1; 1639 OpenMPScheduleClauseModifier M2; 1640 ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind, 1641 OpenMPScheduleClauseModifier M1, 1642 OpenMPScheduleClauseModifier M2) 1643 : Kind(Kind), M1(M1), M2(M2) {} 1644 }; 1645 } // namespace 1646 1647 bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { 1648 // Emit the loop iteration variable. 1649 auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 1650 auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); 1651 EmitVarDecl(*IVDecl); 1652 1653 // Emit the iterations count variable. 1654 // If it is not a variable, Sema decided to calculate iterations count on each 1655 // iteration (e.g., it is foldable into a constant). 1656 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 1657 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 1658 // Emit calculation of the iterations count. 1659 EmitIgnoredExpr(S.getCalcLastIteration()); 1660 } 1661 1662 auto &RT = CGM.getOpenMPRuntime(); 1663 1664 bool HasLastprivateClause; 1665 // Check pre-condition. 1666 { 1667 // Skip the entire loop if we don't meet the precondition. 1668 // If the condition constant folds and can be elided, avoid emitting the 1669 // whole loop. 1670 bool CondConstant; 1671 llvm::BasicBlock *ContBlock = nullptr; 1672 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 1673 if (!CondConstant) 1674 return false; 1675 } else { 1676 auto *ThenBlock = createBasicBlock("omp.precond.then"); 1677 ContBlock = createBasicBlock("omp.precond.end"); 1678 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 1679 getProfileCount(&S)); 1680 EmitBlock(ThenBlock); 1681 incrementProfileCounter(&S); 1682 } 1683 1684 emitAlignedClause(*this, S); 1685 EmitOMPLinearClauseInit(S); 1686 // Emit helper vars inits. 1687 LValue LB = 1688 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable())); 1689 LValue UB = 1690 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable())); 1691 LValue ST = 1692 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 1693 LValue IL = 1694 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 1695 1696 // Emit 'then' code. 1697 { 1698 OMPPrivateScope LoopScope(*this); 1699 if (EmitOMPFirstprivateClause(S, LoopScope)) { 1700 // Emit implicit barrier to synchronize threads and avoid data races on 1701 // initialization of firstprivate variables and post-update of 1702 // lastprivate variables. 1703 CGM.getOpenMPRuntime().emitBarrierCall( 1704 *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 1705 /*ForceSimpleCall=*/true); 1706 } 1707 EmitOMPPrivateClause(S, LoopScope); 1708 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 1709 EmitOMPReductionClauseInit(S, LoopScope); 1710 emitPrivateLoopCounters(*this, LoopScope, S.counters(), 1711 S.private_counters()); 1712 emitPrivateLinearVars(*this, S, LoopScope); 1713 (void)LoopScope.Privatize(); 1714 1715 // Detect the loop schedule kind and chunk. 1716 llvm::Value *Chunk = nullptr; 1717 OpenMPScheduleClauseKind ScheduleKind = OMPC_SCHEDULE_unknown; 1718 OpenMPScheduleClauseModifier M1 = OMPC_SCHEDULE_MODIFIER_unknown; 1719 OpenMPScheduleClauseModifier M2 = OMPC_SCHEDULE_MODIFIER_unknown; 1720 if (auto *C = S.getSingleClause<OMPScheduleClause>()) { 1721 ScheduleKind = C->getScheduleKind(); 1722 M1 = C->getFirstScheduleModifier(); 1723 M2 = C->getSecondScheduleModifier(); 1724 if (const auto *Ch = C->getChunkSize()) { 1725 Chunk = EmitScalarExpr(Ch); 1726 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 1727 S.getIterationVariable()->getType(), 1728 S.getLocStart()); 1729 } 1730 } 1731 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1732 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1733 const bool Ordered = S.getSingleClause<OMPOrderedClause>() != nullptr; 1734 // OpenMP 4.5, 2.7.1 Loop Construct, Description. 1735 // If the static schedule kind is specified or if the ordered clause is 1736 // specified, and if no monotonic modifier is specified, the effect will 1737 // be as if the monotonic modifier was specified. 1738 if (RT.isStaticNonchunked(ScheduleKind, 1739 /* Chunked */ Chunk != nullptr) && 1740 !Ordered) { 1741 if (isOpenMPSimdDirective(S.getDirectiveKind())) 1742 EmitOMPSimdInit(S, /*IsMonotonic=*/true); 1743 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1744 // When no chunk_size is specified, the iteration space is divided into 1745 // chunks that are approximately equal in size, and at most one chunk is 1746 // distributed to each thread. Note that the size of the chunks is 1747 // unspecified in this case. 1748 RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, 1749 IVSize, IVSigned, Ordered, 1750 IL.getAddress(), LB.getAddress(), 1751 UB.getAddress(), ST.getAddress()); 1752 auto LoopExit = 1753 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 1754 // UB = min(UB, GlobalUB); 1755 EmitIgnoredExpr(S.getEnsureUpperBound()); 1756 // IV = LB; 1757 EmitIgnoredExpr(S.getInit()); 1758 // while (idx <= UB) { BODY; ++idx; } 1759 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 1760 S.getInc(), 1761 [&S, LoopExit](CodeGenFunction &CGF) { 1762 CGF.EmitOMPLoopBody(S, LoopExit); 1763 CGF.EmitStopPoint(&S); 1764 }, 1765 [](CodeGenFunction &) {}); 1766 EmitBlock(LoopExit.getBlock()); 1767 // Tell the runtime we are done. 1768 RT.emitForStaticFinish(*this, S.getLocStart()); 1769 } else { 1770 const bool IsMonotonic = Ordered || 1771 ScheduleKind == OMPC_SCHEDULE_static || 1772 ScheduleKind == OMPC_SCHEDULE_unknown || 1773 M1 == OMPC_SCHEDULE_MODIFIER_monotonic || 1774 M2 == OMPC_SCHEDULE_MODIFIER_monotonic; 1775 // Emit the outer loop, which requests its work chunk [LB..UB] from 1776 // runtime and runs the inner loop to process it. 1777 EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered, 1778 LB.getAddress(), UB.getAddress(), ST.getAddress(), 1779 IL.getAddress(), Chunk); 1780 } 1781 EmitOMPReductionClauseFinal(S); 1782 // Emit post-update of the reduction variables if IsLastIter != 0. 1783 emitPostUpdateForReductionClause( 1784 *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * { 1785 return CGF.Builder.CreateIsNotNull( 1786 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 1787 }); 1788 // Emit final copy of the lastprivate variables if IsLastIter != 0. 1789 if (HasLastprivateClause) 1790 EmitOMPLastprivateClauseFinal( 1791 S, Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart()))); 1792 } 1793 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 1794 EmitOMPSimdFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * { 1795 return CGF.Builder.CreateIsNotNull( 1796 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 1797 }); 1798 } 1799 emitLinearClauseFinal(*this, S, [&](CodeGenFunction &CGF) -> llvm::Value * { 1800 return CGF.Builder.CreateIsNotNull( 1801 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 1802 }); 1803 // We're now done with the loop, so jump to the continuation block. 1804 if (ContBlock) { 1805 EmitBranch(ContBlock); 1806 EmitBlock(ContBlock, true); 1807 } 1808 } 1809 return HasLastprivateClause; 1810 } 1811 1812 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { 1813 bool HasLastprivates = false; 1814 { 1815 OMPLexicalScope Scope(*this, S); 1816 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF) { 1817 HasLastprivates = CGF.EmitOMPWorksharingLoop(S); 1818 }; 1819 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen, 1820 S.hasCancel()); 1821 } 1822 1823 // Emit an implicit barrier at the end. 1824 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) { 1825 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); 1826 } 1827 } 1828 1829 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { 1830 bool HasLastprivates = false; 1831 { 1832 OMPLexicalScope Scope(*this, S); 1833 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF) { 1834 HasLastprivates = CGF.EmitOMPWorksharingLoop(S); 1835 }; 1836 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 1837 } 1838 1839 // Emit an implicit barrier at the end. 1840 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) { 1841 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); 1842 } 1843 } 1844 1845 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, 1846 const Twine &Name, 1847 llvm::Value *Init = nullptr) { 1848 auto LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty); 1849 if (Init) 1850 CGF.EmitScalarInit(Init, LVal); 1851 return LVal; 1852 } 1853 1854 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { 1855 auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt(); 1856 auto *CS = dyn_cast<CompoundStmt>(Stmt); 1857 bool HasLastprivates = false; 1858 auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF) { 1859 auto &C = CGF.CGM.getContext(); 1860 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 1861 // Emit helper vars inits. 1862 LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.", 1863 CGF.Builder.getInt32(0)); 1864 auto *GlobalUBVal = CS != nullptr ? CGF.Builder.getInt32(CS->size() - 1) 1865 : CGF.Builder.getInt32(0); 1866 LValue UB = 1867 createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal); 1868 LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.", 1869 CGF.Builder.getInt32(1)); 1870 LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.", 1871 CGF.Builder.getInt32(0)); 1872 // Loop counter. 1873 LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv."); 1874 OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 1875 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV); 1876 OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 1877 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); 1878 // Generate condition for loop. 1879 BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, 1880 OK_Ordinary, S.getLocStart(), 1881 /*fpContractable=*/false); 1882 // Increment for loop counter. 1883 UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary, 1884 S.getLocStart()); 1885 auto BodyGen = [Stmt, CS, &S, &IV](CodeGenFunction &CGF) { 1886 // Iterate through all sections and emit a switch construct: 1887 // switch (IV) { 1888 // case 0: 1889 // <SectionStmt[0]>; 1890 // break; 1891 // ... 1892 // case <NumSection> - 1: 1893 // <SectionStmt[<NumSection> - 1]>; 1894 // break; 1895 // } 1896 // .omp.sections.exit: 1897 auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit"); 1898 auto *SwitchStmt = CGF.Builder.CreateSwitch( 1899 CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB, 1900 CS == nullptr ? 1 : CS->size()); 1901 if (CS) { 1902 unsigned CaseNumber = 0; 1903 for (auto *SubStmt : CS->children()) { 1904 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 1905 CGF.EmitBlock(CaseBB); 1906 SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB); 1907 CGF.EmitStmt(SubStmt); 1908 CGF.EmitBranch(ExitBB); 1909 ++CaseNumber; 1910 } 1911 } else { 1912 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 1913 CGF.EmitBlock(CaseBB); 1914 SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB); 1915 CGF.EmitStmt(Stmt); 1916 CGF.EmitBranch(ExitBB); 1917 } 1918 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 1919 }; 1920 1921 CodeGenFunction::OMPPrivateScope LoopScope(CGF); 1922 if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) { 1923 // Emit implicit barrier to synchronize threads and avoid data races on 1924 // initialization of firstprivate variables and post-update of lastprivate 1925 // variables. 1926 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 1927 CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 1928 /*ForceSimpleCall=*/true); 1929 } 1930 CGF.EmitOMPPrivateClause(S, LoopScope); 1931 HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 1932 CGF.EmitOMPReductionClauseInit(S, LoopScope); 1933 (void)LoopScope.Privatize(); 1934 1935 // Emit static non-chunked loop. 1936 CGF.CGM.getOpenMPRuntime().emitForStaticInit( 1937 CGF, S.getLocStart(), OMPC_SCHEDULE_static, /*IVSize=*/32, 1938 /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), LB.getAddress(), 1939 UB.getAddress(), ST.getAddress()); 1940 // UB = min(UB, GlobalUB); 1941 auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart()); 1942 auto *MinUBGlobalUB = CGF.Builder.CreateSelect( 1943 CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal); 1944 CGF.EmitStoreOfScalar(MinUBGlobalUB, UB); 1945 // IV = LB; 1946 CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV); 1947 // while (idx <= UB) { BODY; ++idx; } 1948 CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen, 1949 [](CodeGenFunction &) {}); 1950 // Tell the runtime we are done. 1951 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocStart()); 1952 CGF.EmitOMPReductionClauseFinal(S); 1953 // Emit post-update of the reduction variables if IsLastIter != 0. 1954 emitPostUpdateForReductionClause( 1955 CGF, S, [&](CodeGenFunction &CGF) -> llvm::Value * { 1956 return CGF.Builder.CreateIsNotNull( 1957 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 1958 }); 1959 1960 // Emit final copy of the lastprivate variables if IsLastIter != 0. 1961 if (HasLastprivates) 1962 CGF.EmitOMPLastprivateClauseFinal( 1963 S, CGF.Builder.CreateIsNotNull( 1964 CGF.EmitLoadOfScalar(IL, S.getLocStart()))); 1965 }; 1966 1967 bool HasCancel = false; 1968 if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S)) 1969 HasCancel = OSD->hasCancel(); 1970 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S)) 1971 HasCancel = OPSD->hasCancel(); 1972 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen, 1973 HasCancel); 1974 // Emit barrier for lastprivates only if 'sections' directive has 'nowait' 1975 // clause. Otherwise the barrier will be generated by the codegen for the 1976 // directive. 1977 if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) { 1978 // Emit implicit barrier to synchronize threads and avoid data races on 1979 // initialization of firstprivate variables. 1980 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), 1981 OMPD_unknown); 1982 } 1983 } 1984 1985 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { 1986 { 1987 OMPLexicalScope Scope(*this, S); 1988 EmitSections(S); 1989 } 1990 // Emit an implicit barrier at the end. 1991 if (!S.getSingleClause<OMPNowaitClause>()) { 1992 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), 1993 OMPD_sections); 1994 } 1995 } 1996 1997 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { 1998 OMPLexicalScope Scope(*this, S); 1999 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 2000 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2001 }; 2002 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen, 2003 S.hasCancel()); 2004 } 2005 2006 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { 2007 llvm::SmallVector<const Expr *, 8> CopyprivateVars; 2008 llvm::SmallVector<const Expr *, 8> DestExprs; 2009 llvm::SmallVector<const Expr *, 8> SrcExprs; 2010 llvm::SmallVector<const Expr *, 8> AssignmentOps; 2011 // Check if there are any 'copyprivate' clauses associated with this 2012 // 'single' construct. 2013 // Build a list of copyprivate variables along with helper expressions 2014 // (<source>, <destination>, <destination>=<source> expressions) 2015 for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) { 2016 CopyprivateVars.append(C->varlists().begin(), C->varlists().end()); 2017 DestExprs.append(C->destination_exprs().begin(), 2018 C->destination_exprs().end()); 2019 SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end()); 2020 AssignmentOps.append(C->assignment_ops().begin(), 2021 C->assignment_ops().end()); 2022 } 2023 { 2024 OMPLexicalScope Scope(*this, S); 2025 // Emit code for 'single' region along with 'copyprivate' clauses 2026 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 2027 CodeGenFunction::OMPPrivateScope SingleScope(CGF); 2028 (void)CGF.EmitOMPFirstprivateClause(S, SingleScope); 2029 CGF.EmitOMPPrivateClause(S, SingleScope); 2030 (void)SingleScope.Privatize(); 2031 CGF.EmitStmt( 2032 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2033 }; 2034 CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(), 2035 CopyprivateVars, DestExprs, 2036 SrcExprs, AssignmentOps); 2037 } 2038 // Emit an implicit barrier at the end (to avoid data race on firstprivate 2039 // init or if no 'nowait' clause was specified and no 'copyprivate' clause). 2040 if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) { 2041 CGM.getOpenMPRuntime().emitBarrierCall( 2042 *this, S.getLocStart(), 2043 S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single); 2044 } 2045 } 2046 2047 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { 2048 OMPLexicalScope Scope(*this, S); 2049 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 2050 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2051 }; 2052 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart()); 2053 } 2054 2055 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { 2056 OMPLexicalScope Scope(*this, S); 2057 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 2058 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2059 }; 2060 Expr *Hint = nullptr; 2061 if (auto *HintClause = S.getSingleClause<OMPHintClause>()) 2062 Hint = HintClause->getHint(); 2063 CGM.getOpenMPRuntime().emitCriticalRegion(*this, 2064 S.getDirectiveName().getAsString(), 2065 CodeGen, S.getLocStart(), Hint); 2066 } 2067 2068 void CodeGenFunction::EmitOMPParallelForDirective( 2069 const OMPParallelForDirective &S) { 2070 // Emit directive as a combined directive that consists of two implicit 2071 // directives: 'parallel' with 'for' directive. 2072 OMPLexicalScope Scope(*this, S); 2073 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 2074 CGF.EmitOMPWorksharingLoop(S); 2075 }; 2076 emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen); 2077 } 2078 2079 void CodeGenFunction::EmitOMPParallelForSimdDirective( 2080 const OMPParallelForSimdDirective &S) { 2081 // Emit directive as a combined directive that consists of two implicit 2082 // directives: 'parallel' with 'for' directive. 2083 OMPLexicalScope Scope(*this, S); 2084 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 2085 CGF.EmitOMPWorksharingLoop(S); 2086 }; 2087 emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen); 2088 } 2089 2090 void CodeGenFunction::EmitOMPParallelSectionsDirective( 2091 const OMPParallelSectionsDirective &S) { 2092 // Emit directive as a combined directive that consists of two implicit 2093 // directives: 'parallel' with 'sections' directive. 2094 OMPLexicalScope Scope(*this, S); 2095 auto &&CodeGen = [&S](CodeGenFunction &CGF) { CGF.EmitSections(S); }; 2096 emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen); 2097 } 2098 2099 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { 2100 // Emit outlined function for task construct. 2101 OMPLexicalScope Scope(*this, S); 2102 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2103 auto CapturedStruct = GenerateCapturedStmtArgument(*CS); 2104 auto *I = CS->getCapturedDecl()->param_begin(); 2105 auto *PartId = std::next(I); 2106 // The first function argument for tasks is a thread id, the second one is a 2107 // part id (0 for tied tasks, >=0 for untied task). 2108 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 2109 // Get list of private variables. 2110 llvm::SmallVector<const Expr *, 8> PrivateVars; 2111 llvm::SmallVector<const Expr *, 8> PrivateCopies; 2112 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 2113 auto IRef = C->varlist_begin(); 2114 for (auto *IInit : C->private_copies()) { 2115 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2116 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2117 PrivateVars.push_back(*IRef); 2118 PrivateCopies.push_back(IInit); 2119 } 2120 ++IRef; 2121 } 2122 } 2123 EmittedAsPrivate.clear(); 2124 // Get list of firstprivate variables. 2125 llvm::SmallVector<const Expr *, 8> FirstprivateVars; 2126 llvm::SmallVector<const Expr *, 8> FirstprivateCopies; 2127 llvm::SmallVector<const Expr *, 8> FirstprivateInits; 2128 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 2129 auto IRef = C->varlist_begin(); 2130 auto IElemInitRef = C->inits().begin(); 2131 for (auto *IInit : C->private_copies()) { 2132 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2133 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2134 FirstprivateVars.push_back(*IRef); 2135 FirstprivateCopies.push_back(IInit); 2136 FirstprivateInits.push_back(*IElemInitRef); 2137 } 2138 ++IRef; 2139 ++IElemInitRef; 2140 } 2141 } 2142 // Build list of dependences. 2143 llvm::SmallVector<std::pair<OpenMPDependClauseKind, const Expr *>, 8> 2144 Dependences; 2145 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) { 2146 for (auto *IRef : C->varlists()) { 2147 Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef)); 2148 } 2149 } 2150 auto &&CodeGen = [PartId, &S, &PrivateVars, &FirstprivateVars]( 2151 CodeGenFunction &CGF) { 2152 // Set proper addresses for generated private copies. 2153 auto *CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2154 OMPPrivateScope Scope(CGF); 2155 if (!PrivateVars.empty() || !FirstprivateVars.empty()) { 2156 auto *CopyFn = CGF.Builder.CreateLoad( 2157 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3))); 2158 auto *PrivatesPtr = CGF.Builder.CreateLoad( 2159 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2))); 2160 // Map privates. 2161 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> 2162 PrivatePtrs; 2163 llvm::SmallVector<llvm::Value *, 16> CallArgs; 2164 CallArgs.push_back(PrivatesPtr); 2165 for (auto *E : PrivateVars) { 2166 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2167 Address PrivatePtr = 2168 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType())); 2169 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2170 CallArgs.push_back(PrivatePtr.getPointer()); 2171 } 2172 for (auto *E : FirstprivateVars) { 2173 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2174 Address PrivatePtr = 2175 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType())); 2176 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2177 CallArgs.push_back(PrivatePtr.getPointer()); 2178 } 2179 CGF.EmitRuntimeCall(CopyFn, CallArgs); 2180 for (auto &&Pair : PrivatePtrs) { 2181 Address Replacement(CGF.Builder.CreateLoad(Pair.second), 2182 CGF.getContext().getDeclAlign(Pair.first)); 2183 Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); 2184 } 2185 } 2186 (void)Scope.Privatize(); 2187 if (*PartId) { 2188 // TODO: emit code for untied tasks. 2189 } 2190 CGF.EmitStmt(CS->getCapturedStmt()); 2191 }; 2192 auto OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( 2193 S, *I, OMPD_task, CodeGen); 2194 // Check if we should emit tied or untied task. 2195 bool Tied = !S.getSingleClause<OMPUntiedClause>(); 2196 // Check if the task is final 2197 llvm::PointerIntPair<llvm::Value *, 1, bool> Final; 2198 if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) { 2199 // If the condition constant folds and can be elided, try to avoid emitting 2200 // the condition and the dead arm of the if/else. 2201 auto *Cond = Clause->getCondition(); 2202 bool CondConstant; 2203 if (ConstantFoldsToSimpleInteger(Cond, CondConstant)) 2204 Final.setInt(CondConstant); 2205 else 2206 Final.setPointer(EvaluateExprAsBool(Cond)); 2207 } else { 2208 // By default the task is not final. 2209 Final.setInt(/*IntVal=*/false); 2210 } 2211 auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 2212 const Expr *IfCond = nullptr; 2213 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 2214 if (C->getNameModifier() == OMPD_unknown || 2215 C->getNameModifier() == OMPD_task) { 2216 IfCond = C->getCondition(); 2217 break; 2218 } 2219 } 2220 CGM.getOpenMPRuntime().emitTaskCall( 2221 *this, S.getLocStart(), S, Tied, Final, OutlinedFn, SharedsTy, 2222 CapturedStruct, IfCond, PrivateVars, PrivateCopies, FirstprivateVars, 2223 FirstprivateCopies, FirstprivateInits, Dependences); 2224 } 2225 2226 void CodeGenFunction::EmitOMPTaskyieldDirective( 2227 const OMPTaskyieldDirective &S) { 2228 CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart()); 2229 } 2230 2231 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { 2232 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier); 2233 } 2234 2235 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) { 2236 CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart()); 2237 } 2238 2239 void CodeGenFunction::EmitOMPTaskgroupDirective( 2240 const OMPTaskgroupDirective &S) { 2241 OMPLexicalScope Scope(*this, S); 2242 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 2243 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2244 }; 2245 CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart()); 2246 } 2247 2248 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { 2249 CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> { 2250 if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) { 2251 return llvm::makeArrayRef(FlushClause->varlist_begin(), 2252 FlushClause->varlist_end()); 2253 } 2254 return llvm::None; 2255 }(), S.getLocStart()); 2256 } 2257 2258 void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) { 2259 // Emit the loop iteration variable. 2260 auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 2261 auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); 2262 EmitVarDecl(*IVDecl); 2263 2264 // Emit the iterations count variable. 2265 // If it is not a variable, Sema decided to calculate iterations count on each 2266 // iteration (e.g., it is foldable into a constant). 2267 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 2268 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 2269 // Emit calculation of the iterations count. 2270 EmitIgnoredExpr(S.getCalcLastIteration()); 2271 } 2272 2273 auto &RT = CGM.getOpenMPRuntime(); 2274 2275 // Check pre-condition. 2276 { 2277 // Skip the entire loop if we don't meet the precondition. 2278 // If the condition constant folds and can be elided, avoid emitting the 2279 // whole loop. 2280 bool CondConstant; 2281 llvm::BasicBlock *ContBlock = nullptr; 2282 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 2283 if (!CondConstant) 2284 return; 2285 } else { 2286 auto *ThenBlock = createBasicBlock("omp.precond.then"); 2287 ContBlock = createBasicBlock("omp.precond.end"); 2288 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 2289 getProfileCount(&S)); 2290 EmitBlock(ThenBlock); 2291 incrementProfileCounter(&S); 2292 } 2293 2294 // Emit 'then' code. 2295 { 2296 // Emit helper vars inits. 2297 LValue LB = 2298 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable())); 2299 LValue UB = 2300 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable())); 2301 LValue ST = 2302 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 2303 LValue IL = 2304 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 2305 2306 OMPPrivateScope LoopScope(*this); 2307 emitPrivateLoopCounters(*this, LoopScope, S.counters(), 2308 S.private_counters()); 2309 (void)LoopScope.Privatize(); 2310 2311 // Detect the distribute schedule kind and chunk. 2312 llvm::Value *Chunk = nullptr; 2313 OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown; 2314 if (auto *C = S.getSingleClause<OMPDistScheduleClause>()) { 2315 ScheduleKind = C->getDistScheduleKind(); 2316 if (const auto *Ch = C->getChunkSize()) { 2317 Chunk = EmitScalarExpr(Ch); 2318 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 2319 S.getIterationVariable()->getType(), 2320 S.getLocStart()); 2321 } 2322 } 2323 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2324 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2325 2326 // OpenMP [2.10.8, distribute Construct, Description] 2327 // If dist_schedule is specified, kind must be static. If specified, 2328 // iterations are divided into chunks of size chunk_size, chunks are 2329 // assigned to the teams of the league in a round-robin fashion in the 2330 // order of the team number. When no chunk_size is specified, the 2331 // iteration space is divided into chunks that are approximately equal 2332 // in size, and at most one chunk is distributed to each team of the 2333 // league. The size of the chunks is unspecified in this case. 2334 if (RT.isStaticNonchunked(ScheduleKind, 2335 /* Chunked */ Chunk != nullptr)) { 2336 RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, 2337 IVSize, IVSigned, /* Ordered = */ false, 2338 IL.getAddress(), LB.getAddress(), 2339 UB.getAddress(), ST.getAddress()); 2340 auto LoopExit = 2341 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 2342 // UB = min(UB, GlobalUB); 2343 EmitIgnoredExpr(S.getEnsureUpperBound()); 2344 // IV = LB; 2345 EmitIgnoredExpr(S.getInit()); 2346 // while (idx <= UB) { BODY; ++idx; } 2347 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 2348 S.getInc(), 2349 [&S, LoopExit](CodeGenFunction &CGF) { 2350 CGF.EmitOMPLoopBody(S, LoopExit); 2351 CGF.EmitStopPoint(&S); 2352 }, 2353 [](CodeGenFunction &) {}); 2354 EmitBlock(LoopExit.getBlock()); 2355 // Tell the runtime we are done. 2356 RT.emitForStaticFinish(*this, S.getLocStart()); 2357 } else { 2358 // Emit the outer loop, which requests its work chunk [LB..UB] from 2359 // runtime and runs the inner loop to process it. 2360 EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, 2361 LB.getAddress(), UB.getAddress(), ST.getAddress(), 2362 IL.getAddress(), Chunk); 2363 } 2364 } 2365 2366 // We're now done with the loop, so jump to the continuation block. 2367 if (ContBlock) { 2368 EmitBranch(ContBlock); 2369 EmitBlock(ContBlock, true); 2370 } 2371 } 2372 } 2373 2374 void CodeGenFunction::EmitOMPDistributeDirective( 2375 const OMPDistributeDirective &S) { 2376 LexicalScope Scope(*this, S.getSourceRange()); 2377 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 2378 CGF.EmitOMPDistributeLoop(S); 2379 }; 2380 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen, 2381 false); 2382 } 2383 2384 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, 2385 const CapturedStmt *S) { 2386 CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); 2387 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; 2388 CGF.CapturedStmtInfo = &CapStmtInfo; 2389 auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S); 2390 Fn->addFnAttr(llvm::Attribute::NoInline); 2391 return Fn; 2392 } 2393 2394 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { 2395 if (!S.getAssociatedStmt()) 2396 return; 2397 OMPLexicalScope Scope(*this, S); 2398 auto *C = S.getSingleClause<OMPSIMDClause>(); 2399 auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF) { 2400 if (C) { 2401 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2402 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 2403 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 2404 auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS); 2405 CGF.EmitNounwindRuntimeCall(OutlinedFn, CapturedVars); 2406 } else { 2407 CGF.EmitStmt( 2408 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2409 } 2410 }; 2411 CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C); 2412 } 2413 2414 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, 2415 QualType SrcType, QualType DestType, 2416 SourceLocation Loc) { 2417 assert(CGF.hasScalarEvaluationKind(DestType) && 2418 "DestType must have scalar evaluation kind."); 2419 assert(!Val.isAggregate() && "Must be a scalar or complex."); 2420 return Val.isScalar() 2421 ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType, 2422 Loc) 2423 : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType, 2424 DestType, Loc); 2425 } 2426 2427 static CodeGenFunction::ComplexPairTy 2428 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, 2429 QualType DestType, SourceLocation Loc) { 2430 assert(CGF.getEvaluationKind(DestType) == TEK_Complex && 2431 "DestType must have complex evaluation kind."); 2432 CodeGenFunction::ComplexPairTy ComplexVal; 2433 if (Val.isScalar()) { 2434 // Convert the input element to the element type of the complex. 2435 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 2436 auto ScalarVal = CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, 2437 DestElementType, Loc); 2438 ComplexVal = CodeGenFunction::ComplexPairTy( 2439 ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType())); 2440 } else { 2441 assert(Val.isComplex() && "Must be a scalar or complex."); 2442 auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType(); 2443 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 2444 ComplexVal.first = CGF.EmitScalarConversion( 2445 Val.getComplexVal().first, SrcElementType, DestElementType, Loc); 2446 ComplexVal.second = CGF.EmitScalarConversion( 2447 Val.getComplexVal().second, SrcElementType, DestElementType, Loc); 2448 } 2449 return ComplexVal; 2450 } 2451 2452 static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst, 2453 LValue LVal, RValue RVal) { 2454 if (LVal.isGlobalReg()) { 2455 CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal); 2456 } else { 2457 CGF.EmitAtomicStore(RVal, LVal, IsSeqCst ? llvm::SequentiallyConsistent 2458 : llvm::Monotonic, 2459 LVal.isVolatile(), /*IsInit=*/false); 2460 } 2461 } 2462 2463 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal, 2464 QualType RValTy, SourceLocation Loc) { 2465 switch (getEvaluationKind(LVal.getType())) { 2466 case TEK_Scalar: 2467 EmitStoreThroughLValue(RValue::get(convertToScalarValue( 2468 *this, RVal, RValTy, LVal.getType(), Loc)), 2469 LVal); 2470 break; 2471 case TEK_Complex: 2472 EmitStoreOfComplex( 2473 convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal, 2474 /*isInit=*/false); 2475 break; 2476 case TEK_Aggregate: 2477 llvm_unreachable("Must be a scalar or complex."); 2478 } 2479 } 2480 2481 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst, 2482 const Expr *X, const Expr *V, 2483 SourceLocation Loc) { 2484 // v = x; 2485 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue"); 2486 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue"); 2487 LValue XLValue = CGF.EmitLValue(X); 2488 LValue VLValue = CGF.EmitLValue(V); 2489 RValue Res = XLValue.isGlobalReg() 2490 ? CGF.EmitLoadOfLValue(XLValue, Loc) 2491 : CGF.EmitAtomicLoad(XLValue, Loc, 2492 IsSeqCst ? llvm::SequentiallyConsistent 2493 : llvm::Monotonic, 2494 XLValue.isVolatile()); 2495 // OpenMP, 2.12.6, atomic Construct 2496 // Any atomic construct with a seq_cst clause forces the atomically 2497 // performed operation to include an implicit flush operation without a 2498 // list. 2499 if (IsSeqCst) 2500 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 2501 CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc); 2502 } 2503 2504 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst, 2505 const Expr *X, const Expr *E, 2506 SourceLocation Loc) { 2507 // x = expr; 2508 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue"); 2509 emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E)); 2510 // OpenMP, 2.12.6, atomic Construct 2511 // Any atomic construct with a seq_cst clause forces the atomically 2512 // performed operation to include an implicit flush operation without a 2513 // list. 2514 if (IsSeqCst) 2515 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 2516 } 2517 2518 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, 2519 RValue Update, 2520 BinaryOperatorKind BO, 2521 llvm::AtomicOrdering AO, 2522 bool IsXLHSInRHSPart) { 2523 auto &Context = CGF.CGM.getContext(); 2524 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x' 2525 // expression is simple and atomic is allowed for the given type for the 2526 // target platform. 2527 if (BO == BO_Comma || !Update.isScalar() || 2528 !Update.getScalarVal()->getType()->isIntegerTy() || 2529 !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) && 2530 (Update.getScalarVal()->getType() != 2531 X.getAddress().getElementType())) || 2532 !X.getAddress().getElementType()->isIntegerTy() || 2533 !Context.getTargetInfo().hasBuiltinAtomic( 2534 Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment()))) 2535 return std::make_pair(false, RValue::get(nullptr)); 2536 2537 llvm::AtomicRMWInst::BinOp RMWOp; 2538 switch (BO) { 2539 case BO_Add: 2540 RMWOp = llvm::AtomicRMWInst::Add; 2541 break; 2542 case BO_Sub: 2543 if (!IsXLHSInRHSPart) 2544 return std::make_pair(false, RValue::get(nullptr)); 2545 RMWOp = llvm::AtomicRMWInst::Sub; 2546 break; 2547 case BO_And: 2548 RMWOp = llvm::AtomicRMWInst::And; 2549 break; 2550 case BO_Or: 2551 RMWOp = llvm::AtomicRMWInst::Or; 2552 break; 2553 case BO_Xor: 2554 RMWOp = llvm::AtomicRMWInst::Xor; 2555 break; 2556 case BO_LT: 2557 RMWOp = X.getType()->hasSignedIntegerRepresentation() 2558 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min 2559 : llvm::AtomicRMWInst::Max) 2560 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin 2561 : llvm::AtomicRMWInst::UMax); 2562 break; 2563 case BO_GT: 2564 RMWOp = X.getType()->hasSignedIntegerRepresentation() 2565 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max 2566 : llvm::AtomicRMWInst::Min) 2567 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax 2568 : llvm::AtomicRMWInst::UMin); 2569 break; 2570 case BO_Assign: 2571 RMWOp = llvm::AtomicRMWInst::Xchg; 2572 break; 2573 case BO_Mul: 2574 case BO_Div: 2575 case BO_Rem: 2576 case BO_Shl: 2577 case BO_Shr: 2578 case BO_LAnd: 2579 case BO_LOr: 2580 return std::make_pair(false, RValue::get(nullptr)); 2581 case BO_PtrMemD: 2582 case BO_PtrMemI: 2583 case BO_LE: 2584 case BO_GE: 2585 case BO_EQ: 2586 case BO_NE: 2587 case BO_AddAssign: 2588 case BO_SubAssign: 2589 case BO_AndAssign: 2590 case BO_OrAssign: 2591 case BO_XorAssign: 2592 case BO_MulAssign: 2593 case BO_DivAssign: 2594 case BO_RemAssign: 2595 case BO_ShlAssign: 2596 case BO_ShrAssign: 2597 case BO_Comma: 2598 llvm_unreachable("Unsupported atomic update operation"); 2599 } 2600 auto *UpdateVal = Update.getScalarVal(); 2601 if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) { 2602 UpdateVal = CGF.Builder.CreateIntCast( 2603 IC, X.getAddress().getElementType(), 2604 X.getType()->hasSignedIntegerRepresentation()); 2605 } 2606 auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO); 2607 return std::make_pair(true, RValue::get(Res)); 2608 } 2609 2610 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr( 2611 LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart, 2612 llvm::AtomicOrdering AO, SourceLocation Loc, 2613 const llvm::function_ref<RValue(RValue)> &CommonGen) { 2614 // Update expressions are allowed to have the following forms: 2615 // x binop= expr; -> xrval + expr; 2616 // x++, ++x -> xrval + 1; 2617 // x--, --x -> xrval - 1; 2618 // x = x binop expr; -> xrval binop expr 2619 // x = expr Op x; - > expr binop xrval; 2620 auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart); 2621 if (!Res.first) { 2622 if (X.isGlobalReg()) { 2623 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop 2624 // 'xrval'. 2625 EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X); 2626 } else { 2627 // Perform compare-and-swap procedure. 2628 EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified()); 2629 } 2630 } 2631 return Res; 2632 } 2633 2634 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst, 2635 const Expr *X, const Expr *E, 2636 const Expr *UE, bool IsXLHSInRHSPart, 2637 SourceLocation Loc) { 2638 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 2639 "Update expr in 'atomic update' must be a binary operator."); 2640 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 2641 // Update expressions are allowed to have the following forms: 2642 // x binop= expr; -> xrval + expr; 2643 // x++, ++x -> xrval + 1; 2644 // x--, --x -> xrval - 1; 2645 // x = x binop expr; -> xrval binop expr 2646 // x = expr Op x; - > expr binop xrval; 2647 assert(X->isLValue() && "X of 'omp atomic update' is not lvalue"); 2648 LValue XLValue = CGF.EmitLValue(X); 2649 RValue ExprRValue = CGF.EmitAnyExpr(E); 2650 auto AO = IsSeqCst ? llvm::SequentiallyConsistent : llvm::Monotonic; 2651 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 2652 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 2653 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 2654 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 2655 auto Gen = 2656 [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue { 2657 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 2658 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 2659 return CGF.EmitAnyExpr(UE); 2660 }; 2661 (void)CGF.EmitOMPAtomicSimpleUpdateExpr( 2662 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 2663 // OpenMP, 2.12.6, atomic Construct 2664 // Any atomic construct with a seq_cst clause forces the atomically 2665 // performed operation to include an implicit flush operation without a 2666 // list. 2667 if (IsSeqCst) 2668 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 2669 } 2670 2671 static RValue convertToType(CodeGenFunction &CGF, RValue Value, 2672 QualType SourceType, QualType ResType, 2673 SourceLocation Loc) { 2674 switch (CGF.getEvaluationKind(ResType)) { 2675 case TEK_Scalar: 2676 return RValue::get( 2677 convertToScalarValue(CGF, Value, SourceType, ResType, Loc)); 2678 case TEK_Complex: { 2679 auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc); 2680 return RValue::getComplex(Res.first, Res.second); 2681 } 2682 case TEK_Aggregate: 2683 break; 2684 } 2685 llvm_unreachable("Must be a scalar or complex."); 2686 } 2687 2688 static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst, 2689 bool IsPostfixUpdate, const Expr *V, 2690 const Expr *X, const Expr *E, 2691 const Expr *UE, bool IsXLHSInRHSPart, 2692 SourceLocation Loc) { 2693 assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue"); 2694 assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue"); 2695 RValue NewVVal; 2696 LValue VLValue = CGF.EmitLValue(V); 2697 LValue XLValue = CGF.EmitLValue(X); 2698 RValue ExprRValue = CGF.EmitAnyExpr(E); 2699 auto AO = IsSeqCst ? llvm::SequentiallyConsistent : llvm::Monotonic; 2700 QualType NewVValType; 2701 if (UE) { 2702 // 'x' is updated with some additional value. 2703 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 2704 "Update expr in 'atomic capture' must be a binary operator."); 2705 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 2706 // Update expressions are allowed to have the following forms: 2707 // x binop= expr; -> xrval + expr; 2708 // x++, ++x -> xrval + 1; 2709 // x--, --x -> xrval - 1; 2710 // x = x binop expr; -> xrval binop expr 2711 // x = expr Op x; - > expr binop xrval; 2712 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 2713 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 2714 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 2715 NewVValType = XRValExpr->getType(); 2716 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 2717 auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr, 2718 IsSeqCst, IsPostfixUpdate](RValue XRValue) -> RValue { 2719 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 2720 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 2721 RValue Res = CGF.EmitAnyExpr(UE); 2722 NewVVal = IsPostfixUpdate ? XRValue : Res; 2723 return Res; 2724 }; 2725 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 2726 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 2727 if (Res.first) { 2728 // 'atomicrmw' instruction was generated. 2729 if (IsPostfixUpdate) { 2730 // Use old value from 'atomicrmw'. 2731 NewVVal = Res.second; 2732 } else { 2733 // 'atomicrmw' does not provide new value, so evaluate it using old 2734 // value of 'x'. 2735 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 2736 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second); 2737 NewVVal = CGF.EmitAnyExpr(UE); 2738 } 2739 } 2740 } else { 2741 // 'x' is simply rewritten with some 'expr'. 2742 NewVValType = X->getType().getNonReferenceType(); 2743 ExprRValue = convertToType(CGF, ExprRValue, E->getType(), 2744 X->getType().getNonReferenceType(), Loc); 2745 auto &&Gen = [&CGF, &NewVVal, ExprRValue](RValue XRValue) -> RValue { 2746 NewVVal = XRValue; 2747 return ExprRValue; 2748 }; 2749 // Try to perform atomicrmw xchg, otherwise simple exchange. 2750 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 2751 XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO, 2752 Loc, Gen); 2753 if (Res.first) { 2754 // 'atomicrmw' instruction was generated. 2755 NewVVal = IsPostfixUpdate ? Res.second : ExprRValue; 2756 } 2757 } 2758 // Emit post-update store to 'v' of old/new 'x' value. 2759 CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc); 2760 // OpenMP, 2.12.6, atomic Construct 2761 // Any atomic construct with a seq_cst clause forces the atomically 2762 // performed operation to include an implicit flush operation without a 2763 // list. 2764 if (IsSeqCst) 2765 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 2766 } 2767 2768 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, 2769 bool IsSeqCst, bool IsPostfixUpdate, 2770 const Expr *X, const Expr *V, const Expr *E, 2771 const Expr *UE, bool IsXLHSInRHSPart, 2772 SourceLocation Loc) { 2773 switch (Kind) { 2774 case OMPC_read: 2775 EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc); 2776 break; 2777 case OMPC_write: 2778 EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc); 2779 break; 2780 case OMPC_unknown: 2781 case OMPC_update: 2782 EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc); 2783 break; 2784 case OMPC_capture: 2785 EmitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE, 2786 IsXLHSInRHSPart, Loc); 2787 break; 2788 case OMPC_if: 2789 case OMPC_final: 2790 case OMPC_num_threads: 2791 case OMPC_private: 2792 case OMPC_firstprivate: 2793 case OMPC_lastprivate: 2794 case OMPC_reduction: 2795 case OMPC_safelen: 2796 case OMPC_simdlen: 2797 case OMPC_collapse: 2798 case OMPC_default: 2799 case OMPC_seq_cst: 2800 case OMPC_shared: 2801 case OMPC_linear: 2802 case OMPC_aligned: 2803 case OMPC_copyin: 2804 case OMPC_copyprivate: 2805 case OMPC_flush: 2806 case OMPC_proc_bind: 2807 case OMPC_schedule: 2808 case OMPC_ordered: 2809 case OMPC_nowait: 2810 case OMPC_untied: 2811 case OMPC_threadprivate: 2812 case OMPC_depend: 2813 case OMPC_mergeable: 2814 case OMPC_device: 2815 case OMPC_threads: 2816 case OMPC_simd: 2817 case OMPC_map: 2818 case OMPC_num_teams: 2819 case OMPC_thread_limit: 2820 case OMPC_priority: 2821 case OMPC_grainsize: 2822 case OMPC_nogroup: 2823 case OMPC_num_tasks: 2824 case OMPC_hint: 2825 case OMPC_dist_schedule: 2826 case OMPC_defaultmap: 2827 llvm_unreachable("Clause is not allowed in 'omp atomic'."); 2828 } 2829 } 2830 2831 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { 2832 bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>(); 2833 OpenMPClauseKind Kind = OMPC_unknown; 2834 for (auto *C : S.clauses()) { 2835 // Find first clause (skip seq_cst clause, if it is first). 2836 if (C->getClauseKind() != OMPC_seq_cst) { 2837 Kind = C->getClauseKind(); 2838 break; 2839 } 2840 } 2841 2842 const auto *CS = 2843 S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 2844 if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) { 2845 enterFullExpression(EWC); 2846 } 2847 // Processing for statements under 'atomic capture'. 2848 if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) { 2849 for (const auto *C : Compound->body()) { 2850 if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) { 2851 enterFullExpression(EWC); 2852 } 2853 } 2854 } 2855 2856 OMPLexicalScope Scope(*this, S); 2857 auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF) { 2858 CGF.EmitStopPoint(CS); 2859 EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(), 2860 S.getV(), S.getExpr(), S.getUpdateExpr(), 2861 S.isXLHSInRHSPart(), S.getLocStart()); 2862 }; 2863 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen); 2864 } 2865 2866 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { 2867 OMPLexicalScope Scope(*this, S); 2868 const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt()); 2869 2870 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 2871 GenerateOpenMPCapturedVars(CS, CapturedVars); 2872 2873 llvm::Function *Fn = nullptr; 2874 llvm::Constant *FnID = nullptr; 2875 2876 // Check if we have any if clause associated with the directive. 2877 const Expr *IfCond = nullptr; 2878 2879 if (auto *C = S.getSingleClause<OMPIfClause>()) { 2880 IfCond = C->getCondition(); 2881 } 2882 2883 // Check if we have any device clause associated with the directive. 2884 const Expr *Device = nullptr; 2885 if (auto *C = S.getSingleClause<OMPDeviceClause>()) { 2886 Device = C->getDevice(); 2887 } 2888 2889 // Check if we have an if clause whose conditional always evaluates to false 2890 // or if we do not have any targets specified. If so the target region is not 2891 // an offload entry point. 2892 bool IsOffloadEntry = true; 2893 if (IfCond) { 2894 bool Val; 2895 if (ConstantFoldsToSimpleInteger(IfCond, Val) && !Val) 2896 IsOffloadEntry = false; 2897 } 2898 if (CGM.getLangOpts().OMPTargetTriples.empty()) 2899 IsOffloadEntry = false; 2900 2901 assert(CurFuncDecl && "No parent declaration for target region!"); 2902 StringRef ParentName; 2903 // In case we have Ctors/Dtors we use the complete type variant to produce 2904 // the mangling of the device outlined kernel. 2905 if (auto *D = dyn_cast<CXXConstructorDecl>(CurFuncDecl)) 2906 ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete)); 2907 else if (auto *D = dyn_cast<CXXDestructorDecl>(CurFuncDecl)) 2908 ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete)); 2909 else 2910 ParentName = 2911 CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CurFuncDecl))); 2912 2913 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID, 2914 IsOffloadEntry); 2915 2916 CGM.getOpenMPRuntime().emitTargetCall(*this, S, Fn, FnID, IfCond, Device, 2917 CapturedVars); 2918 } 2919 2920 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF, 2921 const OMPExecutableDirective &S, 2922 OpenMPDirectiveKind InnermostKind, 2923 const RegionCodeGenTy &CodeGen) { 2924 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2925 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 2926 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 2927 auto OutlinedFn = CGF.CGM.getOpenMPRuntime(). 2928 emitParallelOrTeamsOutlinedFunction(S, 2929 *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 2930 2931 const OMPTeamsDirective &TD = *dyn_cast<OMPTeamsDirective>(&S); 2932 const OMPNumTeamsClause *NT = TD.getSingleClause<OMPNumTeamsClause>(); 2933 const OMPThreadLimitClause *TL = TD.getSingleClause<OMPThreadLimitClause>(); 2934 if (NT || TL) { 2935 llvm::Value *NumTeamsVal = (NT) ? CGF.Builder.CreateIntCast( 2936 CGF.EmitScalarExpr(NT->getNumTeams()), CGF.CGM.Int32Ty, 2937 /* isSigned = */ true) : 2938 CGF.Builder.getInt32(0); 2939 2940 llvm::Value *ThreadLimitVal = (TL) ? CGF.Builder.CreateIntCast( 2941 CGF.EmitScalarExpr(TL->getThreadLimit()), CGF.CGM.Int32Ty, 2942 /* isSigned = */ true) : 2943 CGF.Builder.getInt32(0); 2944 2945 CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeamsVal, 2946 ThreadLimitVal, S.getLocStart()); 2947 } 2948 2949 CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn, 2950 CapturedVars); 2951 } 2952 2953 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) { 2954 LexicalScope Scope(*this, S.getSourceRange()); 2955 // Emit parallel region as a standalone region. 2956 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 2957 OMPPrivateScope PrivateScope(CGF); 2958 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 2959 CGF.EmitOMPPrivateClause(S, PrivateScope); 2960 (void)PrivateScope.Privatize(); 2961 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2962 }; 2963 emitCommonOMPTeamsDirective(*this, S, OMPD_teams, CodeGen); 2964 } 2965 2966 void CodeGenFunction::EmitOMPCancellationPointDirective( 2967 const OMPCancellationPointDirective &S) { 2968 CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(), 2969 S.getCancelRegion()); 2970 } 2971 2972 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { 2973 const Expr *IfCond = nullptr; 2974 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 2975 if (C->getNameModifier() == OMPD_unknown || 2976 C->getNameModifier() == OMPD_cancel) { 2977 IfCond = C->getCondition(); 2978 break; 2979 } 2980 } 2981 CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(), IfCond, 2982 S.getCancelRegion()); 2983 } 2984 2985 CodeGenFunction::JumpDest 2986 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) { 2987 if (Kind == OMPD_parallel || Kind == OMPD_task) 2988 return ReturnBlock; 2989 assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections || 2990 Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for); 2991 return BreakContinueStack.back().BreakBlock; 2992 } 2993 2994 // Generate the instructions for '#pragma omp target data' directive. 2995 void CodeGenFunction::EmitOMPTargetDataDirective( 2996 const OMPTargetDataDirective &S) { 2997 // emit the code inside the construct for now 2998 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2999 CGM.getOpenMPRuntime().emitInlinedDirective( 3000 *this, OMPD_target_data, 3001 [&CS](CodeGenFunction &CGF) { CGF.EmitStmt(CS->getCapturedStmt()); }); 3002 } 3003 3004 void CodeGenFunction::EmitOMPTargetEnterDataDirective( 3005 const OMPTargetEnterDataDirective &S) { 3006 // TODO: codegen for target enter data. 3007 } 3008 3009 void CodeGenFunction::EmitOMPTargetExitDataDirective( 3010 const OMPTargetExitDataDirective &S) { 3011 // TODO: codegen for target exit data. 3012 } 3013 3014 void CodeGenFunction::EmitOMPTargetParallelDirective( 3015 const OMPTargetParallelDirective &S) { 3016 // TODO: codegen for target parallel. 3017 } 3018 3019 void CodeGenFunction::EmitOMPTargetParallelForDirective( 3020 const OMPTargetParallelForDirective &S) { 3021 // TODO: codegen for target parallel for. 3022 } 3023 3024 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) { 3025 // emit the code inside the construct for now 3026 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 3027 CGM.getOpenMPRuntime().emitInlinedDirective( 3028 *this, OMPD_taskloop, 3029 [&CS](CodeGenFunction &CGF) { CGF.EmitStmt(CS->getCapturedStmt()); }); 3030 } 3031 3032 void CodeGenFunction::EmitOMPTaskLoopSimdDirective( 3033 const OMPTaskLoopSimdDirective &S) { 3034 // emit the code inside the construct for now 3035 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 3036 CGM.getOpenMPRuntime().emitInlinedDirective( 3037 *this, OMPD_taskloop_simd, 3038 [&CS](CodeGenFunction &CGF) { CGF.EmitStmt(CS->getCapturedStmt()); }); 3039 } 3040 3041