1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This contains code to emit OpenMP nodes as LLVM code. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGCleanup.h" 15 #include "CGOpenMPRuntime.h" 16 #include "CodeGenFunction.h" 17 #include "CodeGenModule.h" 18 #include "TargetInfo.h" 19 #include "clang/AST/Stmt.h" 20 #include "clang/AST/StmtOpenMP.h" 21 using namespace clang; 22 using namespace CodeGen; 23 24 namespace { 25 /// Lexical scope for OpenMP executable constructs, that handles correct codegen 26 /// for captured expressions. 27 class OMPLexicalScope { 28 CodeGenFunction::LexicalScope Scope; 29 void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 30 for (const auto *C : S.clauses()) { 31 if (auto *CPI = OMPClauseWithPreInit::get(C)) { 32 if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { 33 for (const auto *I : PreInit->decls()) 34 CGF.EmitVarDecl(cast<VarDecl>(*I)); 35 } 36 } 37 } 38 } 39 40 class PostUpdateCleanup final : public EHScopeStack::Cleanup { 41 const OMPExecutableDirective &S; 42 43 public: 44 PostUpdateCleanup(const OMPExecutableDirective &S) : S(S) {} 45 46 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 47 if (!CGF.HaveInsertPoint()) 48 return; 49 (void)S; 50 // TODO: add cleanups for clauses that require post update. 51 } 52 }; 53 54 public: 55 OMPLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 56 : Scope(CGF, S.getSourceRange()) { 57 emitPreInitStmt(CGF, S); 58 CGF.EHStack.pushCleanup<PostUpdateCleanup>(NormalAndEHCleanup, S); 59 } 60 }; 61 } // namespace 62 63 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) { 64 auto &C = getContext(); 65 llvm::Value *Size = nullptr; 66 auto SizeInChars = C.getTypeSizeInChars(Ty); 67 if (SizeInChars.isZero()) { 68 // getTypeSizeInChars() returns 0 for a VLA. 69 while (auto *VAT = C.getAsVariableArrayType(Ty)) { 70 llvm::Value *ArraySize; 71 std::tie(ArraySize, Ty) = getVLASize(VAT); 72 Size = Size ? Builder.CreateNUWMul(Size, ArraySize) : ArraySize; 73 } 74 SizeInChars = C.getTypeSizeInChars(Ty); 75 if (SizeInChars.isZero()) 76 return llvm::ConstantInt::get(SizeTy, /*V=*/0); 77 Size = Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars)); 78 } else 79 Size = CGM.getSize(SizeInChars); 80 return Size; 81 } 82 83 void CodeGenFunction::GenerateOpenMPCapturedVars( 84 const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) { 85 const RecordDecl *RD = S.getCapturedRecordDecl(); 86 auto CurField = RD->field_begin(); 87 auto CurCap = S.captures().begin(); 88 for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(), 89 E = S.capture_init_end(); 90 I != E; ++I, ++CurField, ++CurCap) { 91 if (CurField->hasCapturedVLAType()) { 92 auto VAT = CurField->getCapturedVLAType(); 93 auto *Val = VLASizeMap[VAT->getSizeExpr()]; 94 CapturedVars.push_back(Val); 95 } else if (CurCap->capturesThis()) 96 CapturedVars.push_back(CXXThisValue); 97 else if (CurCap->capturesVariableByCopy()) 98 CapturedVars.push_back( 99 EmitLoadOfLValue(EmitLValue(*I), SourceLocation()).getScalarVal()); 100 else { 101 assert(CurCap->capturesVariable() && "Expected capture by reference."); 102 CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer()); 103 } 104 } 105 } 106 107 static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType, 108 StringRef Name, LValue AddrLV, 109 bool isReferenceType = false) { 110 ASTContext &Ctx = CGF.getContext(); 111 112 auto *CastedPtr = CGF.EmitScalarConversion( 113 AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(), 114 Ctx.getPointerType(DstType), SourceLocation()); 115 auto TmpAddr = 116 CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType)) 117 .getAddress(); 118 119 // If we are dealing with references we need to return the address of the 120 // reference instead of the reference of the value. 121 if (isReferenceType) { 122 QualType RefType = Ctx.getLValueReferenceType(DstType); 123 auto *RefVal = TmpAddr.getPointer(); 124 TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name) + ".ref"); 125 auto TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType); 126 CGF.EmitScalarInit(RefVal, TmpLVal); 127 } 128 129 return TmpAddr; 130 } 131 132 llvm::Function * 133 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { 134 assert( 135 CapturedStmtInfo && 136 "CapturedStmtInfo should be set when generating the captured function"); 137 const CapturedDecl *CD = S.getCapturedDecl(); 138 const RecordDecl *RD = S.getCapturedRecordDecl(); 139 assert(CD->hasBody() && "missing CapturedDecl body"); 140 141 // Build the argument list. 142 ASTContext &Ctx = CGM.getContext(); 143 FunctionArgList Args; 144 Args.append(CD->param_begin(), 145 std::next(CD->param_begin(), CD->getContextParamPosition())); 146 auto I = S.captures().begin(); 147 for (auto *FD : RD->fields()) { 148 QualType ArgType = FD->getType(); 149 IdentifierInfo *II = nullptr; 150 VarDecl *CapVar = nullptr; 151 152 // If this is a capture by copy and the type is not a pointer, the outlined 153 // function argument type should be uintptr and the value properly casted to 154 // uintptr. This is necessary given that the runtime library is only able to 155 // deal with pointers. We can pass in the same way the VLA type sizes to the 156 // outlined function. 157 if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) || 158 I->capturesVariableArrayType()) 159 ArgType = Ctx.getUIntPtrType(); 160 161 if (I->capturesVariable() || I->capturesVariableByCopy()) { 162 CapVar = I->getCapturedVar(); 163 II = CapVar->getIdentifier(); 164 } else if (I->capturesThis()) 165 II = &getContext().Idents.get("this"); 166 else { 167 assert(I->capturesVariableArrayType()); 168 II = &getContext().Idents.get("vla"); 169 } 170 if (ArgType->isVariablyModifiedType()) 171 ArgType = getContext().getVariableArrayDecayedType(ArgType); 172 Args.push_back(ImplicitParamDecl::Create(getContext(), nullptr, 173 FD->getLocation(), II, ArgType)); 174 ++I; 175 } 176 Args.append( 177 std::next(CD->param_begin(), CD->getContextParamPosition() + 1), 178 CD->param_end()); 179 180 // Create the function declaration. 181 FunctionType::ExtInfo ExtInfo; 182 const CGFunctionInfo &FuncInfo = 183 CGM.getTypes().arrangeFreeFunctionDeclaration(Ctx.VoidTy, Args, ExtInfo, 184 /*IsVariadic=*/false); 185 llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); 186 187 llvm::Function *F = llvm::Function::Create( 188 FuncLLVMTy, llvm::GlobalValue::InternalLinkage, 189 CapturedStmtInfo->getHelperName(), &CGM.getModule()); 190 CGM.SetInternalFunctionAttributes(CD, F, FuncInfo); 191 if (CD->isNothrow()) 192 F->addFnAttr(llvm::Attribute::NoUnwind); 193 194 // Generate the function. 195 StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getLocation(), 196 CD->getBody()->getLocStart()); 197 unsigned Cnt = CD->getContextParamPosition(); 198 I = S.captures().begin(); 199 for (auto *FD : RD->fields()) { 200 // If we are capturing a pointer by copy we don't need to do anything, just 201 // use the value that we get from the arguments. 202 if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) { 203 setAddrOfLocalVar(I->getCapturedVar(), GetAddrOfLocalVar(Args[Cnt])); 204 ++Cnt; 205 ++I; 206 continue; 207 } 208 209 LValue ArgLVal = 210 MakeAddrLValue(GetAddrOfLocalVar(Args[Cnt]), Args[Cnt]->getType(), 211 AlignmentSource::Decl); 212 if (FD->hasCapturedVLAType()) { 213 LValue CastedArgLVal = 214 MakeAddrLValue(castValueFromUintptr(*this, FD->getType(), 215 Args[Cnt]->getName(), ArgLVal), 216 FD->getType(), AlignmentSource::Decl); 217 auto *ExprArg = 218 EmitLoadOfLValue(CastedArgLVal, SourceLocation()).getScalarVal(); 219 auto VAT = FD->getCapturedVLAType(); 220 VLASizeMap[VAT->getSizeExpr()] = ExprArg; 221 } else if (I->capturesVariable()) { 222 auto *Var = I->getCapturedVar(); 223 QualType VarTy = Var->getType(); 224 Address ArgAddr = ArgLVal.getAddress(); 225 if (!VarTy->isReferenceType()) { 226 ArgAddr = EmitLoadOfReference( 227 ArgAddr, ArgLVal.getType()->castAs<ReferenceType>()); 228 } 229 setAddrOfLocalVar( 230 Var, Address(ArgAddr.getPointer(), getContext().getDeclAlign(Var))); 231 } else if (I->capturesVariableByCopy()) { 232 assert(!FD->getType()->isAnyPointerType() && 233 "Not expecting a captured pointer."); 234 auto *Var = I->getCapturedVar(); 235 QualType VarTy = Var->getType(); 236 setAddrOfLocalVar(I->getCapturedVar(), 237 castValueFromUintptr(*this, FD->getType(), 238 Args[Cnt]->getName(), ArgLVal, 239 VarTy->isReferenceType())); 240 } else { 241 // If 'this' is captured, load it into CXXThisValue. 242 assert(I->capturesThis()); 243 CXXThisValue = 244 EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation()).getScalarVal(); 245 } 246 ++Cnt; 247 ++I; 248 } 249 250 PGO.assignRegionCounters(GlobalDecl(CD), F); 251 CapturedStmtInfo->EmitBody(*this, CD->getBody()); 252 FinishFunction(CD->getBodyRBrace()); 253 254 return F; 255 } 256 257 //===----------------------------------------------------------------------===// 258 // OpenMP Directive Emission 259 //===----------------------------------------------------------------------===// 260 void CodeGenFunction::EmitOMPAggregateAssign( 261 Address DestAddr, Address SrcAddr, QualType OriginalType, 262 const llvm::function_ref<void(Address, Address)> &CopyGen) { 263 // Perform element-by-element initialization. 264 QualType ElementTy; 265 266 // Drill down to the base element type on both arrays. 267 auto ArrayTy = OriginalType->getAsArrayTypeUnsafe(); 268 auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr); 269 SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 270 271 auto SrcBegin = SrcAddr.getPointer(); 272 auto DestBegin = DestAddr.getPointer(); 273 // Cast from pointer to array type to pointer to single element. 274 auto DestEnd = Builder.CreateGEP(DestBegin, NumElements); 275 // The basic structure here is a while-do loop. 276 auto BodyBB = createBasicBlock("omp.arraycpy.body"); 277 auto DoneBB = createBasicBlock("omp.arraycpy.done"); 278 auto IsEmpty = 279 Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty"); 280 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 281 282 // Enter the loop body, making that address the current address. 283 auto EntryBB = Builder.GetInsertBlock(); 284 EmitBlock(BodyBB); 285 286 CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy); 287 288 llvm::PHINode *SrcElementPHI = 289 Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 290 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 291 Address SrcElementCurrent = 292 Address(SrcElementPHI, 293 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 294 295 llvm::PHINode *DestElementPHI = 296 Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 297 DestElementPHI->addIncoming(DestBegin, EntryBB); 298 Address DestElementCurrent = 299 Address(DestElementPHI, 300 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 301 302 // Emit copy. 303 CopyGen(DestElementCurrent, SrcElementCurrent); 304 305 // Shift the address forward by one element. 306 auto DestElementNext = Builder.CreateConstGEP1_32( 307 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 308 auto SrcElementNext = Builder.CreateConstGEP1_32( 309 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 310 // Check whether we've reached the end. 311 auto Done = 312 Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 313 Builder.CreateCondBr(Done, DoneBB, BodyBB); 314 DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock()); 315 SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock()); 316 317 // Done. 318 EmitBlock(DoneBB, /*IsFinished=*/true); 319 } 320 321 /// \brief Emit initialization of arrays of complex types. 322 /// \param DestAddr Address of the array. 323 /// \param Type Type of array. 324 /// \param Init Initial expression of array. 325 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 326 QualType Type, const Expr *Init) { 327 // Perform element-by-element initialization. 328 QualType ElementTy; 329 330 // Drill down to the base element type on both arrays. 331 auto ArrayTy = Type->getAsArrayTypeUnsafe(); 332 auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 333 DestAddr = 334 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 335 336 auto DestBegin = DestAddr.getPointer(); 337 // Cast from pointer to array type to pointer to single element. 338 auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 339 // The basic structure here is a while-do loop. 340 auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 341 auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 342 auto IsEmpty = 343 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 344 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 345 346 // Enter the loop body, making that address the current address. 347 auto EntryBB = CGF.Builder.GetInsertBlock(); 348 CGF.EmitBlock(BodyBB); 349 350 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 351 352 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 353 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 354 DestElementPHI->addIncoming(DestBegin, EntryBB); 355 Address DestElementCurrent = 356 Address(DestElementPHI, 357 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 358 359 // Emit copy. 360 { 361 CodeGenFunction::RunCleanupsScope InitScope(CGF); 362 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 363 /*IsInitializer=*/false); 364 } 365 366 // Shift the address forward by one element. 367 auto DestElementNext = CGF.Builder.CreateConstGEP1_32( 368 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 369 // Check whether we've reached the end. 370 auto Done = 371 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 372 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 373 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 374 375 // Done. 376 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 377 } 378 379 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr, 380 Address SrcAddr, const VarDecl *DestVD, 381 const VarDecl *SrcVD, const Expr *Copy) { 382 if (OriginalType->isArrayType()) { 383 auto *BO = dyn_cast<BinaryOperator>(Copy); 384 if (BO && BO->getOpcode() == BO_Assign) { 385 // Perform simple memcpy for simple copying. 386 EmitAggregateAssign(DestAddr, SrcAddr, OriginalType); 387 } else { 388 // For arrays with complex element types perform element by element 389 // copying. 390 EmitOMPAggregateAssign( 391 DestAddr, SrcAddr, OriginalType, 392 [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) { 393 // Working with the single array element, so have to remap 394 // destination and source variables to corresponding array 395 // elements. 396 CodeGenFunction::OMPPrivateScope Remap(*this); 397 Remap.addPrivate(DestVD, [DestElement]() -> Address { 398 return DestElement; 399 }); 400 Remap.addPrivate( 401 SrcVD, [SrcElement]() -> Address { return SrcElement; }); 402 (void)Remap.Privatize(); 403 EmitIgnoredExpr(Copy); 404 }); 405 } 406 } else { 407 // Remap pseudo source variable to private copy. 408 CodeGenFunction::OMPPrivateScope Remap(*this); 409 Remap.addPrivate(SrcVD, [SrcAddr]() -> Address { return SrcAddr; }); 410 Remap.addPrivate(DestVD, [DestAddr]() -> Address { return DestAddr; }); 411 (void)Remap.Privatize(); 412 // Emit copying of the whole variable. 413 EmitIgnoredExpr(Copy); 414 } 415 } 416 417 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, 418 OMPPrivateScope &PrivateScope) { 419 if (!HaveInsertPoint()) 420 return false; 421 bool FirstprivateIsLastprivate = false; 422 llvm::DenseSet<const VarDecl *> Lastprivates; 423 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 424 for (const auto *D : C->varlists()) 425 Lastprivates.insert( 426 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); 427 } 428 llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate; 429 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { 430 auto IRef = C->varlist_begin(); 431 auto InitsRef = C->inits().begin(); 432 for (auto IInit : C->private_copies()) { 433 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 434 FirstprivateIsLastprivate = 435 FirstprivateIsLastprivate || 436 (Lastprivates.count(OrigVD->getCanonicalDecl()) > 0); 437 if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) { 438 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 439 auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl()); 440 bool IsRegistered; 441 DeclRefExpr DRE( 442 const_cast<VarDecl *>(OrigVD), 443 /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup( 444 OrigVD) != nullptr, 445 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 446 Address OriginalAddr = EmitLValue(&DRE).getAddress(); 447 QualType Type = OrigVD->getType(); 448 if (Type->isArrayType()) { 449 // Emit VarDecl with copy init for arrays. 450 // Get the address of the original variable captured in current 451 // captured region. 452 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 453 auto Emission = EmitAutoVarAlloca(*VD); 454 auto *Init = VD->getInit(); 455 if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) { 456 // Perform simple memcpy. 457 EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr, 458 Type); 459 } else { 460 EmitOMPAggregateAssign( 461 Emission.getAllocatedAddress(), OriginalAddr, Type, 462 [this, VDInit, Init](Address DestElement, 463 Address SrcElement) { 464 // Clean up any temporaries needed by the initialization. 465 RunCleanupsScope InitScope(*this); 466 // Emit initialization for single element. 467 setAddrOfLocalVar(VDInit, SrcElement); 468 EmitAnyExprToMem(Init, DestElement, 469 Init->getType().getQualifiers(), 470 /*IsInitializer*/ false); 471 LocalDeclMap.erase(VDInit); 472 }); 473 } 474 EmitAutoVarCleanups(Emission); 475 return Emission.getAllocatedAddress(); 476 }); 477 } else { 478 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 479 // Emit private VarDecl with copy init. 480 // Remap temp VDInit variable to the address of the original 481 // variable 482 // (for proper handling of captured global variables). 483 setAddrOfLocalVar(VDInit, OriginalAddr); 484 EmitDecl(*VD); 485 LocalDeclMap.erase(VDInit); 486 return GetAddrOfLocalVar(VD); 487 }); 488 } 489 assert(IsRegistered && 490 "firstprivate var already registered as private"); 491 // Silence the warning about unused variable. 492 (void)IsRegistered; 493 } 494 ++IRef; 495 ++InitsRef; 496 } 497 } 498 return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty(); 499 } 500 501 void CodeGenFunction::EmitOMPPrivateClause( 502 const OMPExecutableDirective &D, 503 CodeGenFunction::OMPPrivateScope &PrivateScope) { 504 if (!HaveInsertPoint()) 505 return; 506 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 507 for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) { 508 auto IRef = C->varlist_begin(); 509 for (auto IInit : C->private_copies()) { 510 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 511 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 512 auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 513 bool IsRegistered = 514 PrivateScope.addPrivate(OrigVD, [&]() -> Address { 515 // Emit private VarDecl with copy init. 516 EmitDecl(*VD); 517 return GetAddrOfLocalVar(VD); 518 }); 519 assert(IsRegistered && "private var already registered as private"); 520 // Silence the warning about unused variable. 521 (void)IsRegistered; 522 } 523 ++IRef; 524 } 525 } 526 } 527 528 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { 529 if (!HaveInsertPoint()) 530 return false; 531 // threadprivate_var1 = master_threadprivate_var1; 532 // operator=(threadprivate_var2, master_threadprivate_var2); 533 // ... 534 // __kmpc_barrier(&loc, global_tid); 535 llvm::DenseSet<const VarDecl *> CopiedVars; 536 llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr; 537 for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) { 538 auto IRef = C->varlist_begin(); 539 auto ISrcRef = C->source_exprs().begin(); 540 auto IDestRef = C->destination_exprs().begin(); 541 for (auto *AssignOp : C->assignment_ops()) { 542 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 543 QualType Type = VD->getType(); 544 if (CopiedVars.insert(VD->getCanonicalDecl()).second) { 545 // Get the address of the master variable. If we are emitting code with 546 // TLS support, the address is passed from the master as field in the 547 // captured declaration. 548 Address MasterAddr = Address::invalid(); 549 if (getLangOpts().OpenMPUseTLS && 550 getContext().getTargetInfo().isTLSSupported()) { 551 assert(CapturedStmtInfo->lookup(VD) && 552 "Copyin threadprivates should have been captured!"); 553 DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(), 554 VK_LValue, (*IRef)->getExprLoc()); 555 MasterAddr = EmitLValue(&DRE).getAddress(); 556 LocalDeclMap.erase(VD); 557 } else { 558 MasterAddr = 559 Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD) 560 : CGM.GetAddrOfGlobal(VD), 561 getContext().getDeclAlign(VD)); 562 } 563 // Get the address of the threadprivate variable. 564 Address PrivateAddr = EmitLValue(*IRef).getAddress(); 565 if (CopiedVars.size() == 1) { 566 // At first check if current thread is a master thread. If it is, no 567 // need to copy data. 568 CopyBegin = createBasicBlock("copyin.not.master"); 569 CopyEnd = createBasicBlock("copyin.not.master.end"); 570 Builder.CreateCondBr( 571 Builder.CreateICmpNE( 572 Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy), 573 Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy)), 574 CopyBegin, CopyEnd); 575 EmitBlock(CopyBegin); 576 } 577 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 578 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 579 EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp); 580 } 581 ++IRef; 582 ++ISrcRef; 583 ++IDestRef; 584 } 585 } 586 if (CopyEnd) { 587 // Exit out of copying procedure for non-master thread. 588 EmitBlock(CopyEnd, /*IsFinished=*/true); 589 return true; 590 } 591 return false; 592 } 593 594 bool CodeGenFunction::EmitOMPLastprivateClauseInit( 595 const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) { 596 if (!HaveInsertPoint()) 597 return false; 598 bool HasAtLeastOneLastprivate = false; 599 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 600 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 601 HasAtLeastOneLastprivate = true; 602 auto IRef = C->varlist_begin(); 603 auto IDestRef = C->destination_exprs().begin(); 604 for (auto *IInit : C->private_copies()) { 605 // Keep the address of the original variable for future update at the end 606 // of the loop. 607 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 608 if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) { 609 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 610 PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address { 611 DeclRefExpr DRE( 612 const_cast<VarDecl *>(OrigVD), 613 /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup( 614 OrigVD) != nullptr, 615 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 616 return EmitLValue(&DRE).getAddress(); 617 }); 618 // Check if the variable is also a firstprivate: in this case IInit is 619 // not generated. Initialization of this variable will happen in codegen 620 // for 'firstprivate' clause. 621 if (IInit) { 622 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 623 bool IsRegistered = 624 PrivateScope.addPrivate(OrigVD, [&]() -> Address { 625 // Emit private VarDecl with copy init. 626 EmitDecl(*VD); 627 return GetAddrOfLocalVar(VD); 628 }); 629 assert(IsRegistered && 630 "lastprivate var already registered as private"); 631 (void)IsRegistered; 632 } 633 } 634 ++IRef; 635 ++IDestRef; 636 } 637 } 638 return HasAtLeastOneLastprivate; 639 } 640 641 void CodeGenFunction::EmitOMPLastprivateClauseFinal( 642 const OMPExecutableDirective &D, llvm::Value *IsLastIterCond) { 643 if (!HaveInsertPoint()) 644 return; 645 // Emit following code: 646 // if (<IsLastIterCond>) { 647 // orig_var1 = private_orig_var1; 648 // ... 649 // orig_varn = private_orig_varn; 650 // } 651 llvm::BasicBlock *ThenBB = nullptr; 652 llvm::BasicBlock *DoneBB = nullptr; 653 if (IsLastIterCond) { 654 ThenBB = createBasicBlock(".omp.lastprivate.then"); 655 DoneBB = createBasicBlock(".omp.lastprivate.done"); 656 Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB); 657 EmitBlock(ThenBB); 658 } 659 llvm::DenseMap<const Decl *, const Expr *> LoopCountersAndUpdates; 660 if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) { 661 auto IC = LoopDirective->counters().begin(); 662 for (auto F : LoopDirective->finals()) { 663 auto *D = cast<DeclRefExpr>(*IC)->getDecl()->getCanonicalDecl(); 664 LoopCountersAndUpdates[D] = F; 665 ++IC; 666 } 667 } 668 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 669 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 670 auto IRef = C->varlist_begin(); 671 auto ISrcRef = C->source_exprs().begin(); 672 auto IDestRef = C->destination_exprs().begin(); 673 for (auto *AssignOp : C->assignment_ops()) { 674 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 675 QualType Type = PrivateVD->getType(); 676 auto *CanonicalVD = PrivateVD->getCanonicalDecl(); 677 if (AlreadyEmittedVars.insert(CanonicalVD).second) { 678 // If lastprivate variable is a loop control variable for loop-based 679 // directive, update its value before copyin back to original 680 // variable. 681 if (auto *UpExpr = LoopCountersAndUpdates.lookup(CanonicalVD)) 682 EmitIgnoredExpr(UpExpr); 683 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 684 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 685 // Get the address of the original variable. 686 Address OriginalAddr = GetAddrOfLocalVar(DestVD); 687 // Get the address of the private variable. 688 Address PrivateAddr = GetAddrOfLocalVar(PrivateVD); 689 if (auto RefTy = PrivateVD->getType()->getAs<ReferenceType>()) 690 PrivateAddr = 691 Address(Builder.CreateLoad(PrivateAddr), 692 getNaturalTypeAlignment(RefTy->getPointeeType())); 693 EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp); 694 } 695 ++IRef; 696 ++ISrcRef; 697 ++IDestRef; 698 } 699 } 700 if (IsLastIterCond) 701 EmitBlock(DoneBB, /*IsFinished=*/true); 702 } 703 704 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 705 LValue BaseLV, llvm::Value *Addr) { 706 Address Tmp = Address::invalid(); 707 Address TopTmp = Address::invalid(); 708 Address MostTopTmp = Address::invalid(); 709 BaseTy = BaseTy.getNonReferenceType(); 710 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 711 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 712 Tmp = CGF.CreateMemTemp(BaseTy); 713 if (TopTmp.isValid()) 714 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 715 else 716 MostTopTmp = Tmp; 717 TopTmp = Tmp; 718 BaseTy = BaseTy->getPointeeType(); 719 } 720 llvm::Type *Ty = BaseLV.getPointer()->getType(); 721 if (Tmp.isValid()) 722 Ty = Tmp.getElementType(); 723 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 724 if (Tmp.isValid()) { 725 CGF.Builder.CreateStore(Addr, Tmp); 726 return MostTopTmp; 727 } 728 return Address(Addr, BaseLV.getAlignment()); 729 } 730 731 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 732 LValue BaseLV) { 733 BaseTy = BaseTy.getNonReferenceType(); 734 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 735 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 736 if (auto *PtrTy = BaseTy->getAs<PointerType>()) 737 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy); 738 else { 739 BaseLV = CGF.EmitLoadOfReferenceLValue(BaseLV.getAddress(), 740 BaseTy->castAs<ReferenceType>()); 741 } 742 BaseTy = BaseTy->getPointeeType(); 743 } 744 return CGF.MakeAddrLValue( 745 Address( 746 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 747 BaseLV.getPointer(), CGF.ConvertTypeForMem(ElTy)->getPointerTo()), 748 BaseLV.getAlignment()), 749 BaseLV.getType(), BaseLV.getAlignmentSource()); 750 } 751 752 void CodeGenFunction::EmitOMPReductionClauseInit( 753 const OMPExecutableDirective &D, 754 CodeGenFunction::OMPPrivateScope &PrivateScope) { 755 if (!HaveInsertPoint()) 756 return; 757 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 758 auto ILHS = C->lhs_exprs().begin(); 759 auto IRHS = C->rhs_exprs().begin(); 760 auto IPriv = C->privates().begin(); 761 for (auto IRef : C->varlists()) { 762 auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 763 auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 764 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl()); 765 if (auto *OASE = dyn_cast<OMPArraySectionExpr>(IRef)) { 766 auto *Base = OASE->getBase()->IgnoreParenImpCasts(); 767 while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 768 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 769 while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 770 Base = TempASE->getBase()->IgnoreParenImpCasts(); 771 auto *DE = cast<DeclRefExpr>(Base); 772 auto *OrigVD = cast<VarDecl>(DE->getDecl()); 773 auto OASELValueLB = EmitOMPArraySectionExpr(OASE); 774 auto OASELValueUB = 775 EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 776 auto OriginalBaseLValue = EmitLValue(DE); 777 LValue BaseLValue = 778 loadToBegin(*this, OrigVD->getType(), OASELValueLB.getType(), 779 OriginalBaseLValue); 780 // Store the address of the original variable associated with the LHS 781 // implicit variable. 782 PrivateScope.addPrivate(LHSVD, [this, OASELValueLB]() -> Address { 783 return OASELValueLB.getAddress(); 784 }); 785 // Emit reduction copy. 786 bool IsRegistered = PrivateScope.addPrivate( 787 OrigVD, [this, OrigVD, PrivateVD, BaseLValue, OASELValueLB, 788 OASELValueUB, OriginalBaseLValue]() -> Address { 789 // Emit VarDecl with copy init for arrays. 790 // Get the address of the original variable captured in current 791 // captured region. 792 auto *Size = Builder.CreatePtrDiff(OASELValueUB.getPointer(), 793 OASELValueLB.getPointer()); 794 Size = Builder.CreateNUWAdd( 795 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 796 CodeGenFunction::OpaqueValueMapping OpaqueMap( 797 *this, cast<OpaqueValueExpr>( 798 getContext() 799 .getAsVariableArrayType(PrivateVD->getType()) 800 ->getSizeExpr()), 801 RValue::get(Size)); 802 EmitVariablyModifiedType(PrivateVD->getType()); 803 auto Emission = EmitAutoVarAlloca(*PrivateVD); 804 auto Addr = Emission.getAllocatedAddress(); 805 auto *Init = PrivateVD->getInit(); 806 EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(), Init); 807 EmitAutoVarCleanups(Emission); 808 // Emit private VarDecl with reduction init. 809 auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(), 810 OASELValueLB.getPointer()); 811 auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset); 812 return castToBase(*this, OrigVD->getType(), 813 OASELValueLB.getType(), OriginalBaseLValue, 814 Ptr); 815 }); 816 assert(IsRegistered && "private var already registered as private"); 817 // Silence the warning about unused variable. 818 (void)IsRegistered; 819 PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { 820 return GetAddrOfLocalVar(PrivateVD); 821 }); 822 } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(IRef)) { 823 auto *Base = ASE->getBase()->IgnoreParenImpCasts(); 824 while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 825 Base = TempASE->getBase()->IgnoreParenImpCasts(); 826 auto *DE = cast<DeclRefExpr>(Base); 827 auto *OrigVD = cast<VarDecl>(DE->getDecl()); 828 auto ASELValue = EmitLValue(ASE); 829 auto OriginalBaseLValue = EmitLValue(DE); 830 LValue BaseLValue = loadToBegin( 831 *this, OrigVD->getType(), ASELValue.getType(), OriginalBaseLValue); 832 // Store the address of the original variable associated with the LHS 833 // implicit variable. 834 PrivateScope.addPrivate(LHSVD, [this, ASELValue]() -> Address { 835 return ASELValue.getAddress(); 836 }); 837 // Emit reduction copy. 838 bool IsRegistered = PrivateScope.addPrivate( 839 OrigVD, [this, OrigVD, PrivateVD, BaseLValue, ASELValue, 840 OriginalBaseLValue]() -> Address { 841 // Emit private VarDecl with reduction init. 842 EmitDecl(*PrivateVD); 843 auto Addr = GetAddrOfLocalVar(PrivateVD); 844 auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(), 845 ASELValue.getPointer()); 846 auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset); 847 return castToBase(*this, OrigVD->getType(), ASELValue.getType(), 848 OriginalBaseLValue, Ptr); 849 }); 850 assert(IsRegistered && "private var already registered as private"); 851 // Silence the warning about unused variable. 852 (void)IsRegistered; 853 PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address { 854 return Builder.CreateElementBitCast( 855 GetAddrOfLocalVar(PrivateVD), ConvertTypeForMem(RHSVD->getType()), 856 "rhs.begin"); 857 }); 858 } else { 859 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); 860 QualType Type = PrivateVD->getType(); 861 if (getContext().getAsArrayType(Type)) { 862 // Store the address of the original variable associated with the LHS 863 // implicit variable. 864 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 865 CapturedStmtInfo->lookup(OrigVD) != nullptr, 866 IRef->getType(), VK_LValue, IRef->getExprLoc()); 867 Address OriginalAddr = EmitLValue(&DRE).getAddress(); 868 PrivateScope.addPrivate(LHSVD, [this, OriginalAddr, 869 LHSVD]() -> Address { 870 return Builder.CreateElementBitCast( 871 OriginalAddr, ConvertTypeForMem(LHSVD->getType()), 872 "lhs.begin"); 873 }); 874 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 875 if (Type->isVariablyModifiedType()) { 876 CodeGenFunction::OpaqueValueMapping OpaqueMap( 877 *this, cast<OpaqueValueExpr>( 878 getContext() 879 .getAsVariableArrayType(PrivateVD->getType()) 880 ->getSizeExpr()), 881 RValue::get( 882 getTypeSize(OrigVD->getType().getNonReferenceType()))); 883 EmitVariablyModifiedType(Type); 884 } 885 auto Emission = EmitAutoVarAlloca(*PrivateVD); 886 auto Addr = Emission.getAllocatedAddress(); 887 auto *Init = PrivateVD->getInit(); 888 EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(), Init); 889 EmitAutoVarCleanups(Emission); 890 return Emission.getAllocatedAddress(); 891 }); 892 assert(IsRegistered && "private var already registered as private"); 893 // Silence the warning about unused variable. 894 (void)IsRegistered; 895 PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address { 896 return Builder.CreateElementBitCast( 897 GetAddrOfLocalVar(PrivateVD), 898 ConvertTypeForMem(RHSVD->getType()), "rhs.begin"); 899 }); 900 } else { 901 // Store the address of the original variable associated with the LHS 902 // implicit variable. 903 PrivateScope.addPrivate(LHSVD, [this, OrigVD, IRef]() -> Address { 904 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 905 CapturedStmtInfo->lookup(OrigVD) != nullptr, 906 IRef->getType(), VK_LValue, IRef->getExprLoc()); 907 return EmitLValue(&DRE).getAddress(); 908 }); 909 // Emit reduction copy. 910 bool IsRegistered = 911 PrivateScope.addPrivate(OrigVD, [this, PrivateVD]() -> Address { 912 // Emit private VarDecl with reduction init. 913 EmitDecl(*PrivateVD); 914 return GetAddrOfLocalVar(PrivateVD); 915 }); 916 assert(IsRegistered && "private var already registered as private"); 917 // Silence the warning about unused variable. 918 (void)IsRegistered; 919 PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { 920 return GetAddrOfLocalVar(PrivateVD); 921 }); 922 } 923 } 924 ++ILHS; 925 ++IRHS; 926 ++IPriv; 927 } 928 } 929 } 930 931 void CodeGenFunction::EmitOMPReductionClauseFinal( 932 const OMPExecutableDirective &D) { 933 if (!HaveInsertPoint()) 934 return; 935 llvm::SmallVector<const Expr *, 8> Privates; 936 llvm::SmallVector<const Expr *, 8> LHSExprs; 937 llvm::SmallVector<const Expr *, 8> RHSExprs; 938 llvm::SmallVector<const Expr *, 8> ReductionOps; 939 bool HasAtLeastOneReduction = false; 940 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 941 HasAtLeastOneReduction = true; 942 Privates.append(C->privates().begin(), C->privates().end()); 943 LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 944 RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 945 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 946 } 947 if (HasAtLeastOneReduction) { 948 // Emit nowait reduction if nowait clause is present or directive is a 949 // parallel directive (it always has implicit barrier). 950 CGM.getOpenMPRuntime().emitReduction( 951 *this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps, 952 D.getSingleClause<OMPNowaitClause>() || 953 isOpenMPParallelDirective(D.getDirectiveKind()) || 954 D.getDirectiveKind() == OMPD_simd, 955 D.getDirectiveKind() == OMPD_simd); 956 } 957 } 958 959 static void emitCommonOMPParallelDirective(CodeGenFunction &CGF, 960 const OMPExecutableDirective &S, 961 OpenMPDirectiveKind InnermostKind, 962 const RegionCodeGenTy &CodeGen) { 963 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 964 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 965 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 966 auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( 967 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 968 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) { 969 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 970 auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), 971 /*IgnoreResultAssign*/ true); 972 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause( 973 CGF, NumThreads, NumThreadsClause->getLocStart()); 974 } 975 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) { 976 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 977 CGF.CGM.getOpenMPRuntime().emitProcBindClause( 978 CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart()); 979 } 980 const Expr *IfCond = nullptr; 981 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 982 if (C->getNameModifier() == OMPD_unknown || 983 C->getNameModifier() == OMPD_parallel) { 984 IfCond = C->getCondition(); 985 break; 986 } 987 } 988 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn, 989 CapturedVars, IfCond); 990 } 991 992 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { 993 OMPLexicalScope Scope(*this, S); 994 // Emit parallel region as a standalone region. 995 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 996 OMPPrivateScope PrivateScope(CGF); 997 bool Copyins = CGF.EmitOMPCopyinClause(S); 998 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 999 if (Copyins) { 1000 // Emit implicit barrier to synchronize threads and avoid data races on 1001 // propagation master's thread values of threadprivate variables to local 1002 // instances of that variables of all other implicit threads. 1003 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 1004 CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 1005 /*ForceSimpleCall=*/true); 1006 } 1007 CGF.EmitOMPPrivateClause(S, PrivateScope); 1008 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 1009 (void)PrivateScope.Privatize(); 1010 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1011 CGF.EmitOMPReductionClauseFinal(S); 1012 }; 1013 emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen); 1014 } 1015 1016 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, 1017 JumpDest LoopExit) { 1018 RunCleanupsScope BodyScope(*this); 1019 // Update counters values on current iteration. 1020 for (auto I : D.updates()) { 1021 EmitIgnoredExpr(I); 1022 } 1023 // Update the linear variables. 1024 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1025 for (auto U : C->updates()) { 1026 EmitIgnoredExpr(U); 1027 } 1028 } 1029 1030 // On a continue in the body, jump to the end. 1031 auto Continue = getJumpDestInCurrentScope("omp.body.continue"); 1032 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1033 // Emit loop body. 1034 EmitStmt(D.getBody()); 1035 // The end (updates/cleanups). 1036 EmitBlock(Continue.getBlock()); 1037 BreakContinueStack.pop_back(); 1038 } 1039 1040 void CodeGenFunction::EmitOMPInnerLoop( 1041 const Stmt &S, bool RequiresCleanup, const Expr *LoopCond, 1042 const Expr *IncExpr, 1043 const llvm::function_ref<void(CodeGenFunction &)> &BodyGen, 1044 const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen) { 1045 auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end"); 1046 1047 // Start the loop with a block that tests the condition. 1048 auto CondBlock = createBasicBlock("omp.inner.for.cond"); 1049 EmitBlock(CondBlock); 1050 LoopStack.push(CondBlock); 1051 1052 // If there are any cleanups between here and the loop-exit scope, 1053 // create a block to stage a loop exit along. 1054 auto ExitBlock = LoopExit.getBlock(); 1055 if (RequiresCleanup) 1056 ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup"); 1057 1058 auto LoopBody = createBasicBlock("omp.inner.for.body"); 1059 1060 // Emit condition. 1061 EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S)); 1062 if (ExitBlock != LoopExit.getBlock()) { 1063 EmitBlock(ExitBlock); 1064 EmitBranchThroughCleanup(LoopExit); 1065 } 1066 1067 EmitBlock(LoopBody); 1068 incrementProfileCounter(&S); 1069 1070 // Create a block for the increment. 1071 auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc"); 1072 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1073 1074 BodyGen(*this); 1075 1076 // Emit "IV = IV + 1" and a back-edge to the condition block. 1077 EmitBlock(Continue.getBlock()); 1078 EmitIgnoredExpr(IncExpr); 1079 PostIncGen(*this); 1080 BreakContinueStack.pop_back(); 1081 EmitBranch(CondBlock); 1082 LoopStack.pop(); 1083 // Emit the fall-through block. 1084 EmitBlock(LoopExit.getBlock()); 1085 } 1086 1087 void CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { 1088 if (!HaveInsertPoint()) 1089 return; 1090 // Emit inits for the linear variables. 1091 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1092 for (auto Init : C->inits()) { 1093 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl()); 1094 auto *OrigVD = cast<VarDecl>( 1095 cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())->getDecl()); 1096 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1097 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1098 VD->getInit()->getType(), VK_LValue, 1099 VD->getInit()->getExprLoc()); 1100 AutoVarEmission Emission = EmitAutoVarAlloca(*VD); 1101 EmitExprAsInit(&DRE, VD, 1102 MakeAddrLValue(Emission.getAllocatedAddress(), VD->getType()), 1103 /*capturedByInit=*/false); 1104 EmitAutoVarCleanups(Emission); 1105 } 1106 // Emit the linear steps for the linear clauses. 1107 // If a step is not constant, it is pre-calculated before the loop. 1108 if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep())) 1109 if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) { 1110 EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl())); 1111 // Emit calculation of the linear step. 1112 EmitIgnoredExpr(CS); 1113 } 1114 } 1115 } 1116 1117 static void emitLinearClauseFinal(CodeGenFunction &CGF, 1118 const OMPLoopDirective &D) { 1119 if (!CGF.HaveInsertPoint()) 1120 return; 1121 // Emit the final values of the linear variables. 1122 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1123 auto IC = C->varlist_begin(); 1124 for (auto F : C->finals()) { 1125 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl()); 1126 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1127 CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr, 1128 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); 1129 Address OrigAddr = CGF.EmitLValue(&DRE).getAddress(); 1130 CodeGenFunction::OMPPrivateScope VarScope(CGF); 1131 VarScope.addPrivate(OrigVD, 1132 [OrigAddr]() -> Address { return OrigAddr; }); 1133 (void)VarScope.Privatize(); 1134 CGF.EmitIgnoredExpr(F); 1135 ++IC; 1136 } 1137 } 1138 } 1139 1140 static void emitAlignedClause(CodeGenFunction &CGF, 1141 const OMPExecutableDirective &D) { 1142 if (!CGF.HaveInsertPoint()) 1143 return; 1144 for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) { 1145 unsigned ClauseAlignment = 0; 1146 if (auto AlignmentExpr = Clause->getAlignment()) { 1147 auto AlignmentCI = 1148 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); 1149 ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue()); 1150 } 1151 for (auto E : Clause->varlists()) { 1152 unsigned Alignment = ClauseAlignment; 1153 if (Alignment == 0) { 1154 // OpenMP [2.8.1, Description] 1155 // If no optional parameter is specified, implementation-defined default 1156 // alignments for SIMD instructions on the target platforms are assumed. 1157 Alignment = 1158 CGF.getContext() 1159 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign( 1160 E->getType()->getPointeeType())) 1161 .getQuantity(); 1162 } 1163 assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) && 1164 "alignment is not power of 2"); 1165 if (Alignment != 0) { 1166 llvm::Value *PtrValue = CGF.EmitScalarExpr(E); 1167 CGF.EmitAlignmentAssumption(PtrValue, Alignment); 1168 } 1169 } 1170 } 1171 } 1172 1173 static void emitPrivateLoopCounters(CodeGenFunction &CGF, 1174 CodeGenFunction::OMPPrivateScope &LoopScope, 1175 ArrayRef<Expr *> Counters, 1176 ArrayRef<Expr *> PrivateCounters) { 1177 if (!CGF.HaveInsertPoint()) 1178 return; 1179 auto I = PrivateCounters.begin(); 1180 for (auto *E : Counters) { 1181 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1182 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()); 1183 Address Addr = Address::invalid(); 1184 (void)LoopScope.addPrivate(PrivateVD, [&]() -> Address { 1185 // Emit var without initialization. 1186 auto VarEmission = CGF.EmitAutoVarAlloca(*PrivateVD); 1187 CGF.EmitAutoVarCleanups(VarEmission); 1188 Addr = VarEmission.getAllocatedAddress(); 1189 return Addr; 1190 }); 1191 (void)LoopScope.addPrivate(VD, [&]() -> Address { return Addr; }); 1192 ++I; 1193 } 1194 } 1195 1196 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S, 1197 const Expr *Cond, llvm::BasicBlock *TrueBlock, 1198 llvm::BasicBlock *FalseBlock, uint64_t TrueCount) { 1199 if (!CGF.HaveInsertPoint()) 1200 return; 1201 { 1202 CodeGenFunction::OMPPrivateScope PreCondScope(CGF); 1203 emitPrivateLoopCounters(CGF, PreCondScope, S.counters(), 1204 S.private_counters()); 1205 (void)PreCondScope.Privatize(); 1206 // Get initial values of real counters. 1207 for (auto I : S.inits()) { 1208 CGF.EmitIgnoredExpr(I); 1209 } 1210 } 1211 // Check that loop is executed at least one time. 1212 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount); 1213 } 1214 1215 static void 1216 emitPrivateLinearVars(CodeGenFunction &CGF, const OMPExecutableDirective &D, 1217 CodeGenFunction::OMPPrivateScope &PrivateScope) { 1218 if (!CGF.HaveInsertPoint()) 1219 return; 1220 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1221 auto CurPrivate = C->privates().begin(); 1222 for (auto *E : C->varlists()) { 1223 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1224 auto *PrivateVD = 1225 cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl()); 1226 bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address { 1227 // Emit private VarDecl with copy init. 1228 CGF.EmitVarDecl(*PrivateVD); 1229 return CGF.GetAddrOfLocalVar(PrivateVD); 1230 }); 1231 assert(IsRegistered && "linear var already registered as private"); 1232 // Silence the warning about unused variable. 1233 (void)IsRegistered; 1234 ++CurPrivate; 1235 } 1236 } 1237 } 1238 1239 static void emitSimdlenSafelenClause(CodeGenFunction &CGF, 1240 const OMPExecutableDirective &D, 1241 bool IsMonotonic) { 1242 if (!CGF.HaveInsertPoint()) 1243 return; 1244 if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) { 1245 RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(), 1246 /*ignoreResult=*/true); 1247 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 1248 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 1249 // In presence of finite 'safelen', it may be unsafe to mark all 1250 // the memory instructions parallel, because loop-carried 1251 // dependences of 'safelen' iterations are possible. 1252 if (!IsMonotonic) 1253 CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>()); 1254 } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) { 1255 RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(), 1256 /*ignoreResult=*/true); 1257 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 1258 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 1259 // In presence of finite 'safelen', it may be unsafe to mark all 1260 // the memory instructions parallel, because loop-carried 1261 // dependences of 'safelen' iterations are possible. 1262 CGF.LoopStack.setParallel(false); 1263 } 1264 } 1265 1266 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D, 1267 bool IsMonotonic) { 1268 // Walk clauses and process safelen/lastprivate. 1269 LoopStack.setParallel(!IsMonotonic); 1270 LoopStack.setVectorizeEnable(true); 1271 emitSimdlenSafelenClause(*this, D, IsMonotonic); 1272 } 1273 1274 void CodeGenFunction::EmitOMPSimdFinal(const OMPLoopDirective &D) { 1275 if (!HaveInsertPoint()) 1276 return; 1277 auto IC = D.counters().begin(); 1278 for (auto F : D.finals()) { 1279 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl()); 1280 if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD)) { 1281 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1282 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1283 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); 1284 Address OrigAddr = EmitLValue(&DRE).getAddress(); 1285 OMPPrivateScope VarScope(*this); 1286 VarScope.addPrivate(OrigVD, 1287 [OrigAddr]() -> Address { return OrigAddr; }); 1288 (void)VarScope.Privatize(); 1289 EmitIgnoredExpr(F); 1290 } 1291 ++IC; 1292 } 1293 emitLinearClauseFinal(*this, D); 1294 } 1295 1296 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { 1297 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 1298 // if (PreCond) { 1299 // for (IV in 0..LastIteration) BODY; 1300 // <Final counter/linear vars updates>; 1301 // } 1302 // 1303 1304 // Emit: if (PreCond) - begin. 1305 // If the condition constant folds and can be elided, avoid emitting the 1306 // whole loop. 1307 bool CondConstant; 1308 llvm::BasicBlock *ContBlock = nullptr; 1309 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 1310 if (!CondConstant) 1311 return; 1312 } else { 1313 auto *ThenBlock = CGF.createBasicBlock("simd.if.then"); 1314 ContBlock = CGF.createBasicBlock("simd.if.end"); 1315 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 1316 CGF.getProfileCount(&S)); 1317 CGF.EmitBlock(ThenBlock); 1318 CGF.incrementProfileCounter(&S); 1319 } 1320 1321 // Emit the loop iteration variable. 1322 const Expr *IVExpr = S.getIterationVariable(); 1323 const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 1324 CGF.EmitVarDecl(*IVDecl); 1325 CGF.EmitIgnoredExpr(S.getInit()); 1326 1327 // Emit the iterations count variable. 1328 // If it is not a variable, Sema decided to calculate iterations count on 1329 // each iteration (e.g., it is foldable into a constant). 1330 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 1331 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 1332 // Emit calculation of the iterations count. 1333 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 1334 } 1335 1336 CGF.EmitOMPSimdInit(S); 1337 1338 emitAlignedClause(CGF, S); 1339 CGF.EmitOMPLinearClauseInit(S); 1340 bool HasLastprivateClause; 1341 { 1342 OMPPrivateScope LoopScope(CGF); 1343 emitPrivateLoopCounters(CGF, LoopScope, S.counters(), 1344 S.private_counters()); 1345 emitPrivateLinearVars(CGF, S, LoopScope); 1346 CGF.EmitOMPPrivateClause(S, LoopScope); 1347 CGF.EmitOMPReductionClauseInit(S, LoopScope); 1348 HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 1349 (void)LoopScope.Privatize(); 1350 CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 1351 S.getInc(), 1352 [&S](CodeGenFunction &CGF) { 1353 CGF.EmitOMPLoopBody(S, JumpDest()); 1354 CGF.EmitStopPoint(&S); 1355 }, 1356 [](CodeGenFunction &) {}); 1357 // Emit final copy of the lastprivate variables at the end of loops. 1358 if (HasLastprivateClause) { 1359 CGF.EmitOMPLastprivateClauseFinal(S); 1360 } 1361 CGF.EmitOMPReductionClauseFinal(S); 1362 } 1363 CGF.EmitOMPSimdFinal(S); 1364 // Emit: if (PreCond) - end. 1365 if (ContBlock) { 1366 CGF.EmitBranch(ContBlock); 1367 CGF.EmitBlock(ContBlock, true); 1368 } 1369 }; 1370 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 1371 } 1372 1373 void CodeGenFunction::EmitOMPForOuterLoop( 1374 OpenMPScheduleClauseKind ScheduleKind, bool IsMonotonic, 1375 const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, 1376 Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) { 1377 auto &RT = CGM.getOpenMPRuntime(); 1378 1379 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). 1380 const bool DynamicOrOrdered = Ordered || RT.isDynamic(ScheduleKind); 1381 1382 assert((Ordered || 1383 !RT.isStaticNonchunked(ScheduleKind, /*Chunked=*/Chunk != nullptr)) && 1384 "static non-chunked schedule does not need outer loop"); 1385 1386 // Emit outer loop. 1387 // 1388 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1389 // When schedule(dynamic,chunk_size) is specified, the iterations are 1390 // distributed to threads in the team in chunks as the threads request them. 1391 // Each thread executes a chunk of iterations, then requests another chunk, 1392 // until no chunks remain to be distributed. Each chunk contains chunk_size 1393 // iterations, except for the last chunk to be distributed, which may have 1394 // fewer iterations. When no chunk_size is specified, it defaults to 1. 1395 // 1396 // When schedule(guided,chunk_size) is specified, the iterations are assigned 1397 // to threads in the team in chunks as the executing threads request them. 1398 // Each thread executes a chunk of iterations, then requests another chunk, 1399 // until no chunks remain to be assigned. For a chunk_size of 1, the size of 1400 // each chunk is proportional to the number of unassigned iterations divided 1401 // by the number of threads in the team, decreasing to 1. For a chunk_size 1402 // with value k (greater than 1), the size of each chunk is determined in the 1403 // same way, with the restriction that the chunks do not contain fewer than k 1404 // iterations (except for the last chunk to be assigned, which may have fewer 1405 // than k iterations). 1406 // 1407 // When schedule(auto) is specified, the decision regarding scheduling is 1408 // delegated to the compiler and/or runtime system. The programmer gives the 1409 // implementation the freedom to choose any possible mapping of iterations to 1410 // threads in the team. 1411 // 1412 // When schedule(runtime) is specified, the decision regarding scheduling is 1413 // deferred until run time, and the schedule and chunk size are taken from the 1414 // run-sched-var ICV. If the ICV is set to auto, the schedule is 1415 // implementation defined 1416 // 1417 // while(__kmpc_dispatch_next(&LB, &UB)) { 1418 // idx = LB; 1419 // while (idx <= UB) { BODY; ++idx; 1420 // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only. 1421 // } // inner loop 1422 // } 1423 // 1424 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1425 // When schedule(static, chunk_size) is specified, iterations are divided into 1426 // chunks of size chunk_size, and the chunks are assigned to the threads in 1427 // the team in a round-robin fashion in the order of the thread number. 1428 // 1429 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { 1430 // while (idx <= UB) { BODY; ++idx; } // inner loop 1431 // LB = LB + ST; 1432 // UB = UB + ST; 1433 // } 1434 // 1435 1436 const Expr *IVExpr = S.getIterationVariable(); 1437 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1438 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1439 1440 if (DynamicOrOrdered) { 1441 llvm::Value *UBVal = EmitScalarExpr(S.getLastIteration()); 1442 RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, 1443 IVSize, IVSigned, Ordered, UBVal, Chunk); 1444 } else { 1445 RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, 1446 IVSize, IVSigned, Ordered, IL, LB, UB, ST, Chunk); 1447 } 1448 1449 auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end"); 1450 1451 // Start the loop with a block that tests the condition. 1452 auto CondBlock = createBasicBlock("omp.dispatch.cond"); 1453 EmitBlock(CondBlock); 1454 LoopStack.push(CondBlock); 1455 1456 llvm::Value *BoolCondVal = nullptr; 1457 if (!DynamicOrOrdered) { 1458 // UB = min(UB, GlobalUB) 1459 EmitIgnoredExpr(S.getEnsureUpperBound()); 1460 // IV = LB 1461 EmitIgnoredExpr(S.getInit()); 1462 // IV < UB 1463 BoolCondVal = EvaluateExprAsBool(S.getCond()); 1464 } else { 1465 BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, 1466 IL, LB, UB, ST); 1467 } 1468 1469 // If there are any cleanups between here and the loop-exit scope, 1470 // create a block to stage a loop exit along. 1471 auto ExitBlock = LoopExit.getBlock(); 1472 if (LoopScope.requiresCleanups()) 1473 ExitBlock = createBasicBlock("omp.dispatch.cleanup"); 1474 1475 auto LoopBody = createBasicBlock("omp.dispatch.body"); 1476 Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock); 1477 if (ExitBlock != LoopExit.getBlock()) { 1478 EmitBlock(ExitBlock); 1479 EmitBranchThroughCleanup(LoopExit); 1480 } 1481 EmitBlock(LoopBody); 1482 1483 // Emit "IV = LB" (in case of static schedule, we have already calculated new 1484 // LB for loop condition and emitted it above). 1485 if (DynamicOrOrdered) 1486 EmitIgnoredExpr(S.getInit()); 1487 1488 // Create a block for the increment. 1489 auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); 1490 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1491 1492 // Generate !llvm.loop.parallel metadata for loads and stores for loops 1493 // with dynamic/guided scheduling and without ordered clause. 1494 if (!isOpenMPSimdDirective(S.getDirectiveKind())) 1495 LoopStack.setParallel(!IsMonotonic); 1496 else 1497 EmitOMPSimdInit(S, IsMonotonic); 1498 1499 SourceLocation Loc = S.getLocStart(); 1500 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), 1501 [&S, LoopExit](CodeGenFunction &CGF) { 1502 CGF.EmitOMPLoopBody(S, LoopExit); 1503 CGF.EmitStopPoint(&S); 1504 }, 1505 [Ordered, IVSize, IVSigned, Loc](CodeGenFunction &CGF) { 1506 if (Ordered) { 1507 CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd( 1508 CGF, Loc, IVSize, IVSigned); 1509 } 1510 }); 1511 1512 EmitBlock(Continue.getBlock()); 1513 BreakContinueStack.pop_back(); 1514 if (!DynamicOrOrdered) { 1515 // Emit "LB = LB + Stride", "UB = UB + Stride". 1516 EmitIgnoredExpr(S.getNextLowerBound()); 1517 EmitIgnoredExpr(S.getNextUpperBound()); 1518 } 1519 1520 EmitBranch(CondBlock); 1521 LoopStack.pop(); 1522 // Emit the fall-through block. 1523 EmitBlock(LoopExit.getBlock()); 1524 1525 // Tell the runtime we are done. 1526 if (!DynamicOrOrdered) 1527 RT.emitForStaticFinish(*this, S.getLocEnd()); 1528 } 1529 1530 /// \brief Emit a helper variable and return corresponding lvalue. 1531 static LValue EmitOMPHelperVar(CodeGenFunction &CGF, 1532 const DeclRefExpr *Helper) { 1533 auto VDecl = cast<VarDecl>(Helper->getDecl()); 1534 CGF.EmitVarDecl(*VDecl); 1535 return CGF.EmitLValue(Helper); 1536 } 1537 1538 namespace { 1539 struct ScheduleKindModifiersTy { 1540 OpenMPScheduleClauseKind Kind; 1541 OpenMPScheduleClauseModifier M1; 1542 OpenMPScheduleClauseModifier M2; 1543 ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind, 1544 OpenMPScheduleClauseModifier M1, 1545 OpenMPScheduleClauseModifier M2) 1546 : Kind(Kind), M1(M1), M2(M2) {} 1547 }; 1548 } // namespace 1549 1550 bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { 1551 // Emit the loop iteration variable. 1552 auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 1553 auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); 1554 EmitVarDecl(*IVDecl); 1555 1556 // Emit the iterations count variable. 1557 // If it is not a variable, Sema decided to calculate iterations count on each 1558 // iteration (e.g., it is foldable into a constant). 1559 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 1560 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 1561 // Emit calculation of the iterations count. 1562 EmitIgnoredExpr(S.getCalcLastIteration()); 1563 } 1564 1565 auto &RT = CGM.getOpenMPRuntime(); 1566 1567 bool HasLastprivateClause; 1568 // Check pre-condition. 1569 { 1570 // Skip the entire loop if we don't meet the precondition. 1571 // If the condition constant folds and can be elided, avoid emitting the 1572 // whole loop. 1573 bool CondConstant; 1574 llvm::BasicBlock *ContBlock = nullptr; 1575 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 1576 if (!CondConstant) 1577 return false; 1578 } else { 1579 auto *ThenBlock = createBasicBlock("omp.precond.then"); 1580 ContBlock = createBasicBlock("omp.precond.end"); 1581 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 1582 getProfileCount(&S)); 1583 EmitBlock(ThenBlock); 1584 incrementProfileCounter(&S); 1585 } 1586 1587 emitAlignedClause(*this, S); 1588 EmitOMPLinearClauseInit(S); 1589 // Emit 'then' code. 1590 { 1591 // Emit helper vars inits. 1592 LValue LB = 1593 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable())); 1594 LValue UB = 1595 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable())); 1596 LValue ST = 1597 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 1598 LValue IL = 1599 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 1600 1601 OMPPrivateScope LoopScope(*this); 1602 if (EmitOMPFirstprivateClause(S, LoopScope)) { 1603 // Emit implicit barrier to synchronize threads and avoid data races on 1604 // initialization of firstprivate variables and post-update of 1605 // lastprivate variables. 1606 CGM.getOpenMPRuntime().emitBarrierCall( 1607 *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 1608 /*ForceSimpleCall=*/true); 1609 } 1610 EmitOMPPrivateClause(S, LoopScope); 1611 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 1612 EmitOMPReductionClauseInit(S, LoopScope); 1613 emitPrivateLoopCounters(*this, LoopScope, S.counters(), 1614 S.private_counters()); 1615 emitPrivateLinearVars(*this, S, LoopScope); 1616 (void)LoopScope.Privatize(); 1617 1618 // Detect the loop schedule kind and chunk. 1619 llvm::Value *Chunk = nullptr; 1620 OpenMPScheduleClauseKind ScheduleKind = OMPC_SCHEDULE_unknown; 1621 OpenMPScheduleClauseModifier M1 = OMPC_SCHEDULE_MODIFIER_unknown; 1622 OpenMPScheduleClauseModifier M2 = OMPC_SCHEDULE_MODIFIER_unknown; 1623 if (auto *C = S.getSingleClause<OMPScheduleClause>()) { 1624 ScheduleKind = C->getScheduleKind(); 1625 M1 = C->getFirstScheduleModifier(); 1626 M2 = C->getSecondScheduleModifier(); 1627 if (const auto *Ch = C->getChunkSize()) { 1628 Chunk = EmitScalarExpr(Ch); 1629 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 1630 S.getIterationVariable()->getType(), 1631 S.getLocStart()); 1632 } 1633 } 1634 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1635 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1636 const bool Ordered = S.getSingleClause<OMPOrderedClause>() != nullptr; 1637 // OpenMP 4.5, 2.7.1 Loop Construct, Description. 1638 // If the static schedule kind is specified or if the ordered clause is 1639 // specified, and if no monotonic modifier is specified, the effect will 1640 // be as if the monotonic modifier was specified. 1641 if (RT.isStaticNonchunked(ScheduleKind, 1642 /* Chunked */ Chunk != nullptr) && 1643 !Ordered) { 1644 if (isOpenMPSimdDirective(S.getDirectiveKind())) 1645 EmitOMPSimdInit(S, /*IsMonotonic=*/true); 1646 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1647 // When no chunk_size is specified, the iteration space is divided into 1648 // chunks that are approximately equal in size, and at most one chunk is 1649 // distributed to each thread. Note that the size of the chunks is 1650 // unspecified in this case. 1651 RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, 1652 IVSize, IVSigned, Ordered, 1653 IL.getAddress(), LB.getAddress(), 1654 UB.getAddress(), ST.getAddress()); 1655 auto LoopExit = 1656 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 1657 // UB = min(UB, GlobalUB); 1658 EmitIgnoredExpr(S.getEnsureUpperBound()); 1659 // IV = LB; 1660 EmitIgnoredExpr(S.getInit()); 1661 // while (idx <= UB) { BODY; ++idx; } 1662 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 1663 S.getInc(), 1664 [&S, LoopExit](CodeGenFunction &CGF) { 1665 CGF.EmitOMPLoopBody(S, LoopExit); 1666 CGF.EmitStopPoint(&S); 1667 }, 1668 [](CodeGenFunction &) {}); 1669 EmitBlock(LoopExit.getBlock()); 1670 // Tell the runtime we are done. 1671 RT.emitForStaticFinish(*this, S.getLocStart()); 1672 } else { 1673 const bool IsMonotonic = Ordered || 1674 ScheduleKind == OMPC_SCHEDULE_static || 1675 ScheduleKind == OMPC_SCHEDULE_unknown || 1676 M1 == OMPC_SCHEDULE_MODIFIER_monotonic || 1677 M2 == OMPC_SCHEDULE_MODIFIER_monotonic; 1678 // Emit the outer loop, which requests its work chunk [LB..UB] from 1679 // runtime and runs the inner loop to process it. 1680 EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered, 1681 LB.getAddress(), UB.getAddress(), ST.getAddress(), 1682 IL.getAddress(), Chunk); 1683 } 1684 EmitOMPReductionClauseFinal(S); 1685 // Emit final copy of the lastprivate variables if IsLastIter != 0. 1686 if (HasLastprivateClause) 1687 EmitOMPLastprivateClauseFinal( 1688 S, Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart()))); 1689 } 1690 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 1691 EmitOMPSimdFinal(S); 1692 } 1693 // We're now done with the loop, so jump to the continuation block. 1694 if (ContBlock) { 1695 EmitBranch(ContBlock); 1696 EmitBlock(ContBlock, true); 1697 } 1698 } 1699 return HasLastprivateClause; 1700 } 1701 1702 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { 1703 bool HasLastprivates = false; 1704 { 1705 OMPLexicalScope Scope(*this, S); 1706 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF) { 1707 HasLastprivates = CGF.EmitOMPWorksharingLoop(S); 1708 }; 1709 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen, 1710 S.hasCancel()); 1711 } 1712 1713 // Emit an implicit barrier at the end. 1714 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) { 1715 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); 1716 } 1717 } 1718 1719 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { 1720 bool HasLastprivates = false; 1721 { 1722 OMPLexicalScope Scope(*this, S); 1723 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF) { 1724 HasLastprivates = CGF.EmitOMPWorksharingLoop(S); 1725 }; 1726 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 1727 } 1728 1729 // Emit an implicit barrier at the end. 1730 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) { 1731 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); 1732 } 1733 } 1734 1735 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, 1736 const Twine &Name, 1737 llvm::Value *Init = nullptr) { 1738 auto LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty); 1739 if (Init) 1740 CGF.EmitScalarInit(Init, LVal); 1741 return LVal; 1742 } 1743 1744 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { 1745 auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt(); 1746 auto *CS = dyn_cast<CompoundStmt>(Stmt); 1747 bool HasLastprivates = false; 1748 auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF) { 1749 auto &C = CGF.CGM.getContext(); 1750 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 1751 // Emit helper vars inits. 1752 LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.", 1753 CGF.Builder.getInt32(0)); 1754 auto *GlobalUBVal = CS != nullptr ? CGF.Builder.getInt32(CS->size() - 1) 1755 : CGF.Builder.getInt32(0); 1756 LValue UB = 1757 createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal); 1758 LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.", 1759 CGF.Builder.getInt32(1)); 1760 LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.", 1761 CGF.Builder.getInt32(0)); 1762 // Loop counter. 1763 LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv."); 1764 OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 1765 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV); 1766 OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 1767 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); 1768 // Generate condition for loop. 1769 BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, 1770 OK_Ordinary, S.getLocStart(), 1771 /*fpContractable=*/false); 1772 // Increment for loop counter. 1773 UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary, 1774 S.getLocStart()); 1775 auto BodyGen = [Stmt, CS, &S, &IV](CodeGenFunction &CGF) { 1776 // Iterate through all sections and emit a switch construct: 1777 // switch (IV) { 1778 // case 0: 1779 // <SectionStmt[0]>; 1780 // break; 1781 // ... 1782 // case <NumSection> - 1: 1783 // <SectionStmt[<NumSection> - 1]>; 1784 // break; 1785 // } 1786 // .omp.sections.exit: 1787 auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit"); 1788 auto *SwitchStmt = CGF.Builder.CreateSwitch( 1789 CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB, 1790 CS == nullptr ? 1 : CS->size()); 1791 if (CS) { 1792 unsigned CaseNumber = 0; 1793 for (auto *SubStmt : CS->children()) { 1794 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 1795 CGF.EmitBlock(CaseBB); 1796 SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB); 1797 CGF.EmitStmt(SubStmt); 1798 CGF.EmitBranch(ExitBB); 1799 ++CaseNumber; 1800 } 1801 } else { 1802 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 1803 CGF.EmitBlock(CaseBB); 1804 SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB); 1805 CGF.EmitStmt(Stmt); 1806 CGF.EmitBranch(ExitBB); 1807 } 1808 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 1809 }; 1810 1811 CodeGenFunction::OMPPrivateScope LoopScope(CGF); 1812 if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) { 1813 // Emit implicit barrier to synchronize threads and avoid data races on 1814 // initialization of firstprivate variables and post-update of lastprivate 1815 // variables. 1816 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 1817 CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 1818 /*ForceSimpleCall=*/true); 1819 } 1820 CGF.EmitOMPPrivateClause(S, LoopScope); 1821 HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 1822 CGF.EmitOMPReductionClauseInit(S, LoopScope); 1823 (void)LoopScope.Privatize(); 1824 1825 // Emit static non-chunked loop. 1826 CGF.CGM.getOpenMPRuntime().emitForStaticInit( 1827 CGF, S.getLocStart(), OMPC_SCHEDULE_static, /*IVSize=*/32, 1828 /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), LB.getAddress(), 1829 UB.getAddress(), ST.getAddress()); 1830 // UB = min(UB, GlobalUB); 1831 auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart()); 1832 auto *MinUBGlobalUB = CGF.Builder.CreateSelect( 1833 CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal); 1834 CGF.EmitStoreOfScalar(MinUBGlobalUB, UB); 1835 // IV = LB; 1836 CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV); 1837 // while (idx <= UB) { BODY; ++idx; } 1838 CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen, 1839 [](CodeGenFunction &) {}); 1840 // Tell the runtime we are done. 1841 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocStart()); 1842 CGF.EmitOMPReductionClauseFinal(S); 1843 1844 // Emit final copy of the lastprivate variables if IsLastIter != 0. 1845 if (HasLastprivates) 1846 CGF.EmitOMPLastprivateClauseFinal( 1847 S, CGF.Builder.CreateIsNotNull( 1848 CGF.EmitLoadOfScalar(IL, S.getLocStart()))); 1849 }; 1850 1851 bool HasCancel = false; 1852 if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S)) 1853 HasCancel = OSD->hasCancel(); 1854 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S)) 1855 HasCancel = OPSD->hasCancel(); 1856 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen, 1857 HasCancel); 1858 // Emit barrier for lastprivates only if 'sections' directive has 'nowait' 1859 // clause. Otherwise the barrier will be generated by the codegen for the 1860 // directive. 1861 if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) { 1862 // Emit implicit barrier to synchronize threads and avoid data races on 1863 // initialization of firstprivate variables. 1864 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), 1865 OMPD_unknown); 1866 } 1867 } 1868 1869 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { 1870 { 1871 OMPLexicalScope Scope(*this, S); 1872 EmitSections(S); 1873 } 1874 // Emit an implicit barrier at the end. 1875 if (!S.getSingleClause<OMPNowaitClause>()) { 1876 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), 1877 OMPD_sections); 1878 } 1879 } 1880 1881 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { 1882 OMPLexicalScope Scope(*this, S); 1883 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 1884 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1885 }; 1886 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen, 1887 S.hasCancel()); 1888 } 1889 1890 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { 1891 llvm::SmallVector<const Expr *, 8> CopyprivateVars; 1892 llvm::SmallVector<const Expr *, 8> DestExprs; 1893 llvm::SmallVector<const Expr *, 8> SrcExprs; 1894 llvm::SmallVector<const Expr *, 8> AssignmentOps; 1895 // Check if there are any 'copyprivate' clauses associated with this 1896 // 'single' construct. 1897 // Build a list of copyprivate variables along with helper expressions 1898 // (<source>, <destination>, <destination>=<source> expressions) 1899 for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) { 1900 CopyprivateVars.append(C->varlists().begin(), C->varlists().end()); 1901 DestExprs.append(C->destination_exprs().begin(), 1902 C->destination_exprs().end()); 1903 SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end()); 1904 AssignmentOps.append(C->assignment_ops().begin(), 1905 C->assignment_ops().end()); 1906 } 1907 { 1908 OMPLexicalScope Scope(*this, S); 1909 // Emit code for 'single' region along with 'copyprivate' clauses 1910 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 1911 CodeGenFunction::OMPPrivateScope SingleScope(CGF); 1912 (void)CGF.EmitOMPFirstprivateClause(S, SingleScope); 1913 CGF.EmitOMPPrivateClause(S, SingleScope); 1914 (void)SingleScope.Privatize(); 1915 CGF.EmitStmt( 1916 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1917 }; 1918 CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(), 1919 CopyprivateVars, DestExprs, 1920 SrcExprs, AssignmentOps); 1921 } 1922 // Emit an implicit barrier at the end (to avoid data race on firstprivate 1923 // init or if no 'nowait' clause was specified and no 'copyprivate' clause). 1924 if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) { 1925 CGM.getOpenMPRuntime().emitBarrierCall( 1926 *this, S.getLocStart(), 1927 S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single); 1928 } 1929 } 1930 1931 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { 1932 OMPLexicalScope Scope(*this, S); 1933 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 1934 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1935 }; 1936 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart()); 1937 } 1938 1939 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { 1940 OMPLexicalScope Scope(*this, S); 1941 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 1942 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1943 }; 1944 Expr *Hint = nullptr; 1945 if (auto *HintClause = S.getSingleClause<OMPHintClause>()) 1946 Hint = HintClause->getHint(); 1947 CGM.getOpenMPRuntime().emitCriticalRegion(*this, 1948 S.getDirectiveName().getAsString(), 1949 CodeGen, S.getLocStart(), Hint); 1950 } 1951 1952 void CodeGenFunction::EmitOMPParallelForDirective( 1953 const OMPParallelForDirective &S) { 1954 // Emit directive as a combined directive that consists of two implicit 1955 // directives: 'parallel' with 'for' directive. 1956 OMPLexicalScope Scope(*this, S); 1957 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 1958 CGF.EmitOMPWorksharingLoop(S); 1959 }; 1960 emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen); 1961 } 1962 1963 void CodeGenFunction::EmitOMPParallelForSimdDirective( 1964 const OMPParallelForSimdDirective &S) { 1965 // Emit directive as a combined directive that consists of two implicit 1966 // directives: 'parallel' with 'for' directive. 1967 OMPLexicalScope Scope(*this, S); 1968 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 1969 CGF.EmitOMPWorksharingLoop(S); 1970 }; 1971 emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen); 1972 } 1973 1974 void CodeGenFunction::EmitOMPParallelSectionsDirective( 1975 const OMPParallelSectionsDirective &S) { 1976 // Emit directive as a combined directive that consists of two implicit 1977 // directives: 'parallel' with 'sections' directive. 1978 OMPLexicalScope Scope(*this, S); 1979 auto &&CodeGen = [&S](CodeGenFunction &CGF) { CGF.EmitSections(S); }; 1980 emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen); 1981 } 1982 1983 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { 1984 // Emit outlined function for task construct. 1985 OMPLexicalScope Scope(*this, S); 1986 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 1987 auto CapturedStruct = GenerateCapturedStmtArgument(*CS); 1988 auto *I = CS->getCapturedDecl()->param_begin(); 1989 auto *PartId = std::next(I); 1990 // The first function argument for tasks is a thread id, the second one is a 1991 // part id (0 for tied tasks, >=0 for untied task). 1992 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 1993 // Get list of private variables. 1994 llvm::SmallVector<const Expr *, 8> PrivateVars; 1995 llvm::SmallVector<const Expr *, 8> PrivateCopies; 1996 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 1997 auto IRef = C->varlist_begin(); 1998 for (auto *IInit : C->private_copies()) { 1999 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2000 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2001 PrivateVars.push_back(*IRef); 2002 PrivateCopies.push_back(IInit); 2003 } 2004 ++IRef; 2005 } 2006 } 2007 EmittedAsPrivate.clear(); 2008 // Get list of firstprivate variables. 2009 llvm::SmallVector<const Expr *, 8> FirstprivateVars; 2010 llvm::SmallVector<const Expr *, 8> FirstprivateCopies; 2011 llvm::SmallVector<const Expr *, 8> FirstprivateInits; 2012 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 2013 auto IRef = C->varlist_begin(); 2014 auto IElemInitRef = C->inits().begin(); 2015 for (auto *IInit : C->private_copies()) { 2016 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2017 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2018 FirstprivateVars.push_back(*IRef); 2019 FirstprivateCopies.push_back(IInit); 2020 FirstprivateInits.push_back(*IElemInitRef); 2021 } 2022 ++IRef; 2023 ++IElemInitRef; 2024 } 2025 } 2026 // Build list of dependences. 2027 llvm::SmallVector<std::pair<OpenMPDependClauseKind, const Expr *>, 8> 2028 Dependences; 2029 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) { 2030 for (auto *IRef : C->varlists()) { 2031 Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef)); 2032 } 2033 } 2034 auto &&CodeGen = [PartId, &S, &PrivateVars, &FirstprivateVars]( 2035 CodeGenFunction &CGF) { 2036 // Set proper addresses for generated private copies. 2037 auto *CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2038 OMPPrivateScope Scope(CGF); 2039 if (!PrivateVars.empty() || !FirstprivateVars.empty()) { 2040 auto *CopyFn = CGF.Builder.CreateLoad( 2041 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3))); 2042 auto *PrivatesPtr = CGF.Builder.CreateLoad( 2043 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2))); 2044 // Map privates. 2045 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> 2046 PrivatePtrs; 2047 llvm::SmallVector<llvm::Value *, 16> CallArgs; 2048 CallArgs.push_back(PrivatesPtr); 2049 for (auto *E : PrivateVars) { 2050 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2051 Address PrivatePtr = 2052 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType())); 2053 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2054 CallArgs.push_back(PrivatePtr.getPointer()); 2055 } 2056 for (auto *E : FirstprivateVars) { 2057 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2058 Address PrivatePtr = 2059 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType())); 2060 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2061 CallArgs.push_back(PrivatePtr.getPointer()); 2062 } 2063 CGF.EmitRuntimeCall(CopyFn, CallArgs); 2064 for (auto &&Pair : PrivatePtrs) { 2065 Address Replacement(CGF.Builder.CreateLoad(Pair.second), 2066 CGF.getContext().getDeclAlign(Pair.first)); 2067 Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); 2068 } 2069 } 2070 (void)Scope.Privatize(); 2071 if (*PartId) { 2072 // TODO: emit code for untied tasks. 2073 } 2074 CGF.EmitStmt(CS->getCapturedStmt()); 2075 }; 2076 auto OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( 2077 S, *I, OMPD_task, CodeGen); 2078 // Check if we should emit tied or untied task. 2079 bool Tied = !S.getSingleClause<OMPUntiedClause>(); 2080 // Check if the task is final 2081 llvm::PointerIntPair<llvm::Value *, 1, bool> Final; 2082 if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) { 2083 // If the condition constant folds and can be elided, try to avoid emitting 2084 // the condition and the dead arm of the if/else. 2085 auto *Cond = Clause->getCondition(); 2086 bool CondConstant; 2087 if (ConstantFoldsToSimpleInteger(Cond, CondConstant)) 2088 Final.setInt(CondConstant); 2089 else 2090 Final.setPointer(EvaluateExprAsBool(Cond)); 2091 } else { 2092 // By default the task is not final. 2093 Final.setInt(/*IntVal=*/false); 2094 } 2095 auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 2096 const Expr *IfCond = nullptr; 2097 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 2098 if (C->getNameModifier() == OMPD_unknown || 2099 C->getNameModifier() == OMPD_task) { 2100 IfCond = C->getCondition(); 2101 break; 2102 } 2103 } 2104 CGM.getOpenMPRuntime().emitTaskCall( 2105 *this, S.getLocStart(), S, Tied, Final, OutlinedFn, SharedsTy, 2106 CapturedStruct, IfCond, PrivateVars, PrivateCopies, FirstprivateVars, 2107 FirstprivateCopies, FirstprivateInits, Dependences); 2108 } 2109 2110 void CodeGenFunction::EmitOMPTaskyieldDirective( 2111 const OMPTaskyieldDirective &S) { 2112 CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart()); 2113 } 2114 2115 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { 2116 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier); 2117 } 2118 2119 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) { 2120 CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart()); 2121 } 2122 2123 void CodeGenFunction::EmitOMPTaskgroupDirective( 2124 const OMPTaskgroupDirective &S) { 2125 OMPLexicalScope Scope(*this, S); 2126 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 2127 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2128 }; 2129 CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart()); 2130 } 2131 2132 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { 2133 CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> { 2134 if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) { 2135 return llvm::makeArrayRef(FlushClause->varlist_begin(), 2136 FlushClause->varlist_end()); 2137 } 2138 return llvm::None; 2139 }(), S.getLocStart()); 2140 } 2141 2142 void CodeGenFunction::EmitOMPDistributeDirective( 2143 const OMPDistributeDirective &S) { 2144 llvm_unreachable("CodeGen for 'omp distribute' is not supported yet."); 2145 } 2146 2147 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, 2148 const CapturedStmt *S) { 2149 CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); 2150 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; 2151 CGF.CapturedStmtInfo = &CapStmtInfo; 2152 auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S); 2153 Fn->addFnAttr(llvm::Attribute::NoInline); 2154 return Fn; 2155 } 2156 2157 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { 2158 if (!S.getAssociatedStmt()) 2159 return; 2160 OMPLexicalScope Scope(*this, S); 2161 auto *C = S.getSingleClause<OMPSIMDClause>(); 2162 auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF) { 2163 if (C) { 2164 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2165 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 2166 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 2167 auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS); 2168 CGF.EmitNounwindRuntimeCall(OutlinedFn, CapturedVars); 2169 } else { 2170 CGF.EmitStmt( 2171 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2172 } 2173 }; 2174 CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C); 2175 } 2176 2177 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, 2178 QualType SrcType, QualType DestType, 2179 SourceLocation Loc) { 2180 assert(CGF.hasScalarEvaluationKind(DestType) && 2181 "DestType must have scalar evaluation kind."); 2182 assert(!Val.isAggregate() && "Must be a scalar or complex."); 2183 return Val.isScalar() 2184 ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType, 2185 Loc) 2186 : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType, 2187 DestType, Loc); 2188 } 2189 2190 static CodeGenFunction::ComplexPairTy 2191 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, 2192 QualType DestType, SourceLocation Loc) { 2193 assert(CGF.getEvaluationKind(DestType) == TEK_Complex && 2194 "DestType must have complex evaluation kind."); 2195 CodeGenFunction::ComplexPairTy ComplexVal; 2196 if (Val.isScalar()) { 2197 // Convert the input element to the element type of the complex. 2198 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 2199 auto ScalarVal = CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, 2200 DestElementType, Loc); 2201 ComplexVal = CodeGenFunction::ComplexPairTy( 2202 ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType())); 2203 } else { 2204 assert(Val.isComplex() && "Must be a scalar or complex."); 2205 auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType(); 2206 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 2207 ComplexVal.first = CGF.EmitScalarConversion( 2208 Val.getComplexVal().first, SrcElementType, DestElementType, Loc); 2209 ComplexVal.second = CGF.EmitScalarConversion( 2210 Val.getComplexVal().second, SrcElementType, DestElementType, Loc); 2211 } 2212 return ComplexVal; 2213 } 2214 2215 static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst, 2216 LValue LVal, RValue RVal) { 2217 if (LVal.isGlobalReg()) { 2218 CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal); 2219 } else { 2220 CGF.EmitAtomicStore(RVal, LVal, IsSeqCst ? llvm::SequentiallyConsistent 2221 : llvm::Monotonic, 2222 LVal.isVolatile(), /*IsInit=*/false); 2223 } 2224 } 2225 2226 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal, 2227 QualType RValTy, SourceLocation Loc) { 2228 switch (getEvaluationKind(LVal.getType())) { 2229 case TEK_Scalar: 2230 EmitStoreThroughLValue(RValue::get(convertToScalarValue( 2231 *this, RVal, RValTy, LVal.getType(), Loc)), 2232 LVal); 2233 break; 2234 case TEK_Complex: 2235 EmitStoreOfComplex( 2236 convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal, 2237 /*isInit=*/false); 2238 break; 2239 case TEK_Aggregate: 2240 llvm_unreachable("Must be a scalar or complex."); 2241 } 2242 } 2243 2244 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst, 2245 const Expr *X, const Expr *V, 2246 SourceLocation Loc) { 2247 // v = x; 2248 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue"); 2249 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue"); 2250 LValue XLValue = CGF.EmitLValue(X); 2251 LValue VLValue = CGF.EmitLValue(V); 2252 RValue Res = XLValue.isGlobalReg() 2253 ? CGF.EmitLoadOfLValue(XLValue, Loc) 2254 : CGF.EmitAtomicLoad(XLValue, Loc, 2255 IsSeqCst ? llvm::SequentiallyConsistent 2256 : llvm::Monotonic, 2257 XLValue.isVolatile()); 2258 // OpenMP, 2.12.6, atomic Construct 2259 // Any atomic construct with a seq_cst clause forces the atomically 2260 // performed operation to include an implicit flush operation without a 2261 // list. 2262 if (IsSeqCst) 2263 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 2264 CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc); 2265 } 2266 2267 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst, 2268 const Expr *X, const Expr *E, 2269 SourceLocation Loc) { 2270 // x = expr; 2271 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue"); 2272 emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E)); 2273 // OpenMP, 2.12.6, atomic Construct 2274 // Any atomic construct with a seq_cst clause forces the atomically 2275 // performed operation to include an implicit flush operation without a 2276 // list. 2277 if (IsSeqCst) 2278 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 2279 } 2280 2281 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, 2282 RValue Update, 2283 BinaryOperatorKind BO, 2284 llvm::AtomicOrdering AO, 2285 bool IsXLHSInRHSPart) { 2286 auto &Context = CGF.CGM.getContext(); 2287 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x' 2288 // expression is simple and atomic is allowed for the given type for the 2289 // target platform. 2290 if (BO == BO_Comma || !Update.isScalar() || 2291 !Update.getScalarVal()->getType()->isIntegerTy() || 2292 !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) && 2293 (Update.getScalarVal()->getType() != 2294 X.getAddress().getElementType())) || 2295 !X.getAddress().getElementType()->isIntegerTy() || 2296 !Context.getTargetInfo().hasBuiltinAtomic( 2297 Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment()))) 2298 return std::make_pair(false, RValue::get(nullptr)); 2299 2300 llvm::AtomicRMWInst::BinOp RMWOp; 2301 switch (BO) { 2302 case BO_Add: 2303 RMWOp = llvm::AtomicRMWInst::Add; 2304 break; 2305 case BO_Sub: 2306 if (!IsXLHSInRHSPart) 2307 return std::make_pair(false, RValue::get(nullptr)); 2308 RMWOp = llvm::AtomicRMWInst::Sub; 2309 break; 2310 case BO_And: 2311 RMWOp = llvm::AtomicRMWInst::And; 2312 break; 2313 case BO_Or: 2314 RMWOp = llvm::AtomicRMWInst::Or; 2315 break; 2316 case BO_Xor: 2317 RMWOp = llvm::AtomicRMWInst::Xor; 2318 break; 2319 case BO_LT: 2320 RMWOp = X.getType()->hasSignedIntegerRepresentation() 2321 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min 2322 : llvm::AtomicRMWInst::Max) 2323 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin 2324 : llvm::AtomicRMWInst::UMax); 2325 break; 2326 case BO_GT: 2327 RMWOp = X.getType()->hasSignedIntegerRepresentation() 2328 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max 2329 : llvm::AtomicRMWInst::Min) 2330 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax 2331 : llvm::AtomicRMWInst::UMin); 2332 break; 2333 case BO_Assign: 2334 RMWOp = llvm::AtomicRMWInst::Xchg; 2335 break; 2336 case BO_Mul: 2337 case BO_Div: 2338 case BO_Rem: 2339 case BO_Shl: 2340 case BO_Shr: 2341 case BO_LAnd: 2342 case BO_LOr: 2343 return std::make_pair(false, RValue::get(nullptr)); 2344 case BO_PtrMemD: 2345 case BO_PtrMemI: 2346 case BO_LE: 2347 case BO_GE: 2348 case BO_EQ: 2349 case BO_NE: 2350 case BO_AddAssign: 2351 case BO_SubAssign: 2352 case BO_AndAssign: 2353 case BO_OrAssign: 2354 case BO_XorAssign: 2355 case BO_MulAssign: 2356 case BO_DivAssign: 2357 case BO_RemAssign: 2358 case BO_ShlAssign: 2359 case BO_ShrAssign: 2360 case BO_Comma: 2361 llvm_unreachable("Unsupported atomic update operation"); 2362 } 2363 auto *UpdateVal = Update.getScalarVal(); 2364 if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) { 2365 UpdateVal = CGF.Builder.CreateIntCast( 2366 IC, X.getAddress().getElementType(), 2367 X.getType()->hasSignedIntegerRepresentation()); 2368 } 2369 auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO); 2370 return std::make_pair(true, RValue::get(Res)); 2371 } 2372 2373 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr( 2374 LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart, 2375 llvm::AtomicOrdering AO, SourceLocation Loc, 2376 const llvm::function_ref<RValue(RValue)> &CommonGen) { 2377 // Update expressions are allowed to have the following forms: 2378 // x binop= expr; -> xrval + expr; 2379 // x++, ++x -> xrval + 1; 2380 // x--, --x -> xrval - 1; 2381 // x = x binop expr; -> xrval binop expr 2382 // x = expr Op x; - > expr binop xrval; 2383 auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart); 2384 if (!Res.first) { 2385 if (X.isGlobalReg()) { 2386 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop 2387 // 'xrval'. 2388 EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X); 2389 } else { 2390 // Perform compare-and-swap procedure. 2391 EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified()); 2392 } 2393 } 2394 return Res; 2395 } 2396 2397 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst, 2398 const Expr *X, const Expr *E, 2399 const Expr *UE, bool IsXLHSInRHSPart, 2400 SourceLocation Loc) { 2401 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 2402 "Update expr in 'atomic update' must be a binary operator."); 2403 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 2404 // Update expressions are allowed to have the following forms: 2405 // x binop= expr; -> xrval + expr; 2406 // x++, ++x -> xrval + 1; 2407 // x--, --x -> xrval - 1; 2408 // x = x binop expr; -> xrval binop expr 2409 // x = expr Op x; - > expr binop xrval; 2410 assert(X->isLValue() && "X of 'omp atomic update' is not lvalue"); 2411 LValue XLValue = CGF.EmitLValue(X); 2412 RValue ExprRValue = CGF.EmitAnyExpr(E); 2413 auto AO = IsSeqCst ? llvm::SequentiallyConsistent : llvm::Monotonic; 2414 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 2415 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 2416 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 2417 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 2418 auto Gen = 2419 [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue { 2420 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 2421 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 2422 return CGF.EmitAnyExpr(UE); 2423 }; 2424 (void)CGF.EmitOMPAtomicSimpleUpdateExpr( 2425 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 2426 // OpenMP, 2.12.6, atomic Construct 2427 // Any atomic construct with a seq_cst clause forces the atomically 2428 // performed operation to include an implicit flush operation without a 2429 // list. 2430 if (IsSeqCst) 2431 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 2432 } 2433 2434 static RValue convertToType(CodeGenFunction &CGF, RValue Value, 2435 QualType SourceType, QualType ResType, 2436 SourceLocation Loc) { 2437 switch (CGF.getEvaluationKind(ResType)) { 2438 case TEK_Scalar: 2439 return RValue::get( 2440 convertToScalarValue(CGF, Value, SourceType, ResType, Loc)); 2441 case TEK_Complex: { 2442 auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc); 2443 return RValue::getComplex(Res.first, Res.second); 2444 } 2445 case TEK_Aggregate: 2446 break; 2447 } 2448 llvm_unreachable("Must be a scalar or complex."); 2449 } 2450 2451 static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst, 2452 bool IsPostfixUpdate, const Expr *V, 2453 const Expr *X, const Expr *E, 2454 const Expr *UE, bool IsXLHSInRHSPart, 2455 SourceLocation Loc) { 2456 assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue"); 2457 assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue"); 2458 RValue NewVVal; 2459 LValue VLValue = CGF.EmitLValue(V); 2460 LValue XLValue = CGF.EmitLValue(X); 2461 RValue ExprRValue = CGF.EmitAnyExpr(E); 2462 auto AO = IsSeqCst ? llvm::SequentiallyConsistent : llvm::Monotonic; 2463 QualType NewVValType; 2464 if (UE) { 2465 // 'x' is updated with some additional value. 2466 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 2467 "Update expr in 'atomic capture' must be a binary operator."); 2468 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 2469 // Update expressions are allowed to have the following forms: 2470 // x binop= expr; -> xrval + expr; 2471 // x++, ++x -> xrval + 1; 2472 // x--, --x -> xrval - 1; 2473 // x = x binop expr; -> xrval binop expr 2474 // x = expr Op x; - > expr binop xrval; 2475 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 2476 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 2477 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 2478 NewVValType = XRValExpr->getType(); 2479 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 2480 auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr, 2481 IsSeqCst, IsPostfixUpdate](RValue XRValue) -> RValue { 2482 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 2483 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 2484 RValue Res = CGF.EmitAnyExpr(UE); 2485 NewVVal = IsPostfixUpdate ? XRValue : Res; 2486 return Res; 2487 }; 2488 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 2489 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 2490 if (Res.first) { 2491 // 'atomicrmw' instruction was generated. 2492 if (IsPostfixUpdate) { 2493 // Use old value from 'atomicrmw'. 2494 NewVVal = Res.second; 2495 } else { 2496 // 'atomicrmw' does not provide new value, so evaluate it using old 2497 // value of 'x'. 2498 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 2499 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second); 2500 NewVVal = CGF.EmitAnyExpr(UE); 2501 } 2502 } 2503 } else { 2504 // 'x' is simply rewritten with some 'expr'. 2505 NewVValType = X->getType().getNonReferenceType(); 2506 ExprRValue = convertToType(CGF, ExprRValue, E->getType(), 2507 X->getType().getNonReferenceType(), Loc); 2508 auto &&Gen = [&CGF, &NewVVal, ExprRValue](RValue XRValue) -> RValue { 2509 NewVVal = XRValue; 2510 return ExprRValue; 2511 }; 2512 // Try to perform atomicrmw xchg, otherwise simple exchange. 2513 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 2514 XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO, 2515 Loc, Gen); 2516 if (Res.first) { 2517 // 'atomicrmw' instruction was generated. 2518 NewVVal = IsPostfixUpdate ? Res.second : ExprRValue; 2519 } 2520 } 2521 // Emit post-update store to 'v' of old/new 'x' value. 2522 CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc); 2523 // OpenMP, 2.12.6, atomic Construct 2524 // Any atomic construct with a seq_cst clause forces the atomically 2525 // performed operation to include an implicit flush operation without a 2526 // list. 2527 if (IsSeqCst) 2528 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 2529 } 2530 2531 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, 2532 bool IsSeqCst, bool IsPostfixUpdate, 2533 const Expr *X, const Expr *V, const Expr *E, 2534 const Expr *UE, bool IsXLHSInRHSPart, 2535 SourceLocation Loc) { 2536 switch (Kind) { 2537 case OMPC_read: 2538 EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc); 2539 break; 2540 case OMPC_write: 2541 EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc); 2542 break; 2543 case OMPC_unknown: 2544 case OMPC_update: 2545 EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc); 2546 break; 2547 case OMPC_capture: 2548 EmitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE, 2549 IsXLHSInRHSPart, Loc); 2550 break; 2551 case OMPC_if: 2552 case OMPC_final: 2553 case OMPC_num_threads: 2554 case OMPC_private: 2555 case OMPC_firstprivate: 2556 case OMPC_lastprivate: 2557 case OMPC_reduction: 2558 case OMPC_safelen: 2559 case OMPC_simdlen: 2560 case OMPC_collapse: 2561 case OMPC_default: 2562 case OMPC_seq_cst: 2563 case OMPC_shared: 2564 case OMPC_linear: 2565 case OMPC_aligned: 2566 case OMPC_copyin: 2567 case OMPC_copyprivate: 2568 case OMPC_flush: 2569 case OMPC_proc_bind: 2570 case OMPC_schedule: 2571 case OMPC_ordered: 2572 case OMPC_nowait: 2573 case OMPC_untied: 2574 case OMPC_threadprivate: 2575 case OMPC_depend: 2576 case OMPC_mergeable: 2577 case OMPC_device: 2578 case OMPC_threads: 2579 case OMPC_simd: 2580 case OMPC_map: 2581 case OMPC_num_teams: 2582 case OMPC_thread_limit: 2583 case OMPC_priority: 2584 case OMPC_grainsize: 2585 case OMPC_nogroup: 2586 case OMPC_num_tasks: 2587 case OMPC_hint: 2588 case OMPC_dist_schedule: 2589 case OMPC_defaultmap: 2590 llvm_unreachable("Clause is not allowed in 'omp atomic'."); 2591 } 2592 } 2593 2594 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { 2595 bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>(); 2596 OpenMPClauseKind Kind = OMPC_unknown; 2597 for (auto *C : S.clauses()) { 2598 // Find first clause (skip seq_cst clause, if it is first). 2599 if (C->getClauseKind() != OMPC_seq_cst) { 2600 Kind = C->getClauseKind(); 2601 break; 2602 } 2603 } 2604 2605 const auto *CS = 2606 S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 2607 if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) { 2608 enterFullExpression(EWC); 2609 } 2610 // Processing for statements under 'atomic capture'. 2611 if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) { 2612 for (const auto *C : Compound->body()) { 2613 if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) { 2614 enterFullExpression(EWC); 2615 } 2616 } 2617 } 2618 2619 OMPLexicalScope Scope(*this, S); 2620 auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF) { 2621 CGF.EmitStopPoint(CS); 2622 EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(), 2623 S.getV(), S.getExpr(), S.getUpdateExpr(), 2624 S.isXLHSInRHSPart(), S.getLocStart()); 2625 }; 2626 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen); 2627 } 2628 2629 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { 2630 OMPLexicalScope Scope(*this, S); 2631 const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt()); 2632 2633 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 2634 GenerateOpenMPCapturedVars(CS, CapturedVars); 2635 2636 llvm::Function *Fn = nullptr; 2637 llvm::Constant *FnID = nullptr; 2638 2639 // Check if we have any if clause associated with the directive. 2640 const Expr *IfCond = nullptr; 2641 2642 if (auto *C = S.getSingleClause<OMPIfClause>()) { 2643 IfCond = C->getCondition(); 2644 } 2645 2646 // Check if we have any device clause associated with the directive. 2647 const Expr *Device = nullptr; 2648 if (auto *C = S.getSingleClause<OMPDeviceClause>()) { 2649 Device = C->getDevice(); 2650 } 2651 2652 // Check if we have an if clause whose conditional always evaluates to false 2653 // or if we do not have any targets specified. If so the target region is not 2654 // an offload entry point. 2655 bool IsOffloadEntry = true; 2656 if (IfCond) { 2657 bool Val; 2658 if (ConstantFoldsToSimpleInteger(IfCond, Val) && !Val) 2659 IsOffloadEntry = false; 2660 } 2661 if (CGM.getLangOpts().OMPTargetTriples.empty()) 2662 IsOffloadEntry = false; 2663 2664 assert(CurFuncDecl && "No parent declaration for target region!"); 2665 StringRef ParentName; 2666 // In case we have Ctors/Dtors we use the complete type variant to produce 2667 // the mangling of the device outlined kernel. 2668 if (auto *D = dyn_cast<CXXConstructorDecl>(CurFuncDecl)) 2669 ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete)); 2670 else if (auto *D = dyn_cast<CXXDestructorDecl>(CurFuncDecl)) 2671 ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete)); 2672 else 2673 ParentName = 2674 CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CurFuncDecl))); 2675 2676 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID, 2677 IsOffloadEntry); 2678 2679 CGM.getOpenMPRuntime().emitTargetCall(*this, S, Fn, FnID, IfCond, Device, 2680 CapturedVars); 2681 } 2682 2683 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &) { 2684 llvm_unreachable("CodeGen for 'omp teams' is not supported yet."); 2685 } 2686 2687 void CodeGenFunction::EmitOMPCancellationPointDirective( 2688 const OMPCancellationPointDirective &S) { 2689 CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(), 2690 S.getCancelRegion()); 2691 } 2692 2693 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { 2694 const Expr *IfCond = nullptr; 2695 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 2696 if (C->getNameModifier() == OMPD_unknown || 2697 C->getNameModifier() == OMPD_cancel) { 2698 IfCond = C->getCondition(); 2699 break; 2700 } 2701 } 2702 CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(), IfCond, 2703 S.getCancelRegion()); 2704 } 2705 2706 CodeGenFunction::JumpDest 2707 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) { 2708 if (Kind == OMPD_parallel || Kind == OMPD_task) 2709 return ReturnBlock; 2710 assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections || 2711 Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for); 2712 return BreakContinueStack.back().BreakBlock; 2713 } 2714 2715 // Generate the instructions for '#pragma omp target data' directive. 2716 void CodeGenFunction::EmitOMPTargetDataDirective( 2717 const OMPTargetDataDirective &S) { 2718 // emit the code inside the construct for now 2719 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2720 CGM.getOpenMPRuntime().emitInlinedDirective( 2721 *this, OMPD_target_data, 2722 [&CS](CodeGenFunction &CGF) { CGF.EmitStmt(CS->getCapturedStmt()); }); 2723 } 2724 2725 void CodeGenFunction::EmitOMPTargetEnterDataDirective( 2726 const OMPTargetEnterDataDirective &S) { 2727 // TODO: codegen for target enter data. 2728 } 2729 2730 void CodeGenFunction::EmitOMPTargetExitDataDirective( 2731 const OMPTargetExitDataDirective &S) { 2732 // TODO: codegen for target exit data. 2733 } 2734 2735 void CodeGenFunction::EmitOMPTargetParallelDirective( 2736 const OMPTargetParallelDirective &S) { 2737 // TODO: codegen for target parallel. 2738 } 2739 2740 void CodeGenFunction::EmitOMPTargetParallelForDirective( 2741 const OMPTargetParallelForDirective &S) { 2742 // TODO: codegen for target parallel for. 2743 } 2744 2745 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) { 2746 // emit the code inside the construct for now 2747 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2748 CGM.getOpenMPRuntime().emitInlinedDirective( 2749 *this, OMPD_taskloop, 2750 [&CS](CodeGenFunction &CGF) { CGF.EmitStmt(CS->getCapturedStmt()); }); 2751 } 2752 2753 void CodeGenFunction::EmitOMPTaskLoopSimdDirective( 2754 const OMPTaskLoopSimdDirective &S) { 2755 // emit the code inside the construct for now 2756 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2757 CGM.getOpenMPRuntime().emitInlinedDirective( 2758 *this, OMPD_taskloop_simd, 2759 [&CS](CodeGenFunction &CGF) { CGF.EmitStmt(CS->getCapturedStmt()); }); 2760 } 2761 2762