1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This contains code to emit OpenMP nodes as LLVM code. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGCleanup.h" 14 #include "CGOpenMPRuntime.h" 15 #include "CodeGenFunction.h" 16 #include "CodeGenModule.h" 17 #include "TargetInfo.h" 18 #include "clang/AST/ASTContext.h" 19 #include "clang/AST/Attr.h" 20 #include "clang/AST/DeclOpenMP.h" 21 #include "clang/AST/OpenMPClause.h" 22 #include "clang/AST/Stmt.h" 23 #include "clang/AST/StmtOpenMP.h" 24 #include "clang/Basic/OpenMPKinds.h" 25 #include "clang/Basic/PrettyStackTrace.h" 26 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" 27 using namespace clang; 28 using namespace CodeGen; 29 using namespace llvm::omp; 30 31 namespace { 32 /// Lexical scope for OpenMP executable constructs, that handles correct codegen 33 /// for captured expressions. 34 class OMPLexicalScope : public CodeGenFunction::LexicalScope { 35 void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 36 for (const auto *C : S.clauses()) { 37 if (const auto *CPI = OMPClauseWithPreInit::get(C)) { 38 if (const auto *PreInit = 39 cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { 40 for (const auto *I : PreInit->decls()) { 41 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 42 CGF.EmitVarDecl(cast<VarDecl>(*I)); 43 } else { 44 CodeGenFunction::AutoVarEmission Emission = 45 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 46 CGF.EmitAutoVarCleanups(Emission); 47 } 48 } 49 } 50 } 51 } 52 } 53 CodeGenFunction::OMPPrivateScope InlinedShareds; 54 55 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { 56 return CGF.LambdaCaptureFields.lookup(VD) || 57 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || 58 (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl)); 59 } 60 61 public: 62 OMPLexicalScope( 63 CodeGenFunction &CGF, const OMPExecutableDirective &S, 64 const llvm::Optional<OpenMPDirectiveKind> CapturedRegion = llvm::None, 65 const bool EmitPreInitStmt = true) 66 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), 67 InlinedShareds(CGF) { 68 if (EmitPreInitStmt) 69 emitPreInitStmt(CGF, S); 70 if (!CapturedRegion.hasValue()) 71 return; 72 assert(S.hasAssociatedStmt() && 73 "Expected associated statement for inlined directive."); 74 const CapturedStmt *CS = S.getCapturedStmt(*CapturedRegion); 75 for (const auto &C : CS->captures()) { 76 if (C.capturesVariable() || C.capturesVariableByCopy()) { 77 auto *VD = C.getCapturedVar(); 78 assert(VD == VD->getCanonicalDecl() && 79 "Canonical decl must be captured."); 80 DeclRefExpr DRE( 81 CGF.getContext(), const_cast<VarDecl *>(VD), 82 isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo && 83 InlinedShareds.isGlobalVarCaptured(VD)), 84 VD->getType().getNonReferenceType(), VK_LValue, C.getLocation()); 85 InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address { 86 return CGF.EmitLValue(&DRE).getAddress(CGF); 87 }); 88 } 89 } 90 (void)InlinedShareds.Privatize(); 91 } 92 }; 93 94 /// Lexical scope for OpenMP parallel construct, that handles correct codegen 95 /// for captured expressions. 96 class OMPParallelScope final : public OMPLexicalScope { 97 bool EmitPreInitStmt(const OMPExecutableDirective &S) { 98 OpenMPDirectiveKind Kind = S.getDirectiveKind(); 99 return !(isOpenMPTargetExecutionDirective(Kind) || 100 isOpenMPLoopBoundSharingDirective(Kind)) && 101 isOpenMPParallelDirective(Kind); 102 } 103 104 public: 105 OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 106 : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None, 107 EmitPreInitStmt(S)) {} 108 }; 109 110 /// Lexical scope for OpenMP teams construct, that handles correct codegen 111 /// for captured expressions. 112 class OMPTeamsScope final : public OMPLexicalScope { 113 bool EmitPreInitStmt(const OMPExecutableDirective &S) { 114 OpenMPDirectiveKind Kind = S.getDirectiveKind(); 115 return !isOpenMPTargetExecutionDirective(Kind) && 116 isOpenMPTeamsDirective(Kind); 117 } 118 119 public: 120 OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 121 : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None, 122 EmitPreInitStmt(S)) {} 123 }; 124 125 /// Private scope for OpenMP loop-based directives, that supports capturing 126 /// of used expression from loop statement. 127 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { 128 void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) { 129 CodeGenFunction::OMPMapVars PreCondVars; 130 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 131 for (const auto *E : S.counters()) { 132 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 133 EmittedAsPrivate.insert(VD->getCanonicalDecl()); 134 (void)PreCondVars.setVarAddr( 135 CGF, VD, CGF.CreateMemTemp(VD->getType().getNonReferenceType())); 136 } 137 // Mark private vars as undefs. 138 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 139 for (const Expr *IRef : C->varlists()) { 140 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); 141 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 142 (void)PreCondVars.setVarAddr( 143 CGF, OrigVD, 144 Address(llvm::UndefValue::get( 145 CGF.ConvertTypeForMem(CGF.getContext().getPointerType( 146 OrigVD->getType().getNonReferenceType()))), 147 CGF.getContext().getDeclAlign(OrigVD))); 148 } 149 } 150 } 151 (void)PreCondVars.apply(CGF); 152 // Emit init, __range and __end variables for C++ range loops. 153 const Stmt *Body = 154 S.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(); 155 for (unsigned Cnt = 0; Cnt < S.getCollapsedNumber(); ++Cnt) { 156 Body = OMPLoopDirective::tryToFindNextInnerLoop( 157 Body, /*TryImperfectlyNestedLoops=*/true); 158 if (auto *For = dyn_cast<ForStmt>(Body)) { 159 Body = For->getBody(); 160 } else { 161 assert(isa<CXXForRangeStmt>(Body) && 162 "Expected canonical for loop or range-based for loop."); 163 auto *CXXFor = cast<CXXForRangeStmt>(Body); 164 if (const Stmt *Init = CXXFor->getInit()) 165 CGF.EmitStmt(Init); 166 CGF.EmitStmt(CXXFor->getRangeStmt()); 167 CGF.EmitStmt(CXXFor->getEndStmt()); 168 Body = CXXFor->getBody(); 169 } 170 } 171 if (const auto *PreInits = cast_or_null<DeclStmt>(S.getPreInits())) { 172 for (const auto *I : PreInits->decls()) 173 CGF.EmitVarDecl(cast<VarDecl>(*I)); 174 } 175 PreCondVars.restore(CGF); 176 } 177 178 public: 179 OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S) 180 : CodeGenFunction::RunCleanupsScope(CGF) { 181 emitPreInitStmt(CGF, S); 182 } 183 }; 184 185 class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope { 186 CodeGenFunction::OMPPrivateScope InlinedShareds; 187 188 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { 189 return CGF.LambdaCaptureFields.lookup(VD) || 190 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || 191 (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) && 192 cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD)); 193 } 194 195 public: 196 OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 197 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), 198 InlinedShareds(CGF) { 199 for (const auto *C : S.clauses()) { 200 if (const auto *CPI = OMPClauseWithPreInit::get(C)) { 201 if (const auto *PreInit = 202 cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { 203 for (const auto *I : PreInit->decls()) { 204 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 205 CGF.EmitVarDecl(cast<VarDecl>(*I)); 206 } else { 207 CodeGenFunction::AutoVarEmission Emission = 208 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 209 CGF.EmitAutoVarCleanups(Emission); 210 } 211 } 212 } 213 } else if (const auto *UDP = dyn_cast<OMPUseDevicePtrClause>(C)) { 214 for (const Expr *E : UDP->varlists()) { 215 const Decl *D = cast<DeclRefExpr>(E)->getDecl(); 216 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D)) 217 CGF.EmitVarDecl(*OED); 218 } 219 } 220 } 221 if (!isOpenMPSimdDirective(S.getDirectiveKind())) 222 CGF.EmitOMPPrivateClause(S, InlinedShareds); 223 if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(&S)) { 224 if (const Expr *E = TG->getReductionRef()) 225 CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl())); 226 } 227 const auto *CS = cast_or_null<CapturedStmt>(S.getAssociatedStmt()); 228 while (CS) { 229 for (auto &C : CS->captures()) { 230 if (C.capturesVariable() || C.capturesVariableByCopy()) { 231 auto *VD = C.getCapturedVar(); 232 assert(VD == VD->getCanonicalDecl() && 233 "Canonical decl must be captured."); 234 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 235 isCapturedVar(CGF, VD) || 236 (CGF.CapturedStmtInfo && 237 InlinedShareds.isGlobalVarCaptured(VD)), 238 VD->getType().getNonReferenceType(), VK_LValue, 239 C.getLocation()); 240 InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address { 241 return CGF.EmitLValue(&DRE).getAddress(CGF); 242 }); 243 } 244 } 245 CS = dyn_cast<CapturedStmt>(CS->getCapturedStmt()); 246 } 247 (void)InlinedShareds.Privatize(); 248 } 249 }; 250 251 } // namespace 252 253 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, 254 const OMPExecutableDirective &S, 255 const RegionCodeGenTy &CodeGen); 256 257 LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) { 258 if (const auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) { 259 if (const auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) { 260 OrigVD = OrigVD->getCanonicalDecl(); 261 bool IsCaptured = 262 LambdaCaptureFields.lookup(OrigVD) || 263 (CapturedStmtInfo && CapturedStmtInfo->lookup(OrigVD)) || 264 (CurCodeDecl && isa<BlockDecl>(CurCodeDecl)); 265 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), IsCaptured, 266 OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc()); 267 return EmitLValue(&DRE); 268 } 269 } 270 return EmitLValue(E); 271 } 272 273 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) { 274 ASTContext &C = getContext(); 275 llvm::Value *Size = nullptr; 276 auto SizeInChars = C.getTypeSizeInChars(Ty); 277 if (SizeInChars.isZero()) { 278 // getTypeSizeInChars() returns 0 for a VLA. 279 while (const VariableArrayType *VAT = C.getAsVariableArrayType(Ty)) { 280 VlaSizePair VlaSize = getVLASize(VAT); 281 Ty = VlaSize.Type; 282 Size = Size ? Builder.CreateNUWMul(Size, VlaSize.NumElts) 283 : VlaSize.NumElts; 284 } 285 SizeInChars = C.getTypeSizeInChars(Ty); 286 if (SizeInChars.isZero()) 287 return llvm::ConstantInt::get(SizeTy, /*V=*/0); 288 return Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars)); 289 } 290 return CGM.getSize(SizeInChars); 291 } 292 293 void CodeGenFunction::GenerateOpenMPCapturedVars( 294 const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) { 295 const RecordDecl *RD = S.getCapturedRecordDecl(); 296 auto CurField = RD->field_begin(); 297 auto CurCap = S.captures().begin(); 298 for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(), 299 E = S.capture_init_end(); 300 I != E; ++I, ++CurField, ++CurCap) { 301 if (CurField->hasCapturedVLAType()) { 302 const VariableArrayType *VAT = CurField->getCapturedVLAType(); 303 llvm::Value *Val = VLASizeMap[VAT->getSizeExpr()]; 304 CapturedVars.push_back(Val); 305 } else if (CurCap->capturesThis()) { 306 CapturedVars.push_back(CXXThisValue); 307 } else if (CurCap->capturesVariableByCopy()) { 308 llvm::Value *CV = EmitLoadOfScalar(EmitLValue(*I), CurCap->getLocation()); 309 310 // If the field is not a pointer, we need to save the actual value 311 // and load it as a void pointer. 312 if (!CurField->getType()->isAnyPointerType()) { 313 ASTContext &Ctx = getContext(); 314 Address DstAddr = CreateMemTemp( 315 Ctx.getUIntPtrType(), 316 Twine(CurCap->getCapturedVar()->getName(), ".casted")); 317 LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType()); 318 319 llvm::Value *SrcAddrVal = EmitScalarConversion( 320 DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()), 321 Ctx.getPointerType(CurField->getType()), CurCap->getLocation()); 322 LValue SrcLV = 323 MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType()); 324 325 // Store the value using the source type pointer. 326 EmitStoreThroughLValue(RValue::get(CV), SrcLV); 327 328 // Load the value using the destination type pointer. 329 CV = EmitLoadOfScalar(DstLV, CurCap->getLocation()); 330 } 331 CapturedVars.push_back(CV); 332 } else { 333 assert(CurCap->capturesVariable() && "Expected capture by reference."); 334 CapturedVars.push_back(EmitLValue(*I).getAddress(*this).getPointer()); 335 } 336 } 337 } 338 339 static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc, 340 QualType DstType, StringRef Name, 341 LValue AddrLV) { 342 ASTContext &Ctx = CGF.getContext(); 343 344 llvm::Value *CastedPtr = CGF.EmitScalarConversion( 345 AddrLV.getAddress(CGF).getPointer(), Ctx.getUIntPtrType(), 346 Ctx.getPointerType(DstType), Loc); 347 Address TmpAddr = 348 CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType)) 349 .getAddress(CGF); 350 return TmpAddr; 351 } 352 353 static QualType getCanonicalParamType(ASTContext &C, QualType T) { 354 if (T->isLValueReferenceType()) 355 return C.getLValueReferenceType( 356 getCanonicalParamType(C, T.getNonReferenceType()), 357 /*SpelledAsLValue=*/false); 358 if (T->isPointerType()) 359 return C.getPointerType(getCanonicalParamType(C, T->getPointeeType())); 360 if (const ArrayType *A = T->getAsArrayTypeUnsafe()) { 361 if (const auto *VLA = dyn_cast<VariableArrayType>(A)) 362 return getCanonicalParamType(C, VLA->getElementType()); 363 if (!A->isVariablyModifiedType()) 364 return C.getCanonicalType(T); 365 } 366 return C.getCanonicalParamType(T); 367 } 368 369 namespace { 370 /// Contains required data for proper outlined function codegen. 371 struct FunctionOptions { 372 /// Captured statement for which the function is generated. 373 const CapturedStmt *S = nullptr; 374 /// true if cast to/from UIntPtr is required for variables captured by 375 /// value. 376 const bool UIntPtrCastRequired = true; 377 /// true if only casted arguments must be registered as local args or VLA 378 /// sizes. 379 const bool RegisterCastedArgsOnly = false; 380 /// Name of the generated function. 381 const StringRef FunctionName; 382 /// Location of the non-debug version of the outlined function. 383 SourceLocation Loc; 384 explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired, 385 bool RegisterCastedArgsOnly, StringRef FunctionName, 386 SourceLocation Loc) 387 : S(S), UIntPtrCastRequired(UIntPtrCastRequired), 388 RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly), 389 FunctionName(FunctionName), Loc(Loc) {} 390 }; 391 } // namespace 392 393 static llvm::Function *emitOutlinedFunctionPrologue( 394 CodeGenFunction &CGF, FunctionArgList &Args, 395 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> 396 &LocalAddrs, 397 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> 398 &VLASizes, 399 llvm::Value *&CXXThisValue, const FunctionOptions &FO) { 400 const CapturedDecl *CD = FO.S->getCapturedDecl(); 401 const RecordDecl *RD = FO.S->getCapturedRecordDecl(); 402 assert(CD->hasBody() && "missing CapturedDecl body"); 403 404 CXXThisValue = nullptr; 405 // Build the argument list. 406 CodeGenModule &CGM = CGF.CGM; 407 ASTContext &Ctx = CGM.getContext(); 408 FunctionArgList TargetArgs; 409 Args.append(CD->param_begin(), 410 std::next(CD->param_begin(), CD->getContextParamPosition())); 411 TargetArgs.append( 412 CD->param_begin(), 413 std::next(CD->param_begin(), CD->getContextParamPosition())); 414 auto I = FO.S->captures().begin(); 415 FunctionDecl *DebugFunctionDecl = nullptr; 416 if (!FO.UIntPtrCastRequired) { 417 FunctionProtoType::ExtProtoInfo EPI; 418 QualType FunctionTy = Ctx.getFunctionType(Ctx.VoidTy, llvm::None, EPI); 419 DebugFunctionDecl = FunctionDecl::Create( 420 Ctx, Ctx.getTranslationUnitDecl(), FO.S->getBeginLoc(), 421 SourceLocation(), DeclarationName(), FunctionTy, 422 Ctx.getTrivialTypeSourceInfo(FunctionTy), SC_Static, 423 /*isInlineSpecified=*/false, /*hasWrittenPrototype=*/false); 424 } 425 for (const FieldDecl *FD : RD->fields()) { 426 QualType ArgType = FD->getType(); 427 IdentifierInfo *II = nullptr; 428 VarDecl *CapVar = nullptr; 429 430 // If this is a capture by copy and the type is not a pointer, the outlined 431 // function argument type should be uintptr and the value properly casted to 432 // uintptr. This is necessary given that the runtime library is only able to 433 // deal with pointers. We can pass in the same way the VLA type sizes to the 434 // outlined function. 435 if (FO.UIntPtrCastRequired && 436 ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) || 437 I->capturesVariableArrayType())) 438 ArgType = Ctx.getUIntPtrType(); 439 440 if (I->capturesVariable() || I->capturesVariableByCopy()) { 441 CapVar = I->getCapturedVar(); 442 II = CapVar->getIdentifier(); 443 } else if (I->capturesThis()) { 444 II = &Ctx.Idents.get("this"); 445 } else { 446 assert(I->capturesVariableArrayType()); 447 II = &Ctx.Idents.get("vla"); 448 } 449 if (ArgType->isVariablyModifiedType()) 450 ArgType = getCanonicalParamType(Ctx, ArgType); 451 VarDecl *Arg; 452 if (DebugFunctionDecl && (CapVar || I->capturesThis())) { 453 Arg = ParmVarDecl::Create( 454 Ctx, DebugFunctionDecl, 455 CapVar ? CapVar->getBeginLoc() : FD->getBeginLoc(), 456 CapVar ? CapVar->getLocation() : FD->getLocation(), II, ArgType, 457 /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr); 458 } else { 459 Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(), 460 II, ArgType, ImplicitParamDecl::Other); 461 } 462 Args.emplace_back(Arg); 463 // Do not cast arguments if we emit function with non-original types. 464 TargetArgs.emplace_back( 465 FO.UIntPtrCastRequired 466 ? Arg 467 : CGM.getOpenMPRuntime().translateParameter(FD, Arg)); 468 ++I; 469 } 470 Args.append( 471 std::next(CD->param_begin(), CD->getContextParamPosition() + 1), 472 CD->param_end()); 473 TargetArgs.append( 474 std::next(CD->param_begin(), CD->getContextParamPosition() + 1), 475 CD->param_end()); 476 477 // Create the function declaration. 478 const CGFunctionInfo &FuncInfo = 479 CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs); 480 llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); 481 482 auto *F = 483 llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage, 484 FO.FunctionName, &CGM.getModule()); 485 CGM.SetInternalFunctionAttributes(CD, F, FuncInfo); 486 if (CD->isNothrow()) 487 F->setDoesNotThrow(); 488 F->setDoesNotRecurse(); 489 490 // Generate the function. 491 CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs, 492 FO.UIntPtrCastRequired ? FO.Loc : FO.S->getBeginLoc(), 493 FO.UIntPtrCastRequired ? FO.Loc 494 : CD->getBody()->getBeginLoc()); 495 unsigned Cnt = CD->getContextParamPosition(); 496 I = FO.S->captures().begin(); 497 for (const FieldDecl *FD : RD->fields()) { 498 // Do not map arguments if we emit function with non-original types. 499 Address LocalAddr(Address::invalid()); 500 if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) { 501 LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt], 502 TargetArgs[Cnt]); 503 } else { 504 LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]); 505 } 506 // If we are capturing a pointer by copy we don't need to do anything, just 507 // use the value that we get from the arguments. 508 if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) { 509 const VarDecl *CurVD = I->getCapturedVar(); 510 if (!FO.RegisterCastedArgsOnly) 511 LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}}); 512 ++Cnt; 513 ++I; 514 continue; 515 } 516 517 LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(), 518 AlignmentSource::Decl); 519 if (FD->hasCapturedVLAType()) { 520 if (FO.UIntPtrCastRequired) { 521 ArgLVal = CGF.MakeAddrLValue( 522 castValueFromUintptr(CGF, I->getLocation(), FD->getType(), 523 Args[Cnt]->getName(), ArgLVal), 524 FD->getType(), AlignmentSource::Decl); 525 } 526 llvm::Value *ExprArg = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation()); 527 const VariableArrayType *VAT = FD->getCapturedVLAType(); 528 VLASizes.try_emplace(Args[Cnt], VAT->getSizeExpr(), ExprArg); 529 } else if (I->capturesVariable()) { 530 const VarDecl *Var = I->getCapturedVar(); 531 QualType VarTy = Var->getType(); 532 Address ArgAddr = ArgLVal.getAddress(CGF); 533 if (ArgLVal.getType()->isLValueReferenceType()) { 534 ArgAddr = CGF.EmitLoadOfReference(ArgLVal); 535 } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) { 536 assert(ArgLVal.getType()->isPointerType()); 537 ArgAddr = CGF.EmitLoadOfPointer( 538 ArgAddr, ArgLVal.getType()->castAs<PointerType>()); 539 } 540 if (!FO.RegisterCastedArgsOnly) { 541 LocalAddrs.insert( 542 {Args[Cnt], 543 {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}}); 544 } 545 } else if (I->capturesVariableByCopy()) { 546 assert(!FD->getType()->isAnyPointerType() && 547 "Not expecting a captured pointer."); 548 const VarDecl *Var = I->getCapturedVar(); 549 LocalAddrs.insert({Args[Cnt], 550 {Var, FO.UIntPtrCastRequired 551 ? castValueFromUintptr( 552 CGF, I->getLocation(), FD->getType(), 553 Args[Cnt]->getName(), ArgLVal) 554 : ArgLVal.getAddress(CGF)}}); 555 } else { 556 // If 'this' is captured, load it into CXXThisValue. 557 assert(I->capturesThis()); 558 CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation()); 559 LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress(CGF)}}); 560 } 561 ++Cnt; 562 ++I; 563 } 564 565 return F; 566 } 567 568 llvm::Function * 569 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, 570 SourceLocation Loc) { 571 assert( 572 CapturedStmtInfo && 573 "CapturedStmtInfo should be set when generating the captured function"); 574 const CapturedDecl *CD = S.getCapturedDecl(); 575 // Build the argument list. 576 bool NeedWrapperFunction = 577 getDebugInfo() && CGM.getCodeGenOpts().hasReducedDebugInfo(); 578 FunctionArgList Args; 579 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs; 580 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes; 581 SmallString<256> Buffer; 582 llvm::raw_svector_ostream Out(Buffer); 583 Out << CapturedStmtInfo->getHelperName(); 584 if (NeedWrapperFunction) 585 Out << "_debug__"; 586 FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false, 587 Out.str(), Loc); 588 llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs, 589 VLASizes, CXXThisValue, FO); 590 CodeGenFunction::OMPPrivateScope LocalScope(*this); 591 for (const auto &LocalAddrPair : LocalAddrs) { 592 if (LocalAddrPair.second.first) { 593 LocalScope.addPrivate(LocalAddrPair.second.first, [&LocalAddrPair]() { 594 return LocalAddrPair.second.second; 595 }); 596 } 597 } 598 (void)LocalScope.Privatize(); 599 for (const auto &VLASizePair : VLASizes) 600 VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second; 601 PGO.assignRegionCounters(GlobalDecl(CD), F); 602 CapturedStmtInfo->EmitBody(*this, CD->getBody()); 603 (void)LocalScope.ForceCleanup(); 604 FinishFunction(CD->getBodyRBrace()); 605 if (!NeedWrapperFunction) 606 return F; 607 608 FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true, 609 /*RegisterCastedArgsOnly=*/true, 610 CapturedStmtInfo->getHelperName(), Loc); 611 CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true); 612 WrapperCGF.CapturedStmtInfo = CapturedStmtInfo; 613 Args.clear(); 614 LocalAddrs.clear(); 615 VLASizes.clear(); 616 llvm::Function *WrapperF = 617 emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes, 618 WrapperCGF.CXXThisValue, WrapperFO); 619 llvm::SmallVector<llvm::Value *, 4> CallArgs; 620 for (const auto *Arg : Args) { 621 llvm::Value *CallArg; 622 auto I = LocalAddrs.find(Arg); 623 if (I != LocalAddrs.end()) { 624 LValue LV = WrapperCGF.MakeAddrLValue( 625 I->second.second, 626 I->second.first ? I->second.first->getType() : Arg->getType(), 627 AlignmentSource::Decl); 628 CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc()); 629 } else { 630 auto EI = VLASizes.find(Arg); 631 if (EI != VLASizes.end()) { 632 CallArg = EI->second.second; 633 } else { 634 LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg), 635 Arg->getType(), 636 AlignmentSource::Decl); 637 CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc()); 638 } 639 } 640 CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType())); 641 } 642 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, Loc, F, CallArgs); 643 WrapperCGF.FinishFunction(); 644 return WrapperF; 645 } 646 647 //===----------------------------------------------------------------------===// 648 // OpenMP Directive Emission 649 //===----------------------------------------------------------------------===// 650 void CodeGenFunction::EmitOMPAggregateAssign( 651 Address DestAddr, Address SrcAddr, QualType OriginalType, 652 const llvm::function_ref<void(Address, Address)> CopyGen) { 653 // Perform element-by-element initialization. 654 QualType ElementTy; 655 656 // Drill down to the base element type on both arrays. 657 const ArrayType *ArrayTy = OriginalType->getAsArrayTypeUnsafe(); 658 llvm::Value *NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr); 659 SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 660 661 llvm::Value *SrcBegin = SrcAddr.getPointer(); 662 llvm::Value *DestBegin = DestAddr.getPointer(); 663 // Cast from pointer to array type to pointer to single element. 664 llvm::Value *DestEnd = Builder.CreateGEP(DestBegin, NumElements); 665 // The basic structure here is a while-do loop. 666 llvm::BasicBlock *BodyBB = createBasicBlock("omp.arraycpy.body"); 667 llvm::BasicBlock *DoneBB = createBasicBlock("omp.arraycpy.done"); 668 llvm::Value *IsEmpty = 669 Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty"); 670 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 671 672 // Enter the loop body, making that address the current address. 673 llvm::BasicBlock *EntryBB = Builder.GetInsertBlock(); 674 EmitBlock(BodyBB); 675 676 CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy); 677 678 llvm::PHINode *SrcElementPHI = 679 Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 680 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 681 Address SrcElementCurrent = 682 Address(SrcElementPHI, 683 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 684 685 llvm::PHINode *DestElementPHI = 686 Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 687 DestElementPHI->addIncoming(DestBegin, EntryBB); 688 Address DestElementCurrent = 689 Address(DestElementPHI, 690 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 691 692 // Emit copy. 693 CopyGen(DestElementCurrent, SrcElementCurrent); 694 695 // Shift the address forward by one element. 696 llvm::Value *DestElementNext = Builder.CreateConstGEP1_32( 697 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 698 llvm::Value *SrcElementNext = Builder.CreateConstGEP1_32( 699 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 700 // Check whether we've reached the end. 701 llvm::Value *Done = 702 Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 703 Builder.CreateCondBr(Done, DoneBB, BodyBB); 704 DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock()); 705 SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock()); 706 707 // Done. 708 EmitBlock(DoneBB, /*IsFinished=*/true); 709 } 710 711 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr, 712 Address SrcAddr, const VarDecl *DestVD, 713 const VarDecl *SrcVD, const Expr *Copy) { 714 if (OriginalType->isArrayType()) { 715 const auto *BO = dyn_cast<BinaryOperator>(Copy); 716 if (BO && BO->getOpcode() == BO_Assign) { 717 // Perform simple memcpy for simple copying. 718 LValue Dest = MakeAddrLValue(DestAddr, OriginalType); 719 LValue Src = MakeAddrLValue(SrcAddr, OriginalType); 720 EmitAggregateAssign(Dest, Src, OriginalType); 721 } else { 722 // For arrays with complex element types perform element by element 723 // copying. 724 EmitOMPAggregateAssign( 725 DestAddr, SrcAddr, OriginalType, 726 [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) { 727 // Working with the single array element, so have to remap 728 // destination and source variables to corresponding array 729 // elements. 730 CodeGenFunction::OMPPrivateScope Remap(*this); 731 Remap.addPrivate(DestVD, [DestElement]() { return DestElement; }); 732 Remap.addPrivate(SrcVD, [SrcElement]() { return SrcElement; }); 733 (void)Remap.Privatize(); 734 EmitIgnoredExpr(Copy); 735 }); 736 } 737 } else { 738 // Remap pseudo source variable to private copy. 739 CodeGenFunction::OMPPrivateScope Remap(*this); 740 Remap.addPrivate(SrcVD, [SrcAddr]() { return SrcAddr; }); 741 Remap.addPrivate(DestVD, [DestAddr]() { return DestAddr; }); 742 (void)Remap.Privatize(); 743 // Emit copying of the whole variable. 744 EmitIgnoredExpr(Copy); 745 } 746 } 747 748 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, 749 OMPPrivateScope &PrivateScope) { 750 if (!HaveInsertPoint()) 751 return false; 752 bool DeviceConstTarget = 753 getLangOpts().OpenMPIsDevice && 754 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 755 bool FirstprivateIsLastprivate = false; 756 llvm::DenseMap<const VarDecl *, OpenMPLastprivateModifier> Lastprivates; 757 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 758 for (const auto *D : C->varlists()) 759 Lastprivates.try_emplace( 760 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl(), 761 C->getKind()); 762 } 763 llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate; 764 llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 765 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 766 // Force emission of the firstprivate copy if the directive does not emit 767 // outlined function, like omp for, omp simd, omp distribute etc. 768 bool MustEmitFirstprivateCopy = 769 CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown; 770 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { 771 const auto *IRef = C->varlist_begin(); 772 const auto *InitsRef = C->inits().begin(); 773 for (const Expr *IInit : C->private_copies()) { 774 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 775 bool ThisFirstprivateIsLastprivate = 776 Lastprivates.count(OrigVD->getCanonicalDecl()) > 0; 777 const FieldDecl *FD = CapturedStmtInfo->lookup(OrigVD); 778 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 779 if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD && 780 !FD->getType()->isReferenceType() && 781 (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) { 782 EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()); 783 ++IRef; 784 ++InitsRef; 785 continue; 786 } 787 // Do not emit copy for firstprivate constant variables in target regions, 788 // captured by reference. 789 if (DeviceConstTarget && OrigVD->getType().isConstant(getContext()) && 790 FD && FD->getType()->isReferenceType() && 791 (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) { 792 (void)CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(*this, 793 OrigVD); 794 ++IRef; 795 ++InitsRef; 796 continue; 797 } 798 FirstprivateIsLastprivate = 799 FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate; 800 if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) { 801 const auto *VDInit = 802 cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl()); 803 bool IsRegistered; 804 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), 805 /*RefersToEnclosingVariableOrCapture=*/FD != nullptr, 806 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 807 LValue OriginalLVal; 808 if (!FD) { 809 // Check if the firstprivate variable is just a constant value. 810 ConstantEmission CE = tryEmitAsConstant(&DRE); 811 if (CE && !CE.isReference()) { 812 // Constant value, no need to create a copy. 813 ++IRef; 814 ++InitsRef; 815 continue; 816 } 817 if (CE && CE.isReference()) { 818 OriginalLVal = CE.getReferenceLValue(*this, &DRE); 819 } else { 820 assert(!CE && "Expected non-constant firstprivate."); 821 OriginalLVal = EmitLValue(&DRE); 822 } 823 } else { 824 OriginalLVal = EmitLValue(&DRE); 825 } 826 QualType Type = VD->getType(); 827 if (Type->isArrayType()) { 828 // Emit VarDecl with copy init for arrays. 829 // Get the address of the original variable captured in current 830 // captured region. 831 IsRegistered = PrivateScope.addPrivate( 832 OrigVD, [this, VD, Type, OriginalLVal, VDInit]() { 833 AutoVarEmission Emission = EmitAutoVarAlloca(*VD); 834 const Expr *Init = VD->getInit(); 835 if (!isa<CXXConstructExpr>(Init) || 836 isTrivialInitializer(Init)) { 837 // Perform simple memcpy. 838 LValue Dest = 839 MakeAddrLValue(Emission.getAllocatedAddress(), Type); 840 EmitAggregateAssign(Dest, OriginalLVal, Type); 841 } else { 842 EmitOMPAggregateAssign( 843 Emission.getAllocatedAddress(), 844 OriginalLVal.getAddress(*this), Type, 845 [this, VDInit, Init](Address DestElement, 846 Address SrcElement) { 847 // Clean up any temporaries needed by the 848 // initialization. 849 RunCleanupsScope InitScope(*this); 850 // Emit initialization for single element. 851 setAddrOfLocalVar(VDInit, SrcElement); 852 EmitAnyExprToMem(Init, DestElement, 853 Init->getType().getQualifiers(), 854 /*IsInitializer*/ false); 855 LocalDeclMap.erase(VDInit); 856 }); 857 } 858 EmitAutoVarCleanups(Emission); 859 return Emission.getAllocatedAddress(); 860 }); 861 } else { 862 Address OriginalAddr = OriginalLVal.getAddress(*this); 863 IsRegistered = 864 PrivateScope.addPrivate(OrigVD, [this, VDInit, OriginalAddr, VD, 865 ThisFirstprivateIsLastprivate, 866 OrigVD, &Lastprivates, IRef]() { 867 // Emit private VarDecl with copy init. 868 // Remap temp VDInit variable to the address of the original 869 // variable (for proper handling of captured global variables). 870 setAddrOfLocalVar(VDInit, OriginalAddr); 871 EmitDecl(*VD); 872 LocalDeclMap.erase(VDInit); 873 if (ThisFirstprivateIsLastprivate && 874 Lastprivates[OrigVD->getCanonicalDecl()] == 875 OMPC_LASTPRIVATE_conditional) { 876 // Create/init special variable for lastprivate conditionals. 877 Address VDAddr = 878 CGM.getOpenMPRuntime().emitLastprivateConditionalInit( 879 *this, OrigVD); 880 llvm::Value *V = EmitLoadOfScalar( 881 MakeAddrLValue(GetAddrOfLocalVar(VD), (*IRef)->getType(), 882 AlignmentSource::Decl), 883 (*IRef)->getExprLoc()); 884 EmitStoreOfScalar(V, 885 MakeAddrLValue(VDAddr, (*IRef)->getType(), 886 AlignmentSource::Decl)); 887 LocalDeclMap.erase(VD); 888 setAddrOfLocalVar(VD, VDAddr); 889 return VDAddr; 890 } 891 return GetAddrOfLocalVar(VD); 892 }); 893 } 894 assert(IsRegistered && 895 "firstprivate var already registered as private"); 896 // Silence the warning about unused variable. 897 (void)IsRegistered; 898 } 899 ++IRef; 900 ++InitsRef; 901 } 902 } 903 return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty(); 904 } 905 906 void CodeGenFunction::EmitOMPPrivateClause( 907 const OMPExecutableDirective &D, 908 CodeGenFunction::OMPPrivateScope &PrivateScope) { 909 if (!HaveInsertPoint()) 910 return; 911 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 912 for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) { 913 auto IRef = C->varlist_begin(); 914 for (const Expr *IInit : C->private_copies()) { 915 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 916 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 917 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 918 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD]() { 919 // Emit private VarDecl with copy init. 920 EmitDecl(*VD); 921 return GetAddrOfLocalVar(VD); 922 }); 923 assert(IsRegistered && "private var already registered as private"); 924 // Silence the warning about unused variable. 925 (void)IsRegistered; 926 } 927 ++IRef; 928 } 929 } 930 } 931 932 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { 933 if (!HaveInsertPoint()) 934 return false; 935 // threadprivate_var1 = master_threadprivate_var1; 936 // operator=(threadprivate_var2, master_threadprivate_var2); 937 // ... 938 // __kmpc_barrier(&loc, global_tid); 939 llvm::DenseSet<const VarDecl *> CopiedVars; 940 llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr; 941 for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) { 942 auto IRef = C->varlist_begin(); 943 auto ISrcRef = C->source_exprs().begin(); 944 auto IDestRef = C->destination_exprs().begin(); 945 for (const Expr *AssignOp : C->assignment_ops()) { 946 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 947 QualType Type = VD->getType(); 948 if (CopiedVars.insert(VD->getCanonicalDecl()).second) { 949 // Get the address of the master variable. If we are emitting code with 950 // TLS support, the address is passed from the master as field in the 951 // captured declaration. 952 Address MasterAddr = Address::invalid(); 953 if (getLangOpts().OpenMPUseTLS && 954 getContext().getTargetInfo().isTLSSupported()) { 955 assert(CapturedStmtInfo->lookup(VD) && 956 "Copyin threadprivates should have been captured!"); 957 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), true, 958 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 959 MasterAddr = EmitLValue(&DRE).getAddress(*this); 960 LocalDeclMap.erase(VD); 961 } else { 962 MasterAddr = 963 Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD) 964 : CGM.GetAddrOfGlobal(VD), 965 getContext().getDeclAlign(VD)); 966 } 967 // Get the address of the threadprivate variable. 968 Address PrivateAddr = EmitLValue(*IRef).getAddress(*this); 969 if (CopiedVars.size() == 1) { 970 // At first check if current thread is a master thread. If it is, no 971 // need to copy data. 972 CopyBegin = createBasicBlock("copyin.not.master"); 973 CopyEnd = createBasicBlock("copyin.not.master.end"); 974 Builder.CreateCondBr( 975 Builder.CreateICmpNE( 976 Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy), 977 Builder.CreatePtrToInt(PrivateAddr.getPointer(), 978 CGM.IntPtrTy)), 979 CopyBegin, CopyEnd); 980 EmitBlock(CopyBegin); 981 } 982 const auto *SrcVD = 983 cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 984 const auto *DestVD = 985 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 986 EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp); 987 } 988 ++IRef; 989 ++ISrcRef; 990 ++IDestRef; 991 } 992 } 993 if (CopyEnd) { 994 // Exit out of copying procedure for non-master thread. 995 EmitBlock(CopyEnd, /*IsFinished=*/true); 996 return true; 997 } 998 return false; 999 } 1000 1001 bool CodeGenFunction::EmitOMPLastprivateClauseInit( 1002 const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) { 1003 if (!HaveInsertPoint()) 1004 return false; 1005 bool HasAtLeastOneLastprivate = false; 1006 llvm::DenseSet<const VarDecl *> SIMDLCVs; 1007 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 1008 const auto *LoopDirective = cast<OMPLoopDirective>(&D); 1009 for (const Expr *C : LoopDirective->counters()) { 1010 SIMDLCVs.insert( 1011 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 1012 } 1013 } 1014 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 1015 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 1016 HasAtLeastOneLastprivate = true; 1017 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 1018 !getLangOpts().OpenMPSimd) 1019 break; 1020 const auto *IRef = C->varlist_begin(); 1021 const auto *IDestRef = C->destination_exprs().begin(); 1022 for (const Expr *IInit : C->private_copies()) { 1023 // Keep the address of the original variable for future update at the end 1024 // of the loop. 1025 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 1026 // Taskloops do not require additional initialization, it is done in 1027 // runtime support library. 1028 if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) { 1029 const auto *DestVD = 1030 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 1031 PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() { 1032 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), 1033 /*RefersToEnclosingVariableOrCapture=*/ 1034 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1035 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 1036 return EmitLValue(&DRE).getAddress(*this); 1037 }); 1038 // Check if the variable is also a firstprivate: in this case IInit is 1039 // not generated. Initialization of this variable will happen in codegen 1040 // for 'firstprivate' clause. 1041 if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) { 1042 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 1043 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD, C, 1044 OrigVD]() { 1045 if (C->getKind() == OMPC_LASTPRIVATE_conditional) { 1046 Address VDAddr = 1047 CGM.getOpenMPRuntime().emitLastprivateConditionalInit(*this, 1048 OrigVD); 1049 setAddrOfLocalVar(VD, VDAddr); 1050 return VDAddr; 1051 } 1052 // Emit private VarDecl with copy init. 1053 EmitDecl(*VD); 1054 return GetAddrOfLocalVar(VD); 1055 }); 1056 assert(IsRegistered && 1057 "lastprivate var already registered as private"); 1058 (void)IsRegistered; 1059 } 1060 } 1061 ++IRef; 1062 ++IDestRef; 1063 } 1064 } 1065 return HasAtLeastOneLastprivate; 1066 } 1067 1068 void CodeGenFunction::EmitOMPLastprivateClauseFinal( 1069 const OMPExecutableDirective &D, bool NoFinals, 1070 llvm::Value *IsLastIterCond) { 1071 if (!HaveInsertPoint()) 1072 return; 1073 // Emit following code: 1074 // if (<IsLastIterCond>) { 1075 // orig_var1 = private_orig_var1; 1076 // ... 1077 // orig_varn = private_orig_varn; 1078 // } 1079 llvm::BasicBlock *ThenBB = nullptr; 1080 llvm::BasicBlock *DoneBB = nullptr; 1081 if (IsLastIterCond) { 1082 // Emit implicit barrier if at least one lastprivate conditional is found 1083 // and this is not a simd mode. 1084 if (!getLangOpts().OpenMPSimd && 1085 llvm::any_of(D.getClausesOfKind<OMPLastprivateClause>(), 1086 [](const OMPLastprivateClause *C) { 1087 return C->getKind() == OMPC_LASTPRIVATE_conditional; 1088 })) { 1089 CGM.getOpenMPRuntime().emitBarrierCall(*this, D.getBeginLoc(), 1090 OMPD_unknown, 1091 /*EmitChecks=*/false, 1092 /*ForceSimpleCall=*/true); 1093 } 1094 ThenBB = createBasicBlock(".omp.lastprivate.then"); 1095 DoneBB = createBasicBlock(".omp.lastprivate.done"); 1096 Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB); 1097 EmitBlock(ThenBB); 1098 } 1099 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 1100 llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates; 1101 if (const auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) { 1102 auto IC = LoopDirective->counters().begin(); 1103 for (const Expr *F : LoopDirective->finals()) { 1104 const auto *D = 1105 cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl(); 1106 if (NoFinals) 1107 AlreadyEmittedVars.insert(D); 1108 else 1109 LoopCountersAndUpdates[D] = F; 1110 ++IC; 1111 } 1112 } 1113 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 1114 auto IRef = C->varlist_begin(); 1115 auto ISrcRef = C->source_exprs().begin(); 1116 auto IDestRef = C->destination_exprs().begin(); 1117 for (const Expr *AssignOp : C->assignment_ops()) { 1118 const auto *PrivateVD = 1119 cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 1120 QualType Type = PrivateVD->getType(); 1121 const auto *CanonicalVD = PrivateVD->getCanonicalDecl(); 1122 if (AlreadyEmittedVars.insert(CanonicalVD).second) { 1123 // If lastprivate variable is a loop control variable for loop-based 1124 // directive, update its value before copyin back to original 1125 // variable. 1126 if (const Expr *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD)) 1127 EmitIgnoredExpr(FinalExpr); 1128 const auto *SrcVD = 1129 cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 1130 const auto *DestVD = 1131 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 1132 // Get the address of the private variable. 1133 Address PrivateAddr = GetAddrOfLocalVar(PrivateVD); 1134 if (const auto *RefTy = PrivateVD->getType()->getAs<ReferenceType>()) 1135 PrivateAddr = 1136 Address(Builder.CreateLoad(PrivateAddr), 1137 getNaturalTypeAlignment(RefTy->getPointeeType())); 1138 // Store the last value to the private copy in the last iteration. 1139 if (C->getKind() == OMPC_LASTPRIVATE_conditional) 1140 CGM.getOpenMPRuntime().emitLastprivateConditionalFinalUpdate( 1141 *this, MakeAddrLValue(PrivateAddr, (*IRef)->getType()), PrivateVD, 1142 (*IRef)->getExprLoc()); 1143 // Get the address of the original variable. 1144 Address OriginalAddr = GetAddrOfLocalVar(DestVD); 1145 EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp); 1146 } 1147 ++IRef; 1148 ++ISrcRef; 1149 ++IDestRef; 1150 } 1151 if (const Expr *PostUpdate = C->getPostUpdateExpr()) 1152 EmitIgnoredExpr(PostUpdate); 1153 } 1154 if (IsLastIterCond) 1155 EmitBlock(DoneBB, /*IsFinished=*/true); 1156 } 1157 1158 void CodeGenFunction::EmitOMPReductionClauseInit( 1159 const OMPExecutableDirective &D, 1160 CodeGenFunction::OMPPrivateScope &PrivateScope) { 1161 if (!HaveInsertPoint()) 1162 return; 1163 SmallVector<const Expr *, 4> Shareds; 1164 SmallVector<const Expr *, 4> Privates; 1165 SmallVector<const Expr *, 4> ReductionOps; 1166 SmallVector<const Expr *, 4> LHSs; 1167 SmallVector<const Expr *, 4> RHSs; 1168 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1169 auto IPriv = C->privates().begin(); 1170 auto IRed = C->reduction_ops().begin(); 1171 auto ILHS = C->lhs_exprs().begin(); 1172 auto IRHS = C->rhs_exprs().begin(); 1173 for (const Expr *Ref : C->varlists()) { 1174 Shareds.emplace_back(Ref); 1175 Privates.emplace_back(*IPriv); 1176 ReductionOps.emplace_back(*IRed); 1177 LHSs.emplace_back(*ILHS); 1178 RHSs.emplace_back(*IRHS); 1179 std::advance(IPriv, 1); 1180 std::advance(IRed, 1); 1181 std::advance(ILHS, 1); 1182 std::advance(IRHS, 1); 1183 } 1184 } 1185 ReductionCodeGen RedCG(Shareds, Privates, ReductionOps); 1186 unsigned Count = 0; 1187 auto ILHS = LHSs.begin(); 1188 auto IRHS = RHSs.begin(); 1189 auto IPriv = Privates.begin(); 1190 for (const Expr *IRef : Shareds) { 1191 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl()); 1192 // Emit private VarDecl with reduction init. 1193 RedCG.emitSharedLValue(*this, Count); 1194 RedCG.emitAggregateType(*this, Count); 1195 AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD); 1196 RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(), 1197 RedCG.getSharedLValue(Count), 1198 [&Emission](CodeGenFunction &CGF) { 1199 CGF.EmitAutoVarInit(Emission); 1200 return true; 1201 }); 1202 EmitAutoVarCleanups(Emission); 1203 Address BaseAddr = RedCG.adjustPrivateAddress( 1204 *this, Count, Emission.getAllocatedAddress()); 1205 bool IsRegistered = PrivateScope.addPrivate( 1206 RedCG.getBaseDecl(Count), [BaseAddr]() { return BaseAddr; }); 1207 assert(IsRegistered && "private var already registered as private"); 1208 // Silence the warning about unused variable. 1209 (void)IsRegistered; 1210 1211 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 1212 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 1213 QualType Type = PrivateVD->getType(); 1214 bool isaOMPArraySectionExpr = isa<OMPArraySectionExpr>(IRef); 1215 if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) { 1216 // Store the address of the original variable associated with the LHS 1217 // implicit variable. 1218 PrivateScope.addPrivate(LHSVD, [&RedCG, Count, this]() { 1219 return RedCG.getSharedLValue(Count).getAddress(*this); 1220 }); 1221 PrivateScope.addPrivate( 1222 RHSVD, [this, PrivateVD]() { return GetAddrOfLocalVar(PrivateVD); }); 1223 } else if ((isaOMPArraySectionExpr && Type->isScalarType()) || 1224 isa<ArraySubscriptExpr>(IRef)) { 1225 // Store the address of the original variable associated with the LHS 1226 // implicit variable. 1227 PrivateScope.addPrivate(LHSVD, [&RedCG, Count, this]() { 1228 return RedCG.getSharedLValue(Count).getAddress(*this); 1229 }); 1230 PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() { 1231 return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD), 1232 ConvertTypeForMem(RHSVD->getType()), 1233 "rhs.begin"); 1234 }); 1235 } else { 1236 QualType Type = PrivateVD->getType(); 1237 bool IsArray = getContext().getAsArrayType(Type) != nullptr; 1238 Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress(*this); 1239 // Store the address of the original variable associated with the LHS 1240 // implicit variable. 1241 if (IsArray) { 1242 OriginalAddr = Builder.CreateElementBitCast( 1243 OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin"); 1244 } 1245 PrivateScope.addPrivate(LHSVD, [OriginalAddr]() { return OriginalAddr; }); 1246 PrivateScope.addPrivate( 1247 RHSVD, [this, PrivateVD, RHSVD, IsArray]() { 1248 return IsArray 1249 ? Builder.CreateElementBitCast( 1250 GetAddrOfLocalVar(PrivateVD), 1251 ConvertTypeForMem(RHSVD->getType()), "rhs.begin") 1252 : GetAddrOfLocalVar(PrivateVD); 1253 }); 1254 } 1255 ++ILHS; 1256 ++IRHS; 1257 ++IPriv; 1258 ++Count; 1259 } 1260 } 1261 1262 void CodeGenFunction::EmitOMPReductionClauseFinal( 1263 const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) { 1264 if (!HaveInsertPoint()) 1265 return; 1266 llvm::SmallVector<const Expr *, 8> Privates; 1267 llvm::SmallVector<const Expr *, 8> LHSExprs; 1268 llvm::SmallVector<const Expr *, 8> RHSExprs; 1269 llvm::SmallVector<const Expr *, 8> ReductionOps; 1270 bool HasAtLeastOneReduction = false; 1271 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1272 HasAtLeastOneReduction = true; 1273 Privates.append(C->privates().begin(), C->privates().end()); 1274 LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 1275 RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 1276 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 1277 } 1278 if (HasAtLeastOneReduction) { 1279 bool WithNowait = D.getSingleClause<OMPNowaitClause>() || 1280 isOpenMPParallelDirective(D.getDirectiveKind()) || 1281 ReductionKind == OMPD_simd; 1282 bool SimpleReduction = ReductionKind == OMPD_simd; 1283 // Emit nowait reduction if nowait clause is present or directive is a 1284 // parallel directive (it always has implicit barrier). 1285 CGM.getOpenMPRuntime().emitReduction( 1286 *this, D.getEndLoc(), Privates, LHSExprs, RHSExprs, ReductionOps, 1287 {WithNowait, SimpleReduction, ReductionKind}); 1288 } 1289 } 1290 1291 static void emitPostUpdateForReductionClause( 1292 CodeGenFunction &CGF, const OMPExecutableDirective &D, 1293 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { 1294 if (!CGF.HaveInsertPoint()) 1295 return; 1296 llvm::BasicBlock *DoneBB = nullptr; 1297 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1298 if (const Expr *PostUpdate = C->getPostUpdateExpr()) { 1299 if (!DoneBB) { 1300 if (llvm::Value *Cond = CondGen(CGF)) { 1301 // If the first post-update expression is found, emit conditional 1302 // block if it was requested. 1303 llvm::BasicBlock *ThenBB = CGF.createBasicBlock(".omp.reduction.pu"); 1304 DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done"); 1305 CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1306 CGF.EmitBlock(ThenBB); 1307 } 1308 } 1309 CGF.EmitIgnoredExpr(PostUpdate); 1310 } 1311 } 1312 if (DoneBB) 1313 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 1314 } 1315 1316 namespace { 1317 /// Codegen lambda for appending distribute lower and upper bounds to outlined 1318 /// parallel function. This is necessary for combined constructs such as 1319 /// 'distribute parallel for' 1320 typedef llvm::function_ref<void(CodeGenFunction &, 1321 const OMPExecutableDirective &, 1322 llvm::SmallVectorImpl<llvm::Value *> &)> 1323 CodeGenBoundParametersTy; 1324 } // anonymous namespace 1325 1326 static void 1327 checkForLastprivateConditionalUpdate(CodeGenFunction &CGF, 1328 const OMPExecutableDirective &S) { 1329 if (CGF.getLangOpts().OpenMP < 50) 1330 return; 1331 llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> PrivateDecls; 1332 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 1333 for (const Expr *Ref : C->varlists()) { 1334 if (!Ref->getType()->isScalarType()) 1335 continue; 1336 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 1337 if (!DRE) 1338 continue; 1339 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); 1340 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref); 1341 } 1342 } 1343 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 1344 for (const Expr *Ref : C->varlists()) { 1345 if (!Ref->getType()->isScalarType()) 1346 continue; 1347 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 1348 if (!DRE) 1349 continue; 1350 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); 1351 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref); 1352 } 1353 } 1354 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 1355 for (const Expr *Ref : C->varlists()) { 1356 if (!Ref->getType()->isScalarType()) 1357 continue; 1358 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 1359 if (!DRE) 1360 continue; 1361 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); 1362 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref); 1363 } 1364 } 1365 // Privates should ne analyzed since they are not captured at all. 1366 // Task reductions may be skipped - tasks are ignored. 1367 // Firstprivates do not return value but may be passed by reference - no need 1368 // to check for updated lastprivate conditional. 1369 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 1370 for (const Expr *Ref : C->varlists()) { 1371 if (!Ref->getType()->isScalarType()) 1372 continue; 1373 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 1374 if (!DRE) 1375 continue; 1376 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); 1377 } 1378 } 1379 CGF.CGM.getOpenMPRuntime().checkAndEmitSharedLastprivateConditional( 1380 CGF, S, PrivateDecls); 1381 } 1382 1383 static void emitCommonOMPParallelDirective( 1384 CodeGenFunction &CGF, const OMPExecutableDirective &S, 1385 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1386 const CodeGenBoundParametersTy &CodeGenBoundParameters) { 1387 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); 1388 llvm::Function *OutlinedFn = 1389 CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( 1390 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 1391 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) { 1392 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 1393 llvm::Value *NumThreads = 1394 CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), 1395 /*IgnoreResultAssign=*/true); 1396 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause( 1397 CGF, NumThreads, NumThreadsClause->getBeginLoc()); 1398 } 1399 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) { 1400 CodeGenFunction::RunCleanupsScope ProcBindScope(CGF); 1401 CGF.CGM.getOpenMPRuntime().emitProcBindClause( 1402 CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getBeginLoc()); 1403 } 1404 const Expr *IfCond = nullptr; 1405 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 1406 if (C->getNameModifier() == OMPD_unknown || 1407 C->getNameModifier() == OMPD_parallel) { 1408 IfCond = C->getCondition(); 1409 break; 1410 } 1411 } 1412 1413 OMPParallelScope Scope(CGF, S); 1414 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 1415 // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk 1416 // lower and upper bounds with the pragma 'for' chunking mechanism. 1417 // The following lambda takes care of appending the lower and upper bound 1418 // parameters when necessary 1419 CodeGenBoundParameters(CGF, S, CapturedVars); 1420 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 1421 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getBeginLoc(), OutlinedFn, 1422 CapturedVars, IfCond); 1423 } 1424 1425 static void emitEmptyBoundParameters(CodeGenFunction &, 1426 const OMPExecutableDirective &, 1427 llvm::SmallVectorImpl<llvm::Value *> &) {} 1428 1429 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { 1430 if (llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder()) { 1431 // Check if we have any if clause associated with the directive. 1432 llvm::Value *IfCond = nullptr; 1433 if (const auto *C = S.getSingleClause<OMPIfClause>()) 1434 IfCond = EmitScalarExpr(C->getCondition(), 1435 /*IgnoreResultAssign=*/true); 1436 1437 llvm::Value *NumThreads = nullptr; 1438 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) 1439 NumThreads = EmitScalarExpr(NumThreadsClause->getNumThreads(), 1440 /*IgnoreResultAssign=*/true); 1441 1442 ProcBindKind ProcBind = OMP_PROC_BIND_default; 1443 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) 1444 ProcBind = ProcBindClause->getProcBindKind(); 1445 1446 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 1447 1448 // The cleanup callback that finalizes all variabels at the given location, 1449 // thus calls destructors etc. 1450 auto FiniCB = [this](InsertPointTy IP) { 1451 CGBuilderTy::InsertPointGuard IPG(Builder); 1452 assert(IP.getBlock()->end() != IP.getPoint() && 1453 "OpenMP IR Builder should cause terminated block!"); 1454 llvm::BasicBlock *IPBB = IP.getBlock(); 1455 llvm::BasicBlock *DestBB = IPBB->splitBasicBlock(IP.getPoint()); 1456 IPBB->getTerminator()->eraseFromParent(); 1457 Builder.SetInsertPoint(IPBB); 1458 CodeGenFunction::JumpDest Dest = getJumpDestInCurrentScope(DestBB); 1459 EmitBranchThroughCleanup(Dest); 1460 }; 1461 1462 // Privatization callback that performs appropriate action for 1463 // shared/private/firstprivate/lastprivate/copyin/... variables. 1464 // 1465 // TODO: This defaults to shared right now. 1466 auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, 1467 llvm::Value &Val, llvm::Value *&ReplVal) { 1468 // The next line is appropriate only for variables (Val) with the 1469 // data-sharing attribute "shared". 1470 ReplVal = &Val; 1471 1472 return CodeGenIP; 1473 }; 1474 1475 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); 1476 const Stmt *ParallelRegionBodyStmt = CS->getCapturedStmt(); 1477 1478 auto BodyGenCB = [ParallelRegionBodyStmt, 1479 this](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, 1480 llvm::BasicBlock &ContinuationBB) { 1481 auto OldAllocaIP = AllocaInsertPt; 1482 AllocaInsertPt = &*AllocaIP.getPoint(); 1483 1484 auto OldReturnBlock = ReturnBlock; 1485 ReturnBlock = getJumpDestInCurrentScope(&ContinuationBB); 1486 1487 llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock(); 1488 CodeGenIPBB->splitBasicBlock(CodeGenIP.getPoint()); 1489 llvm::Instruction *CodeGenIPBBTI = CodeGenIPBB->getTerminator(); 1490 CodeGenIPBBTI->removeFromParent(); 1491 1492 Builder.SetInsertPoint(CodeGenIPBB); 1493 1494 EmitStmt(ParallelRegionBodyStmt); 1495 1496 Builder.Insert(CodeGenIPBBTI); 1497 1498 AllocaInsertPt = OldAllocaIP; 1499 ReturnBlock = OldReturnBlock; 1500 }; 1501 1502 CGCapturedStmtInfo CGSI(*CS, CR_OpenMP); 1503 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI); 1504 Builder.restoreIP(OMPBuilder->CreateParallel(Builder, BodyGenCB, PrivCB, 1505 FiniCB, IfCond, NumThreads, 1506 ProcBind, S.hasCancel())); 1507 return; 1508 } 1509 1510 // Emit parallel region as a standalone region. 1511 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 1512 Action.Enter(CGF); 1513 OMPPrivateScope PrivateScope(CGF); 1514 bool Copyins = CGF.EmitOMPCopyinClause(S); 1515 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 1516 if (Copyins) { 1517 // Emit implicit barrier to synchronize threads and avoid data races on 1518 // propagation master's thread values of threadprivate variables to local 1519 // instances of that variables of all other implicit threads. 1520 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 1521 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, 1522 /*ForceSimpleCall=*/true); 1523 } 1524 CGF.EmitOMPPrivateClause(S, PrivateScope); 1525 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 1526 (void)PrivateScope.Privatize(); 1527 CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt()); 1528 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 1529 }; 1530 { 1531 auto LPCRegion = 1532 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 1533 emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen, 1534 emitEmptyBoundParameters); 1535 emitPostUpdateForReductionClause(*this, S, 1536 [](CodeGenFunction &) { return nullptr; }); 1537 } 1538 // Check for outer lastprivate conditional update. 1539 checkForLastprivateConditionalUpdate(*this, S); 1540 } 1541 1542 static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop, 1543 int MaxLevel, int Level = 0) { 1544 assert(Level < MaxLevel && "Too deep lookup during loop body codegen."); 1545 const Stmt *SimplifiedS = S->IgnoreContainers(); 1546 if (const auto *CS = dyn_cast<CompoundStmt>(SimplifiedS)) { 1547 PrettyStackTraceLoc CrashInfo( 1548 CGF.getContext().getSourceManager(), CS->getLBracLoc(), 1549 "LLVM IR generation of compound statement ('{}')"); 1550 1551 // Keep track of the current cleanup stack depth, including debug scopes. 1552 CodeGenFunction::LexicalScope Scope(CGF, S->getSourceRange()); 1553 for (const Stmt *CurStmt : CS->body()) 1554 emitBody(CGF, CurStmt, NextLoop, MaxLevel, Level); 1555 return; 1556 } 1557 if (SimplifiedS == NextLoop) { 1558 if (const auto *For = dyn_cast<ForStmt>(SimplifiedS)) { 1559 S = For->getBody(); 1560 } else { 1561 assert(isa<CXXForRangeStmt>(SimplifiedS) && 1562 "Expected canonical for loop or range-based for loop."); 1563 const auto *CXXFor = cast<CXXForRangeStmt>(SimplifiedS); 1564 CGF.EmitStmt(CXXFor->getLoopVarStmt()); 1565 S = CXXFor->getBody(); 1566 } 1567 if (Level + 1 < MaxLevel) { 1568 NextLoop = OMPLoopDirective::tryToFindNextInnerLoop( 1569 S, /*TryImperfectlyNestedLoops=*/true); 1570 emitBody(CGF, S, NextLoop, MaxLevel, Level + 1); 1571 return; 1572 } 1573 } 1574 CGF.EmitStmt(S); 1575 } 1576 1577 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, 1578 JumpDest LoopExit) { 1579 RunCleanupsScope BodyScope(*this); 1580 // Update counters values on current iteration. 1581 for (const Expr *UE : D.updates()) 1582 EmitIgnoredExpr(UE); 1583 // Update the linear variables. 1584 // In distribute directives only loop counters may be marked as linear, no 1585 // need to generate the code for them. 1586 if (!isOpenMPDistributeDirective(D.getDirectiveKind())) { 1587 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1588 for (const Expr *UE : C->updates()) 1589 EmitIgnoredExpr(UE); 1590 } 1591 } 1592 1593 // On a continue in the body, jump to the end. 1594 JumpDest Continue = getJumpDestInCurrentScope("omp.body.continue"); 1595 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1596 for (const Expr *E : D.finals_conditions()) { 1597 if (!E) 1598 continue; 1599 // Check that loop counter in non-rectangular nest fits into the iteration 1600 // space. 1601 llvm::BasicBlock *NextBB = createBasicBlock("omp.body.next"); 1602 EmitBranchOnBoolExpr(E, NextBB, Continue.getBlock(), 1603 getProfileCount(D.getBody())); 1604 EmitBlock(NextBB); 1605 } 1606 // Emit loop variables for C++ range loops. 1607 const Stmt *Body = 1608 D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(); 1609 // Emit loop body. 1610 emitBody(*this, Body, 1611 OMPLoopDirective::tryToFindNextInnerLoop( 1612 Body, /*TryImperfectlyNestedLoops=*/true), 1613 D.getCollapsedNumber()); 1614 1615 // The end (updates/cleanups). 1616 EmitBlock(Continue.getBlock()); 1617 BreakContinueStack.pop_back(); 1618 } 1619 1620 void CodeGenFunction::EmitOMPInnerLoop( 1621 const Stmt &S, bool RequiresCleanup, const Expr *LoopCond, 1622 const Expr *IncExpr, 1623 const llvm::function_ref<void(CodeGenFunction &)> BodyGen, 1624 const llvm::function_ref<void(CodeGenFunction &)> PostIncGen) { 1625 auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end"); 1626 1627 // Start the loop with a block that tests the condition. 1628 auto CondBlock = createBasicBlock("omp.inner.for.cond"); 1629 EmitBlock(CondBlock); 1630 const SourceRange R = S.getSourceRange(); 1631 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), 1632 SourceLocToDebugLoc(R.getEnd())); 1633 1634 // If there are any cleanups between here and the loop-exit scope, 1635 // create a block to stage a loop exit along. 1636 llvm::BasicBlock *ExitBlock = LoopExit.getBlock(); 1637 if (RequiresCleanup) 1638 ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup"); 1639 1640 llvm::BasicBlock *LoopBody = createBasicBlock("omp.inner.for.body"); 1641 1642 // Emit condition. 1643 EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S)); 1644 if (ExitBlock != LoopExit.getBlock()) { 1645 EmitBlock(ExitBlock); 1646 EmitBranchThroughCleanup(LoopExit); 1647 } 1648 1649 EmitBlock(LoopBody); 1650 incrementProfileCounter(&S); 1651 1652 // Create a block for the increment. 1653 JumpDest Continue = getJumpDestInCurrentScope("omp.inner.for.inc"); 1654 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1655 1656 BodyGen(*this); 1657 1658 // Emit "IV = IV + 1" and a back-edge to the condition block. 1659 EmitBlock(Continue.getBlock()); 1660 EmitIgnoredExpr(IncExpr); 1661 PostIncGen(*this); 1662 BreakContinueStack.pop_back(); 1663 EmitBranch(CondBlock); 1664 LoopStack.pop(); 1665 // Emit the fall-through block. 1666 EmitBlock(LoopExit.getBlock()); 1667 } 1668 1669 bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { 1670 if (!HaveInsertPoint()) 1671 return false; 1672 // Emit inits for the linear variables. 1673 bool HasLinears = false; 1674 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1675 for (const Expr *Init : C->inits()) { 1676 HasLinears = true; 1677 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl()); 1678 if (const auto *Ref = 1679 dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) { 1680 AutoVarEmission Emission = EmitAutoVarAlloca(*VD); 1681 const auto *OrigVD = cast<VarDecl>(Ref->getDecl()); 1682 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), 1683 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1684 VD->getInit()->getType(), VK_LValue, 1685 VD->getInit()->getExprLoc()); 1686 EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(), 1687 VD->getType()), 1688 /*capturedByInit=*/false); 1689 EmitAutoVarCleanups(Emission); 1690 } else { 1691 EmitVarDecl(*VD); 1692 } 1693 } 1694 // Emit the linear steps for the linear clauses. 1695 // If a step is not constant, it is pre-calculated before the loop. 1696 if (const auto *CS = cast_or_null<BinaryOperator>(C->getCalcStep())) 1697 if (const auto *SaveRef = cast<DeclRefExpr>(CS->getLHS())) { 1698 EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl())); 1699 // Emit calculation of the linear step. 1700 EmitIgnoredExpr(CS); 1701 } 1702 } 1703 return HasLinears; 1704 } 1705 1706 void CodeGenFunction::EmitOMPLinearClauseFinal( 1707 const OMPLoopDirective &D, 1708 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { 1709 if (!HaveInsertPoint()) 1710 return; 1711 llvm::BasicBlock *DoneBB = nullptr; 1712 // Emit the final values of the linear variables. 1713 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1714 auto IC = C->varlist_begin(); 1715 for (const Expr *F : C->finals()) { 1716 if (!DoneBB) { 1717 if (llvm::Value *Cond = CondGen(*this)) { 1718 // If the first post-update expression is found, emit conditional 1719 // block if it was requested. 1720 llvm::BasicBlock *ThenBB = createBasicBlock(".omp.linear.pu"); 1721 DoneBB = createBasicBlock(".omp.linear.pu.done"); 1722 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1723 EmitBlock(ThenBB); 1724 } 1725 } 1726 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl()); 1727 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), 1728 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1729 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); 1730 Address OrigAddr = EmitLValue(&DRE).getAddress(*this); 1731 CodeGenFunction::OMPPrivateScope VarScope(*this); 1732 VarScope.addPrivate(OrigVD, [OrigAddr]() { return OrigAddr; }); 1733 (void)VarScope.Privatize(); 1734 EmitIgnoredExpr(F); 1735 ++IC; 1736 } 1737 if (const Expr *PostUpdate = C->getPostUpdateExpr()) 1738 EmitIgnoredExpr(PostUpdate); 1739 } 1740 if (DoneBB) 1741 EmitBlock(DoneBB, /*IsFinished=*/true); 1742 } 1743 1744 static void emitAlignedClause(CodeGenFunction &CGF, 1745 const OMPExecutableDirective &D) { 1746 if (!CGF.HaveInsertPoint()) 1747 return; 1748 for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) { 1749 llvm::APInt ClauseAlignment(64, 0); 1750 if (const Expr *AlignmentExpr = Clause->getAlignment()) { 1751 auto *AlignmentCI = 1752 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); 1753 ClauseAlignment = AlignmentCI->getValue(); 1754 } 1755 for (const Expr *E : Clause->varlists()) { 1756 llvm::APInt Alignment(ClauseAlignment); 1757 if (Alignment == 0) { 1758 // OpenMP [2.8.1, Description] 1759 // If no optional parameter is specified, implementation-defined default 1760 // alignments for SIMD instructions on the target platforms are assumed. 1761 Alignment = 1762 CGF.getContext() 1763 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign( 1764 E->getType()->getPointeeType())) 1765 .getQuantity(); 1766 } 1767 assert((Alignment == 0 || Alignment.isPowerOf2()) && 1768 "alignment is not power of 2"); 1769 if (Alignment != 0) { 1770 llvm::Value *PtrValue = CGF.EmitScalarExpr(E); 1771 CGF.EmitAlignmentAssumption( 1772 PtrValue, E, /*No second loc needed*/ SourceLocation(), 1773 llvm::ConstantInt::get(CGF.getLLVMContext(), Alignment)); 1774 } 1775 } 1776 } 1777 } 1778 1779 void CodeGenFunction::EmitOMPPrivateLoopCounters( 1780 const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) { 1781 if (!HaveInsertPoint()) 1782 return; 1783 auto I = S.private_counters().begin(); 1784 for (const Expr *E : S.counters()) { 1785 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1786 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()); 1787 // Emit var without initialization. 1788 AutoVarEmission VarEmission = EmitAutoVarAlloca(*PrivateVD); 1789 EmitAutoVarCleanups(VarEmission); 1790 LocalDeclMap.erase(PrivateVD); 1791 (void)LoopScope.addPrivate(VD, [&VarEmission]() { 1792 return VarEmission.getAllocatedAddress(); 1793 }); 1794 if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) || 1795 VD->hasGlobalStorage()) { 1796 (void)LoopScope.addPrivate(PrivateVD, [this, VD, E]() { 1797 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), 1798 LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD), 1799 E->getType(), VK_LValue, E->getExprLoc()); 1800 return EmitLValue(&DRE).getAddress(*this); 1801 }); 1802 } else { 1803 (void)LoopScope.addPrivate(PrivateVD, [&VarEmission]() { 1804 return VarEmission.getAllocatedAddress(); 1805 }); 1806 } 1807 ++I; 1808 } 1809 // Privatize extra loop counters used in loops for ordered(n) clauses. 1810 for (const auto *C : S.getClausesOfKind<OMPOrderedClause>()) { 1811 if (!C->getNumForLoops()) 1812 continue; 1813 for (unsigned I = S.getCollapsedNumber(), 1814 E = C->getLoopNumIterations().size(); 1815 I < E; ++I) { 1816 const auto *DRE = cast<DeclRefExpr>(C->getLoopCounter(I)); 1817 const auto *VD = cast<VarDecl>(DRE->getDecl()); 1818 // Override only those variables that can be captured to avoid re-emission 1819 // of the variables declared within the loops. 1820 if (DRE->refersToEnclosingVariableOrCapture()) { 1821 (void)LoopScope.addPrivate(VD, [this, DRE, VD]() { 1822 return CreateMemTemp(DRE->getType(), VD->getName()); 1823 }); 1824 } 1825 } 1826 } 1827 } 1828 1829 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S, 1830 const Expr *Cond, llvm::BasicBlock *TrueBlock, 1831 llvm::BasicBlock *FalseBlock, uint64_t TrueCount) { 1832 if (!CGF.HaveInsertPoint()) 1833 return; 1834 { 1835 CodeGenFunction::OMPPrivateScope PreCondScope(CGF); 1836 CGF.EmitOMPPrivateLoopCounters(S, PreCondScope); 1837 (void)PreCondScope.Privatize(); 1838 // Get initial values of real counters. 1839 for (const Expr *I : S.inits()) { 1840 CGF.EmitIgnoredExpr(I); 1841 } 1842 } 1843 // Create temp loop control variables with their init values to support 1844 // non-rectangular loops. 1845 CodeGenFunction::OMPMapVars PreCondVars; 1846 for (const Expr * E: S.dependent_counters()) { 1847 if (!E) 1848 continue; 1849 assert(!E->getType().getNonReferenceType()->isRecordType() && 1850 "dependent counter must not be an iterator."); 1851 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1852 Address CounterAddr = 1853 CGF.CreateMemTemp(VD->getType().getNonReferenceType()); 1854 (void)PreCondVars.setVarAddr(CGF, VD, CounterAddr); 1855 } 1856 (void)PreCondVars.apply(CGF); 1857 for (const Expr *E : S.dependent_inits()) { 1858 if (!E) 1859 continue; 1860 CGF.EmitIgnoredExpr(E); 1861 } 1862 // Check that loop is executed at least one time. 1863 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount); 1864 PreCondVars.restore(CGF); 1865 } 1866 1867 void CodeGenFunction::EmitOMPLinearClause( 1868 const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) { 1869 if (!HaveInsertPoint()) 1870 return; 1871 llvm::DenseSet<const VarDecl *> SIMDLCVs; 1872 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 1873 const auto *LoopDirective = cast<OMPLoopDirective>(&D); 1874 for (const Expr *C : LoopDirective->counters()) { 1875 SIMDLCVs.insert( 1876 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 1877 } 1878 } 1879 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1880 auto CurPrivate = C->privates().begin(); 1881 for (const Expr *E : C->varlists()) { 1882 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1883 const auto *PrivateVD = 1884 cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl()); 1885 if (!SIMDLCVs.count(VD->getCanonicalDecl())) { 1886 bool IsRegistered = PrivateScope.addPrivate(VD, [this, PrivateVD]() { 1887 // Emit private VarDecl with copy init. 1888 EmitVarDecl(*PrivateVD); 1889 return GetAddrOfLocalVar(PrivateVD); 1890 }); 1891 assert(IsRegistered && "linear var already registered as private"); 1892 // Silence the warning about unused variable. 1893 (void)IsRegistered; 1894 } else { 1895 EmitVarDecl(*PrivateVD); 1896 } 1897 ++CurPrivate; 1898 } 1899 } 1900 } 1901 1902 static void emitSimdlenSafelenClause(CodeGenFunction &CGF, 1903 const OMPExecutableDirective &D, 1904 bool IsMonotonic) { 1905 if (!CGF.HaveInsertPoint()) 1906 return; 1907 if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) { 1908 RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(), 1909 /*ignoreResult=*/true); 1910 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 1911 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 1912 // In presence of finite 'safelen', it may be unsafe to mark all 1913 // the memory instructions parallel, because loop-carried 1914 // dependences of 'safelen' iterations are possible. 1915 if (!IsMonotonic) 1916 CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>()); 1917 } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) { 1918 RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(), 1919 /*ignoreResult=*/true); 1920 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 1921 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 1922 // In presence of finite 'safelen', it may be unsafe to mark all 1923 // the memory instructions parallel, because loop-carried 1924 // dependences of 'safelen' iterations are possible. 1925 CGF.LoopStack.setParallel(/*Enable=*/false); 1926 } 1927 } 1928 1929 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D, 1930 bool IsMonotonic) { 1931 // Walk clauses and process safelen/lastprivate. 1932 LoopStack.setParallel(!IsMonotonic); 1933 LoopStack.setVectorizeEnable(); 1934 emitSimdlenSafelenClause(*this, D, IsMonotonic); 1935 if (const auto *C = D.getSingleClause<OMPOrderClause>()) 1936 if (C->getKind() == OMPC_ORDER_concurrent) 1937 LoopStack.setParallel(/*Enable=*/true); 1938 } 1939 1940 void CodeGenFunction::EmitOMPSimdFinal( 1941 const OMPLoopDirective &D, 1942 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { 1943 if (!HaveInsertPoint()) 1944 return; 1945 llvm::BasicBlock *DoneBB = nullptr; 1946 auto IC = D.counters().begin(); 1947 auto IPC = D.private_counters().begin(); 1948 for (const Expr *F : D.finals()) { 1949 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl()); 1950 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl()); 1951 const auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD); 1952 if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) || 1953 OrigVD->hasGlobalStorage() || CED) { 1954 if (!DoneBB) { 1955 if (llvm::Value *Cond = CondGen(*this)) { 1956 // If the first post-update expression is found, emit conditional 1957 // block if it was requested. 1958 llvm::BasicBlock *ThenBB = createBasicBlock(".omp.final.then"); 1959 DoneBB = createBasicBlock(".omp.final.done"); 1960 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1961 EmitBlock(ThenBB); 1962 } 1963 } 1964 Address OrigAddr = Address::invalid(); 1965 if (CED) { 1966 OrigAddr = 1967 EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress(*this); 1968 } else { 1969 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(PrivateVD), 1970 /*RefersToEnclosingVariableOrCapture=*/false, 1971 (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc()); 1972 OrigAddr = EmitLValue(&DRE).getAddress(*this); 1973 } 1974 OMPPrivateScope VarScope(*this); 1975 VarScope.addPrivate(OrigVD, [OrigAddr]() { return OrigAddr; }); 1976 (void)VarScope.Privatize(); 1977 EmitIgnoredExpr(F); 1978 } 1979 ++IC; 1980 ++IPC; 1981 } 1982 if (DoneBB) 1983 EmitBlock(DoneBB, /*IsFinished=*/true); 1984 } 1985 1986 static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF, 1987 const OMPLoopDirective &S, 1988 CodeGenFunction::JumpDest LoopExit) { 1989 CGF.EmitOMPLoopBody(S, LoopExit); 1990 CGF.EmitStopPoint(&S); 1991 } 1992 1993 /// Emit a helper variable and return corresponding lvalue. 1994 static LValue EmitOMPHelperVar(CodeGenFunction &CGF, 1995 const DeclRefExpr *Helper) { 1996 auto VDecl = cast<VarDecl>(Helper->getDecl()); 1997 CGF.EmitVarDecl(*VDecl); 1998 return CGF.EmitLValue(Helper); 1999 } 2000 2001 static void emitCommonSimdLoop(CodeGenFunction &CGF, const OMPLoopDirective &S, 2002 const RegionCodeGenTy &SimdInitGen, 2003 const RegionCodeGenTy &BodyCodeGen) { 2004 auto &&ThenGen = [&S, &SimdInitGen, &BodyCodeGen](CodeGenFunction &CGF, 2005 PrePostActionTy &) { 2006 CGOpenMPRuntime::NontemporalDeclsRAII NontemporalsRegion(CGF.CGM, S); 2007 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); 2008 SimdInitGen(CGF); 2009 2010 BodyCodeGen(CGF); 2011 }; 2012 auto &&ElseGen = [&BodyCodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 2013 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); 2014 CGF.LoopStack.setVectorizeEnable(/*Enable=*/false); 2015 2016 BodyCodeGen(CGF); 2017 }; 2018 const Expr *IfCond = nullptr; 2019 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 2020 if (CGF.getLangOpts().OpenMP >= 50 && 2021 (C->getNameModifier() == OMPD_unknown || 2022 C->getNameModifier() == OMPD_simd)) { 2023 IfCond = C->getCondition(); 2024 break; 2025 } 2026 } 2027 if (IfCond) { 2028 CGF.CGM.getOpenMPRuntime().emitIfClause(CGF, IfCond, ThenGen, ElseGen); 2029 } else { 2030 RegionCodeGenTy ThenRCG(ThenGen); 2031 ThenRCG(CGF); 2032 } 2033 } 2034 2035 static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S, 2036 PrePostActionTy &Action) { 2037 Action.Enter(CGF); 2038 assert(isOpenMPSimdDirective(S.getDirectiveKind()) && 2039 "Expected simd directive"); 2040 OMPLoopScope PreInitScope(CGF, S); 2041 // if (PreCond) { 2042 // for (IV in 0..LastIteration) BODY; 2043 // <Final counter/linear vars updates>; 2044 // } 2045 // 2046 if (isOpenMPDistributeDirective(S.getDirectiveKind()) || 2047 isOpenMPWorksharingDirective(S.getDirectiveKind()) || 2048 isOpenMPTaskLoopDirective(S.getDirectiveKind())) { 2049 (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable())); 2050 (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable())); 2051 } 2052 2053 // Emit: if (PreCond) - begin. 2054 // If the condition constant folds and can be elided, avoid emitting the 2055 // whole loop. 2056 bool CondConstant; 2057 llvm::BasicBlock *ContBlock = nullptr; 2058 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 2059 if (!CondConstant) 2060 return; 2061 } else { 2062 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("simd.if.then"); 2063 ContBlock = CGF.createBasicBlock("simd.if.end"); 2064 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 2065 CGF.getProfileCount(&S)); 2066 CGF.EmitBlock(ThenBlock); 2067 CGF.incrementProfileCounter(&S); 2068 } 2069 2070 // Emit the loop iteration variable. 2071 const Expr *IVExpr = S.getIterationVariable(); 2072 const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 2073 CGF.EmitVarDecl(*IVDecl); 2074 CGF.EmitIgnoredExpr(S.getInit()); 2075 2076 // Emit the iterations count variable. 2077 // If it is not a variable, Sema decided to calculate iterations count on 2078 // each iteration (e.g., it is foldable into a constant). 2079 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 2080 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 2081 // Emit calculation of the iterations count. 2082 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 2083 } 2084 2085 emitAlignedClause(CGF, S); 2086 (void)CGF.EmitOMPLinearClauseInit(S); 2087 { 2088 CodeGenFunction::OMPPrivateScope LoopScope(CGF); 2089 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 2090 CGF.EmitOMPLinearClause(S, LoopScope); 2091 CGF.EmitOMPPrivateClause(S, LoopScope); 2092 CGF.EmitOMPReductionClauseInit(S, LoopScope); 2093 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion( 2094 CGF, S, CGF.EmitLValue(S.getIterationVariable())); 2095 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 2096 (void)LoopScope.Privatize(); 2097 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 2098 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); 2099 2100 emitCommonSimdLoop( 2101 CGF, S, 2102 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2103 CGF.EmitOMPSimdInit(S); 2104 }, 2105 [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { 2106 CGF.EmitOMPInnerLoop( 2107 S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), 2108 [&S](CodeGenFunction &CGF) { 2109 CGF.EmitOMPLoopBody(S, CodeGenFunction::JumpDest()); 2110 CGF.EmitStopPoint(&S); 2111 }, 2112 [](CodeGenFunction &) {}); 2113 }); 2114 CGF.EmitOMPSimdFinal(S, [](CodeGenFunction &) { return nullptr; }); 2115 // Emit final copy of the lastprivate variables at the end of loops. 2116 if (HasLastprivateClause) 2117 CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true); 2118 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd); 2119 emitPostUpdateForReductionClause(CGF, S, 2120 [](CodeGenFunction &) { return nullptr; }); 2121 } 2122 CGF.EmitOMPLinearClauseFinal(S, [](CodeGenFunction &) { return nullptr; }); 2123 // Emit: if (PreCond) - end. 2124 if (ContBlock) { 2125 CGF.EmitBranch(ContBlock); 2126 CGF.EmitBlock(ContBlock, true); 2127 } 2128 } 2129 2130 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { 2131 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2132 emitOMPSimdRegion(CGF, S, Action); 2133 }; 2134 { 2135 auto LPCRegion = 2136 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 2137 OMPLexicalScope Scope(*this, S, OMPD_unknown); 2138 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 2139 } 2140 // Check for outer lastprivate conditional update. 2141 checkForLastprivateConditionalUpdate(*this, S); 2142 } 2143 2144 void CodeGenFunction::EmitOMPOuterLoop( 2145 bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S, 2146 CodeGenFunction::OMPPrivateScope &LoopScope, 2147 const CodeGenFunction::OMPLoopArguments &LoopArgs, 2148 const CodeGenFunction::CodeGenLoopTy &CodeGenLoop, 2149 const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) { 2150 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); 2151 2152 const Expr *IVExpr = S.getIterationVariable(); 2153 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2154 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2155 2156 JumpDest LoopExit = getJumpDestInCurrentScope("omp.dispatch.end"); 2157 2158 // Start the loop with a block that tests the condition. 2159 llvm::BasicBlock *CondBlock = createBasicBlock("omp.dispatch.cond"); 2160 EmitBlock(CondBlock); 2161 const SourceRange R = S.getSourceRange(); 2162 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), 2163 SourceLocToDebugLoc(R.getEnd())); 2164 2165 llvm::Value *BoolCondVal = nullptr; 2166 if (!DynamicOrOrdered) { 2167 // UB = min(UB, GlobalUB) or 2168 // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g. 2169 // 'distribute parallel for') 2170 EmitIgnoredExpr(LoopArgs.EUB); 2171 // IV = LB 2172 EmitIgnoredExpr(LoopArgs.Init); 2173 // IV < UB 2174 BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond); 2175 } else { 2176 BoolCondVal = 2177 RT.emitForNext(*this, S.getBeginLoc(), IVSize, IVSigned, LoopArgs.IL, 2178 LoopArgs.LB, LoopArgs.UB, LoopArgs.ST); 2179 } 2180 2181 // If there are any cleanups between here and the loop-exit scope, 2182 // create a block to stage a loop exit along. 2183 llvm::BasicBlock *ExitBlock = LoopExit.getBlock(); 2184 if (LoopScope.requiresCleanups()) 2185 ExitBlock = createBasicBlock("omp.dispatch.cleanup"); 2186 2187 llvm::BasicBlock *LoopBody = createBasicBlock("omp.dispatch.body"); 2188 Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock); 2189 if (ExitBlock != LoopExit.getBlock()) { 2190 EmitBlock(ExitBlock); 2191 EmitBranchThroughCleanup(LoopExit); 2192 } 2193 EmitBlock(LoopBody); 2194 2195 // Emit "IV = LB" (in case of static schedule, we have already calculated new 2196 // LB for loop condition and emitted it above). 2197 if (DynamicOrOrdered) 2198 EmitIgnoredExpr(LoopArgs.Init); 2199 2200 // Create a block for the increment. 2201 JumpDest Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); 2202 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 2203 2204 emitCommonSimdLoop( 2205 *this, S, 2206 [&S, IsMonotonic](CodeGenFunction &CGF, PrePostActionTy &) { 2207 // Generate !llvm.loop.parallel metadata for loads and stores for loops 2208 // with dynamic/guided scheduling and without ordered clause. 2209 if (!isOpenMPSimdDirective(S.getDirectiveKind())) { 2210 CGF.LoopStack.setParallel(!IsMonotonic); 2211 if (const auto *C = S.getSingleClause<OMPOrderClause>()) 2212 if (C->getKind() == OMPC_ORDER_concurrent) 2213 CGF.LoopStack.setParallel(/*Enable=*/true); 2214 } else { 2215 CGF.EmitOMPSimdInit(S, IsMonotonic); 2216 } 2217 }, 2218 [&S, &LoopArgs, LoopExit, &CodeGenLoop, IVSize, IVSigned, &CodeGenOrdered, 2219 &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { 2220 SourceLocation Loc = S.getBeginLoc(); 2221 // when 'distribute' is not combined with a 'for': 2222 // while (idx <= UB) { BODY; ++idx; } 2223 // when 'distribute' is combined with a 'for' 2224 // (e.g. 'distribute parallel for') 2225 // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; } 2226 CGF.EmitOMPInnerLoop( 2227 S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr, 2228 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { 2229 CodeGenLoop(CGF, S, LoopExit); 2230 }, 2231 [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) { 2232 CodeGenOrdered(CGF, Loc, IVSize, IVSigned); 2233 }); 2234 }); 2235 2236 EmitBlock(Continue.getBlock()); 2237 BreakContinueStack.pop_back(); 2238 if (!DynamicOrOrdered) { 2239 // Emit "LB = LB + Stride", "UB = UB + Stride". 2240 EmitIgnoredExpr(LoopArgs.NextLB); 2241 EmitIgnoredExpr(LoopArgs.NextUB); 2242 } 2243 2244 EmitBranch(CondBlock); 2245 LoopStack.pop(); 2246 // Emit the fall-through block. 2247 EmitBlock(LoopExit.getBlock()); 2248 2249 // Tell the runtime we are done. 2250 auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) { 2251 if (!DynamicOrOrdered) 2252 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(), 2253 S.getDirectiveKind()); 2254 }; 2255 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); 2256 } 2257 2258 void CodeGenFunction::EmitOMPForOuterLoop( 2259 const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic, 2260 const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, 2261 const OMPLoopArguments &LoopArgs, 2262 const CodeGenDispatchBoundsTy &CGDispatchBounds) { 2263 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); 2264 2265 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). 2266 const bool DynamicOrOrdered = 2267 Ordered || RT.isDynamic(ScheduleKind.Schedule); 2268 2269 assert((Ordered || 2270 !RT.isStaticNonchunked(ScheduleKind.Schedule, 2271 LoopArgs.Chunk != nullptr)) && 2272 "static non-chunked schedule does not need outer loop"); 2273 2274 // Emit outer loop. 2275 // 2276 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 2277 // When schedule(dynamic,chunk_size) is specified, the iterations are 2278 // distributed to threads in the team in chunks as the threads request them. 2279 // Each thread executes a chunk of iterations, then requests another chunk, 2280 // until no chunks remain to be distributed. Each chunk contains chunk_size 2281 // iterations, except for the last chunk to be distributed, which may have 2282 // fewer iterations. When no chunk_size is specified, it defaults to 1. 2283 // 2284 // When schedule(guided,chunk_size) is specified, the iterations are assigned 2285 // to threads in the team in chunks as the executing threads request them. 2286 // Each thread executes a chunk of iterations, then requests another chunk, 2287 // until no chunks remain to be assigned. For a chunk_size of 1, the size of 2288 // each chunk is proportional to the number of unassigned iterations divided 2289 // by the number of threads in the team, decreasing to 1. For a chunk_size 2290 // with value k (greater than 1), the size of each chunk is determined in the 2291 // same way, with the restriction that the chunks do not contain fewer than k 2292 // iterations (except for the last chunk to be assigned, which may have fewer 2293 // than k iterations). 2294 // 2295 // When schedule(auto) is specified, the decision regarding scheduling is 2296 // delegated to the compiler and/or runtime system. The programmer gives the 2297 // implementation the freedom to choose any possible mapping of iterations to 2298 // threads in the team. 2299 // 2300 // When schedule(runtime) is specified, the decision regarding scheduling is 2301 // deferred until run time, and the schedule and chunk size are taken from the 2302 // run-sched-var ICV. If the ICV is set to auto, the schedule is 2303 // implementation defined 2304 // 2305 // while(__kmpc_dispatch_next(&LB, &UB)) { 2306 // idx = LB; 2307 // while (idx <= UB) { BODY; ++idx; 2308 // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only. 2309 // } // inner loop 2310 // } 2311 // 2312 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 2313 // When schedule(static, chunk_size) is specified, iterations are divided into 2314 // chunks of size chunk_size, and the chunks are assigned to the threads in 2315 // the team in a round-robin fashion in the order of the thread number. 2316 // 2317 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { 2318 // while (idx <= UB) { BODY; ++idx; } // inner loop 2319 // LB = LB + ST; 2320 // UB = UB + ST; 2321 // } 2322 // 2323 2324 const Expr *IVExpr = S.getIterationVariable(); 2325 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2326 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2327 2328 if (DynamicOrOrdered) { 2329 const std::pair<llvm::Value *, llvm::Value *> DispatchBounds = 2330 CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB); 2331 llvm::Value *LBVal = DispatchBounds.first; 2332 llvm::Value *UBVal = DispatchBounds.second; 2333 CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal, 2334 LoopArgs.Chunk}; 2335 RT.emitForDispatchInit(*this, S.getBeginLoc(), ScheduleKind, IVSize, 2336 IVSigned, Ordered, DipatchRTInputValues); 2337 } else { 2338 CGOpenMPRuntime::StaticRTInput StaticInit( 2339 IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB, 2340 LoopArgs.ST, LoopArgs.Chunk); 2341 RT.emitForStaticInit(*this, S.getBeginLoc(), S.getDirectiveKind(), 2342 ScheduleKind, StaticInit); 2343 } 2344 2345 auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc, 2346 const unsigned IVSize, 2347 const bool IVSigned) { 2348 if (Ordered) { 2349 CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize, 2350 IVSigned); 2351 } 2352 }; 2353 2354 OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST, 2355 LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB); 2356 OuterLoopArgs.IncExpr = S.getInc(); 2357 OuterLoopArgs.Init = S.getInit(); 2358 OuterLoopArgs.Cond = S.getCond(); 2359 OuterLoopArgs.NextLB = S.getNextLowerBound(); 2360 OuterLoopArgs.NextUB = S.getNextUpperBound(); 2361 EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs, 2362 emitOMPLoopBodyWithStopPoint, CodeGenOrdered); 2363 } 2364 2365 static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc, 2366 const unsigned IVSize, const bool IVSigned) {} 2367 2368 void CodeGenFunction::EmitOMPDistributeOuterLoop( 2369 OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S, 2370 OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs, 2371 const CodeGenLoopTy &CodeGenLoopContent) { 2372 2373 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); 2374 2375 // Emit outer loop. 2376 // Same behavior as a OMPForOuterLoop, except that schedule cannot be 2377 // dynamic 2378 // 2379 2380 const Expr *IVExpr = S.getIterationVariable(); 2381 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2382 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2383 2384 CGOpenMPRuntime::StaticRTInput StaticInit( 2385 IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB, 2386 LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk); 2387 RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, StaticInit); 2388 2389 // for combined 'distribute' and 'for' the increment expression of distribute 2390 // is stored in DistInc. For 'distribute' alone, it is in Inc. 2391 Expr *IncExpr; 2392 if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())) 2393 IncExpr = S.getDistInc(); 2394 else 2395 IncExpr = S.getInc(); 2396 2397 // this routine is shared by 'omp distribute parallel for' and 2398 // 'omp distribute': select the right EUB expression depending on the 2399 // directive 2400 OMPLoopArguments OuterLoopArgs; 2401 OuterLoopArgs.LB = LoopArgs.LB; 2402 OuterLoopArgs.UB = LoopArgs.UB; 2403 OuterLoopArgs.ST = LoopArgs.ST; 2404 OuterLoopArgs.IL = LoopArgs.IL; 2405 OuterLoopArgs.Chunk = LoopArgs.Chunk; 2406 OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2407 ? S.getCombinedEnsureUpperBound() 2408 : S.getEnsureUpperBound(); 2409 OuterLoopArgs.IncExpr = IncExpr; 2410 OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2411 ? S.getCombinedInit() 2412 : S.getInit(); 2413 OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2414 ? S.getCombinedCond() 2415 : S.getCond(); 2416 OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2417 ? S.getCombinedNextLowerBound() 2418 : S.getNextLowerBound(); 2419 OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2420 ? S.getCombinedNextUpperBound() 2421 : S.getNextUpperBound(); 2422 2423 EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S, 2424 LoopScope, OuterLoopArgs, CodeGenLoopContent, 2425 emitEmptyOrdered); 2426 } 2427 2428 static std::pair<LValue, LValue> 2429 emitDistributeParallelForInnerBounds(CodeGenFunction &CGF, 2430 const OMPExecutableDirective &S) { 2431 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 2432 LValue LB = 2433 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); 2434 LValue UB = 2435 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); 2436 2437 // When composing 'distribute' with 'for' (e.g. as in 'distribute 2438 // parallel for') we need to use the 'distribute' 2439 // chunk lower and upper bounds rather than the whole loop iteration 2440 // space. These are parameters to the outlined function for 'parallel' 2441 // and we copy the bounds of the previous schedule into the 2442 // the current ones. 2443 LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable()); 2444 LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable()); 2445 llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar( 2446 PrevLB, LS.getPrevLowerBoundVariable()->getExprLoc()); 2447 PrevLBVal = CGF.EmitScalarConversion( 2448 PrevLBVal, LS.getPrevLowerBoundVariable()->getType(), 2449 LS.getIterationVariable()->getType(), 2450 LS.getPrevLowerBoundVariable()->getExprLoc()); 2451 llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar( 2452 PrevUB, LS.getPrevUpperBoundVariable()->getExprLoc()); 2453 PrevUBVal = CGF.EmitScalarConversion( 2454 PrevUBVal, LS.getPrevUpperBoundVariable()->getType(), 2455 LS.getIterationVariable()->getType(), 2456 LS.getPrevUpperBoundVariable()->getExprLoc()); 2457 2458 CGF.EmitStoreOfScalar(PrevLBVal, LB); 2459 CGF.EmitStoreOfScalar(PrevUBVal, UB); 2460 2461 return {LB, UB}; 2462 } 2463 2464 /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then 2465 /// we need to use the LB and UB expressions generated by the worksharing 2466 /// code generation support, whereas in non combined situations we would 2467 /// just emit 0 and the LastIteration expression 2468 /// This function is necessary due to the difference of the LB and UB 2469 /// types for the RT emission routines for 'for_static_init' and 2470 /// 'for_dispatch_init' 2471 static std::pair<llvm::Value *, llvm::Value *> 2472 emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF, 2473 const OMPExecutableDirective &S, 2474 Address LB, Address UB) { 2475 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 2476 const Expr *IVExpr = LS.getIterationVariable(); 2477 // when implementing a dynamic schedule for a 'for' combined with a 2478 // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop 2479 // is not normalized as each team only executes its own assigned 2480 // distribute chunk 2481 QualType IteratorTy = IVExpr->getType(); 2482 llvm::Value *LBVal = 2483 CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, S.getBeginLoc()); 2484 llvm::Value *UBVal = 2485 CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, S.getBeginLoc()); 2486 return {LBVal, UBVal}; 2487 } 2488 2489 static void emitDistributeParallelForDistributeInnerBoundParams( 2490 CodeGenFunction &CGF, const OMPExecutableDirective &S, 2491 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) { 2492 const auto &Dir = cast<OMPLoopDirective>(S); 2493 LValue LB = 2494 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable())); 2495 llvm::Value *LBCast = 2496 CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(LB.getAddress(CGF)), 2497 CGF.SizeTy, /*isSigned=*/false); 2498 CapturedVars.push_back(LBCast); 2499 LValue UB = 2500 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable())); 2501 2502 llvm::Value *UBCast = 2503 CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(UB.getAddress(CGF)), 2504 CGF.SizeTy, /*isSigned=*/false); 2505 CapturedVars.push_back(UBCast); 2506 } 2507 2508 static void 2509 emitInnerParallelForWhenCombined(CodeGenFunction &CGF, 2510 const OMPLoopDirective &S, 2511 CodeGenFunction::JumpDest LoopExit) { 2512 auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF, 2513 PrePostActionTy &Action) { 2514 Action.Enter(CGF); 2515 bool HasCancel = false; 2516 if (!isOpenMPSimdDirective(S.getDirectiveKind())) { 2517 if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(&S)) 2518 HasCancel = D->hasCancel(); 2519 else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(&S)) 2520 HasCancel = D->hasCancel(); 2521 else if (const auto *D = 2522 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&S)) 2523 HasCancel = D->hasCancel(); 2524 } 2525 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(), 2526 HasCancel); 2527 CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(), 2528 emitDistributeParallelForInnerBounds, 2529 emitDistributeParallelForDispatchBounds); 2530 }; 2531 2532 emitCommonOMPParallelDirective( 2533 CGF, S, 2534 isOpenMPSimdDirective(S.getDirectiveKind()) ? OMPD_for_simd : OMPD_for, 2535 CGInlinedWorksharingLoop, 2536 emitDistributeParallelForDistributeInnerBoundParams); 2537 } 2538 2539 void CodeGenFunction::EmitOMPDistributeParallelForDirective( 2540 const OMPDistributeParallelForDirective &S) { 2541 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2542 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 2543 S.getDistInc()); 2544 }; 2545 OMPLexicalScope Scope(*this, S, OMPD_parallel); 2546 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); 2547 } 2548 2549 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective( 2550 const OMPDistributeParallelForSimdDirective &S) { 2551 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2552 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 2553 S.getDistInc()); 2554 }; 2555 OMPLexicalScope Scope(*this, S, OMPD_parallel); 2556 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); 2557 } 2558 2559 void CodeGenFunction::EmitOMPDistributeSimdDirective( 2560 const OMPDistributeSimdDirective &S) { 2561 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2562 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 2563 }; 2564 OMPLexicalScope Scope(*this, S, OMPD_unknown); 2565 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 2566 } 2567 2568 void CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 2569 CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) { 2570 // Emit SPMD target parallel for region as a standalone region. 2571 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2572 emitOMPSimdRegion(CGF, S, Action); 2573 }; 2574 llvm::Function *Fn; 2575 llvm::Constant *Addr; 2576 // Emit target region as a standalone region. 2577 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 2578 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 2579 assert(Fn && Addr && "Target device function emission failed."); 2580 } 2581 2582 void CodeGenFunction::EmitOMPTargetSimdDirective( 2583 const OMPTargetSimdDirective &S) { 2584 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2585 emitOMPSimdRegion(CGF, S, Action); 2586 }; 2587 emitCommonOMPTargetDirective(*this, S, CodeGen); 2588 } 2589 2590 namespace { 2591 struct ScheduleKindModifiersTy { 2592 OpenMPScheduleClauseKind Kind; 2593 OpenMPScheduleClauseModifier M1; 2594 OpenMPScheduleClauseModifier M2; 2595 ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind, 2596 OpenMPScheduleClauseModifier M1, 2597 OpenMPScheduleClauseModifier M2) 2598 : Kind(Kind), M1(M1), M2(M2) {} 2599 }; 2600 } // namespace 2601 2602 bool CodeGenFunction::EmitOMPWorksharingLoop( 2603 const OMPLoopDirective &S, Expr *EUB, 2604 const CodeGenLoopBoundsTy &CodeGenLoopBounds, 2605 const CodeGenDispatchBoundsTy &CGDispatchBounds) { 2606 // Emit the loop iteration variable. 2607 const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 2608 const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl()); 2609 EmitVarDecl(*IVDecl); 2610 2611 // Emit the iterations count variable. 2612 // If it is not a variable, Sema decided to calculate iterations count on each 2613 // iteration (e.g., it is foldable into a constant). 2614 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 2615 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 2616 // Emit calculation of the iterations count. 2617 EmitIgnoredExpr(S.getCalcLastIteration()); 2618 } 2619 2620 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); 2621 2622 bool HasLastprivateClause; 2623 // Check pre-condition. 2624 { 2625 OMPLoopScope PreInitScope(*this, S); 2626 // Skip the entire loop if we don't meet the precondition. 2627 // If the condition constant folds and can be elided, avoid emitting the 2628 // whole loop. 2629 bool CondConstant; 2630 llvm::BasicBlock *ContBlock = nullptr; 2631 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 2632 if (!CondConstant) 2633 return false; 2634 } else { 2635 llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then"); 2636 ContBlock = createBasicBlock("omp.precond.end"); 2637 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 2638 getProfileCount(&S)); 2639 EmitBlock(ThenBlock); 2640 incrementProfileCounter(&S); 2641 } 2642 2643 RunCleanupsScope DoacrossCleanupScope(*this); 2644 bool Ordered = false; 2645 if (const auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) { 2646 if (OrderedClause->getNumForLoops()) 2647 RT.emitDoacrossInit(*this, S, OrderedClause->getLoopNumIterations()); 2648 else 2649 Ordered = true; 2650 } 2651 2652 llvm::DenseSet<const Expr *> EmittedFinals; 2653 emitAlignedClause(*this, S); 2654 bool HasLinears = EmitOMPLinearClauseInit(S); 2655 // Emit helper vars inits. 2656 2657 std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S); 2658 LValue LB = Bounds.first; 2659 LValue UB = Bounds.second; 2660 LValue ST = 2661 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 2662 LValue IL = 2663 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 2664 2665 // Emit 'then' code. 2666 { 2667 OMPPrivateScope LoopScope(*this); 2668 if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) { 2669 // Emit implicit barrier to synchronize threads and avoid data races on 2670 // initialization of firstprivate variables and post-update of 2671 // lastprivate variables. 2672 CGM.getOpenMPRuntime().emitBarrierCall( 2673 *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, 2674 /*ForceSimpleCall=*/true); 2675 } 2676 EmitOMPPrivateClause(S, LoopScope); 2677 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion( 2678 *this, S, EmitLValue(S.getIterationVariable())); 2679 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 2680 EmitOMPReductionClauseInit(S, LoopScope); 2681 EmitOMPPrivateLoopCounters(S, LoopScope); 2682 EmitOMPLinearClause(S, LoopScope); 2683 (void)LoopScope.Privatize(); 2684 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 2685 CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S); 2686 2687 // Detect the loop schedule kind and chunk. 2688 const Expr *ChunkExpr = nullptr; 2689 OpenMPScheduleTy ScheduleKind; 2690 if (const auto *C = S.getSingleClause<OMPScheduleClause>()) { 2691 ScheduleKind.Schedule = C->getScheduleKind(); 2692 ScheduleKind.M1 = C->getFirstScheduleModifier(); 2693 ScheduleKind.M2 = C->getSecondScheduleModifier(); 2694 ChunkExpr = C->getChunkSize(); 2695 } else { 2696 // Default behaviour for schedule clause. 2697 CGM.getOpenMPRuntime().getDefaultScheduleAndChunk( 2698 *this, S, ScheduleKind.Schedule, ChunkExpr); 2699 } 2700 bool HasChunkSizeOne = false; 2701 llvm::Value *Chunk = nullptr; 2702 if (ChunkExpr) { 2703 Chunk = EmitScalarExpr(ChunkExpr); 2704 Chunk = EmitScalarConversion(Chunk, ChunkExpr->getType(), 2705 S.getIterationVariable()->getType(), 2706 S.getBeginLoc()); 2707 Expr::EvalResult Result; 2708 if (ChunkExpr->EvaluateAsInt(Result, getContext())) { 2709 llvm::APSInt EvaluatedChunk = Result.Val.getInt(); 2710 HasChunkSizeOne = (EvaluatedChunk.getLimitedValue() == 1); 2711 } 2712 } 2713 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2714 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2715 // OpenMP 4.5, 2.7.1 Loop Construct, Description. 2716 // If the static schedule kind is specified or if the ordered clause is 2717 // specified, and if no monotonic modifier is specified, the effect will 2718 // be as if the monotonic modifier was specified. 2719 bool StaticChunkedOne = RT.isStaticChunked(ScheduleKind.Schedule, 2720 /* Chunked */ Chunk != nullptr) && HasChunkSizeOne && 2721 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()); 2722 if ((RT.isStaticNonchunked(ScheduleKind.Schedule, 2723 /* Chunked */ Chunk != nullptr) || 2724 StaticChunkedOne) && 2725 !Ordered) { 2726 JumpDest LoopExit = 2727 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 2728 emitCommonSimdLoop( 2729 *this, S, 2730 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2731 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 2732 CGF.EmitOMPSimdInit(S, /*IsMonotonic=*/true); 2733 } else if (const auto *C = S.getSingleClause<OMPOrderClause>()) { 2734 if (C->getKind() == OMPC_ORDER_concurrent) 2735 CGF.LoopStack.setParallel(/*Enable=*/true); 2736 } 2737 }, 2738 [IVSize, IVSigned, Ordered, IL, LB, UB, ST, StaticChunkedOne, Chunk, 2739 &S, ScheduleKind, LoopExit, 2740 &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { 2741 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 2742 // When no chunk_size is specified, the iteration space is divided 2743 // into chunks that are approximately equal in size, and at most 2744 // one chunk is distributed to each thread. Note that the size of 2745 // the chunks is unspecified in this case. 2746 CGOpenMPRuntime::StaticRTInput StaticInit( 2747 IVSize, IVSigned, Ordered, IL.getAddress(CGF), 2748 LB.getAddress(CGF), UB.getAddress(CGF), ST.getAddress(CGF), 2749 StaticChunkedOne ? Chunk : nullptr); 2750 CGF.CGM.getOpenMPRuntime().emitForStaticInit( 2751 CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, 2752 StaticInit); 2753 // UB = min(UB, GlobalUB); 2754 if (!StaticChunkedOne) 2755 CGF.EmitIgnoredExpr(S.getEnsureUpperBound()); 2756 // IV = LB; 2757 CGF.EmitIgnoredExpr(S.getInit()); 2758 // For unchunked static schedule generate: 2759 // 2760 // while (idx <= UB) { 2761 // BODY; 2762 // ++idx; 2763 // } 2764 // 2765 // For static schedule with chunk one: 2766 // 2767 // while (IV <= PrevUB) { 2768 // BODY; 2769 // IV += ST; 2770 // } 2771 CGF.EmitOMPInnerLoop( 2772 S, LoopScope.requiresCleanups(), 2773 StaticChunkedOne ? S.getCombinedParForInDistCond() 2774 : S.getCond(), 2775 StaticChunkedOne ? S.getDistInc() : S.getInc(), 2776 [&S, LoopExit](CodeGenFunction &CGF) { 2777 CGF.EmitOMPLoopBody(S, LoopExit); 2778 CGF.EmitStopPoint(&S); 2779 }, 2780 [](CodeGenFunction &) {}); 2781 }); 2782 EmitBlock(LoopExit.getBlock()); 2783 // Tell the runtime we are done. 2784 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 2785 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(), 2786 S.getDirectiveKind()); 2787 }; 2788 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); 2789 } else { 2790 const bool IsMonotonic = 2791 Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static || 2792 ScheduleKind.Schedule == OMPC_SCHEDULE_unknown || 2793 ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic || 2794 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic; 2795 // Emit the outer loop, which requests its work chunk [LB..UB] from 2796 // runtime and runs the inner loop to process it. 2797 const OMPLoopArguments LoopArguments( 2798 LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this), 2799 IL.getAddress(*this), Chunk, EUB); 2800 EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered, 2801 LoopArguments, CGDispatchBounds); 2802 } 2803 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 2804 EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) { 2805 return CGF.Builder.CreateIsNotNull( 2806 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 2807 }); 2808 } 2809 EmitOMPReductionClauseFinal( 2810 S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind()) 2811 ? /*Parallel and Simd*/ OMPD_parallel_for_simd 2812 : /*Parallel only*/ OMPD_parallel); 2813 // Emit post-update of the reduction variables if IsLastIter != 0. 2814 emitPostUpdateForReductionClause( 2815 *this, S, [IL, &S](CodeGenFunction &CGF) { 2816 return CGF.Builder.CreateIsNotNull( 2817 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 2818 }); 2819 // Emit final copy of the lastprivate variables if IsLastIter != 0. 2820 if (HasLastprivateClause) 2821 EmitOMPLastprivateClauseFinal( 2822 S, isOpenMPSimdDirective(S.getDirectiveKind()), 2823 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc()))); 2824 } 2825 EmitOMPLinearClauseFinal(S, [IL, &S](CodeGenFunction &CGF) { 2826 return CGF.Builder.CreateIsNotNull( 2827 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 2828 }); 2829 DoacrossCleanupScope.ForceCleanup(); 2830 // We're now done with the loop, so jump to the continuation block. 2831 if (ContBlock) { 2832 EmitBranch(ContBlock); 2833 EmitBlock(ContBlock, /*IsFinished=*/true); 2834 } 2835 } 2836 return HasLastprivateClause; 2837 } 2838 2839 /// The following two functions generate expressions for the loop lower 2840 /// and upper bounds in case of static and dynamic (dispatch) schedule 2841 /// of the associated 'for' or 'distribute' loop. 2842 static std::pair<LValue, LValue> 2843 emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 2844 const auto &LS = cast<OMPLoopDirective>(S); 2845 LValue LB = 2846 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); 2847 LValue UB = 2848 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); 2849 return {LB, UB}; 2850 } 2851 2852 /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not 2853 /// consider the lower and upper bound expressions generated by the 2854 /// worksharing loop support, but we use 0 and the iteration space size as 2855 /// constants 2856 static std::pair<llvm::Value *, llvm::Value *> 2857 emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S, 2858 Address LB, Address UB) { 2859 const auto &LS = cast<OMPLoopDirective>(S); 2860 const Expr *IVExpr = LS.getIterationVariable(); 2861 const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType()); 2862 llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0); 2863 llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration()); 2864 return {LBVal, UBVal}; 2865 } 2866 2867 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { 2868 bool HasLastprivates = false; 2869 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, 2870 PrePostActionTy &) { 2871 OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel()); 2872 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 2873 emitForLoopBounds, 2874 emitDispatchForLoopBounds); 2875 }; 2876 { 2877 auto LPCRegion = 2878 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 2879 OMPLexicalScope Scope(*this, S, OMPD_unknown); 2880 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen, 2881 S.hasCancel()); 2882 } 2883 2884 // Emit an implicit barrier at the end. 2885 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) 2886 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for); 2887 // Check for outer lastprivate conditional update. 2888 checkForLastprivateConditionalUpdate(*this, S); 2889 } 2890 2891 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { 2892 bool HasLastprivates = false; 2893 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, 2894 PrePostActionTy &) { 2895 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 2896 emitForLoopBounds, 2897 emitDispatchForLoopBounds); 2898 }; 2899 { 2900 auto LPCRegion = 2901 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 2902 OMPLexicalScope Scope(*this, S, OMPD_unknown); 2903 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 2904 } 2905 2906 // Emit an implicit barrier at the end. 2907 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) 2908 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for); 2909 // Check for outer lastprivate conditional update. 2910 checkForLastprivateConditionalUpdate(*this, S); 2911 } 2912 2913 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, 2914 const Twine &Name, 2915 llvm::Value *Init = nullptr) { 2916 LValue LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty); 2917 if (Init) 2918 CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true); 2919 return LVal; 2920 } 2921 2922 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { 2923 const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt(); 2924 const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt); 2925 bool HasLastprivates = false; 2926 auto &&CodeGen = [&S, CapturedStmt, CS, 2927 &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) { 2928 ASTContext &C = CGF.getContext(); 2929 QualType KmpInt32Ty = 2930 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2931 // Emit helper vars inits. 2932 LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.", 2933 CGF.Builder.getInt32(0)); 2934 llvm::ConstantInt *GlobalUBVal = CS != nullptr 2935 ? CGF.Builder.getInt32(CS->size() - 1) 2936 : CGF.Builder.getInt32(0); 2937 LValue UB = 2938 createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal); 2939 LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.", 2940 CGF.Builder.getInt32(1)); 2941 LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.", 2942 CGF.Builder.getInt32(0)); 2943 // Loop counter. 2944 LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv."); 2945 OpaqueValueExpr IVRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue); 2946 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV); 2947 OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue); 2948 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); 2949 // Generate condition for loop. 2950 BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, 2951 OK_Ordinary, S.getBeginLoc(), FPOptions()); 2952 // Increment for loop counter. 2953 UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary, 2954 S.getBeginLoc(), true); 2955 auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) { 2956 // Iterate through all sections and emit a switch construct: 2957 // switch (IV) { 2958 // case 0: 2959 // <SectionStmt[0]>; 2960 // break; 2961 // ... 2962 // case <NumSection> - 1: 2963 // <SectionStmt[<NumSection> - 1]>; 2964 // break; 2965 // } 2966 // .omp.sections.exit: 2967 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".omp.sections.exit"); 2968 llvm::SwitchInst *SwitchStmt = 2969 CGF.Builder.CreateSwitch(CGF.EmitLoadOfScalar(IV, S.getBeginLoc()), 2970 ExitBB, CS == nullptr ? 1 : CS->size()); 2971 if (CS) { 2972 unsigned CaseNumber = 0; 2973 for (const Stmt *SubStmt : CS->children()) { 2974 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 2975 CGF.EmitBlock(CaseBB); 2976 SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB); 2977 CGF.EmitStmt(SubStmt); 2978 CGF.EmitBranch(ExitBB); 2979 ++CaseNumber; 2980 } 2981 } else { 2982 llvm::BasicBlock *CaseBB = CGF.createBasicBlock(".omp.sections.case"); 2983 CGF.EmitBlock(CaseBB); 2984 SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB); 2985 CGF.EmitStmt(CapturedStmt); 2986 CGF.EmitBranch(ExitBB); 2987 } 2988 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 2989 }; 2990 2991 CodeGenFunction::OMPPrivateScope LoopScope(CGF); 2992 if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) { 2993 // Emit implicit barrier to synchronize threads and avoid data races on 2994 // initialization of firstprivate variables and post-update of lastprivate 2995 // variables. 2996 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 2997 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, 2998 /*ForceSimpleCall=*/true); 2999 } 3000 CGF.EmitOMPPrivateClause(S, LoopScope); 3001 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(CGF, S, IV); 3002 HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 3003 CGF.EmitOMPReductionClauseInit(S, LoopScope); 3004 (void)LoopScope.Privatize(); 3005 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 3006 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); 3007 3008 // Emit static non-chunked loop. 3009 OpenMPScheduleTy ScheduleKind; 3010 ScheduleKind.Schedule = OMPC_SCHEDULE_static; 3011 CGOpenMPRuntime::StaticRTInput StaticInit( 3012 /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(CGF), 3013 LB.getAddress(CGF), UB.getAddress(CGF), ST.getAddress(CGF)); 3014 CGF.CGM.getOpenMPRuntime().emitForStaticInit( 3015 CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, StaticInit); 3016 // UB = min(UB, GlobalUB); 3017 llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, S.getBeginLoc()); 3018 llvm::Value *MinUBGlobalUB = CGF.Builder.CreateSelect( 3019 CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal); 3020 CGF.EmitStoreOfScalar(MinUBGlobalUB, UB); 3021 // IV = LB; 3022 CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getBeginLoc()), IV); 3023 // while (idx <= UB) { BODY; ++idx; } 3024 CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen, 3025 [](CodeGenFunction &) {}); 3026 // Tell the runtime we are done. 3027 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 3028 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(), 3029 S.getDirectiveKind()); 3030 }; 3031 CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen); 3032 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 3033 // Emit post-update of the reduction variables if IsLastIter != 0. 3034 emitPostUpdateForReductionClause(CGF, S, [IL, &S](CodeGenFunction &CGF) { 3035 return CGF.Builder.CreateIsNotNull( 3036 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 3037 }); 3038 3039 // Emit final copy of the lastprivate variables if IsLastIter != 0. 3040 if (HasLastprivates) 3041 CGF.EmitOMPLastprivateClauseFinal( 3042 S, /*NoFinals=*/false, 3043 CGF.Builder.CreateIsNotNull( 3044 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()))); 3045 }; 3046 3047 bool HasCancel = false; 3048 if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S)) 3049 HasCancel = OSD->hasCancel(); 3050 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S)) 3051 HasCancel = OPSD->hasCancel(); 3052 OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel); 3053 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen, 3054 HasCancel); 3055 // Emit barrier for lastprivates only if 'sections' directive has 'nowait' 3056 // clause. Otherwise the barrier will be generated by the codegen for the 3057 // directive. 3058 if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) { 3059 // Emit implicit barrier to synchronize threads and avoid data races on 3060 // initialization of firstprivate variables. 3061 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), 3062 OMPD_unknown); 3063 } 3064 } 3065 3066 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { 3067 { 3068 auto LPCRegion = 3069 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 3070 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3071 EmitSections(S); 3072 } 3073 // Emit an implicit barrier at the end. 3074 if (!S.getSingleClause<OMPNowaitClause>()) { 3075 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), 3076 OMPD_sections); 3077 } 3078 // Check for outer lastprivate conditional update. 3079 checkForLastprivateConditionalUpdate(*this, S); 3080 } 3081 3082 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { 3083 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3084 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 3085 }; 3086 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3087 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen, 3088 S.hasCancel()); 3089 } 3090 3091 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { 3092 llvm::SmallVector<const Expr *, 8> CopyprivateVars; 3093 llvm::SmallVector<const Expr *, 8> DestExprs; 3094 llvm::SmallVector<const Expr *, 8> SrcExprs; 3095 llvm::SmallVector<const Expr *, 8> AssignmentOps; 3096 // Check if there are any 'copyprivate' clauses associated with this 3097 // 'single' construct. 3098 // Build a list of copyprivate variables along with helper expressions 3099 // (<source>, <destination>, <destination>=<source> expressions) 3100 for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) { 3101 CopyprivateVars.append(C->varlists().begin(), C->varlists().end()); 3102 DestExprs.append(C->destination_exprs().begin(), 3103 C->destination_exprs().end()); 3104 SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end()); 3105 AssignmentOps.append(C->assignment_ops().begin(), 3106 C->assignment_ops().end()); 3107 } 3108 // Emit code for 'single' region along with 'copyprivate' clauses 3109 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3110 Action.Enter(CGF); 3111 OMPPrivateScope SingleScope(CGF); 3112 (void)CGF.EmitOMPFirstprivateClause(S, SingleScope); 3113 CGF.EmitOMPPrivateClause(S, SingleScope); 3114 (void)SingleScope.Privatize(); 3115 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 3116 }; 3117 { 3118 auto LPCRegion = 3119 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 3120 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3121 CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getBeginLoc(), 3122 CopyprivateVars, DestExprs, 3123 SrcExprs, AssignmentOps); 3124 } 3125 // Emit an implicit barrier at the end (to avoid data race on firstprivate 3126 // init or if no 'nowait' clause was specified and no 'copyprivate' clause). 3127 if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) { 3128 CGM.getOpenMPRuntime().emitBarrierCall( 3129 *this, S.getBeginLoc(), 3130 S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single); 3131 } 3132 // Check for outer lastprivate conditional update. 3133 checkForLastprivateConditionalUpdate(*this, S); 3134 } 3135 3136 static void emitMaster(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 3137 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3138 Action.Enter(CGF); 3139 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 3140 }; 3141 CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc()); 3142 } 3143 3144 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { 3145 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3146 emitMaster(*this, S); 3147 } 3148 3149 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { 3150 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3151 Action.Enter(CGF); 3152 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 3153 }; 3154 const Expr *Hint = nullptr; 3155 if (const auto *HintClause = S.getSingleClause<OMPHintClause>()) 3156 Hint = HintClause->getHint(); 3157 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3158 CGM.getOpenMPRuntime().emitCriticalRegion(*this, 3159 S.getDirectiveName().getAsString(), 3160 CodeGen, S.getBeginLoc(), Hint); 3161 } 3162 3163 void CodeGenFunction::EmitOMPParallelForDirective( 3164 const OMPParallelForDirective &S) { 3165 // Emit directive as a combined directive that consists of two implicit 3166 // directives: 'parallel' with 'for' directive. 3167 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3168 Action.Enter(CGF); 3169 OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel()); 3170 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 3171 emitDispatchForLoopBounds); 3172 }; 3173 { 3174 auto LPCRegion = 3175 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 3176 emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen, 3177 emitEmptyBoundParameters); 3178 } 3179 // Check for outer lastprivate conditional update. 3180 checkForLastprivateConditionalUpdate(*this, S); 3181 } 3182 3183 void CodeGenFunction::EmitOMPParallelForSimdDirective( 3184 const OMPParallelForSimdDirective &S) { 3185 // Emit directive as a combined directive that consists of two implicit 3186 // directives: 'parallel' with 'for' directive. 3187 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3188 Action.Enter(CGF); 3189 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 3190 emitDispatchForLoopBounds); 3191 }; 3192 { 3193 auto LPCRegion = 3194 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 3195 emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen, 3196 emitEmptyBoundParameters); 3197 } 3198 // Check for outer lastprivate conditional update. 3199 checkForLastprivateConditionalUpdate(*this, S); 3200 } 3201 3202 void CodeGenFunction::EmitOMPParallelMasterDirective( 3203 const OMPParallelMasterDirective &S) { 3204 // Emit directive as a combined directive that consists of two implicit 3205 // directives: 'parallel' with 'master' directive. 3206 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3207 Action.Enter(CGF); 3208 OMPPrivateScope PrivateScope(CGF); 3209 bool Copyins = CGF.EmitOMPCopyinClause(S); 3210 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 3211 if (Copyins) { 3212 // Emit implicit barrier to synchronize threads and avoid data races on 3213 // propagation master's thread values of threadprivate variables to local 3214 // instances of that variables of all other implicit threads. 3215 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 3216 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, 3217 /*ForceSimpleCall=*/true); 3218 } 3219 CGF.EmitOMPPrivateClause(S, PrivateScope); 3220 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 3221 (void)PrivateScope.Privatize(); 3222 emitMaster(CGF, S); 3223 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 3224 }; 3225 { 3226 auto LPCRegion = 3227 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 3228 emitCommonOMPParallelDirective(*this, S, OMPD_master, CodeGen, 3229 emitEmptyBoundParameters); 3230 emitPostUpdateForReductionClause(*this, S, 3231 [](CodeGenFunction &) { return nullptr; }); 3232 } 3233 // Check for outer lastprivate conditional update. 3234 checkForLastprivateConditionalUpdate(*this, S); 3235 } 3236 3237 void CodeGenFunction::EmitOMPParallelSectionsDirective( 3238 const OMPParallelSectionsDirective &S) { 3239 // Emit directive as a combined directive that consists of two implicit 3240 // directives: 'parallel' with 'sections' directive. 3241 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3242 Action.Enter(CGF); 3243 CGF.EmitSections(S); 3244 }; 3245 { 3246 auto LPCRegion = 3247 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 3248 emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen, 3249 emitEmptyBoundParameters); 3250 } 3251 // Check for outer lastprivate conditional update. 3252 checkForLastprivateConditionalUpdate(*this, S); 3253 } 3254 3255 void CodeGenFunction::EmitOMPTaskBasedDirective( 3256 const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion, 3257 const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen, 3258 OMPTaskDataTy &Data) { 3259 // Emit outlined function for task construct. 3260 const CapturedStmt *CS = S.getCapturedStmt(CapturedRegion); 3261 auto I = CS->getCapturedDecl()->param_begin(); 3262 auto PartId = std::next(I); 3263 auto TaskT = std::next(I, 4); 3264 // Check if the task is final 3265 if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) { 3266 // If the condition constant folds and can be elided, try to avoid emitting 3267 // the condition and the dead arm of the if/else. 3268 const Expr *Cond = Clause->getCondition(); 3269 bool CondConstant; 3270 if (ConstantFoldsToSimpleInteger(Cond, CondConstant)) 3271 Data.Final.setInt(CondConstant); 3272 else 3273 Data.Final.setPointer(EvaluateExprAsBool(Cond)); 3274 } else { 3275 // By default the task is not final. 3276 Data.Final.setInt(/*IntVal=*/false); 3277 } 3278 // Check if the task has 'priority' clause. 3279 if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) { 3280 const Expr *Prio = Clause->getPriority(); 3281 Data.Priority.setInt(/*IntVal=*/true); 3282 Data.Priority.setPointer(EmitScalarConversion( 3283 EmitScalarExpr(Prio), Prio->getType(), 3284 getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1), 3285 Prio->getExprLoc())); 3286 } 3287 // The first function argument for tasks is a thread id, the second one is a 3288 // part id (0 for tied tasks, >=0 for untied task). 3289 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 3290 // Get list of private variables. 3291 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 3292 auto IRef = C->varlist_begin(); 3293 for (const Expr *IInit : C->private_copies()) { 3294 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 3295 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 3296 Data.PrivateVars.push_back(*IRef); 3297 Data.PrivateCopies.push_back(IInit); 3298 } 3299 ++IRef; 3300 } 3301 } 3302 EmittedAsPrivate.clear(); 3303 // Get list of firstprivate variables. 3304 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 3305 auto IRef = C->varlist_begin(); 3306 auto IElemInitRef = C->inits().begin(); 3307 for (const Expr *IInit : C->private_copies()) { 3308 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 3309 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 3310 Data.FirstprivateVars.push_back(*IRef); 3311 Data.FirstprivateCopies.push_back(IInit); 3312 Data.FirstprivateInits.push_back(*IElemInitRef); 3313 } 3314 ++IRef; 3315 ++IElemInitRef; 3316 } 3317 } 3318 // Get list of lastprivate variables (for taskloops). 3319 llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs; 3320 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 3321 auto IRef = C->varlist_begin(); 3322 auto ID = C->destination_exprs().begin(); 3323 for (const Expr *IInit : C->private_copies()) { 3324 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 3325 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 3326 Data.LastprivateVars.push_back(*IRef); 3327 Data.LastprivateCopies.push_back(IInit); 3328 } 3329 LastprivateDstsOrigs.insert( 3330 {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()), 3331 cast<DeclRefExpr>(*IRef)}); 3332 ++IRef; 3333 ++ID; 3334 } 3335 } 3336 SmallVector<const Expr *, 4> LHSs; 3337 SmallVector<const Expr *, 4> RHSs; 3338 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 3339 auto IPriv = C->privates().begin(); 3340 auto IRed = C->reduction_ops().begin(); 3341 auto ILHS = C->lhs_exprs().begin(); 3342 auto IRHS = C->rhs_exprs().begin(); 3343 for (const Expr *Ref : C->varlists()) { 3344 Data.ReductionVars.emplace_back(Ref); 3345 Data.ReductionCopies.emplace_back(*IPriv); 3346 Data.ReductionOps.emplace_back(*IRed); 3347 LHSs.emplace_back(*ILHS); 3348 RHSs.emplace_back(*IRHS); 3349 std::advance(IPriv, 1); 3350 std::advance(IRed, 1); 3351 std::advance(ILHS, 1); 3352 std::advance(IRHS, 1); 3353 } 3354 } 3355 Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit( 3356 *this, S.getBeginLoc(), LHSs, RHSs, Data); 3357 // Build list of dependences. 3358 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) 3359 for (const Expr *IRef : C->varlists()) 3360 Data.Dependences.emplace_back(C->getDependencyKind(), IRef); 3361 auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs, 3362 CapturedRegion](CodeGenFunction &CGF, 3363 PrePostActionTy &Action) { 3364 // Set proper addresses for generated private copies. 3365 OMPPrivateScope Scope(CGF); 3366 if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() || 3367 !Data.LastprivateVars.empty()) { 3368 llvm::FunctionType *CopyFnTy = llvm::FunctionType::get( 3369 CGF.Builder.getVoidTy(), {CGF.Builder.getInt8PtrTy()}, true); 3370 enum { PrivatesParam = 2, CopyFnParam = 3 }; 3371 llvm::Value *CopyFn = CGF.Builder.CreateLoad( 3372 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam))); 3373 llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar( 3374 CS->getCapturedDecl()->getParam(PrivatesParam))); 3375 // Map privates. 3376 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; 3377 llvm::SmallVector<llvm::Value *, 16> CallArgs; 3378 CallArgs.push_back(PrivatesPtr); 3379 for (const Expr *E : Data.PrivateVars) { 3380 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3381 Address PrivatePtr = CGF.CreateMemTemp( 3382 CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr"); 3383 PrivatePtrs.emplace_back(VD, PrivatePtr); 3384 CallArgs.push_back(PrivatePtr.getPointer()); 3385 } 3386 for (const Expr *E : Data.FirstprivateVars) { 3387 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3388 Address PrivatePtr = 3389 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 3390 ".firstpriv.ptr.addr"); 3391 PrivatePtrs.emplace_back(VD, PrivatePtr); 3392 CallArgs.push_back(PrivatePtr.getPointer()); 3393 } 3394 for (const Expr *E : Data.LastprivateVars) { 3395 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3396 Address PrivatePtr = 3397 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 3398 ".lastpriv.ptr.addr"); 3399 PrivatePtrs.emplace_back(VD, PrivatePtr); 3400 CallArgs.push_back(PrivatePtr.getPointer()); 3401 } 3402 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall( 3403 CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs); 3404 for (const auto &Pair : LastprivateDstsOrigs) { 3405 const auto *OrigVD = cast<VarDecl>(Pair.second->getDecl()); 3406 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(OrigVD), 3407 /*RefersToEnclosingVariableOrCapture=*/ 3408 CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr, 3409 Pair.second->getType(), VK_LValue, 3410 Pair.second->getExprLoc()); 3411 Scope.addPrivate(Pair.first, [&CGF, &DRE]() { 3412 return CGF.EmitLValue(&DRE).getAddress(CGF); 3413 }); 3414 } 3415 for (const auto &Pair : PrivatePtrs) { 3416 Address Replacement(CGF.Builder.CreateLoad(Pair.second), 3417 CGF.getContext().getDeclAlign(Pair.first)); 3418 Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); 3419 } 3420 } 3421 if (Data.Reductions) { 3422 OMPLexicalScope LexScope(CGF, S, CapturedRegion); 3423 ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionCopies, 3424 Data.ReductionOps); 3425 llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad( 3426 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9))); 3427 for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) { 3428 RedCG.emitSharedLValue(CGF, Cnt); 3429 RedCG.emitAggregateType(CGF, Cnt); 3430 // FIXME: This must removed once the runtime library is fixed. 3431 // Emit required threadprivate variables for 3432 // initializer/combiner/finalizer. 3433 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(), 3434 RedCG, Cnt); 3435 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( 3436 CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); 3437 Replacement = 3438 Address(CGF.EmitScalarConversion( 3439 Replacement.getPointer(), CGF.getContext().VoidPtrTy, 3440 CGF.getContext().getPointerType( 3441 Data.ReductionCopies[Cnt]->getType()), 3442 Data.ReductionCopies[Cnt]->getExprLoc()), 3443 Replacement.getAlignment()); 3444 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); 3445 Scope.addPrivate(RedCG.getBaseDecl(Cnt), 3446 [Replacement]() { return Replacement; }); 3447 } 3448 } 3449 // Privatize all private variables except for in_reduction items. 3450 (void)Scope.Privatize(); 3451 SmallVector<const Expr *, 4> InRedVars; 3452 SmallVector<const Expr *, 4> InRedPrivs; 3453 SmallVector<const Expr *, 4> InRedOps; 3454 SmallVector<const Expr *, 4> TaskgroupDescriptors; 3455 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) { 3456 auto IPriv = C->privates().begin(); 3457 auto IRed = C->reduction_ops().begin(); 3458 auto ITD = C->taskgroup_descriptors().begin(); 3459 for (const Expr *Ref : C->varlists()) { 3460 InRedVars.emplace_back(Ref); 3461 InRedPrivs.emplace_back(*IPriv); 3462 InRedOps.emplace_back(*IRed); 3463 TaskgroupDescriptors.emplace_back(*ITD); 3464 std::advance(IPriv, 1); 3465 std::advance(IRed, 1); 3466 std::advance(ITD, 1); 3467 } 3468 } 3469 // Privatize in_reduction items here, because taskgroup descriptors must be 3470 // privatized earlier. 3471 OMPPrivateScope InRedScope(CGF); 3472 if (!InRedVars.empty()) { 3473 ReductionCodeGen RedCG(InRedVars, InRedPrivs, InRedOps); 3474 for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) { 3475 RedCG.emitSharedLValue(CGF, Cnt); 3476 RedCG.emitAggregateType(CGF, Cnt); 3477 // The taskgroup descriptor variable is always implicit firstprivate and 3478 // privatized already during processing of the firstprivates. 3479 // FIXME: This must removed once the runtime library is fixed. 3480 // Emit required threadprivate variables for 3481 // initializer/combiner/finalizer. 3482 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(), 3483 RedCG, Cnt); 3484 llvm::Value *ReductionsPtr = 3485 CGF.EmitLoadOfScalar(CGF.EmitLValue(TaskgroupDescriptors[Cnt]), 3486 TaskgroupDescriptors[Cnt]->getExprLoc()); 3487 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( 3488 CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); 3489 Replacement = Address( 3490 CGF.EmitScalarConversion( 3491 Replacement.getPointer(), CGF.getContext().VoidPtrTy, 3492 CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()), 3493 InRedPrivs[Cnt]->getExprLoc()), 3494 Replacement.getAlignment()); 3495 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); 3496 InRedScope.addPrivate(RedCG.getBaseDecl(Cnt), 3497 [Replacement]() { return Replacement; }); 3498 } 3499 } 3500 (void)InRedScope.Privatize(); 3501 3502 Action.Enter(CGF); 3503 BodyGen(CGF); 3504 }; 3505 llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( 3506 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied, 3507 Data.NumberOfParts); 3508 OMPLexicalScope Scope(*this, S, llvm::None, 3509 !isOpenMPParallelDirective(S.getDirectiveKind()) && 3510 !isOpenMPSimdDirective(S.getDirectiveKind())); 3511 TaskGen(*this, OutlinedFn, Data); 3512 } 3513 3514 static ImplicitParamDecl * 3515 createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data, 3516 QualType Ty, CapturedDecl *CD, 3517 SourceLocation Loc) { 3518 auto *OrigVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty, 3519 ImplicitParamDecl::Other); 3520 auto *OrigRef = DeclRefExpr::Create( 3521 C, NestedNameSpecifierLoc(), SourceLocation(), OrigVD, 3522 /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue); 3523 auto *PrivateVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty, 3524 ImplicitParamDecl::Other); 3525 auto *PrivateRef = DeclRefExpr::Create( 3526 C, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD, 3527 /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue); 3528 QualType ElemType = C.getBaseElementType(Ty); 3529 auto *InitVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, ElemType, 3530 ImplicitParamDecl::Other); 3531 auto *InitRef = DeclRefExpr::Create( 3532 C, NestedNameSpecifierLoc(), SourceLocation(), InitVD, 3533 /*RefersToEnclosingVariableOrCapture=*/false, Loc, ElemType, VK_LValue); 3534 PrivateVD->setInitStyle(VarDecl::CInit); 3535 PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue, 3536 InitRef, /*BasePath=*/nullptr, 3537 VK_RValue)); 3538 Data.FirstprivateVars.emplace_back(OrigRef); 3539 Data.FirstprivateCopies.emplace_back(PrivateRef); 3540 Data.FirstprivateInits.emplace_back(InitRef); 3541 return OrigVD; 3542 } 3543 3544 void CodeGenFunction::EmitOMPTargetTaskBasedDirective( 3545 const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen, 3546 OMPTargetDataInfo &InputInfo) { 3547 // Emit outlined function for task construct. 3548 const CapturedStmt *CS = S.getCapturedStmt(OMPD_task); 3549 Address CapturedStruct = GenerateCapturedStmtArgument(*CS); 3550 QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 3551 auto I = CS->getCapturedDecl()->param_begin(); 3552 auto PartId = std::next(I); 3553 auto TaskT = std::next(I, 4); 3554 OMPTaskDataTy Data; 3555 // The task is not final. 3556 Data.Final.setInt(/*IntVal=*/false); 3557 // Get list of firstprivate variables. 3558 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 3559 auto IRef = C->varlist_begin(); 3560 auto IElemInitRef = C->inits().begin(); 3561 for (auto *IInit : C->private_copies()) { 3562 Data.FirstprivateVars.push_back(*IRef); 3563 Data.FirstprivateCopies.push_back(IInit); 3564 Data.FirstprivateInits.push_back(*IElemInitRef); 3565 ++IRef; 3566 ++IElemInitRef; 3567 } 3568 } 3569 OMPPrivateScope TargetScope(*this); 3570 VarDecl *BPVD = nullptr; 3571 VarDecl *PVD = nullptr; 3572 VarDecl *SVD = nullptr; 3573 if (InputInfo.NumberOfTargetItems > 0) { 3574 auto *CD = CapturedDecl::Create( 3575 getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0); 3576 llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems); 3577 QualType BaseAndPointersType = getContext().getConstantArrayType( 3578 getContext().VoidPtrTy, ArrSize, nullptr, ArrayType::Normal, 3579 /*IndexTypeQuals=*/0); 3580 BPVD = createImplicitFirstprivateForType( 3581 getContext(), Data, BaseAndPointersType, CD, S.getBeginLoc()); 3582 PVD = createImplicitFirstprivateForType( 3583 getContext(), Data, BaseAndPointersType, CD, S.getBeginLoc()); 3584 QualType SizesType = getContext().getConstantArrayType( 3585 getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1), 3586 ArrSize, nullptr, ArrayType::Normal, 3587 /*IndexTypeQuals=*/0); 3588 SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD, 3589 S.getBeginLoc()); 3590 TargetScope.addPrivate( 3591 BPVD, [&InputInfo]() { return InputInfo.BasePointersArray; }); 3592 TargetScope.addPrivate(PVD, 3593 [&InputInfo]() { return InputInfo.PointersArray; }); 3594 TargetScope.addPrivate(SVD, 3595 [&InputInfo]() { return InputInfo.SizesArray; }); 3596 } 3597 (void)TargetScope.Privatize(); 3598 // Build list of dependences. 3599 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) 3600 for (const Expr *IRef : C->varlists()) 3601 Data.Dependences.emplace_back(C->getDependencyKind(), IRef); 3602 auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD, 3603 &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) { 3604 // Set proper addresses for generated private copies. 3605 OMPPrivateScope Scope(CGF); 3606 if (!Data.FirstprivateVars.empty()) { 3607 llvm::FunctionType *CopyFnTy = llvm::FunctionType::get( 3608 CGF.Builder.getVoidTy(), {CGF.Builder.getInt8PtrTy()}, true); 3609 enum { PrivatesParam = 2, CopyFnParam = 3 }; 3610 llvm::Value *CopyFn = CGF.Builder.CreateLoad( 3611 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam))); 3612 llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar( 3613 CS->getCapturedDecl()->getParam(PrivatesParam))); 3614 // Map privates. 3615 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; 3616 llvm::SmallVector<llvm::Value *, 16> CallArgs; 3617 CallArgs.push_back(PrivatesPtr); 3618 for (const Expr *E : Data.FirstprivateVars) { 3619 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3620 Address PrivatePtr = 3621 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 3622 ".firstpriv.ptr.addr"); 3623 PrivatePtrs.emplace_back(VD, PrivatePtr); 3624 CallArgs.push_back(PrivatePtr.getPointer()); 3625 } 3626 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall( 3627 CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs); 3628 for (const auto &Pair : PrivatePtrs) { 3629 Address Replacement(CGF.Builder.CreateLoad(Pair.second), 3630 CGF.getContext().getDeclAlign(Pair.first)); 3631 Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); 3632 } 3633 } 3634 // Privatize all private variables except for in_reduction items. 3635 (void)Scope.Privatize(); 3636 if (InputInfo.NumberOfTargetItems > 0) { 3637 InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP( 3638 CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0); 3639 InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP( 3640 CGF.GetAddrOfLocalVar(PVD), /*Index=*/0); 3641 InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP( 3642 CGF.GetAddrOfLocalVar(SVD), /*Index=*/0); 3643 } 3644 3645 Action.Enter(CGF); 3646 OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false); 3647 BodyGen(CGF); 3648 }; 3649 llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( 3650 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true, 3651 Data.NumberOfParts); 3652 llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0); 3653 IntegerLiteral IfCond(getContext(), TrueOrFalse, 3654 getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 3655 SourceLocation()); 3656 3657 CGM.getOpenMPRuntime().emitTaskCall(*this, S.getBeginLoc(), S, OutlinedFn, 3658 SharedsTy, CapturedStruct, &IfCond, Data); 3659 } 3660 3661 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { 3662 // Emit outlined function for task construct. 3663 const CapturedStmt *CS = S.getCapturedStmt(OMPD_task); 3664 Address CapturedStruct = GenerateCapturedStmtArgument(*CS); 3665 QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 3666 const Expr *IfCond = nullptr; 3667 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 3668 if (C->getNameModifier() == OMPD_unknown || 3669 C->getNameModifier() == OMPD_task) { 3670 IfCond = C->getCondition(); 3671 break; 3672 } 3673 } 3674 3675 OMPTaskDataTy Data; 3676 // Check if we should emit tied or untied task. 3677 Data.Tied = !S.getSingleClause<OMPUntiedClause>(); 3678 auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { 3679 CGF.EmitStmt(CS->getCapturedStmt()); 3680 }; 3681 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 3682 IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn, 3683 const OMPTaskDataTy &Data) { 3684 CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getBeginLoc(), S, OutlinedFn, 3685 SharedsTy, CapturedStruct, IfCond, 3686 Data); 3687 }; 3688 auto LPCRegion = 3689 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 3690 EmitOMPTaskBasedDirective(S, OMPD_task, BodyGen, TaskGen, Data); 3691 } 3692 3693 void CodeGenFunction::EmitOMPTaskyieldDirective( 3694 const OMPTaskyieldDirective &S) { 3695 CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getBeginLoc()); 3696 } 3697 3698 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { 3699 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_barrier); 3700 } 3701 3702 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) { 3703 CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getBeginLoc()); 3704 } 3705 3706 void CodeGenFunction::EmitOMPTaskgroupDirective( 3707 const OMPTaskgroupDirective &S) { 3708 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3709 Action.Enter(CGF); 3710 if (const Expr *E = S.getReductionRef()) { 3711 SmallVector<const Expr *, 4> LHSs; 3712 SmallVector<const Expr *, 4> RHSs; 3713 OMPTaskDataTy Data; 3714 for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) { 3715 auto IPriv = C->privates().begin(); 3716 auto IRed = C->reduction_ops().begin(); 3717 auto ILHS = C->lhs_exprs().begin(); 3718 auto IRHS = C->rhs_exprs().begin(); 3719 for (const Expr *Ref : C->varlists()) { 3720 Data.ReductionVars.emplace_back(Ref); 3721 Data.ReductionCopies.emplace_back(*IPriv); 3722 Data.ReductionOps.emplace_back(*IRed); 3723 LHSs.emplace_back(*ILHS); 3724 RHSs.emplace_back(*IRHS); 3725 std::advance(IPriv, 1); 3726 std::advance(IRed, 1); 3727 std::advance(ILHS, 1); 3728 std::advance(IRHS, 1); 3729 } 3730 } 3731 llvm::Value *ReductionDesc = 3732 CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getBeginLoc(), 3733 LHSs, RHSs, Data); 3734 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3735 CGF.EmitVarDecl(*VD); 3736 CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD), 3737 /*Volatile=*/false, E->getType()); 3738 } 3739 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 3740 }; 3741 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3742 CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getBeginLoc()); 3743 } 3744 3745 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { 3746 CGM.getOpenMPRuntime().emitFlush( 3747 *this, 3748 [&S]() -> ArrayRef<const Expr *> { 3749 if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) 3750 return llvm::makeArrayRef(FlushClause->varlist_begin(), 3751 FlushClause->varlist_end()); 3752 return llvm::None; 3753 }(), 3754 S.getBeginLoc()); 3755 } 3756 3757 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, 3758 const CodeGenLoopTy &CodeGenLoop, 3759 Expr *IncExpr) { 3760 // Emit the loop iteration variable. 3761 const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 3762 const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl()); 3763 EmitVarDecl(*IVDecl); 3764 3765 // Emit the iterations count variable. 3766 // If it is not a variable, Sema decided to calculate iterations count on each 3767 // iteration (e.g., it is foldable into a constant). 3768 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 3769 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 3770 // Emit calculation of the iterations count. 3771 EmitIgnoredExpr(S.getCalcLastIteration()); 3772 } 3773 3774 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); 3775 3776 bool HasLastprivateClause = false; 3777 // Check pre-condition. 3778 { 3779 OMPLoopScope PreInitScope(*this, S); 3780 // Skip the entire loop if we don't meet the precondition. 3781 // If the condition constant folds and can be elided, avoid emitting the 3782 // whole loop. 3783 bool CondConstant; 3784 llvm::BasicBlock *ContBlock = nullptr; 3785 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 3786 if (!CondConstant) 3787 return; 3788 } else { 3789 llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then"); 3790 ContBlock = createBasicBlock("omp.precond.end"); 3791 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 3792 getProfileCount(&S)); 3793 EmitBlock(ThenBlock); 3794 incrementProfileCounter(&S); 3795 } 3796 3797 emitAlignedClause(*this, S); 3798 // Emit 'then' code. 3799 { 3800 // Emit helper vars inits. 3801 3802 LValue LB = EmitOMPHelperVar( 3803 *this, cast<DeclRefExpr>( 3804 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3805 ? S.getCombinedLowerBoundVariable() 3806 : S.getLowerBoundVariable()))); 3807 LValue UB = EmitOMPHelperVar( 3808 *this, cast<DeclRefExpr>( 3809 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3810 ? S.getCombinedUpperBoundVariable() 3811 : S.getUpperBoundVariable()))); 3812 LValue ST = 3813 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 3814 LValue IL = 3815 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 3816 3817 OMPPrivateScope LoopScope(*this); 3818 if (EmitOMPFirstprivateClause(S, LoopScope)) { 3819 // Emit implicit barrier to synchronize threads and avoid data races 3820 // on initialization of firstprivate variables and post-update of 3821 // lastprivate variables. 3822 CGM.getOpenMPRuntime().emitBarrierCall( 3823 *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, 3824 /*ForceSimpleCall=*/true); 3825 } 3826 EmitOMPPrivateClause(S, LoopScope); 3827 if (isOpenMPSimdDirective(S.getDirectiveKind()) && 3828 !isOpenMPParallelDirective(S.getDirectiveKind()) && 3829 !isOpenMPTeamsDirective(S.getDirectiveKind())) 3830 EmitOMPReductionClauseInit(S, LoopScope); 3831 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 3832 EmitOMPPrivateLoopCounters(S, LoopScope); 3833 (void)LoopScope.Privatize(); 3834 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 3835 CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S); 3836 3837 // Detect the distribute schedule kind and chunk. 3838 llvm::Value *Chunk = nullptr; 3839 OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown; 3840 if (const auto *C = S.getSingleClause<OMPDistScheduleClause>()) { 3841 ScheduleKind = C->getDistScheduleKind(); 3842 if (const Expr *Ch = C->getChunkSize()) { 3843 Chunk = EmitScalarExpr(Ch); 3844 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 3845 S.getIterationVariable()->getType(), 3846 S.getBeginLoc()); 3847 } 3848 } else { 3849 // Default behaviour for dist_schedule clause. 3850 CGM.getOpenMPRuntime().getDefaultDistScheduleAndChunk( 3851 *this, S, ScheduleKind, Chunk); 3852 } 3853 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 3854 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 3855 3856 // OpenMP [2.10.8, distribute Construct, Description] 3857 // If dist_schedule is specified, kind must be static. If specified, 3858 // iterations are divided into chunks of size chunk_size, chunks are 3859 // assigned to the teams of the league in a round-robin fashion in the 3860 // order of the team number. When no chunk_size is specified, the 3861 // iteration space is divided into chunks that are approximately equal 3862 // in size, and at most one chunk is distributed to each team of the 3863 // league. The size of the chunks is unspecified in this case. 3864 bool StaticChunked = RT.isStaticChunked( 3865 ScheduleKind, /* Chunked */ Chunk != nullptr) && 3866 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()); 3867 if (RT.isStaticNonchunked(ScheduleKind, 3868 /* Chunked */ Chunk != nullptr) || 3869 StaticChunked) { 3870 CGOpenMPRuntime::StaticRTInput StaticInit( 3871 IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(*this), 3872 LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this), 3873 StaticChunked ? Chunk : nullptr); 3874 RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, 3875 StaticInit); 3876 JumpDest LoopExit = 3877 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 3878 // UB = min(UB, GlobalUB); 3879 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3880 ? S.getCombinedEnsureUpperBound() 3881 : S.getEnsureUpperBound()); 3882 // IV = LB; 3883 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3884 ? S.getCombinedInit() 3885 : S.getInit()); 3886 3887 const Expr *Cond = 3888 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3889 ? S.getCombinedCond() 3890 : S.getCond(); 3891 3892 if (StaticChunked) 3893 Cond = S.getCombinedDistCond(); 3894 3895 // For static unchunked schedules generate: 3896 // 3897 // 1. For distribute alone, codegen 3898 // while (idx <= UB) { 3899 // BODY; 3900 // ++idx; 3901 // } 3902 // 3903 // 2. When combined with 'for' (e.g. as in 'distribute parallel for') 3904 // while (idx <= UB) { 3905 // <CodeGen rest of pragma>(LB, UB); 3906 // idx += ST; 3907 // } 3908 // 3909 // For static chunk one schedule generate: 3910 // 3911 // while (IV <= GlobalUB) { 3912 // <CodeGen rest of pragma>(LB, UB); 3913 // LB += ST; 3914 // UB += ST; 3915 // UB = min(UB, GlobalUB); 3916 // IV = LB; 3917 // } 3918 // 3919 emitCommonSimdLoop( 3920 *this, S, 3921 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3922 if (isOpenMPSimdDirective(S.getDirectiveKind())) 3923 CGF.EmitOMPSimdInit(S, /*IsMonotonic=*/true); 3924 }, 3925 [&S, &LoopScope, Cond, IncExpr, LoopExit, &CodeGenLoop, 3926 StaticChunked](CodeGenFunction &CGF, PrePostActionTy &) { 3927 CGF.EmitOMPInnerLoop( 3928 S, LoopScope.requiresCleanups(), Cond, IncExpr, 3929 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { 3930 CodeGenLoop(CGF, S, LoopExit); 3931 }, 3932 [&S, StaticChunked](CodeGenFunction &CGF) { 3933 if (StaticChunked) { 3934 CGF.EmitIgnoredExpr(S.getCombinedNextLowerBound()); 3935 CGF.EmitIgnoredExpr(S.getCombinedNextUpperBound()); 3936 CGF.EmitIgnoredExpr(S.getCombinedEnsureUpperBound()); 3937 CGF.EmitIgnoredExpr(S.getCombinedInit()); 3938 } 3939 }); 3940 }); 3941 EmitBlock(LoopExit.getBlock()); 3942 // Tell the runtime we are done. 3943 RT.emitForStaticFinish(*this, S.getEndLoc(), S.getDirectiveKind()); 3944 } else { 3945 // Emit the outer loop, which requests its work chunk [LB..UB] from 3946 // runtime and runs the inner loop to process it. 3947 const OMPLoopArguments LoopArguments = { 3948 LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this), 3949 IL.getAddress(*this), Chunk}; 3950 EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments, 3951 CodeGenLoop); 3952 } 3953 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 3954 EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) { 3955 return CGF.Builder.CreateIsNotNull( 3956 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 3957 }); 3958 } 3959 if (isOpenMPSimdDirective(S.getDirectiveKind()) && 3960 !isOpenMPParallelDirective(S.getDirectiveKind()) && 3961 !isOpenMPTeamsDirective(S.getDirectiveKind())) { 3962 EmitOMPReductionClauseFinal(S, OMPD_simd); 3963 // Emit post-update of the reduction variables if IsLastIter != 0. 3964 emitPostUpdateForReductionClause( 3965 *this, S, [IL, &S](CodeGenFunction &CGF) { 3966 return CGF.Builder.CreateIsNotNull( 3967 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 3968 }); 3969 } 3970 // Emit final copy of the lastprivate variables if IsLastIter != 0. 3971 if (HasLastprivateClause) { 3972 EmitOMPLastprivateClauseFinal( 3973 S, /*NoFinals=*/false, 3974 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc()))); 3975 } 3976 } 3977 3978 // We're now done with the loop, so jump to the continuation block. 3979 if (ContBlock) { 3980 EmitBranch(ContBlock); 3981 EmitBlock(ContBlock, true); 3982 } 3983 } 3984 } 3985 3986 void CodeGenFunction::EmitOMPDistributeDirective( 3987 const OMPDistributeDirective &S) { 3988 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3989 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 3990 }; 3991 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3992 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); 3993 } 3994 3995 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, 3996 const CapturedStmt *S, 3997 SourceLocation Loc) { 3998 CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); 3999 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; 4000 CGF.CapturedStmtInfo = &CapStmtInfo; 4001 llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S, Loc); 4002 Fn->setDoesNotRecurse(); 4003 return Fn; 4004 } 4005 4006 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { 4007 if (S.hasClausesOfKind<OMPDependClause>()) { 4008 assert(!S.getAssociatedStmt() && 4009 "No associated statement must be in ordered depend construct."); 4010 for (const auto *DC : S.getClausesOfKind<OMPDependClause>()) 4011 CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC); 4012 return; 4013 } 4014 const auto *C = S.getSingleClause<OMPSIMDClause>(); 4015 auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF, 4016 PrePostActionTy &Action) { 4017 const CapturedStmt *CS = S.getInnermostCapturedStmt(); 4018 if (C) { 4019 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 4020 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 4021 llvm::Function *OutlinedFn = 4022 emitOutlinedOrderedFunction(CGM, CS, S.getBeginLoc()); 4023 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getBeginLoc(), 4024 OutlinedFn, CapturedVars); 4025 } else { 4026 Action.Enter(CGF); 4027 CGF.EmitStmt(CS->getCapturedStmt()); 4028 } 4029 }; 4030 OMPLexicalScope Scope(*this, S, OMPD_unknown); 4031 CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getBeginLoc(), !C); 4032 } 4033 4034 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, 4035 QualType SrcType, QualType DestType, 4036 SourceLocation Loc) { 4037 assert(CGF.hasScalarEvaluationKind(DestType) && 4038 "DestType must have scalar evaluation kind."); 4039 assert(!Val.isAggregate() && "Must be a scalar or complex."); 4040 return Val.isScalar() ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, 4041 DestType, Loc) 4042 : CGF.EmitComplexToScalarConversion( 4043 Val.getComplexVal(), SrcType, DestType, Loc); 4044 } 4045 4046 static CodeGenFunction::ComplexPairTy 4047 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, 4048 QualType DestType, SourceLocation Loc) { 4049 assert(CGF.getEvaluationKind(DestType) == TEK_Complex && 4050 "DestType must have complex evaluation kind."); 4051 CodeGenFunction::ComplexPairTy ComplexVal; 4052 if (Val.isScalar()) { 4053 // Convert the input element to the element type of the complex. 4054 QualType DestElementType = 4055 DestType->castAs<ComplexType>()->getElementType(); 4056 llvm::Value *ScalarVal = CGF.EmitScalarConversion( 4057 Val.getScalarVal(), SrcType, DestElementType, Loc); 4058 ComplexVal = CodeGenFunction::ComplexPairTy( 4059 ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType())); 4060 } else { 4061 assert(Val.isComplex() && "Must be a scalar or complex."); 4062 QualType SrcElementType = SrcType->castAs<ComplexType>()->getElementType(); 4063 QualType DestElementType = 4064 DestType->castAs<ComplexType>()->getElementType(); 4065 ComplexVal.first = CGF.EmitScalarConversion( 4066 Val.getComplexVal().first, SrcElementType, DestElementType, Loc); 4067 ComplexVal.second = CGF.EmitScalarConversion( 4068 Val.getComplexVal().second, SrcElementType, DestElementType, Loc); 4069 } 4070 return ComplexVal; 4071 } 4072 4073 static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst, 4074 LValue LVal, RValue RVal) { 4075 if (LVal.isGlobalReg()) { 4076 CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal); 4077 } else { 4078 CGF.EmitAtomicStore(RVal, LVal, 4079 IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 4080 : llvm::AtomicOrdering::Monotonic, 4081 LVal.isVolatile(), /*isInit=*/false); 4082 } 4083 } 4084 4085 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal, 4086 QualType RValTy, SourceLocation Loc) { 4087 switch (getEvaluationKind(LVal.getType())) { 4088 case TEK_Scalar: 4089 EmitStoreThroughLValue(RValue::get(convertToScalarValue( 4090 *this, RVal, RValTy, LVal.getType(), Loc)), 4091 LVal); 4092 break; 4093 case TEK_Complex: 4094 EmitStoreOfComplex( 4095 convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal, 4096 /*isInit=*/false); 4097 break; 4098 case TEK_Aggregate: 4099 llvm_unreachable("Must be a scalar or complex."); 4100 } 4101 } 4102 4103 static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst, 4104 const Expr *X, const Expr *V, 4105 SourceLocation Loc) { 4106 // v = x; 4107 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue"); 4108 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue"); 4109 LValue XLValue = CGF.EmitLValue(X); 4110 LValue VLValue = CGF.EmitLValue(V); 4111 RValue Res = XLValue.isGlobalReg() 4112 ? CGF.EmitLoadOfLValue(XLValue, Loc) 4113 : CGF.EmitAtomicLoad( 4114 XLValue, Loc, 4115 IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 4116 : llvm::AtomicOrdering::Monotonic, 4117 XLValue.isVolatile()); 4118 // OpenMP, 2.12.6, atomic Construct 4119 // Any atomic construct with a seq_cst clause forces the atomically 4120 // performed operation to include an implicit flush operation without a 4121 // list. 4122 if (IsSeqCst) 4123 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 4124 CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc); 4125 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V); 4126 } 4127 4128 static void emitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst, 4129 const Expr *X, const Expr *E, 4130 SourceLocation Loc) { 4131 // x = expr; 4132 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue"); 4133 emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E)); 4134 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X); 4135 // OpenMP, 2.12.6, atomic Construct 4136 // Any atomic construct with a seq_cst clause forces the atomically 4137 // performed operation to include an implicit flush operation without a 4138 // list. 4139 if (IsSeqCst) 4140 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 4141 } 4142 4143 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, 4144 RValue Update, 4145 BinaryOperatorKind BO, 4146 llvm::AtomicOrdering AO, 4147 bool IsXLHSInRHSPart) { 4148 ASTContext &Context = CGF.getContext(); 4149 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x' 4150 // expression is simple and atomic is allowed for the given type for the 4151 // target platform. 4152 if (BO == BO_Comma || !Update.isScalar() || 4153 !Update.getScalarVal()->getType()->isIntegerTy() || !X.isSimple() || 4154 (!isa<llvm::ConstantInt>(Update.getScalarVal()) && 4155 (Update.getScalarVal()->getType() != 4156 X.getAddress(CGF).getElementType())) || 4157 !X.getAddress(CGF).getElementType()->isIntegerTy() || 4158 !Context.getTargetInfo().hasBuiltinAtomic( 4159 Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment()))) 4160 return std::make_pair(false, RValue::get(nullptr)); 4161 4162 llvm::AtomicRMWInst::BinOp RMWOp; 4163 switch (BO) { 4164 case BO_Add: 4165 RMWOp = llvm::AtomicRMWInst::Add; 4166 break; 4167 case BO_Sub: 4168 if (!IsXLHSInRHSPart) 4169 return std::make_pair(false, RValue::get(nullptr)); 4170 RMWOp = llvm::AtomicRMWInst::Sub; 4171 break; 4172 case BO_And: 4173 RMWOp = llvm::AtomicRMWInst::And; 4174 break; 4175 case BO_Or: 4176 RMWOp = llvm::AtomicRMWInst::Or; 4177 break; 4178 case BO_Xor: 4179 RMWOp = llvm::AtomicRMWInst::Xor; 4180 break; 4181 case BO_LT: 4182 RMWOp = X.getType()->hasSignedIntegerRepresentation() 4183 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min 4184 : llvm::AtomicRMWInst::Max) 4185 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin 4186 : llvm::AtomicRMWInst::UMax); 4187 break; 4188 case BO_GT: 4189 RMWOp = X.getType()->hasSignedIntegerRepresentation() 4190 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max 4191 : llvm::AtomicRMWInst::Min) 4192 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax 4193 : llvm::AtomicRMWInst::UMin); 4194 break; 4195 case BO_Assign: 4196 RMWOp = llvm::AtomicRMWInst::Xchg; 4197 break; 4198 case BO_Mul: 4199 case BO_Div: 4200 case BO_Rem: 4201 case BO_Shl: 4202 case BO_Shr: 4203 case BO_LAnd: 4204 case BO_LOr: 4205 return std::make_pair(false, RValue::get(nullptr)); 4206 case BO_PtrMemD: 4207 case BO_PtrMemI: 4208 case BO_LE: 4209 case BO_GE: 4210 case BO_EQ: 4211 case BO_NE: 4212 case BO_Cmp: 4213 case BO_AddAssign: 4214 case BO_SubAssign: 4215 case BO_AndAssign: 4216 case BO_OrAssign: 4217 case BO_XorAssign: 4218 case BO_MulAssign: 4219 case BO_DivAssign: 4220 case BO_RemAssign: 4221 case BO_ShlAssign: 4222 case BO_ShrAssign: 4223 case BO_Comma: 4224 llvm_unreachable("Unsupported atomic update operation"); 4225 } 4226 llvm::Value *UpdateVal = Update.getScalarVal(); 4227 if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) { 4228 UpdateVal = CGF.Builder.CreateIntCast( 4229 IC, X.getAddress(CGF).getElementType(), 4230 X.getType()->hasSignedIntegerRepresentation()); 4231 } 4232 llvm::Value *Res = 4233 CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(CGF), UpdateVal, AO); 4234 return std::make_pair(true, RValue::get(Res)); 4235 } 4236 4237 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr( 4238 LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart, 4239 llvm::AtomicOrdering AO, SourceLocation Loc, 4240 const llvm::function_ref<RValue(RValue)> CommonGen) { 4241 // Update expressions are allowed to have the following forms: 4242 // x binop= expr; -> xrval + expr; 4243 // x++, ++x -> xrval + 1; 4244 // x--, --x -> xrval - 1; 4245 // x = x binop expr; -> xrval binop expr 4246 // x = expr Op x; - > expr binop xrval; 4247 auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart); 4248 if (!Res.first) { 4249 if (X.isGlobalReg()) { 4250 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop 4251 // 'xrval'. 4252 EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X); 4253 } else { 4254 // Perform compare-and-swap procedure. 4255 EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified()); 4256 } 4257 } 4258 return Res; 4259 } 4260 4261 static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst, 4262 const Expr *X, const Expr *E, 4263 const Expr *UE, bool IsXLHSInRHSPart, 4264 SourceLocation Loc) { 4265 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 4266 "Update expr in 'atomic update' must be a binary operator."); 4267 const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 4268 // Update expressions are allowed to have the following forms: 4269 // x binop= expr; -> xrval + expr; 4270 // x++, ++x -> xrval + 1; 4271 // x--, --x -> xrval - 1; 4272 // x = x binop expr; -> xrval binop expr 4273 // x = expr Op x; - > expr binop xrval; 4274 assert(X->isLValue() && "X of 'omp atomic update' is not lvalue"); 4275 LValue XLValue = CGF.EmitLValue(X); 4276 RValue ExprRValue = CGF.EmitAnyExpr(E); 4277 llvm::AtomicOrdering AO = IsSeqCst 4278 ? llvm::AtomicOrdering::SequentiallyConsistent 4279 : llvm::AtomicOrdering::Monotonic; 4280 const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 4281 const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 4282 const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 4283 const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 4284 auto &&Gen = [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) { 4285 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 4286 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 4287 return CGF.EmitAnyExpr(UE); 4288 }; 4289 (void)CGF.EmitOMPAtomicSimpleUpdateExpr( 4290 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 4291 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X); 4292 // OpenMP, 2.12.6, atomic Construct 4293 // Any atomic construct with a seq_cst clause forces the atomically 4294 // performed operation to include an implicit flush operation without a 4295 // list. 4296 if (IsSeqCst) 4297 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 4298 } 4299 4300 static RValue convertToType(CodeGenFunction &CGF, RValue Value, 4301 QualType SourceType, QualType ResType, 4302 SourceLocation Loc) { 4303 switch (CGF.getEvaluationKind(ResType)) { 4304 case TEK_Scalar: 4305 return RValue::get( 4306 convertToScalarValue(CGF, Value, SourceType, ResType, Loc)); 4307 case TEK_Complex: { 4308 auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc); 4309 return RValue::getComplex(Res.first, Res.second); 4310 } 4311 case TEK_Aggregate: 4312 break; 4313 } 4314 llvm_unreachable("Must be a scalar or complex."); 4315 } 4316 4317 static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst, 4318 bool IsPostfixUpdate, const Expr *V, 4319 const Expr *X, const Expr *E, 4320 const Expr *UE, bool IsXLHSInRHSPart, 4321 SourceLocation Loc) { 4322 assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue"); 4323 assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue"); 4324 RValue NewVVal; 4325 LValue VLValue = CGF.EmitLValue(V); 4326 LValue XLValue = CGF.EmitLValue(X); 4327 RValue ExprRValue = CGF.EmitAnyExpr(E); 4328 llvm::AtomicOrdering AO = IsSeqCst 4329 ? llvm::AtomicOrdering::SequentiallyConsistent 4330 : llvm::AtomicOrdering::Monotonic; 4331 QualType NewVValType; 4332 if (UE) { 4333 // 'x' is updated with some additional value. 4334 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 4335 "Update expr in 'atomic capture' must be a binary operator."); 4336 const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 4337 // Update expressions are allowed to have the following forms: 4338 // x binop= expr; -> xrval + expr; 4339 // x++, ++x -> xrval + 1; 4340 // x--, --x -> xrval - 1; 4341 // x = x binop expr; -> xrval binop expr 4342 // x = expr Op x; - > expr binop xrval; 4343 const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 4344 const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 4345 const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 4346 NewVValType = XRValExpr->getType(); 4347 const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 4348 auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr, 4349 IsPostfixUpdate](RValue XRValue) { 4350 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 4351 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 4352 RValue Res = CGF.EmitAnyExpr(UE); 4353 NewVVal = IsPostfixUpdate ? XRValue : Res; 4354 return Res; 4355 }; 4356 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 4357 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 4358 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X); 4359 if (Res.first) { 4360 // 'atomicrmw' instruction was generated. 4361 if (IsPostfixUpdate) { 4362 // Use old value from 'atomicrmw'. 4363 NewVVal = Res.second; 4364 } else { 4365 // 'atomicrmw' does not provide new value, so evaluate it using old 4366 // value of 'x'. 4367 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 4368 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second); 4369 NewVVal = CGF.EmitAnyExpr(UE); 4370 } 4371 } 4372 } else { 4373 // 'x' is simply rewritten with some 'expr'. 4374 NewVValType = X->getType().getNonReferenceType(); 4375 ExprRValue = convertToType(CGF, ExprRValue, E->getType(), 4376 X->getType().getNonReferenceType(), Loc); 4377 auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) { 4378 NewVVal = XRValue; 4379 return ExprRValue; 4380 }; 4381 // Try to perform atomicrmw xchg, otherwise simple exchange. 4382 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 4383 XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO, 4384 Loc, Gen); 4385 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X); 4386 if (Res.first) { 4387 // 'atomicrmw' instruction was generated. 4388 NewVVal = IsPostfixUpdate ? Res.second : ExprRValue; 4389 } 4390 } 4391 // Emit post-update store to 'v' of old/new 'x' value. 4392 CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc); 4393 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V); 4394 // OpenMP, 2.12.6, atomic Construct 4395 // Any atomic construct with a seq_cst clause forces the atomically 4396 // performed operation to include an implicit flush operation without a 4397 // list. 4398 if (IsSeqCst) 4399 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 4400 } 4401 4402 static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, 4403 bool IsSeqCst, bool IsPostfixUpdate, 4404 const Expr *X, const Expr *V, const Expr *E, 4405 const Expr *UE, bool IsXLHSInRHSPart, 4406 SourceLocation Loc) { 4407 switch (Kind) { 4408 case OMPC_read: 4409 emitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc); 4410 break; 4411 case OMPC_write: 4412 emitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc); 4413 break; 4414 case OMPC_unknown: 4415 case OMPC_update: 4416 emitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc); 4417 break; 4418 case OMPC_capture: 4419 emitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE, 4420 IsXLHSInRHSPart, Loc); 4421 break; 4422 case OMPC_if: 4423 case OMPC_final: 4424 case OMPC_num_threads: 4425 case OMPC_private: 4426 case OMPC_firstprivate: 4427 case OMPC_lastprivate: 4428 case OMPC_reduction: 4429 case OMPC_task_reduction: 4430 case OMPC_in_reduction: 4431 case OMPC_safelen: 4432 case OMPC_simdlen: 4433 case OMPC_allocator: 4434 case OMPC_allocate: 4435 case OMPC_collapse: 4436 case OMPC_default: 4437 case OMPC_seq_cst: 4438 case OMPC_acq_rel: 4439 case OMPC_shared: 4440 case OMPC_linear: 4441 case OMPC_aligned: 4442 case OMPC_copyin: 4443 case OMPC_copyprivate: 4444 case OMPC_flush: 4445 case OMPC_proc_bind: 4446 case OMPC_schedule: 4447 case OMPC_ordered: 4448 case OMPC_nowait: 4449 case OMPC_untied: 4450 case OMPC_threadprivate: 4451 case OMPC_depend: 4452 case OMPC_mergeable: 4453 case OMPC_device: 4454 case OMPC_threads: 4455 case OMPC_simd: 4456 case OMPC_map: 4457 case OMPC_num_teams: 4458 case OMPC_thread_limit: 4459 case OMPC_priority: 4460 case OMPC_grainsize: 4461 case OMPC_nogroup: 4462 case OMPC_num_tasks: 4463 case OMPC_hint: 4464 case OMPC_dist_schedule: 4465 case OMPC_defaultmap: 4466 case OMPC_uniform: 4467 case OMPC_to: 4468 case OMPC_from: 4469 case OMPC_use_device_ptr: 4470 case OMPC_is_device_ptr: 4471 case OMPC_unified_address: 4472 case OMPC_unified_shared_memory: 4473 case OMPC_reverse_offload: 4474 case OMPC_dynamic_allocators: 4475 case OMPC_atomic_default_mem_order: 4476 case OMPC_device_type: 4477 case OMPC_match: 4478 case OMPC_nontemporal: 4479 case OMPC_order: 4480 llvm_unreachable("Clause is not allowed in 'omp atomic'."); 4481 } 4482 } 4483 4484 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { 4485 bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>(); 4486 OpenMPClauseKind Kind = OMPC_unknown; 4487 for (const OMPClause *C : S.clauses()) { 4488 // Find first clause (skip seq_cst|acq_rel clause, if it is first). 4489 if (C->getClauseKind() != OMPC_seq_cst && 4490 C->getClauseKind() != OMPC_acq_rel) { 4491 Kind = C->getClauseKind(); 4492 break; 4493 } 4494 } 4495 4496 const Stmt *CS = S.getInnermostCapturedStmt()->IgnoreContainers(); 4497 if (const auto *FE = dyn_cast<FullExpr>(CS)) 4498 enterFullExpression(FE); 4499 // Processing for statements under 'atomic capture'. 4500 if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) { 4501 for (const Stmt *C : Compound->body()) { 4502 if (const auto *FE = dyn_cast<FullExpr>(C)) 4503 enterFullExpression(FE); 4504 } 4505 } 4506 4507 auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF, 4508 PrePostActionTy &) { 4509 CGF.EmitStopPoint(CS); 4510 emitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(), 4511 S.getV(), S.getExpr(), S.getUpdateExpr(), 4512 S.isXLHSInRHSPart(), S.getBeginLoc()); 4513 }; 4514 OMPLexicalScope Scope(*this, S, OMPD_unknown); 4515 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen); 4516 } 4517 4518 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, 4519 const OMPExecutableDirective &S, 4520 const RegionCodeGenTy &CodeGen) { 4521 assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind())); 4522 CodeGenModule &CGM = CGF.CGM; 4523 4524 // On device emit this construct as inlined code. 4525 if (CGM.getLangOpts().OpenMPIsDevice) { 4526 OMPLexicalScope Scope(CGF, S, OMPD_target); 4527 CGM.getOpenMPRuntime().emitInlinedDirective( 4528 CGF, OMPD_target, [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4529 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 4530 }); 4531 return; 4532 } 4533 4534 auto LPCRegion = 4535 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S); 4536 llvm::Function *Fn = nullptr; 4537 llvm::Constant *FnID = nullptr; 4538 4539 const Expr *IfCond = nullptr; 4540 // Check for the at most one if clause associated with the target region. 4541 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 4542 if (C->getNameModifier() == OMPD_unknown || 4543 C->getNameModifier() == OMPD_target) { 4544 IfCond = C->getCondition(); 4545 break; 4546 } 4547 } 4548 4549 // Check if we have any device clause associated with the directive. 4550 const Expr *Device = nullptr; 4551 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 4552 Device = C->getDevice(); 4553 4554 // Check if we have an if clause whose conditional always evaluates to false 4555 // or if we do not have any targets specified. If so the target region is not 4556 // an offload entry point. 4557 bool IsOffloadEntry = true; 4558 if (IfCond) { 4559 bool Val; 4560 if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val) 4561 IsOffloadEntry = false; 4562 } 4563 if (CGM.getLangOpts().OMPTargetTriples.empty()) 4564 IsOffloadEntry = false; 4565 4566 assert(CGF.CurFuncDecl && "No parent declaration for target region!"); 4567 StringRef ParentName; 4568 // In case we have Ctors/Dtors we use the complete type variant to produce 4569 // the mangling of the device outlined kernel. 4570 if (const auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl)) 4571 ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete)); 4572 else if (const auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl)) 4573 ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete)); 4574 else 4575 ParentName = 4576 CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl))); 4577 4578 // Emit target region as a standalone region. 4579 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID, 4580 IsOffloadEntry, CodeGen); 4581 OMPLexicalScope Scope(CGF, S, OMPD_task); 4582 auto &&SizeEmitter = 4583 [IsOffloadEntry](CodeGenFunction &CGF, 4584 const OMPLoopDirective &D) -> llvm::Value * { 4585 if (IsOffloadEntry) { 4586 OMPLoopScope(CGF, D); 4587 // Emit calculation of the iterations count. 4588 llvm::Value *NumIterations = CGF.EmitScalarExpr(D.getNumIterations()); 4589 NumIterations = CGF.Builder.CreateIntCast(NumIterations, CGF.Int64Ty, 4590 /*isSigned=*/false); 4591 return NumIterations; 4592 } 4593 return nullptr; 4594 }; 4595 CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device, 4596 SizeEmitter); 4597 } 4598 4599 static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S, 4600 PrePostActionTy &Action) { 4601 Action.Enter(CGF); 4602 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 4603 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 4604 CGF.EmitOMPPrivateClause(S, PrivateScope); 4605 (void)PrivateScope.Privatize(); 4606 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 4607 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); 4608 4609 CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt()); 4610 } 4611 4612 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM, 4613 StringRef ParentName, 4614 const OMPTargetDirective &S) { 4615 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4616 emitTargetRegion(CGF, S, Action); 4617 }; 4618 llvm::Function *Fn; 4619 llvm::Constant *Addr; 4620 // Emit target region as a standalone region. 4621 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 4622 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 4623 assert(Fn && Addr && "Target device function emission failed."); 4624 } 4625 4626 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { 4627 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4628 emitTargetRegion(CGF, S, Action); 4629 }; 4630 emitCommonOMPTargetDirective(*this, S, CodeGen); 4631 } 4632 4633 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF, 4634 const OMPExecutableDirective &S, 4635 OpenMPDirectiveKind InnermostKind, 4636 const RegionCodeGenTy &CodeGen) { 4637 const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams); 4638 llvm::Function *OutlinedFn = 4639 CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction( 4640 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 4641 4642 const auto *NT = S.getSingleClause<OMPNumTeamsClause>(); 4643 const auto *TL = S.getSingleClause<OMPThreadLimitClause>(); 4644 if (NT || TL) { 4645 const Expr *NumTeams = NT ? NT->getNumTeams() : nullptr; 4646 const Expr *ThreadLimit = TL ? TL->getThreadLimit() : nullptr; 4647 4648 CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit, 4649 S.getBeginLoc()); 4650 } 4651 4652 OMPTeamsScope Scope(CGF, S); 4653 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 4654 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 4655 CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getBeginLoc(), OutlinedFn, 4656 CapturedVars); 4657 } 4658 4659 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) { 4660 // Emit teams region as a standalone region. 4661 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4662 Action.Enter(CGF); 4663 OMPPrivateScope PrivateScope(CGF); 4664 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 4665 CGF.EmitOMPPrivateClause(S, PrivateScope); 4666 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4667 (void)PrivateScope.Privatize(); 4668 CGF.EmitStmt(S.getCapturedStmt(OMPD_teams)->getCapturedStmt()); 4669 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4670 }; 4671 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen); 4672 emitPostUpdateForReductionClause(*this, S, 4673 [](CodeGenFunction &) { return nullptr; }); 4674 } 4675 4676 static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action, 4677 const OMPTargetTeamsDirective &S) { 4678 auto *CS = S.getCapturedStmt(OMPD_teams); 4679 Action.Enter(CGF); 4680 // Emit teams region as a standalone region. 4681 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) { 4682 Action.Enter(CGF); 4683 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 4684 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 4685 CGF.EmitOMPPrivateClause(S, PrivateScope); 4686 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4687 (void)PrivateScope.Privatize(); 4688 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 4689 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); 4690 CGF.EmitStmt(CS->getCapturedStmt()); 4691 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4692 }; 4693 emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen); 4694 emitPostUpdateForReductionClause(CGF, S, 4695 [](CodeGenFunction &) { return nullptr; }); 4696 } 4697 4698 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 4699 CodeGenModule &CGM, StringRef ParentName, 4700 const OMPTargetTeamsDirective &S) { 4701 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4702 emitTargetTeamsRegion(CGF, Action, S); 4703 }; 4704 llvm::Function *Fn; 4705 llvm::Constant *Addr; 4706 // Emit target region as a standalone region. 4707 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 4708 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 4709 assert(Fn && Addr && "Target device function emission failed."); 4710 } 4711 4712 void CodeGenFunction::EmitOMPTargetTeamsDirective( 4713 const OMPTargetTeamsDirective &S) { 4714 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4715 emitTargetTeamsRegion(CGF, Action, S); 4716 }; 4717 emitCommonOMPTargetDirective(*this, S, CodeGen); 4718 } 4719 4720 static void 4721 emitTargetTeamsDistributeRegion(CodeGenFunction &CGF, PrePostActionTy &Action, 4722 const OMPTargetTeamsDistributeDirective &S) { 4723 Action.Enter(CGF); 4724 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4725 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 4726 }; 4727 4728 // Emit teams region as a standalone region. 4729 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 4730 PrePostActionTy &Action) { 4731 Action.Enter(CGF); 4732 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 4733 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4734 (void)PrivateScope.Privatize(); 4735 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 4736 CodeGenDistribute); 4737 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4738 }; 4739 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute, CodeGen); 4740 emitPostUpdateForReductionClause(CGF, S, 4741 [](CodeGenFunction &) { return nullptr; }); 4742 } 4743 4744 void CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 4745 CodeGenModule &CGM, StringRef ParentName, 4746 const OMPTargetTeamsDistributeDirective &S) { 4747 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4748 emitTargetTeamsDistributeRegion(CGF, Action, S); 4749 }; 4750 llvm::Function *Fn; 4751 llvm::Constant *Addr; 4752 // Emit target region as a standalone region. 4753 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 4754 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 4755 assert(Fn && Addr && "Target device function emission failed."); 4756 } 4757 4758 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective( 4759 const OMPTargetTeamsDistributeDirective &S) { 4760 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4761 emitTargetTeamsDistributeRegion(CGF, Action, S); 4762 }; 4763 emitCommonOMPTargetDirective(*this, S, CodeGen); 4764 } 4765 4766 static void emitTargetTeamsDistributeSimdRegion( 4767 CodeGenFunction &CGF, PrePostActionTy &Action, 4768 const OMPTargetTeamsDistributeSimdDirective &S) { 4769 Action.Enter(CGF); 4770 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4771 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 4772 }; 4773 4774 // Emit teams region as a standalone region. 4775 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 4776 PrePostActionTy &Action) { 4777 Action.Enter(CGF); 4778 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 4779 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4780 (void)PrivateScope.Privatize(); 4781 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 4782 CodeGenDistribute); 4783 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4784 }; 4785 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_simd, CodeGen); 4786 emitPostUpdateForReductionClause(CGF, S, 4787 [](CodeGenFunction &) { return nullptr; }); 4788 } 4789 4790 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 4791 CodeGenModule &CGM, StringRef ParentName, 4792 const OMPTargetTeamsDistributeSimdDirective &S) { 4793 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4794 emitTargetTeamsDistributeSimdRegion(CGF, Action, S); 4795 }; 4796 llvm::Function *Fn; 4797 llvm::Constant *Addr; 4798 // Emit target region as a standalone region. 4799 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 4800 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 4801 assert(Fn && Addr && "Target device function emission failed."); 4802 } 4803 4804 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective( 4805 const OMPTargetTeamsDistributeSimdDirective &S) { 4806 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4807 emitTargetTeamsDistributeSimdRegion(CGF, Action, S); 4808 }; 4809 emitCommonOMPTargetDirective(*this, S, CodeGen); 4810 } 4811 4812 void CodeGenFunction::EmitOMPTeamsDistributeDirective( 4813 const OMPTeamsDistributeDirective &S) { 4814 4815 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4816 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 4817 }; 4818 4819 // Emit teams region as a standalone region. 4820 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 4821 PrePostActionTy &Action) { 4822 Action.Enter(CGF); 4823 OMPPrivateScope PrivateScope(CGF); 4824 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4825 (void)PrivateScope.Privatize(); 4826 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 4827 CodeGenDistribute); 4828 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4829 }; 4830 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen); 4831 emitPostUpdateForReductionClause(*this, S, 4832 [](CodeGenFunction &) { return nullptr; }); 4833 } 4834 4835 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective( 4836 const OMPTeamsDistributeSimdDirective &S) { 4837 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4838 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 4839 }; 4840 4841 // Emit teams region as a standalone region. 4842 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 4843 PrePostActionTy &Action) { 4844 Action.Enter(CGF); 4845 OMPPrivateScope PrivateScope(CGF); 4846 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4847 (void)PrivateScope.Privatize(); 4848 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_simd, 4849 CodeGenDistribute); 4850 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4851 }; 4852 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_simd, CodeGen); 4853 emitPostUpdateForReductionClause(*this, S, 4854 [](CodeGenFunction &) { return nullptr; }); 4855 } 4856 4857 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective( 4858 const OMPTeamsDistributeParallelForDirective &S) { 4859 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4860 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 4861 S.getDistInc()); 4862 }; 4863 4864 // Emit teams region as a standalone region. 4865 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 4866 PrePostActionTy &Action) { 4867 Action.Enter(CGF); 4868 OMPPrivateScope PrivateScope(CGF); 4869 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4870 (void)PrivateScope.Privatize(); 4871 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 4872 CodeGenDistribute); 4873 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4874 }; 4875 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen); 4876 emitPostUpdateForReductionClause(*this, S, 4877 [](CodeGenFunction &) { return nullptr; }); 4878 } 4879 4880 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective( 4881 const OMPTeamsDistributeParallelForSimdDirective &S) { 4882 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4883 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 4884 S.getDistInc()); 4885 }; 4886 4887 // Emit teams region as a standalone region. 4888 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 4889 PrePostActionTy &Action) { 4890 Action.Enter(CGF); 4891 OMPPrivateScope PrivateScope(CGF); 4892 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4893 (void)PrivateScope.Privatize(); 4894 CGF.CGM.getOpenMPRuntime().emitInlinedDirective( 4895 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); 4896 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4897 }; 4898 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for_simd, 4899 CodeGen); 4900 emitPostUpdateForReductionClause(*this, S, 4901 [](CodeGenFunction &) { return nullptr; }); 4902 } 4903 4904 static void emitTargetTeamsDistributeParallelForRegion( 4905 CodeGenFunction &CGF, const OMPTargetTeamsDistributeParallelForDirective &S, 4906 PrePostActionTy &Action) { 4907 Action.Enter(CGF); 4908 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4909 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 4910 S.getDistInc()); 4911 }; 4912 4913 // Emit teams region as a standalone region. 4914 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 4915 PrePostActionTy &Action) { 4916 Action.Enter(CGF); 4917 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 4918 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4919 (void)PrivateScope.Privatize(); 4920 CGF.CGM.getOpenMPRuntime().emitInlinedDirective( 4921 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); 4922 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4923 }; 4924 4925 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for, 4926 CodeGenTeams); 4927 emitPostUpdateForReductionClause(CGF, S, 4928 [](CodeGenFunction &) { return nullptr; }); 4929 } 4930 4931 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 4932 CodeGenModule &CGM, StringRef ParentName, 4933 const OMPTargetTeamsDistributeParallelForDirective &S) { 4934 // Emit SPMD target teams distribute parallel for region as a standalone 4935 // region. 4936 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4937 emitTargetTeamsDistributeParallelForRegion(CGF, S, Action); 4938 }; 4939 llvm::Function *Fn; 4940 llvm::Constant *Addr; 4941 // Emit target region as a standalone region. 4942 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 4943 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 4944 assert(Fn && Addr && "Target device function emission failed."); 4945 } 4946 4947 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective( 4948 const OMPTargetTeamsDistributeParallelForDirective &S) { 4949 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4950 emitTargetTeamsDistributeParallelForRegion(CGF, S, Action); 4951 }; 4952 emitCommonOMPTargetDirective(*this, S, CodeGen); 4953 } 4954 4955 static void emitTargetTeamsDistributeParallelForSimdRegion( 4956 CodeGenFunction &CGF, 4957 const OMPTargetTeamsDistributeParallelForSimdDirective &S, 4958 PrePostActionTy &Action) { 4959 Action.Enter(CGF); 4960 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4961 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 4962 S.getDistInc()); 4963 }; 4964 4965 // Emit teams region as a standalone region. 4966 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 4967 PrePostActionTy &Action) { 4968 Action.Enter(CGF); 4969 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 4970 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4971 (void)PrivateScope.Privatize(); 4972 CGF.CGM.getOpenMPRuntime().emitInlinedDirective( 4973 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); 4974 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4975 }; 4976 4977 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for_simd, 4978 CodeGenTeams); 4979 emitPostUpdateForReductionClause(CGF, S, 4980 [](CodeGenFunction &) { return nullptr; }); 4981 } 4982 4983 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 4984 CodeGenModule &CGM, StringRef ParentName, 4985 const OMPTargetTeamsDistributeParallelForSimdDirective &S) { 4986 // Emit SPMD target teams distribute parallel for simd region as a standalone 4987 // region. 4988 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4989 emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action); 4990 }; 4991 llvm::Function *Fn; 4992 llvm::Constant *Addr; 4993 // Emit target region as a standalone region. 4994 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 4995 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 4996 assert(Fn && Addr && "Target device function emission failed."); 4997 } 4998 4999 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective( 5000 const OMPTargetTeamsDistributeParallelForSimdDirective &S) { 5001 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5002 emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action); 5003 }; 5004 emitCommonOMPTargetDirective(*this, S, CodeGen); 5005 } 5006 5007 void CodeGenFunction::EmitOMPCancellationPointDirective( 5008 const OMPCancellationPointDirective &S) { 5009 CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getBeginLoc(), 5010 S.getCancelRegion()); 5011 } 5012 5013 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { 5014 const Expr *IfCond = nullptr; 5015 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 5016 if (C->getNameModifier() == OMPD_unknown || 5017 C->getNameModifier() == OMPD_cancel) { 5018 IfCond = C->getCondition(); 5019 break; 5020 } 5021 } 5022 if (llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder()) { 5023 // TODO: This check is necessary as we only generate `omp parallel` through 5024 // the OpenMPIRBuilder for now. 5025 if (S.getCancelRegion() == OMPD_parallel) { 5026 llvm::Value *IfCondition = nullptr; 5027 if (IfCond) 5028 IfCondition = EmitScalarExpr(IfCond, 5029 /*IgnoreResultAssign=*/true); 5030 return Builder.restoreIP( 5031 OMPBuilder->CreateCancel(Builder, IfCondition, S.getCancelRegion())); 5032 } 5033 } 5034 5035 CGM.getOpenMPRuntime().emitCancelCall(*this, S.getBeginLoc(), IfCond, 5036 S.getCancelRegion()); 5037 } 5038 5039 CodeGenFunction::JumpDest 5040 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) { 5041 if (Kind == OMPD_parallel || Kind == OMPD_task || 5042 Kind == OMPD_target_parallel) 5043 return ReturnBlock; 5044 assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections || 5045 Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for || 5046 Kind == OMPD_distribute_parallel_for || 5047 Kind == OMPD_target_parallel_for || 5048 Kind == OMPD_teams_distribute_parallel_for || 5049 Kind == OMPD_target_teams_distribute_parallel_for); 5050 return OMPCancelStack.getExitBlock(); 5051 } 5052 5053 void CodeGenFunction::EmitOMPUseDevicePtrClause( 5054 const OMPClause &NC, OMPPrivateScope &PrivateScope, 5055 const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) { 5056 const auto &C = cast<OMPUseDevicePtrClause>(NC); 5057 auto OrigVarIt = C.varlist_begin(); 5058 auto InitIt = C.inits().begin(); 5059 for (const Expr *PvtVarIt : C.private_copies()) { 5060 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl()); 5061 const auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl()); 5062 const auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl()); 5063 5064 // In order to identify the right initializer we need to match the 5065 // declaration used by the mapping logic. In some cases we may get 5066 // OMPCapturedExprDecl that refers to the original declaration. 5067 const ValueDecl *MatchingVD = OrigVD; 5068 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) { 5069 // OMPCapturedExprDecl are used to privative fields of the current 5070 // structure. 5071 const auto *ME = cast<MemberExpr>(OED->getInit()); 5072 assert(isa<CXXThisExpr>(ME->getBase()) && 5073 "Base should be the current struct!"); 5074 MatchingVD = ME->getMemberDecl(); 5075 } 5076 5077 // If we don't have information about the current list item, move on to 5078 // the next one. 5079 auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD); 5080 if (InitAddrIt == CaptureDeviceAddrMap.end()) 5081 continue; 5082 5083 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, OrigVD, 5084 InitAddrIt, InitVD, 5085 PvtVD]() { 5086 // Initialize the temporary initialization variable with the address we 5087 // get from the runtime library. We have to cast the source address 5088 // because it is always a void *. References are materialized in the 5089 // privatization scope, so the initialization here disregards the fact 5090 // the original variable is a reference. 5091 QualType AddrQTy = 5092 getContext().getPointerType(OrigVD->getType().getNonReferenceType()); 5093 llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy); 5094 Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy); 5095 setAddrOfLocalVar(InitVD, InitAddr); 5096 5097 // Emit private declaration, it will be initialized by the value we 5098 // declaration we just added to the local declarations map. 5099 EmitDecl(*PvtVD); 5100 5101 // The initialization variables reached its purpose in the emission 5102 // of the previous declaration, so we don't need it anymore. 5103 LocalDeclMap.erase(InitVD); 5104 5105 // Return the address of the private variable. 5106 return GetAddrOfLocalVar(PvtVD); 5107 }); 5108 assert(IsRegistered && "firstprivate var already registered as private"); 5109 // Silence the warning about unused variable. 5110 (void)IsRegistered; 5111 5112 ++OrigVarIt; 5113 ++InitIt; 5114 } 5115 } 5116 5117 // Generate the instructions for '#pragma omp target data' directive. 5118 void CodeGenFunction::EmitOMPTargetDataDirective( 5119 const OMPTargetDataDirective &S) { 5120 CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true); 5121 5122 // Create a pre/post action to signal the privatization of the device pointer. 5123 // This action can be replaced by the OpenMP runtime code generation to 5124 // deactivate privatization. 5125 bool PrivatizeDevicePointers = false; 5126 class DevicePointerPrivActionTy : public PrePostActionTy { 5127 bool &PrivatizeDevicePointers; 5128 5129 public: 5130 explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers) 5131 : PrePostActionTy(), PrivatizeDevicePointers(PrivatizeDevicePointers) {} 5132 void Enter(CodeGenFunction &CGF) override { 5133 PrivatizeDevicePointers = true; 5134 } 5135 }; 5136 DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers); 5137 5138 auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers]( 5139 CodeGenFunction &CGF, PrePostActionTy &Action) { 5140 auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 5141 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 5142 }; 5143 5144 // Codegen that selects whether to generate the privatization code or not. 5145 auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers, 5146 &InnermostCodeGen](CodeGenFunction &CGF, 5147 PrePostActionTy &Action) { 5148 RegionCodeGenTy RCG(InnermostCodeGen); 5149 PrivatizeDevicePointers = false; 5150 5151 // Call the pre-action to change the status of PrivatizeDevicePointers if 5152 // needed. 5153 Action.Enter(CGF); 5154 5155 if (PrivatizeDevicePointers) { 5156 OMPPrivateScope PrivateScope(CGF); 5157 // Emit all instances of the use_device_ptr clause. 5158 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>()) 5159 CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope, 5160 Info.CaptureDeviceAddrMap); 5161 (void)PrivateScope.Privatize(); 5162 RCG(CGF); 5163 } else { 5164 RCG(CGF); 5165 } 5166 }; 5167 5168 // Forward the provided action to the privatization codegen. 5169 RegionCodeGenTy PrivRCG(PrivCodeGen); 5170 PrivRCG.setAction(Action); 5171 5172 // Notwithstanding the body of the region is emitted as inlined directive, 5173 // we don't use an inline scope as changes in the references inside the 5174 // region are expected to be visible outside, so we do not privative them. 5175 OMPLexicalScope Scope(CGF, S); 5176 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data, 5177 PrivRCG); 5178 }; 5179 5180 RegionCodeGenTy RCG(CodeGen); 5181 5182 // If we don't have target devices, don't bother emitting the data mapping 5183 // code. 5184 if (CGM.getLangOpts().OMPTargetTriples.empty()) { 5185 RCG(*this); 5186 return; 5187 } 5188 5189 // Check if we have any if clause associated with the directive. 5190 const Expr *IfCond = nullptr; 5191 if (const auto *C = S.getSingleClause<OMPIfClause>()) 5192 IfCond = C->getCondition(); 5193 5194 // Check if we have any device clause associated with the directive. 5195 const Expr *Device = nullptr; 5196 if (const auto *C = S.getSingleClause<OMPDeviceClause>()) 5197 Device = C->getDevice(); 5198 5199 // Set the action to signal privatization of device pointers. 5200 RCG.setAction(PrivAction); 5201 5202 // Emit region code. 5203 CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG, 5204 Info); 5205 } 5206 5207 void CodeGenFunction::EmitOMPTargetEnterDataDirective( 5208 const OMPTargetEnterDataDirective &S) { 5209 // If we don't have target devices, don't bother emitting the data mapping 5210 // code. 5211 if (CGM.getLangOpts().OMPTargetTriples.empty()) 5212 return; 5213 5214 // Check if we have any if clause associated with the directive. 5215 const Expr *IfCond = nullptr; 5216 if (const auto *C = S.getSingleClause<OMPIfClause>()) 5217 IfCond = C->getCondition(); 5218 5219 // Check if we have any device clause associated with the directive. 5220 const Expr *Device = nullptr; 5221 if (const auto *C = S.getSingleClause<OMPDeviceClause>()) 5222 Device = C->getDevice(); 5223 5224 OMPLexicalScope Scope(*this, S, OMPD_task); 5225 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 5226 } 5227 5228 void CodeGenFunction::EmitOMPTargetExitDataDirective( 5229 const OMPTargetExitDataDirective &S) { 5230 // If we don't have target devices, don't bother emitting the data mapping 5231 // code. 5232 if (CGM.getLangOpts().OMPTargetTriples.empty()) 5233 return; 5234 5235 // Check if we have any if clause associated with the directive. 5236 const Expr *IfCond = nullptr; 5237 if (const auto *C = S.getSingleClause<OMPIfClause>()) 5238 IfCond = C->getCondition(); 5239 5240 // Check if we have any device clause associated with the directive. 5241 const Expr *Device = nullptr; 5242 if (const auto *C = S.getSingleClause<OMPDeviceClause>()) 5243 Device = C->getDevice(); 5244 5245 OMPLexicalScope Scope(*this, S, OMPD_task); 5246 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 5247 } 5248 5249 static void emitTargetParallelRegion(CodeGenFunction &CGF, 5250 const OMPTargetParallelDirective &S, 5251 PrePostActionTy &Action) { 5252 // Get the captured statement associated with the 'parallel' region. 5253 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); 5254 Action.Enter(CGF); 5255 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) { 5256 Action.Enter(CGF); 5257 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5258 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 5259 CGF.EmitOMPPrivateClause(S, PrivateScope); 5260 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 5261 (void)PrivateScope.Privatize(); 5262 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 5263 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); 5264 // TODO: Add support for clauses. 5265 CGF.EmitStmt(CS->getCapturedStmt()); 5266 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 5267 }; 5268 emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen, 5269 emitEmptyBoundParameters); 5270 emitPostUpdateForReductionClause(CGF, S, 5271 [](CodeGenFunction &) { return nullptr; }); 5272 } 5273 5274 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 5275 CodeGenModule &CGM, StringRef ParentName, 5276 const OMPTargetParallelDirective &S) { 5277 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5278 emitTargetParallelRegion(CGF, S, Action); 5279 }; 5280 llvm::Function *Fn; 5281 llvm::Constant *Addr; 5282 // Emit target region as a standalone region. 5283 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 5284 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 5285 assert(Fn && Addr && "Target device function emission failed."); 5286 } 5287 5288 void CodeGenFunction::EmitOMPTargetParallelDirective( 5289 const OMPTargetParallelDirective &S) { 5290 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5291 emitTargetParallelRegion(CGF, S, Action); 5292 }; 5293 emitCommonOMPTargetDirective(*this, S, CodeGen); 5294 } 5295 5296 static void emitTargetParallelForRegion(CodeGenFunction &CGF, 5297 const OMPTargetParallelForDirective &S, 5298 PrePostActionTy &Action) { 5299 Action.Enter(CGF); 5300 // Emit directive as a combined directive that consists of two implicit 5301 // directives: 'parallel' with 'for' directive. 5302 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5303 Action.Enter(CGF); 5304 CodeGenFunction::OMPCancelStackRAII CancelRegion( 5305 CGF, OMPD_target_parallel_for, S.hasCancel()); 5306 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 5307 emitDispatchForLoopBounds); 5308 }; 5309 emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen, 5310 emitEmptyBoundParameters); 5311 } 5312 5313 void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 5314 CodeGenModule &CGM, StringRef ParentName, 5315 const OMPTargetParallelForDirective &S) { 5316 // Emit SPMD target parallel for region as a standalone region. 5317 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5318 emitTargetParallelForRegion(CGF, S, Action); 5319 }; 5320 llvm::Function *Fn; 5321 llvm::Constant *Addr; 5322 // Emit target region as a standalone region. 5323 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 5324 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 5325 assert(Fn && Addr && "Target device function emission failed."); 5326 } 5327 5328 void CodeGenFunction::EmitOMPTargetParallelForDirective( 5329 const OMPTargetParallelForDirective &S) { 5330 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5331 emitTargetParallelForRegion(CGF, S, Action); 5332 }; 5333 emitCommonOMPTargetDirective(*this, S, CodeGen); 5334 } 5335 5336 static void 5337 emitTargetParallelForSimdRegion(CodeGenFunction &CGF, 5338 const OMPTargetParallelForSimdDirective &S, 5339 PrePostActionTy &Action) { 5340 Action.Enter(CGF); 5341 // Emit directive as a combined directive that consists of two implicit 5342 // directives: 'parallel' with 'for' directive. 5343 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5344 Action.Enter(CGF); 5345 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 5346 emitDispatchForLoopBounds); 5347 }; 5348 emitCommonOMPParallelDirective(CGF, S, OMPD_simd, CodeGen, 5349 emitEmptyBoundParameters); 5350 } 5351 5352 void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 5353 CodeGenModule &CGM, StringRef ParentName, 5354 const OMPTargetParallelForSimdDirective &S) { 5355 // Emit SPMD target parallel for region as a standalone region. 5356 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5357 emitTargetParallelForSimdRegion(CGF, S, Action); 5358 }; 5359 llvm::Function *Fn; 5360 llvm::Constant *Addr; 5361 // Emit target region as a standalone region. 5362 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 5363 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 5364 assert(Fn && Addr && "Target device function emission failed."); 5365 } 5366 5367 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective( 5368 const OMPTargetParallelForSimdDirective &S) { 5369 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5370 emitTargetParallelForSimdRegion(CGF, S, Action); 5371 }; 5372 emitCommonOMPTargetDirective(*this, S, CodeGen); 5373 } 5374 5375 /// Emit a helper variable and return corresponding lvalue. 5376 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper, 5377 const ImplicitParamDecl *PVD, 5378 CodeGenFunction::OMPPrivateScope &Privates) { 5379 const auto *VDecl = cast<VarDecl>(Helper->getDecl()); 5380 Privates.addPrivate(VDecl, 5381 [&CGF, PVD]() { return CGF.GetAddrOfLocalVar(PVD); }); 5382 } 5383 5384 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { 5385 assert(isOpenMPTaskLoopDirective(S.getDirectiveKind())); 5386 // Emit outlined function for task construct. 5387 const CapturedStmt *CS = S.getCapturedStmt(OMPD_taskloop); 5388 Address CapturedStruct = GenerateCapturedStmtArgument(*CS); 5389 QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 5390 const Expr *IfCond = nullptr; 5391 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 5392 if (C->getNameModifier() == OMPD_unknown || 5393 C->getNameModifier() == OMPD_taskloop) { 5394 IfCond = C->getCondition(); 5395 break; 5396 } 5397 } 5398 5399 OMPTaskDataTy Data; 5400 // Check if taskloop must be emitted without taskgroup. 5401 Data.Nogroup = S.getSingleClause<OMPNogroupClause>(); 5402 // TODO: Check if we should emit tied or untied task. 5403 Data.Tied = true; 5404 // Set scheduling for taskloop 5405 if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) { 5406 // grainsize clause 5407 Data.Schedule.setInt(/*IntVal=*/false); 5408 Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize())); 5409 } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) { 5410 // num_tasks clause 5411 Data.Schedule.setInt(/*IntVal=*/true); 5412 Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks())); 5413 } 5414 5415 auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) { 5416 // if (PreCond) { 5417 // for (IV in 0..LastIteration) BODY; 5418 // <Final counter/linear vars updates>; 5419 // } 5420 // 5421 5422 // Emit: if (PreCond) - begin. 5423 // If the condition constant folds and can be elided, avoid emitting the 5424 // whole loop. 5425 bool CondConstant; 5426 llvm::BasicBlock *ContBlock = nullptr; 5427 OMPLoopScope PreInitScope(CGF, S); 5428 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 5429 if (!CondConstant) 5430 return; 5431 } else { 5432 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("taskloop.if.then"); 5433 ContBlock = CGF.createBasicBlock("taskloop.if.end"); 5434 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 5435 CGF.getProfileCount(&S)); 5436 CGF.EmitBlock(ThenBlock); 5437 CGF.incrementProfileCounter(&S); 5438 } 5439 5440 (void)CGF.EmitOMPLinearClauseInit(S); 5441 5442 OMPPrivateScope LoopScope(CGF); 5443 // Emit helper vars inits. 5444 enum { LowerBound = 5, UpperBound, Stride, LastIter }; 5445 auto *I = CS->getCapturedDecl()->param_begin(); 5446 auto *LBP = std::next(I, LowerBound); 5447 auto *UBP = std::next(I, UpperBound); 5448 auto *STP = std::next(I, Stride); 5449 auto *LIP = std::next(I, LastIter); 5450 mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP, 5451 LoopScope); 5452 mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP, 5453 LoopScope); 5454 mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope); 5455 mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP, 5456 LoopScope); 5457 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 5458 CGF.EmitOMPLinearClause(S, LoopScope); 5459 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 5460 (void)LoopScope.Privatize(); 5461 // Emit the loop iteration variable. 5462 const Expr *IVExpr = S.getIterationVariable(); 5463 const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 5464 CGF.EmitVarDecl(*IVDecl); 5465 CGF.EmitIgnoredExpr(S.getInit()); 5466 5467 // Emit the iterations count variable. 5468 // If it is not a variable, Sema decided to calculate iterations count on 5469 // each iteration (e.g., it is foldable into a constant). 5470 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 5471 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 5472 // Emit calculation of the iterations count. 5473 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 5474 } 5475 5476 { 5477 OMPLexicalScope Scope(CGF, S, OMPD_taskloop, /*EmitPreInitStmt=*/false); 5478 emitCommonSimdLoop( 5479 CGF, S, 5480 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 5481 if (isOpenMPSimdDirective(S.getDirectiveKind())) 5482 CGF.EmitOMPSimdInit(S); 5483 }, 5484 [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { 5485 CGF.EmitOMPInnerLoop( 5486 S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), 5487 [&S](CodeGenFunction &CGF) { 5488 CGF.EmitOMPLoopBody(S, CodeGenFunction::JumpDest()); 5489 CGF.EmitStopPoint(&S); 5490 }, 5491 [](CodeGenFunction &) {}); 5492 }); 5493 } 5494 // Emit: if (PreCond) - end. 5495 if (ContBlock) { 5496 CGF.EmitBranch(ContBlock); 5497 CGF.EmitBlock(ContBlock, true); 5498 } 5499 // Emit final copy of the lastprivate variables if IsLastIter != 0. 5500 if (HasLastprivateClause) { 5501 CGF.EmitOMPLastprivateClauseFinal( 5502 S, isOpenMPSimdDirective(S.getDirectiveKind()), 5503 CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar( 5504 CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false, 5505 (*LIP)->getType(), S.getBeginLoc()))); 5506 } 5507 CGF.EmitOMPLinearClauseFinal(S, [LIP, &S](CodeGenFunction &CGF) { 5508 return CGF.Builder.CreateIsNotNull( 5509 CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false, 5510 (*LIP)->getType(), S.getBeginLoc())); 5511 }); 5512 }; 5513 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 5514 IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn, 5515 const OMPTaskDataTy &Data) { 5516 auto &&CodeGen = [&S, OutlinedFn, SharedsTy, CapturedStruct, IfCond, 5517 &Data](CodeGenFunction &CGF, PrePostActionTy &) { 5518 OMPLoopScope PreInitScope(CGF, S); 5519 CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getBeginLoc(), S, 5520 OutlinedFn, SharedsTy, 5521 CapturedStruct, IfCond, Data); 5522 }; 5523 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop, 5524 CodeGen); 5525 }; 5526 if (Data.Nogroup) { 5527 EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, Data); 5528 } else { 5529 CGM.getOpenMPRuntime().emitTaskgroupRegion( 5530 *this, 5531 [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF, 5532 PrePostActionTy &Action) { 5533 Action.Enter(CGF); 5534 CGF.EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, 5535 Data); 5536 }, 5537 S.getBeginLoc()); 5538 } 5539 } 5540 5541 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) { 5542 auto LPCRegion = 5543 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 5544 EmitOMPTaskLoopBasedDirective(S); 5545 } 5546 5547 void CodeGenFunction::EmitOMPTaskLoopSimdDirective( 5548 const OMPTaskLoopSimdDirective &S) { 5549 auto LPCRegion = 5550 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 5551 OMPLexicalScope Scope(*this, S); 5552 EmitOMPTaskLoopBasedDirective(S); 5553 } 5554 5555 void CodeGenFunction::EmitOMPMasterTaskLoopDirective( 5556 const OMPMasterTaskLoopDirective &S) { 5557 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5558 Action.Enter(CGF); 5559 EmitOMPTaskLoopBasedDirective(S); 5560 }; 5561 auto LPCRegion = 5562 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 5563 OMPLexicalScope Scope(*this, S, llvm::None, /*EmitPreInitStmt=*/false); 5564 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc()); 5565 } 5566 5567 void CodeGenFunction::EmitOMPMasterTaskLoopSimdDirective( 5568 const OMPMasterTaskLoopSimdDirective &S) { 5569 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5570 Action.Enter(CGF); 5571 EmitOMPTaskLoopBasedDirective(S); 5572 }; 5573 auto LPCRegion = 5574 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 5575 OMPLexicalScope Scope(*this, S); 5576 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc()); 5577 } 5578 5579 void CodeGenFunction::EmitOMPParallelMasterTaskLoopDirective( 5580 const OMPParallelMasterTaskLoopDirective &S) { 5581 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5582 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF, 5583 PrePostActionTy &Action) { 5584 Action.Enter(CGF); 5585 CGF.EmitOMPTaskLoopBasedDirective(S); 5586 }; 5587 OMPLexicalScope Scope(CGF, S, llvm::None, /*EmitPreInitStmt=*/false); 5588 CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen, 5589 S.getBeginLoc()); 5590 }; 5591 auto LPCRegion = 5592 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 5593 emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop, CodeGen, 5594 emitEmptyBoundParameters); 5595 } 5596 5597 void CodeGenFunction::EmitOMPParallelMasterTaskLoopSimdDirective( 5598 const OMPParallelMasterTaskLoopSimdDirective &S) { 5599 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5600 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF, 5601 PrePostActionTy &Action) { 5602 Action.Enter(CGF); 5603 CGF.EmitOMPTaskLoopBasedDirective(S); 5604 }; 5605 OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false); 5606 CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen, 5607 S.getBeginLoc()); 5608 }; 5609 auto LPCRegion = 5610 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 5611 emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop_simd, CodeGen, 5612 emitEmptyBoundParameters); 5613 } 5614 5615 // Generate the instructions for '#pragma omp target update' directive. 5616 void CodeGenFunction::EmitOMPTargetUpdateDirective( 5617 const OMPTargetUpdateDirective &S) { 5618 // If we don't have target devices, don't bother emitting the data mapping 5619 // code. 5620 if (CGM.getLangOpts().OMPTargetTriples.empty()) 5621 return; 5622 5623 // Check if we have any if clause associated with the directive. 5624 const Expr *IfCond = nullptr; 5625 if (const auto *C = S.getSingleClause<OMPIfClause>()) 5626 IfCond = C->getCondition(); 5627 5628 // Check if we have any device clause associated with the directive. 5629 const Expr *Device = nullptr; 5630 if (const auto *C = S.getSingleClause<OMPDeviceClause>()) 5631 Device = C->getDevice(); 5632 5633 OMPLexicalScope Scope(*this, S, OMPD_task); 5634 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 5635 } 5636 5637 void CodeGenFunction::EmitSimpleOMPExecutableDirective( 5638 const OMPExecutableDirective &D) { 5639 if (!D.hasAssociatedStmt() || !D.getAssociatedStmt()) 5640 return; 5641 auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) { 5642 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 5643 emitOMPSimdRegion(CGF, cast<OMPLoopDirective>(D), Action); 5644 } else { 5645 OMPPrivateScope LoopGlobals(CGF); 5646 if (const auto *LD = dyn_cast<OMPLoopDirective>(&D)) { 5647 for (const Expr *E : LD->counters()) { 5648 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 5649 if (!VD->hasLocalStorage() && !CGF.LocalDeclMap.count(VD)) { 5650 LValue GlobLVal = CGF.EmitLValue(E); 5651 LoopGlobals.addPrivate( 5652 VD, [&GlobLVal, &CGF]() { return GlobLVal.getAddress(CGF); }); 5653 } 5654 if (isa<OMPCapturedExprDecl>(VD)) { 5655 // Emit only those that were not explicitly referenced in clauses. 5656 if (!CGF.LocalDeclMap.count(VD)) 5657 CGF.EmitVarDecl(*VD); 5658 } 5659 } 5660 for (const auto *C : D.getClausesOfKind<OMPOrderedClause>()) { 5661 if (!C->getNumForLoops()) 5662 continue; 5663 for (unsigned I = LD->getCollapsedNumber(), 5664 E = C->getLoopNumIterations().size(); 5665 I < E; ++I) { 5666 if (const auto *VD = dyn_cast<OMPCapturedExprDecl>( 5667 cast<DeclRefExpr>(C->getLoopCounter(I))->getDecl())) { 5668 // Emit only those that were not explicitly referenced in clauses. 5669 if (!CGF.LocalDeclMap.count(VD)) 5670 CGF.EmitVarDecl(*VD); 5671 } 5672 } 5673 } 5674 } 5675 LoopGlobals.Privatize(); 5676 CGF.EmitStmt(D.getInnermostCapturedStmt()->getCapturedStmt()); 5677 } 5678 }; 5679 { 5680 auto LPCRegion = 5681 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, D); 5682 OMPSimdLexicalScope Scope(*this, D); 5683 CGM.getOpenMPRuntime().emitInlinedDirective( 5684 *this, 5685 isOpenMPSimdDirective(D.getDirectiveKind()) ? OMPD_simd 5686 : D.getDirectiveKind(), 5687 CodeGen); 5688 } 5689 // Check for outer lastprivate conditional update. 5690 checkForLastprivateConditionalUpdate(*this, D); 5691 } 5692