1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This contains code to emit OpenMP nodes as LLVM code. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGCleanup.h" 14 #include "CGOpenMPRuntime.h" 15 #include "CodeGenFunction.h" 16 #include "CodeGenModule.h" 17 #include "TargetInfo.h" 18 #include "clang/AST/ASTContext.h" 19 #include "clang/AST/Attr.h" 20 #include "clang/AST/DeclOpenMP.h" 21 #include "clang/AST/OpenMPClause.h" 22 #include "clang/AST/Stmt.h" 23 #include "clang/AST/StmtOpenMP.h" 24 #include "clang/Basic/OpenMPKinds.h" 25 #include "clang/Basic/PrettyStackTrace.h" 26 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" 27 #include "llvm/IR/Constants.h" 28 #include "llvm/IR/Instructions.h" 29 #include "llvm/Support/AtomicOrdering.h" 30 using namespace clang; 31 using namespace CodeGen; 32 using namespace llvm::omp; 33 34 namespace { 35 /// Lexical scope for OpenMP executable constructs, that handles correct codegen 36 /// for captured expressions. 37 class OMPLexicalScope : public CodeGenFunction::LexicalScope { 38 void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 39 for (const auto *C : S.clauses()) { 40 if (const auto *CPI = OMPClauseWithPreInit::get(C)) { 41 if (const auto *PreInit = 42 cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { 43 for (const auto *I : PreInit->decls()) { 44 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 45 CGF.EmitVarDecl(cast<VarDecl>(*I)); 46 } else { 47 CodeGenFunction::AutoVarEmission Emission = 48 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 49 CGF.EmitAutoVarCleanups(Emission); 50 } 51 } 52 } 53 } 54 } 55 } 56 CodeGenFunction::OMPPrivateScope InlinedShareds; 57 58 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { 59 return CGF.LambdaCaptureFields.lookup(VD) || 60 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || 61 (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) && 62 cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD)); 63 } 64 65 public: 66 OMPLexicalScope( 67 CodeGenFunction &CGF, const OMPExecutableDirective &S, 68 const llvm::Optional<OpenMPDirectiveKind> CapturedRegion = llvm::None, 69 const bool EmitPreInitStmt = true) 70 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), 71 InlinedShareds(CGF) { 72 if (EmitPreInitStmt) 73 emitPreInitStmt(CGF, S); 74 if (!CapturedRegion.hasValue()) 75 return; 76 assert(S.hasAssociatedStmt() && 77 "Expected associated statement for inlined directive."); 78 const CapturedStmt *CS = S.getCapturedStmt(*CapturedRegion); 79 for (const auto &C : CS->captures()) { 80 if (C.capturesVariable() || C.capturesVariableByCopy()) { 81 auto *VD = C.getCapturedVar(); 82 assert(VD == VD->getCanonicalDecl() && 83 "Canonical decl must be captured."); 84 DeclRefExpr DRE( 85 CGF.getContext(), const_cast<VarDecl *>(VD), 86 isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo && 87 InlinedShareds.isGlobalVarCaptured(VD)), 88 VD->getType().getNonReferenceType(), VK_LValue, C.getLocation()); 89 InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address { 90 return CGF.EmitLValue(&DRE).getAddress(CGF); 91 }); 92 } 93 } 94 (void)InlinedShareds.Privatize(); 95 } 96 }; 97 98 /// Lexical scope for OpenMP parallel construct, that handles correct codegen 99 /// for captured expressions. 100 class OMPParallelScope final : public OMPLexicalScope { 101 bool EmitPreInitStmt(const OMPExecutableDirective &S) { 102 OpenMPDirectiveKind Kind = S.getDirectiveKind(); 103 return !(isOpenMPTargetExecutionDirective(Kind) || 104 isOpenMPLoopBoundSharingDirective(Kind)) && 105 isOpenMPParallelDirective(Kind); 106 } 107 108 public: 109 OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 110 : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None, 111 EmitPreInitStmt(S)) {} 112 }; 113 114 /// Lexical scope for OpenMP teams construct, that handles correct codegen 115 /// for captured expressions. 116 class OMPTeamsScope final : public OMPLexicalScope { 117 bool EmitPreInitStmt(const OMPExecutableDirective &S) { 118 OpenMPDirectiveKind Kind = S.getDirectiveKind(); 119 return !isOpenMPTargetExecutionDirective(Kind) && 120 isOpenMPTeamsDirective(Kind); 121 } 122 123 public: 124 OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 125 : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None, 126 EmitPreInitStmt(S)) {} 127 }; 128 129 /// Private scope for OpenMP loop-based directives, that supports capturing 130 /// of used expression from loop statement. 131 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { 132 void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) { 133 CodeGenFunction::OMPMapVars PreCondVars; 134 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 135 for (const auto *E : S.counters()) { 136 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 137 EmittedAsPrivate.insert(VD->getCanonicalDecl()); 138 (void)PreCondVars.setVarAddr( 139 CGF, VD, CGF.CreateMemTemp(VD->getType().getNonReferenceType())); 140 } 141 // Mark private vars as undefs. 142 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 143 for (const Expr *IRef : C->varlists()) { 144 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); 145 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 146 (void)PreCondVars.setVarAddr( 147 CGF, OrigVD, 148 Address(llvm::UndefValue::get( 149 CGF.ConvertTypeForMem(CGF.getContext().getPointerType( 150 OrigVD->getType().getNonReferenceType()))), 151 CGF.getContext().getDeclAlign(OrigVD))); 152 } 153 } 154 } 155 (void)PreCondVars.apply(CGF); 156 // Emit init, __range and __end variables for C++ range loops. 157 const Stmt *Body = 158 S.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(); 159 for (unsigned Cnt = 0; Cnt < S.getCollapsedNumber(); ++Cnt) { 160 Body = OMPLoopDirective::tryToFindNextInnerLoop( 161 Body, /*TryImperfectlyNestedLoops=*/true); 162 if (auto *For = dyn_cast<ForStmt>(Body)) { 163 Body = For->getBody(); 164 } else { 165 assert(isa<CXXForRangeStmt>(Body) && 166 "Expected canonical for loop or range-based for loop."); 167 auto *CXXFor = cast<CXXForRangeStmt>(Body); 168 if (const Stmt *Init = CXXFor->getInit()) 169 CGF.EmitStmt(Init); 170 CGF.EmitStmt(CXXFor->getRangeStmt()); 171 CGF.EmitStmt(CXXFor->getEndStmt()); 172 Body = CXXFor->getBody(); 173 } 174 } 175 if (const auto *PreInits = cast_or_null<DeclStmt>(S.getPreInits())) { 176 for (const auto *I : PreInits->decls()) 177 CGF.EmitVarDecl(cast<VarDecl>(*I)); 178 } 179 PreCondVars.restore(CGF); 180 } 181 182 public: 183 OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S) 184 : CodeGenFunction::RunCleanupsScope(CGF) { 185 emitPreInitStmt(CGF, S); 186 } 187 }; 188 189 class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope { 190 CodeGenFunction::OMPPrivateScope InlinedShareds; 191 192 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { 193 return CGF.LambdaCaptureFields.lookup(VD) || 194 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || 195 (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) && 196 cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD)); 197 } 198 199 public: 200 OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 201 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), 202 InlinedShareds(CGF) { 203 for (const auto *C : S.clauses()) { 204 if (const auto *CPI = OMPClauseWithPreInit::get(C)) { 205 if (const auto *PreInit = 206 cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { 207 for (const auto *I : PreInit->decls()) { 208 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 209 CGF.EmitVarDecl(cast<VarDecl>(*I)); 210 } else { 211 CodeGenFunction::AutoVarEmission Emission = 212 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 213 CGF.EmitAutoVarCleanups(Emission); 214 } 215 } 216 } 217 } else if (const auto *UDP = dyn_cast<OMPUseDevicePtrClause>(C)) { 218 for (const Expr *E : UDP->varlists()) { 219 const Decl *D = cast<DeclRefExpr>(E)->getDecl(); 220 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D)) 221 CGF.EmitVarDecl(*OED); 222 } 223 } 224 } 225 if (!isOpenMPSimdDirective(S.getDirectiveKind())) 226 CGF.EmitOMPPrivateClause(S, InlinedShareds); 227 if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(&S)) { 228 if (const Expr *E = TG->getReductionRef()) 229 CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl())); 230 } 231 const auto *CS = cast_or_null<CapturedStmt>(S.getAssociatedStmt()); 232 while (CS) { 233 for (auto &C : CS->captures()) { 234 if (C.capturesVariable() || C.capturesVariableByCopy()) { 235 auto *VD = C.getCapturedVar(); 236 assert(VD == VD->getCanonicalDecl() && 237 "Canonical decl must be captured."); 238 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 239 isCapturedVar(CGF, VD) || 240 (CGF.CapturedStmtInfo && 241 InlinedShareds.isGlobalVarCaptured(VD)), 242 VD->getType().getNonReferenceType(), VK_LValue, 243 C.getLocation()); 244 InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address { 245 return CGF.EmitLValue(&DRE).getAddress(CGF); 246 }); 247 } 248 } 249 CS = dyn_cast<CapturedStmt>(CS->getCapturedStmt()); 250 } 251 (void)InlinedShareds.Privatize(); 252 } 253 }; 254 255 } // namespace 256 257 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, 258 const OMPExecutableDirective &S, 259 const RegionCodeGenTy &CodeGen); 260 261 LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) { 262 if (const auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) { 263 if (const auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) { 264 OrigVD = OrigVD->getCanonicalDecl(); 265 bool IsCaptured = 266 LambdaCaptureFields.lookup(OrigVD) || 267 (CapturedStmtInfo && CapturedStmtInfo->lookup(OrigVD)) || 268 (CurCodeDecl && isa<BlockDecl>(CurCodeDecl)); 269 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), IsCaptured, 270 OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc()); 271 return EmitLValue(&DRE); 272 } 273 } 274 return EmitLValue(E); 275 } 276 277 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) { 278 ASTContext &C = getContext(); 279 llvm::Value *Size = nullptr; 280 auto SizeInChars = C.getTypeSizeInChars(Ty); 281 if (SizeInChars.isZero()) { 282 // getTypeSizeInChars() returns 0 for a VLA. 283 while (const VariableArrayType *VAT = C.getAsVariableArrayType(Ty)) { 284 VlaSizePair VlaSize = getVLASize(VAT); 285 Ty = VlaSize.Type; 286 Size = Size ? Builder.CreateNUWMul(Size, VlaSize.NumElts) 287 : VlaSize.NumElts; 288 } 289 SizeInChars = C.getTypeSizeInChars(Ty); 290 if (SizeInChars.isZero()) 291 return llvm::ConstantInt::get(SizeTy, /*V=*/0); 292 return Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars)); 293 } 294 return CGM.getSize(SizeInChars); 295 } 296 297 void CodeGenFunction::GenerateOpenMPCapturedVars( 298 const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) { 299 const RecordDecl *RD = S.getCapturedRecordDecl(); 300 auto CurField = RD->field_begin(); 301 auto CurCap = S.captures().begin(); 302 for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(), 303 E = S.capture_init_end(); 304 I != E; ++I, ++CurField, ++CurCap) { 305 if (CurField->hasCapturedVLAType()) { 306 const VariableArrayType *VAT = CurField->getCapturedVLAType(); 307 llvm::Value *Val = VLASizeMap[VAT->getSizeExpr()]; 308 CapturedVars.push_back(Val); 309 } else if (CurCap->capturesThis()) { 310 CapturedVars.push_back(CXXThisValue); 311 } else if (CurCap->capturesVariableByCopy()) { 312 llvm::Value *CV = EmitLoadOfScalar(EmitLValue(*I), CurCap->getLocation()); 313 314 // If the field is not a pointer, we need to save the actual value 315 // and load it as a void pointer. 316 if (!CurField->getType()->isAnyPointerType()) { 317 ASTContext &Ctx = getContext(); 318 Address DstAddr = CreateMemTemp( 319 Ctx.getUIntPtrType(), 320 Twine(CurCap->getCapturedVar()->getName(), ".casted")); 321 LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType()); 322 323 llvm::Value *SrcAddrVal = EmitScalarConversion( 324 DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()), 325 Ctx.getPointerType(CurField->getType()), CurCap->getLocation()); 326 LValue SrcLV = 327 MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType()); 328 329 // Store the value using the source type pointer. 330 EmitStoreThroughLValue(RValue::get(CV), SrcLV); 331 332 // Load the value using the destination type pointer. 333 CV = EmitLoadOfScalar(DstLV, CurCap->getLocation()); 334 } 335 CapturedVars.push_back(CV); 336 } else { 337 assert(CurCap->capturesVariable() && "Expected capture by reference."); 338 CapturedVars.push_back(EmitLValue(*I).getAddress(*this).getPointer()); 339 } 340 } 341 } 342 343 static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc, 344 QualType DstType, StringRef Name, 345 LValue AddrLV) { 346 ASTContext &Ctx = CGF.getContext(); 347 348 llvm::Value *CastedPtr = CGF.EmitScalarConversion( 349 AddrLV.getAddress(CGF).getPointer(), Ctx.getUIntPtrType(), 350 Ctx.getPointerType(DstType), Loc); 351 Address TmpAddr = 352 CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType)) 353 .getAddress(CGF); 354 return TmpAddr; 355 } 356 357 static QualType getCanonicalParamType(ASTContext &C, QualType T) { 358 if (T->isLValueReferenceType()) 359 return C.getLValueReferenceType( 360 getCanonicalParamType(C, T.getNonReferenceType()), 361 /*SpelledAsLValue=*/false); 362 if (T->isPointerType()) 363 return C.getPointerType(getCanonicalParamType(C, T->getPointeeType())); 364 if (const ArrayType *A = T->getAsArrayTypeUnsafe()) { 365 if (const auto *VLA = dyn_cast<VariableArrayType>(A)) 366 return getCanonicalParamType(C, VLA->getElementType()); 367 if (!A->isVariablyModifiedType()) 368 return C.getCanonicalType(T); 369 } 370 return C.getCanonicalParamType(T); 371 } 372 373 namespace { 374 /// Contains required data for proper outlined function codegen. 375 struct FunctionOptions { 376 /// Captured statement for which the function is generated. 377 const CapturedStmt *S = nullptr; 378 /// true if cast to/from UIntPtr is required for variables captured by 379 /// value. 380 const bool UIntPtrCastRequired = true; 381 /// true if only casted arguments must be registered as local args or VLA 382 /// sizes. 383 const bool RegisterCastedArgsOnly = false; 384 /// Name of the generated function. 385 const StringRef FunctionName; 386 /// Location of the non-debug version of the outlined function. 387 SourceLocation Loc; 388 explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired, 389 bool RegisterCastedArgsOnly, StringRef FunctionName, 390 SourceLocation Loc) 391 : S(S), UIntPtrCastRequired(UIntPtrCastRequired), 392 RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly), 393 FunctionName(FunctionName), Loc(Loc) {} 394 }; 395 } // namespace 396 397 static llvm::Function *emitOutlinedFunctionPrologue( 398 CodeGenFunction &CGF, FunctionArgList &Args, 399 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> 400 &LocalAddrs, 401 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> 402 &VLASizes, 403 llvm::Value *&CXXThisValue, const FunctionOptions &FO) { 404 const CapturedDecl *CD = FO.S->getCapturedDecl(); 405 const RecordDecl *RD = FO.S->getCapturedRecordDecl(); 406 assert(CD->hasBody() && "missing CapturedDecl body"); 407 408 CXXThisValue = nullptr; 409 // Build the argument list. 410 CodeGenModule &CGM = CGF.CGM; 411 ASTContext &Ctx = CGM.getContext(); 412 FunctionArgList TargetArgs; 413 Args.append(CD->param_begin(), 414 std::next(CD->param_begin(), CD->getContextParamPosition())); 415 TargetArgs.append( 416 CD->param_begin(), 417 std::next(CD->param_begin(), CD->getContextParamPosition())); 418 auto I = FO.S->captures().begin(); 419 FunctionDecl *DebugFunctionDecl = nullptr; 420 if (!FO.UIntPtrCastRequired) { 421 FunctionProtoType::ExtProtoInfo EPI; 422 QualType FunctionTy = Ctx.getFunctionType(Ctx.VoidTy, llvm::None, EPI); 423 DebugFunctionDecl = FunctionDecl::Create( 424 Ctx, Ctx.getTranslationUnitDecl(), FO.S->getBeginLoc(), 425 SourceLocation(), DeclarationName(), FunctionTy, 426 Ctx.getTrivialTypeSourceInfo(FunctionTy), SC_Static, 427 /*isInlineSpecified=*/false, /*hasWrittenPrototype=*/false); 428 } 429 for (const FieldDecl *FD : RD->fields()) { 430 QualType ArgType = FD->getType(); 431 IdentifierInfo *II = nullptr; 432 VarDecl *CapVar = nullptr; 433 434 // If this is a capture by copy and the type is not a pointer, the outlined 435 // function argument type should be uintptr and the value properly casted to 436 // uintptr. This is necessary given that the runtime library is only able to 437 // deal with pointers. We can pass in the same way the VLA type sizes to the 438 // outlined function. 439 if (FO.UIntPtrCastRequired && 440 ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) || 441 I->capturesVariableArrayType())) 442 ArgType = Ctx.getUIntPtrType(); 443 444 if (I->capturesVariable() || I->capturesVariableByCopy()) { 445 CapVar = I->getCapturedVar(); 446 II = CapVar->getIdentifier(); 447 } else if (I->capturesThis()) { 448 II = &Ctx.Idents.get("this"); 449 } else { 450 assert(I->capturesVariableArrayType()); 451 II = &Ctx.Idents.get("vla"); 452 } 453 if (ArgType->isVariablyModifiedType()) 454 ArgType = getCanonicalParamType(Ctx, ArgType); 455 VarDecl *Arg; 456 if (DebugFunctionDecl && (CapVar || I->capturesThis())) { 457 Arg = ParmVarDecl::Create( 458 Ctx, DebugFunctionDecl, 459 CapVar ? CapVar->getBeginLoc() : FD->getBeginLoc(), 460 CapVar ? CapVar->getLocation() : FD->getLocation(), II, ArgType, 461 /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr); 462 } else { 463 Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(), 464 II, ArgType, ImplicitParamDecl::Other); 465 } 466 Args.emplace_back(Arg); 467 // Do not cast arguments if we emit function with non-original types. 468 TargetArgs.emplace_back( 469 FO.UIntPtrCastRequired 470 ? Arg 471 : CGM.getOpenMPRuntime().translateParameter(FD, Arg)); 472 ++I; 473 } 474 Args.append( 475 std::next(CD->param_begin(), CD->getContextParamPosition() + 1), 476 CD->param_end()); 477 TargetArgs.append( 478 std::next(CD->param_begin(), CD->getContextParamPosition() + 1), 479 CD->param_end()); 480 481 // Create the function declaration. 482 const CGFunctionInfo &FuncInfo = 483 CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs); 484 llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); 485 486 auto *F = 487 llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage, 488 FO.FunctionName, &CGM.getModule()); 489 CGM.SetInternalFunctionAttributes(CD, F, FuncInfo); 490 if (CD->isNothrow()) 491 F->setDoesNotThrow(); 492 F->setDoesNotRecurse(); 493 494 // Generate the function. 495 CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs, 496 FO.UIntPtrCastRequired ? FO.Loc : FO.S->getBeginLoc(), 497 FO.UIntPtrCastRequired ? FO.Loc 498 : CD->getBody()->getBeginLoc()); 499 unsigned Cnt = CD->getContextParamPosition(); 500 I = FO.S->captures().begin(); 501 for (const FieldDecl *FD : RD->fields()) { 502 // Do not map arguments if we emit function with non-original types. 503 Address LocalAddr(Address::invalid()); 504 if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) { 505 LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt], 506 TargetArgs[Cnt]); 507 } else { 508 LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]); 509 } 510 // If we are capturing a pointer by copy we don't need to do anything, just 511 // use the value that we get from the arguments. 512 if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) { 513 const VarDecl *CurVD = I->getCapturedVar(); 514 if (!FO.RegisterCastedArgsOnly) 515 LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}}); 516 ++Cnt; 517 ++I; 518 continue; 519 } 520 521 LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(), 522 AlignmentSource::Decl); 523 if (FD->hasCapturedVLAType()) { 524 if (FO.UIntPtrCastRequired) { 525 ArgLVal = CGF.MakeAddrLValue( 526 castValueFromUintptr(CGF, I->getLocation(), FD->getType(), 527 Args[Cnt]->getName(), ArgLVal), 528 FD->getType(), AlignmentSource::Decl); 529 } 530 llvm::Value *ExprArg = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation()); 531 const VariableArrayType *VAT = FD->getCapturedVLAType(); 532 VLASizes.try_emplace(Args[Cnt], VAT->getSizeExpr(), ExprArg); 533 } else if (I->capturesVariable()) { 534 const VarDecl *Var = I->getCapturedVar(); 535 QualType VarTy = Var->getType(); 536 Address ArgAddr = ArgLVal.getAddress(CGF); 537 if (ArgLVal.getType()->isLValueReferenceType()) { 538 ArgAddr = CGF.EmitLoadOfReference(ArgLVal); 539 } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) { 540 assert(ArgLVal.getType()->isPointerType()); 541 ArgAddr = CGF.EmitLoadOfPointer( 542 ArgAddr, ArgLVal.getType()->castAs<PointerType>()); 543 } 544 if (!FO.RegisterCastedArgsOnly) { 545 LocalAddrs.insert( 546 {Args[Cnt], 547 {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}}); 548 } 549 } else if (I->capturesVariableByCopy()) { 550 assert(!FD->getType()->isAnyPointerType() && 551 "Not expecting a captured pointer."); 552 const VarDecl *Var = I->getCapturedVar(); 553 LocalAddrs.insert({Args[Cnt], 554 {Var, FO.UIntPtrCastRequired 555 ? castValueFromUintptr( 556 CGF, I->getLocation(), FD->getType(), 557 Args[Cnt]->getName(), ArgLVal) 558 : ArgLVal.getAddress(CGF)}}); 559 } else { 560 // If 'this' is captured, load it into CXXThisValue. 561 assert(I->capturesThis()); 562 CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation()); 563 LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress(CGF)}}); 564 } 565 ++Cnt; 566 ++I; 567 } 568 569 return F; 570 } 571 572 llvm::Function * 573 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, 574 SourceLocation Loc) { 575 assert( 576 CapturedStmtInfo && 577 "CapturedStmtInfo should be set when generating the captured function"); 578 const CapturedDecl *CD = S.getCapturedDecl(); 579 // Build the argument list. 580 bool NeedWrapperFunction = 581 getDebugInfo() && CGM.getCodeGenOpts().hasReducedDebugInfo(); 582 FunctionArgList Args; 583 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs; 584 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes; 585 SmallString<256> Buffer; 586 llvm::raw_svector_ostream Out(Buffer); 587 Out << CapturedStmtInfo->getHelperName(); 588 if (NeedWrapperFunction) 589 Out << "_debug__"; 590 FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false, 591 Out.str(), Loc); 592 llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs, 593 VLASizes, CXXThisValue, FO); 594 CodeGenFunction::OMPPrivateScope LocalScope(*this); 595 for (const auto &LocalAddrPair : LocalAddrs) { 596 if (LocalAddrPair.second.first) { 597 LocalScope.addPrivate(LocalAddrPair.second.first, [&LocalAddrPair]() { 598 return LocalAddrPair.second.second; 599 }); 600 } 601 } 602 (void)LocalScope.Privatize(); 603 for (const auto &VLASizePair : VLASizes) 604 VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second; 605 PGO.assignRegionCounters(GlobalDecl(CD), F); 606 CapturedStmtInfo->EmitBody(*this, CD->getBody()); 607 (void)LocalScope.ForceCleanup(); 608 FinishFunction(CD->getBodyRBrace()); 609 if (!NeedWrapperFunction) 610 return F; 611 612 FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true, 613 /*RegisterCastedArgsOnly=*/true, 614 CapturedStmtInfo->getHelperName(), Loc); 615 CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true); 616 WrapperCGF.CapturedStmtInfo = CapturedStmtInfo; 617 Args.clear(); 618 LocalAddrs.clear(); 619 VLASizes.clear(); 620 llvm::Function *WrapperF = 621 emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes, 622 WrapperCGF.CXXThisValue, WrapperFO); 623 llvm::SmallVector<llvm::Value *, 4> CallArgs; 624 for (const auto *Arg : Args) { 625 llvm::Value *CallArg; 626 auto I = LocalAddrs.find(Arg); 627 if (I != LocalAddrs.end()) { 628 LValue LV = WrapperCGF.MakeAddrLValue( 629 I->second.second, 630 I->second.first ? I->second.first->getType() : Arg->getType(), 631 AlignmentSource::Decl); 632 CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc()); 633 } else { 634 auto EI = VLASizes.find(Arg); 635 if (EI != VLASizes.end()) { 636 CallArg = EI->second.second; 637 } else { 638 LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg), 639 Arg->getType(), 640 AlignmentSource::Decl); 641 CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc()); 642 } 643 } 644 CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType())); 645 } 646 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, Loc, F, CallArgs); 647 WrapperCGF.FinishFunction(); 648 return WrapperF; 649 } 650 651 //===----------------------------------------------------------------------===// 652 // OpenMP Directive Emission 653 //===----------------------------------------------------------------------===// 654 void CodeGenFunction::EmitOMPAggregateAssign( 655 Address DestAddr, Address SrcAddr, QualType OriginalType, 656 const llvm::function_ref<void(Address, Address)> CopyGen) { 657 // Perform element-by-element initialization. 658 QualType ElementTy; 659 660 // Drill down to the base element type on both arrays. 661 const ArrayType *ArrayTy = OriginalType->getAsArrayTypeUnsafe(); 662 llvm::Value *NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr); 663 SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 664 665 llvm::Value *SrcBegin = SrcAddr.getPointer(); 666 llvm::Value *DestBegin = DestAddr.getPointer(); 667 // Cast from pointer to array type to pointer to single element. 668 llvm::Value *DestEnd = Builder.CreateGEP(DestBegin, NumElements); 669 // The basic structure here is a while-do loop. 670 llvm::BasicBlock *BodyBB = createBasicBlock("omp.arraycpy.body"); 671 llvm::BasicBlock *DoneBB = createBasicBlock("omp.arraycpy.done"); 672 llvm::Value *IsEmpty = 673 Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty"); 674 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 675 676 // Enter the loop body, making that address the current address. 677 llvm::BasicBlock *EntryBB = Builder.GetInsertBlock(); 678 EmitBlock(BodyBB); 679 680 CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy); 681 682 llvm::PHINode *SrcElementPHI = 683 Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 684 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 685 Address SrcElementCurrent = 686 Address(SrcElementPHI, 687 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 688 689 llvm::PHINode *DestElementPHI = 690 Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 691 DestElementPHI->addIncoming(DestBegin, EntryBB); 692 Address DestElementCurrent = 693 Address(DestElementPHI, 694 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 695 696 // Emit copy. 697 CopyGen(DestElementCurrent, SrcElementCurrent); 698 699 // Shift the address forward by one element. 700 llvm::Value *DestElementNext = Builder.CreateConstGEP1_32( 701 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 702 llvm::Value *SrcElementNext = Builder.CreateConstGEP1_32( 703 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 704 // Check whether we've reached the end. 705 llvm::Value *Done = 706 Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 707 Builder.CreateCondBr(Done, DoneBB, BodyBB); 708 DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock()); 709 SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock()); 710 711 // Done. 712 EmitBlock(DoneBB, /*IsFinished=*/true); 713 } 714 715 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr, 716 Address SrcAddr, const VarDecl *DestVD, 717 const VarDecl *SrcVD, const Expr *Copy) { 718 if (OriginalType->isArrayType()) { 719 const auto *BO = dyn_cast<BinaryOperator>(Copy); 720 if (BO && BO->getOpcode() == BO_Assign) { 721 // Perform simple memcpy for simple copying. 722 LValue Dest = MakeAddrLValue(DestAddr, OriginalType); 723 LValue Src = MakeAddrLValue(SrcAddr, OriginalType); 724 EmitAggregateAssign(Dest, Src, OriginalType); 725 } else { 726 // For arrays with complex element types perform element by element 727 // copying. 728 EmitOMPAggregateAssign( 729 DestAddr, SrcAddr, OriginalType, 730 [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) { 731 // Working with the single array element, so have to remap 732 // destination and source variables to corresponding array 733 // elements. 734 CodeGenFunction::OMPPrivateScope Remap(*this); 735 Remap.addPrivate(DestVD, [DestElement]() { return DestElement; }); 736 Remap.addPrivate(SrcVD, [SrcElement]() { return SrcElement; }); 737 (void)Remap.Privatize(); 738 EmitIgnoredExpr(Copy); 739 }); 740 } 741 } else { 742 // Remap pseudo source variable to private copy. 743 CodeGenFunction::OMPPrivateScope Remap(*this); 744 Remap.addPrivate(SrcVD, [SrcAddr]() { return SrcAddr; }); 745 Remap.addPrivate(DestVD, [DestAddr]() { return DestAddr; }); 746 (void)Remap.Privatize(); 747 // Emit copying of the whole variable. 748 EmitIgnoredExpr(Copy); 749 } 750 } 751 752 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, 753 OMPPrivateScope &PrivateScope) { 754 if (!HaveInsertPoint()) 755 return false; 756 bool DeviceConstTarget = 757 getLangOpts().OpenMPIsDevice && 758 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 759 bool FirstprivateIsLastprivate = false; 760 llvm::DenseMap<const VarDecl *, OpenMPLastprivateModifier> Lastprivates; 761 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 762 for (const auto *D : C->varlists()) 763 Lastprivates.try_emplace( 764 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl(), 765 C->getKind()); 766 } 767 llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate; 768 llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 769 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 770 // Force emission of the firstprivate copy if the directive does not emit 771 // outlined function, like omp for, omp simd, omp distribute etc. 772 bool MustEmitFirstprivateCopy = 773 CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown; 774 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { 775 const auto *IRef = C->varlist_begin(); 776 const auto *InitsRef = C->inits().begin(); 777 for (const Expr *IInit : C->private_copies()) { 778 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 779 bool ThisFirstprivateIsLastprivate = 780 Lastprivates.count(OrigVD->getCanonicalDecl()) > 0; 781 const FieldDecl *FD = CapturedStmtInfo->lookup(OrigVD); 782 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 783 if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD && 784 !FD->getType()->isReferenceType() && 785 (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) { 786 EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()); 787 ++IRef; 788 ++InitsRef; 789 continue; 790 } 791 // Do not emit copy for firstprivate constant variables in target regions, 792 // captured by reference. 793 if (DeviceConstTarget && OrigVD->getType().isConstant(getContext()) && 794 FD && FD->getType()->isReferenceType() && 795 (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) { 796 (void)CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(*this, 797 OrigVD); 798 ++IRef; 799 ++InitsRef; 800 continue; 801 } 802 FirstprivateIsLastprivate = 803 FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate; 804 if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) { 805 const auto *VDInit = 806 cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl()); 807 bool IsRegistered; 808 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), 809 /*RefersToEnclosingVariableOrCapture=*/FD != nullptr, 810 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 811 LValue OriginalLVal; 812 if (!FD) { 813 // Check if the firstprivate variable is just a constant value. 814 ConstantEmission CE = tryEmitAsConstant(&DRE); 815 if (CE && !CE.isReference()) { 816 // Constant value, no need to create a copy. 817 ++IRef; 818 ++InitsRef; 819 continue; 820 } 821 if (CE && CE.isReference()) { 822 OriginalLVal = CE.getReferenceLValue(*this, &DRE); 823 } else { 824 assert(!CE && "Expected non-constant firstprivate."); 825 OriginalLVal = EmitLValue(&DRE); 826 } 827 } else { 828 OriginalLVal = EmitLValue(&DRE); 829 } 830 QualType Type = VD->getType(); 831 if (Type->isArrayType()) { 832 // Emit VarDecl with copy init for arrays. 833 // Get the address of the original variable captured in current 834 // captured region. 835 IsRegistered = PrivateScope.addPrivate( 836 OrigVD, [this, VD, Type, OriginalLVal, VDInit]() { 837 AutoVarEmission Emission = EmitAutoVarAlloca(*VD); 838 const Expr *Init = VD->getInit(); 839 if (!isa<CXXConstructExpr>(Init) || 840 isTrivialInitializer(Init)) { 841 // Perform simple memcpy. 842 LValue Dest = 843 MakeAddrLValue(Emission.getAllocatedAddress(), Type); 844 EmitAggregateAssign(Dest, OriginalLVal, Type); 845 } else { 846 EmitOMPAggregateAssign( 847 Emission.getAllocatedAddress(), 848 OriginalLVal.getAddress(*this), Type, 849 [this, VDInit, Init](Address DestElement, 850 Address SrcElement) { 851 // Clean up any temporaries needed by the 852 // initialization. 853 RunCleanupsScope InitScope(*this); 854 // Emit initialization for single element. 855 setAddrOfLocalVar(VDInit, SrcElement); 856 EmitAnyExprToMem(Init, DestElement, 857 Init->getType().getQualifiers(), 858 /*IsInitializer*/ false); 859 LocalDeclMap.erase(VDInit); 860 }); 861 } 862 EmitAutoVarCleanups(Emission); 863 return Emission.getAllocatedAddress(); 864 }); 865 } else { 866 Address OriginalAddr = OriginalLVal.getAddress(*this); 867 IsRegistered = 868 PrivateScope.addPrivate(OrigVD, [this, VDInit, OriginalAddr, VD, 869 ThisFirstprivateIsLastprivate, 870 OrigVD, &Lastprivates, IRef]() { 871 // Emit private VarDecl with copy init. 872 // Remap temp VDInit variable to the address of the original 873 // variable (for proper handling of captured global variables). 874 setAddrOfLocalVar(VDInit, OriginalAddr); 875 EmitDecl(*VD); 876 LocalDeclMap.erase(VDInit); 877 if (ThisFirstprivateIsLastprivate && 878 Lastprivates[OrigVD->getCanonicalDecl()] == 879 OMPC_LASTPRIVATE_conditional) { 880 // Create/init special variable for lastprivate conditionals. 881 Address VDAddr = 882 CGM.getOpenMPRuntime().emitLastprivateConditionalInit( 883 *this, OrigVD); 884 llvm::Value *V = EmitLoadOfScalar( 885 MakeAddrLValue(GetAddrOfLocalVar(VD), (*IRef)->getType(), 886 AlignmentSource::Decl), 887 (*IRef)->getExprLoc()); 888 EmitStoreOfScalar(V, 889 MakeAddrLValue(VDAddr, (*IRef)->getType(), 890 AlignmentSource::Decl)); 891 LocalDeclMap.erase(VD); 892 setAddrOfLocalVar(VD, VDAddr); 893 return VDAddr; 894 } 895 return GetAddrOfLocalVar(VD); 896 }); 897 } 898 assert(IsRegistered && 899 "firstprivate var already registered as private"); 900 // Silence the warning about unused variable. 901 (void)IsRegistered; 902 } 903 ++IRef; 904 ++InitsRef; 905 } 906 } 907 return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty(); 908 } 909 910 void CodeGenFunction::EmitOMPPrivateClause( 911 const OMPExecutableDirective &D, 912 CodeGenFunction::OMPPrivateScope &PrivateScope) { 913 if (!HaveInsertPoint()) 914 return; 915 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 916 for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) { 917 auto IRef = C->varlist_begin(); 918 for (const Expr *IInit : C->private_copies()) { 919 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 920 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 921 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 922 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD]() { 923 // Emit private VarDecl with copy init. 924 EmitDecl(*VD); 925 return GetAddrOfLocalVar(VD); 926 }); 927 assert(IsRegistered && "private var already registered as private"); 928 // Silence the warning about unused variable. 929 (void)IsRegistered; 930 } 931 ++IRef; 932 } 933 } 934 } 935 936 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { 937 if (!HaveInsertPoint()) 938 return false; 939 // threadprivate_var1 = master_threadprivate_var1; 940 // operator=(threadprivate_var2, master_threadprivate_var2); 941 // ... 942 // __kmpc_barrier(&loc, global_tid); 943 llvm::DenseSet<const VarDecl *> CopiedVars; 944 llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr; 945 for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) { 946 auto IRef = C->varlist_begin(); 947 auto ISrcRef = C->source_exprs().begin(); 948 auto IDestRef = C->destination_exprs().begin(); 949 for (const Expr *AssignOp : C->assignment_ops()) { 950 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 951 QualType Type = VD->getType(); 952 if (CopiedVars.insert(VD->getCanonicalDecl()).second) { 953 // Get the address of the master variable. If we are emitting code with 954 // TLS support, the address is passed from the master as field in the 955 // captured declaration. 956 Address MasterAddr = Address::invalid(); 957 if (getLangOpts().OpenMPUseTLS && 958 getContext().getTargetInfo().isTLSSupported()) { 959 assert(CapturedStmtInfo->lookup(VD) && 960 "Copyin threadprivates should have been captured!"); 961 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), true, 962 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 963 MasterAddr = EmitLValue(&DRE).getAddress(*this); 964 LocalDeclMap.erase(VD); 965 } else { 966 MasterAddr = 967 Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD) 968 : CGM.GetAddrOfGlobal(VD), 969 getContext().getDeclAlign(VD)); 970 } 971 // Get the address of the threadprivate variable. 972 Address PrivateAddr = EmitLValue(*IRef).getAddress(*this); 973 if (CopiedVars.size() == 1) { 974 // At first check if current thread is a master thread. If it is, no 975 // need to copy data. 976 CopyBegin = createBasicBlock("copyin.not.master"); 977 CopyEnd = createBasicBlock("copyin.not.master.end"); 978 Builder.CreateCondBr( 979 Builder.CreateICmpNE( 980 Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy), 981 Builder.CreatePtrToInt(PrivateAddr.getPointer(), 982 CGM.IntPtrTy)), 983 CopyBegin, CopyEnd); 984 EmitBlock(CopyBegin); 985 } 986 const auto *SrcVD = 987 cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 988 const auto *DestVD = 989 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 990 EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp); 991 } 992 ++IRef; 993 ++ISrcRef; 994 ++IDestRef; 995 } 996 } 997 if (CopyEnd) { 998 // Exit out of copying procedure for non-master thread. 999 EmitBlock(CopyEnd, /*IsFinished=*/true); 1000 return true; 1001 } 1002 return false; 1003 } 1004 1005 bool CodeGenFunction::EmitOMPLastprivateClauseInit( 1006 const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) { 1007 if (!HaveInsertPoint()) 1008 return false; 1009 bool HasAtLeastOneLastprivate = false; 1010 llvm::DenseSet<const VarDecl *> SIMDLCVs; 1011 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 1012 const auto *LoopDirective = cast<OMPLoopDirective>(&D); 1013 for (const Expr *C : LoopDirective->counters()) { 1014 SIMDLCVs.insert( 1015 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 1016 } 1017 } 1018 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 1019 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 1020 HasAtLeastOneLastprivate = true; 1021 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 1022 !getLangOpts().OpenMPSimd) 1023 break; 1024 const auto *IRef = C->varlist_begin(); 1025 const auto *IDestRef = C->destination_exprs().begin(); 1026 for (const Expr *IInit : C->private_copies()) { 1027 // Keep the address of the original variable for future update at the end 1028 // of the loop. 1029 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 1030 // Taskloops do not require additional initialization, it is done in 1031 // runtime support library. 1032 if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) { 1033 const auto *DestVD = 1034 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 1035 PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() { 1036 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), 1037 /*RefersToEnclosingVariableOrCapture=*/ 1038 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1039 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 1040 return EmitLValue(&DRE).getAddress(*this); 1041 }); 1042 // Check if the variable is also a firstprivate: in this case IInit is 1043 // not generated. Initialization of this variable will happen in codegen 1044 // for 'firstprivate' clause. 1045 if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) { 1046 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 1047 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD, C, 1048 OrigVD]() { 1049 if (C->getKind() == OMPC_LASTPRIVATE_conditional) { 1050 Address VDAddr = 1051 CGM.getOpenMPRuntime().emitLastprivateConditionalInit(*this, 1052 OrigVD); 1053 setAddrOfLocalVar(VD, VDAddr); 1054 return VDAddr; 1055 } 1056 // Emit private VarDecl with copy init. 1057 EmitDecl(*VD); 1058 return GetAddrOfLocalVar(VD); 1059 }); 1060 assert(IsRegistered && 1061 "lastprivate var already registered as private"); 1062 (void)IsRegistered; 1063 } 1064 } 1065 ++IRef; 1066 ++IDestRef; 1067 } 1068 } 1069 return HasAtLeastOneLastprivate; 1070 } 1071 1072 void CodeGenFunction::EmitOMPLastprivateClauseFinal( 1073 const OMPExecutableDirective &D, bool NoFinals, 1074 llvm::Value *IsLastIterCond) { 1075 if (!HaveInsertPoint()) 1076 return; 1077 // Emit following code: 1078 // if (<IsLastIterCond>) { 1079 // orig_var1 = private_orig_var1; 1080 // ... 1081 // orig_varn = private_orig_varn; 1082 // } 1083 llvm::BasicBlock *ThenBB = nullptr; 1084 llvm::BasicBlock *DoneBB = nullptr; 1085 if (IsLastIterCond) { 1086 // Emit implicit barrier if at least one lastprivate conditional is found 1087 // and this is not a simd mode. 1088 if (!getLangOpts().OpenMPSimd && 1089 llvm::any_of(D.getClausesOfKind<OMPLastprivateClause>(), 1090 [](const OMPLastprivateClause *C) { 1091 return C->getKind() == OMPC_LASTPRIVATE_conditional; 1092 })) { 1093 CGM.getOpenMPRuntime().emitBarrierCall(*this, D.getBeginLoc(), 1094 OMPD_unknown, 1095 /*EmitChecks=*/false, 1096 /*ForceSimpleCall=*/true); 1097 } 1098 ThenBB = createBasicBlock(".omp.lastprivate.then"); 1099 DoneBB = createBasicBlock(".omp.lastprivate.done"); 1100 Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB); 1101 EmitBlock(ThenBB); 1102 } 1103 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 1104 llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates; 1105 if (const auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) { 1106 auto IC = LoopDirective->counters().begin(); 1107 for (const Expr *F : LoopDirective->finals()) { 1108 const auto *D = 1109 cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl(); 1110 if (NoFinals) 1111 AlreadyEmittedVars.insert(D); 1112 else 1113 LoopCountersAndUpdates[D] = F; 1114 ++IC; 1115 } 1116 } 1117 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 1118 auto IRef = C->varlist_begin(); 1119 auto ISrcRef = C->source_exprs().begin(); 1120 auto IDestRef = C->destination_exprs().begin(); 1121 for (const Expr *AssignOp : C->assignment_ops()) { 1122 const auto *PrivateVD = 1123 cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 1124 QualType Type = PrivateVD->getType(); 1125 const auto *CanonicalVD = PrivateVD->getCanonicalDecl(); 1126 if (AlreadyEmittedVars.insert(CanonicalVD).second) { 1127 // If lastprivate variable is a loop control variable for loop-based 1128 // directive, update its value before copyin back to original 1129 // variable. 1130 if (const Expr *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD)) 1131 EmitIgnoredExpr(FinalExpr); 1132 const auto *SrcVD = 1133 cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 1134 const auto *DestVD = 1135 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 1136 // Get the address of the private variable. 1137 Address PrivateAddr = GetAddrOfLocalVar(PrivateVD); 1138 if (const auto *RefTy = PrivateVD->getType()->getAs<ReferenceType>()) 1139 PrivateAddr = 1140 Address(Builder.CreateLoad(PrivateAddr), 1141 CGM.getNaturalTypeAlignment(RefTy->getPointeeType())); 1142 // Store the last value to the private copy in the last iteration. 1143 if (C->getKind() == OMPC_LASTPRIVATE_conditional) 1144 CGM.getOpenMPRuntime().emitLastprivateConditionalFinalUpdate( 1145 *this, MakeAddrLValue(PrivateAddr, (*IRef)->getType()), PrivateVD, 1146 (*IRef)->getExprLoc()); 1147 // Get the address of the original variable. 1148 Address OriginalAddr = GetAddrOfLocalVar(DestVD); 1149 EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp); 1150 } 1151 ++IRef; 1152 ++ISrcRef; 1153 ++IDestRef; 1154 } 1155 if (const Expr *PostUpdate = C->getPostUpdateExpr()) 1156 EmitIgnoredExpr(PostUpdate); 1157 } 1158 if (IsLastIterCond) 1159 EmitBlock(DoneBB, /*IsFinished=*/true); 1160 } 1161 1162 void CodeGenFunction::EmitOMPReductionClauseInit( 1163 const OMPExecutableDirective &D, 1164 CodeGenFunction::OMPPrivateScope &PrivateScope, bool ForInscan) { 1165 if (!HaveInsertPoint()) 1166 return; 1167 SmallVector<const Expr *, 4> Shareds; 1168 SmallVector<const Expr *, 4> Privates; 1169 SmallVector<const Expr *, 4> ReductionOps; 1170 SmallVector<const Expr *, 4> LHSs; 1171 SmallVector<const Expr *, 4> RHSs; 1172 OMPTaskDataTy Data; 1173 SmallVector<const Expr *, 4> TaskLHSs; 1174 SmallVector<const Expr *, 4> TaskRHSs; 1175 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1176 if (ForInscan != (C->getModifier() == OMPC_REDUCTION_inscan)) 1177 continue; 1178 Shareds.append(C->varlist_begin(), C->varlist_end()); 1179 Privates.append(C->privates().begin(), C->privates().end()); 1180 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 1181 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 1182 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 1183 if (C->getModifier() == OMPC_REDUCTION_task) { 1184 Data.ReductionVars.append(C->privates().begin(), C->privates().end()); 1185 Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end()); 1186 Data.ReductionCopies.append(C->privates().begin(), C->privates().end()); 1187 Data.ReductionOps.append(C->reduction_ops().begin(), 1188 C->reduction_ops().end()); 1189 TaskLHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 1190 TaskRHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 1191 } 1192 } 1193 ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps); 1194 unsigned Count = 0; 1195 auto *ILHS = LHSs.begin(); 1196 auto *IRHS = RHSs.begin(); 1197 auto *IPriv = Privates.begin(); 1198 for (const Expr *IRef : Shareds) { 1199 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl()); 1200 // Emit private VarDecl with reduction init. 1201 RedCG.emitSharedOrigLValue(*this, Count); 1202 RedCG.emitAggregateType(*this, Count); 1203 AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD); 1204 RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(), 1205 RedCG.getSharedLValue(Count), 1206 [&Emission](CodeGenFunction &CGF) { 1207 CGF.EmitAutoVarInit(Emission); 1208 return true; 1209 }); 1210 EmitAutoVarCleanups(Emission); 1211 Address BaseAddr = RedCG.adjustPrivateAddress( 1212 *this, Count, Emission.getAllocatedAddress()); 1213 bool IsRegistered = PrivateScope.addPrivate( 1214 RedCG.getBaseDecl(Count), [BaseAddr]() { return BaseAddr; }); 1215 assert(IsRegistered && "private var already registered as private"); 1216 // Silence the warning about unused variable. 1217 (void)IsRegistered; 1218 1219 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 1220 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 1221 QualType Type = PrivateVD->getType(); 1222 bool isaOMPArraySectionExpr = isa<OMPArraySectionExpr>(IRef); 1223 if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) { 1224 // Store the address of the original variable associated with the LHS 1225 // implicit variable. 1226 PrivateScope.addPrivate(LHSVD, [&RedCG, Count, this]() { 1227 return RedCG.getSharedLValue(Count).getAddress(*this); 1228 }); 1229 PrivateScope.addPrivate( 1230 RHSVD, [this, PrivateVD]() { return GetAddrOfLocalVar(PrivateVD); }); 1231 } else if ((isaOMPArraySectionExpr && Type->isScalarType()) || 1232 isa<ArraySubscriptExpr>(IRef)) { 1233 // Store the address of the original variable associated with the LHS 1234 // implicit variable. 1235 PrivateScope.addPrivate(LHSVD, [&RedCG, Count, this]() { 1236 return RedCG.getSharedLValue(Count).getAddress(*this); 1237 }); 1238 PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() { 1239 return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD), 1240 ConvertTypeForMem(RHSVD->getType()), 1241 "rhs.begin"); 1242 }); 1243 } else { 1244 QualType Type = PrivateVD->getType(); 1245 bool IsArray = getContext().getAsArrayType(Type) != nullptr; 1246 Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress(*this); 1247 // Store the address of the original variable associated with the LHS 1248 // implicit variable. 1249 if (IsArray) { 1250 OriginalAddr = Builder.CreateElementBitCast( 1251 OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin"); 1252 } 1253 PrivateScope.addPrivate(LHSVD, [OriginalAddr]() { return OriginalAddr; }); 1254 PrivateScope.addPrivate( 1255 RHSVD, [this, PrivateVD, RHSVD, IsArray]() { 1256 return IsArray 1257 ? Builder.CreateElementBitCast( 1258 GetAddrOfLocalVar(PrivateVD), 1259 ConvertTypeForMem(RHSVD->getType()), "rhs.begin") 1260 : GetAddrOfLocalVar(PrivateVD); 1261 }); 1262 } 1263 ++ILHS; 1264 ++IRHS; 1265 ++IPriv; 1266 ++Count; 1267 } 1268 if (!Data.ReductionVars.empty()) { 1269 Data.IsReductionWithTaskMod = true; 1270 Data.IsWorksharingReduction = 1271 isOpenMPWorksharingDirective(D.getDirectiveKind()); 1272 llvm::Value *ReductionDesc = CGM.getOpenMPRuntime().emitTaskReductionInit( 1273 *this, D.getBeginLoc(), TaskLHSs, TaskRHSs, Data); 1274 const Expr *TaskRedRef = nullptr; 1275 switch (D.getDirectiveKind()) { 1276 case OMPD_parallel: 1277 TaskRedRef = cast<OMPParallelDirective>(D).getTaskReductionRefExpr(); 1278 break; 1279 case OMPD_for: 1280 TaskRedRef = cast<OMPForDirective>(D).getTaskReductionRefExpr(); 1281 break; 1282 case OMPD_sections: 1283 TaskRedRef = cast<OMPSectionsDirective>(D).getTaskReductionRefExpr(); 1284 break; 1285 case OMPD_parallel_for: 1286 TaskRedRef = cast<OMPParallelForDirective>(D).getTaskReductionRefExpr(); 1287 break; 1288 case OMPD_parallel_master: 1289 TaskRedRef = 1290 cast<OMPParallelMasterDirective>(D).getTaskReductionRefExpr(); 1291 break; 1292 case OMPD_parallel_sections: 1293 TaskRedRef = 1294 cast<OMPParallelSectionsDirective>(D).getTaskReductionRefExpr(); 1295 break; 1296 case OMPD_target_parallel: 1297 TaskRedRef = 1298 cast<OMPTargetParallelDirective>(D).getTaskReductionRefExpr(); 1299 break; 1300 case OMPD_target_parallel_for: 1301 TaskRedRef = 1302 cast<OMPTargetParallelForDirective>(D).getTaskReductionRefExpr(); 1303 break; 1304 case OMPD_distribute_parallel_for: 1305 TaskRedRef = 1306 cast<OMPDistributeParallelForDirective>(D).getTaskReductionRefExpr(); 1307 break; 1308 case OMPD_teams_distribute_parallel_for: 1309 TaskRedRef = cast<OMPTeamsDistributeParallelForDirective>(D) 1310 .getTaskReductionRefExpr(); 1311 break; 1312 case OMPD_target_teams_distribute_parallel_for: 1313 TaskRedRef = cast<OMPTargetTeamsDistributeParallelForDirective>(D) 1314 .getTaskReductionRefExpr(); 1315 break; 1316 case OMPD_simd: 1317 case OMPD_for_simd: 1318 case OMPD_section: 1319 case OMPD_single: 1320 case OMPD_master: 1321 case OMPD_critical: 1322 case OMPD_parallel_for_simd: 1323 case OMPD_task: 1324 case OMPD_taskyield: 1325 case OMPD_barrier: 1326 case OMPD_taskwait: 1327 case OMPD_taskgroup: 1328 case OMPD_flush: 1329 case OMPD_depobj: 1330 case OMPD_scan: 1331 case OMPD_ordered: 1332 case OMPD_atomic: 1333 case OMPD_teams: 1334 case OMPD_target: 1335 case OMPD_cancellation_point: 1336 case OMPD_cancel: 1337 case OMPD_target_data: 1338 case OMPD_target_enter_data: 1339 case OMPD_target_exit_data: 1340 case OMPD_taskloop: 1341 case OMPD_taskloop_simd: 1342 case OMPD_master_taskloop: 1343 case OMPD_master_taskloop_simd: 1344 case OMPD_parallel_master_taskloop: 1345 case OMPD_parallel_master_taskloop_simd: 1346 case OMPD_distribute: 1347 case OMPD_target_update: 1348 case OMPD_distribute_parallel_for_simd: 1349 case OMPD_distribute_simd: 1350 case OMPD_target_parallel_for_simd: 1351 case OMPD_target_simd: 1352 case OMPD_teams_distribute: 1353 case OMPD_teams_distribute_simd: 1354 case OMPD_teams_distribute_parallel_for_simd: 1355 case OMPD_target_teams: 1356 case OMPD_target_teams_distribute: 1357 case OMPD_target_teams_distribute_parallel_for_simd: 1358 case OMPD_target_teams_distribute_simd: 1359 case OMPD_declare_target: 1360 case OMPD_end_declare_target: 1361 case OMPD_threadprivate: 1362 case OMPD_allocate: 1363 case OMPD_declare_reduction: 1364 case OMPD_declare_mapper: 1365 case OMPD_declare_simd: 1366 case OMPD_requires: 1367 case OMPD_declare_variant: 1368 case OMPD_begin_declare_variant: 1369 case OMPD_end_declare_variant: 1370 case OMPD_unknown: 1371 llvm_unreachable("Enexpected directive with task reductions."); 1372 } 1373 1374 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(TaskRedRef)->getDecl()); 1375 EmitVarDecl(*VD); 1376 EmitStoreOfScalar(ReductionDesc, GetAddrOfLocalVar(VD), 1377 /*Volatile=*/false, TaskRedRef->getType()); 1378 } 1379 } 1380 1381 void CodeGenFunction::EmitOMPReductionClauseFinal( 1382 const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) { 1383 if (!HaveInsertPoint()) 1384 return; 1385 llvm::SmallVector<const Expr *, 8> Privates; 1386 llvm::SmallVector<const Expr *, 8> LHSExprs; 1387 llvm::SmallVector<const Expr *, 8> RHSExprs; 1388 llvm::SmallVector<const Expr *, 8> ReductionOps; 1389 bool HasAtLeastOneReduction = false; 1390 bool IsReductionWithTaskMod = false; 1391 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1392 // Do not emit for inscan reductions. 1393 if (C->getModifier() == OMPC_REDUCTION_inscan) 1394 continue; 1395 HasAtLeastOneReduction = true; 1396 Privates.append(C->privates().begin(), C->privates().end()); 1397 LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 1398 RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 1399 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 1400 IsReductionWithTaskMod = 1401 IsReductionWithTaskMod || C->getModifier() == OMPC_REDUCTION_task; 1402 } 1403 if (HasAtLeastOneReduction) { 1404 if (IsReductionWithTaskMod) { 1405 CGM.getOpenMPRuntime().emitTaskReductionFini( 1406 *this, D.getBeginLoc(), 1407 isOpenMPWorksharingDirective(D.getDirectiveKind())); 1408 } 1409 bool WithNowait = D.getSingleClause<OMPNowaitClause>() || 1410 isOpenMPParallelDirective(D.getDirectiveKind()) || 1411 ReductionKind == OMPD_simd; 1412 bool SimpleReduction = ReductionKind == OMPD_simd; 1413 // Emit nowait reduction if nowait clause is present or directive is a 1414 // parallel directive (it always has implicit barrier). 1415 CGM.getOpenMPRuntime().emitReduction( 1416 *this, D.getEndLoc(), Privates, LHSExprs, RHSExprs, ReductionOps, 1417 {WithNowait, SimpleReduction, ReductionKind}); 1418 } 1419 } 1420 1421 static void emitPostUpdateForReductionClause( 1422 CodeGenFunction &CGF, const OMPExecutableDirective &D, 1423 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { 1424 if (!CGF.HaveInsertPoint()) 1425 return; 1426 llvm::BasicBlock *DoneBB = nullptr; 1427 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1428 if (const Expr *PostUpdate = C->getPostUpdateExpr()) { 1429 if (!DoneBB) { 1430 if (llvm::Value *Cond = CondGen(CGF)) { 1431 // If the first post-update expression is found, emit conditional 1432 // block if it was requested. 1433 llvm::BasicBlock *ThenBB = CGF.createBasicBlock(".omp.reduction.pu"); 1434 DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done"); 1435 CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1436 CGF.EmitBlock(ThenBB); 1437 } 1438 } 1439 CGF.EmitIgnoredExpr(PostUpdate); 1440 } 1441 } 1442 if (DoneBB) 1443 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 1444 } 1445 1446 namespace { 1447 /// Codegen lambda for appending distribute lower and upper bounds to outlined 1448 /// parallel function. This is necessary for combined constructs such as 1449 /// 'distribute parallel for' 1450 typedef llvm::function_ref<void(CodeGenFunction &, 1451 const OMPExecutableDirective &, 1452 llvm::SmallVectorImpl<llvm::Value *> &)> 1453 CodeGenBoundParametersTy; 1454 } // anonymous namespace 1455 1456 static void 1457 checkForLastprivateConditionalUpdate(CodeGenFunction &CGF, 1458 const OMPExecutableDirective &S) { 1459 if (CGF.getLangOpts().OpenMP < 50) 1460 return; 1461 llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> PrivateDecls; 1462 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 1463 for (const Expr *Ref : C->varlists()) { 1464 if (!Ref->getType()->isScalarType()) 1465 continue; 1466 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 1467 if (!DRE) 1468 continue; 1469 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); 1470 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref); 1471 } 1472 } 1473 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 1474 for (const Expr *Ref : C->varlists()) { 1475 if (!Ref->getType()->isScalarType()) 1476 continue; 1477 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 1478 if (!DRE) 1479 continue; 1480 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); 1481 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref); 1482 } 1483 } 1484 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 1485 for (const Expr *Ref : C->varlists()) { 1486 if (!Ref->getType()->isScalarType()) 1487 continue; 1488 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 1489 if (!DRE) 1490 continue; 1491 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); 1492 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref); 1493 } 1494 } 1495 // Privates should ne analyzed since they are not captured at all. 1496 // Task reductions may be skipped - tasks are ignored. 1497 // Firstprivates do not return value but may be passed by reference - no need 1498 // to check for updated lastprivate conditional. 1499 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 1500 for (const Expr *Ref : C->varlists()) { 1501 if (!Ref->getType()->isScalarType()) 1502 continue; 1503 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 1504 if (!DRE) 1505 continue; 1506 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); 1507 } 1508 } 1509 CGF.CGM.getOpenMPRuntime().checkAndEmitSharedLastprivateConditional( 1510 CGF, S, PrivateDecls); 1511 } 1512 1513 static void emitCommonOMPParallelDirective( 1514 CodeGenFunction &CGF, const OMPExecutableDirective &S, 1515 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1516 const CodeGenBoundParametersTy &CodeGenBoundParameters) { 1517 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); 1518 llvm::Function *OutlinedFn = 1519 CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( 1520 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 1521 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) { 1522 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 1523 llvm::Value *NumThreads = 1524 CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), 1525 /*IgnoreResultAssign=*/true); 1526 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause( 1527 CGF, NumThreads, NumThreadsClause->getBeginLoc()); 1528 } 1529 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) { 1530 CodeGenFunction::RunCleanupsScope ProcBindScope(CGF); 1531 CGF.CGM.getOpenMPRuntime().emitProcBindClause( 1532 CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getBeginLoc()); 1533 } 1534 const Expr *IfCond = nullptr; 1535 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 1536 if (C->getNameModifier() == OMPD_unknown || 1537 C->getNameModifier() == OMPD_parallel) { 1538 IfCond = C->getCondition(); 1539 break; 1540 } 1541 } 1542 1543 OMPParallelScope Scope(CGF, S); 1544 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 1545 // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk 1546 // lower and upper bounds with the pragma 'for' chunking mechanism. 1547 // The following lambda takes care of appending the lower and upper bound 1548 // parameters when necessary 1549 CodeGenBoundParameters(CGF, S, CapturedVars); 1550 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 1551 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getBeginLoc(), OutlinedFn, 1552 CapturedVars, IfCond); 1553 } 1554 1555 static void emitEmptyBoundParameters(CodeGenFunction &, 1556 const OMPExecutableDirective &, 1557 llvm::SmallVectorImpl<llvm::Value *> &) {} 1558 1559 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { 1560 if (llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder()) { 1561 // Check if we have any if clause associated with the directive. 1562 llvm::Value *IfCond = nullptr; 1563 if (const auto *C = S.getSingleClause<OMPIfClause>()) 1564 IfCond = EmitScalarExpr(C->getCondition(), 1565 /*IgnoreResultAssign=*/true); 1566 1567 llvm::Value *NumThreads = nullptr; 1568 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) 1569 NumThreads = EmitScalarExpr(NumThreadsClause->getNumThreads(), 1570 /*IgnoreResultAssign=*/true); 1571 1572 ProcBindKind ProcBind = OMP_PROC_BIND_default; 1573 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) 1574 ProcBind = ProcBindClause->getProcBindKind(); 1575 1576 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 1577 1578 // The cleanup callback that finalizes all variabels at the given location, 1579 // thus calls destructors etc. 1580 auto FiniCB = [this](InsertPointTy IP) { 1581 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); 1582 }; 1583 1584 // Privatization callback that performs appropriate action for 1585 // shared/private/firstprivate/lastprivate/copyin/... variables. 1586 // 1587 // TODO: This defaults to shared right now. 1588 auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, 1589 llvm::Value &Val, llvm::Value *&ReplVal) { 1590 // The next line is appropriate only for variables (Val) with the 1591 // data-sharing attribute "shared". 1592 ReplVal = &Val; 1593 1594 return CodeGenIP; 1595 }; 1596 1597 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); 1598 const Stmt *ParallelRegionBodyStmt = CS->getCapturedStmt(); 1599 1600 auto BodyGenCB = [ParallelRegionBodyStmt, 1601 this](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, 1602 llvm::BasicBlock &ContinuationBB) { 1603 OMPBuilderCBHelpers::OutlinedRegionBodyRAII ORB(*this, AllocaIP, 1604 ContinuationBB); 1605 OMPBuilderCBHelpers::EmitOMPRegionBody(*this, ParallelRegionBodyStmt, 1606 CodeGenIP, ContinuationBB); 1607 }; 1608 1609 CGCapturedStmtInfo CGSI(*CS, CR_OpenMP); 1610 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI); 1611 Builder.restoreIP(OMPBuilder->CreateParallel(Builder, BodyGenCB, PrivCB, 1612 FiniCB, IfCond, NumThreads, 1613 ProcBind, S.hasCancel())); 1614 return; 1615 } 1616 1617 // Emit parallel region as a standalone region. 1618 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 1619 Action.Enter(CGF); 1620 OMPPrivateScope PrivateScope(CGF); 1621 bool Copyins = CGF.EmitOMPCopyinClause(S); 1622 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 1623 if (Copyins) { 1624 // Emit implicit barrier to synchronize threads and avoid data races on 1625 // propagation master's thread values of threadprivate variables to local 1626 // instances of that variables of all other implicit threads. 1627 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 1628 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, 1629 /*ForceSimpleCall=*/true); 1630 } 1631 CGF.EmitOMPPrivateClause(S, PrivateScope); 1632 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 1633 (void)PrivateScope.Privatize(); 1634 CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt()); 1635 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 1636 }; 1637 { 1638 auto LPCRegion = 1639 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 1640 emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen, 1641 emitEmptyBoundParameters); 1642 emitPostUpdateForReductionClause(*this, S, 1643 [](CodeGenFunction &) { return nullptr; }); 1644 } 1645 // Check for outer lastprivate conditional update. 1646 checkForLastprivateConditionalUpdate(*this, S); 1647 } 1648 1649 static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop, 1650 int MaxLevel, int Level = 0) { 1651 assert(Level < MaxLevel && "Too deep lookup during loop body codegen."); 1652 const Stmt *SimplifiedS = S->IgnoreContainers(); 1653 if (const auto *CS = dyn_cast<CompoundStmt>(SimplifiedS)) { 1654 PrettyStackTraceLoc CrashInfo( 1655 CGF.getContext().getSourceManager(), CS->getLBracLoc(), 1656 "LLVM IR generation of compound statement ('{}')"); 1657 1658 // Keep track of the current cleanup stack depth, including debug scopes. 1659 CodeGenFunction::LexicalScope Scope(CGF, S->getSourceRange()); 1660 for (const Stmt *CurStmt : CS->body()) 1661 emitBody(CGF, CurStmt, NextLoop, MaxLevel, Level); 1662 return; 1663 } 1664 if (SimplifiedS == NextLoop) { 1665 if (const auto *For = dyn_cast<ForStmt>(SimplifiedS)) { 1666 S = For->getBody(); 1667 } else { 1668 assert(isa<CXXForRangeStmt>(SimplifiedS) && 1669 "Expected canonical for loop or range-based for loop."); 1670 const auto *CXXFor = cast<CXXForRangeStmt>(SimplifiedS); 1671 CGF.EmitStmt(CXXFor->getLoopVarStmt()); 1672 S = CXXFor->getBody(); 1673 } 1674 if (Level + 1 < MaxLevel) { 1675 NextLoop = OMPLoopDirective::tryToFindNextInnerLoop( 1676 S, /*TryImperfectlyNestedLoops=*/true); 1677 emitBody(CGF, S, NextLoop, MaxLevel, Level + 1); 1678 return; 1679 } 1680 } 1681 CGF.EmitStmt(S); 1682 } 1683 1684 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, 1685 JumpDest LoopExit) { 1686 RunCleanupsScope BodyScope(*this); 1687 // Update counters values on current iteration. 1688 for (const Expr *UE : D.updates()) 1689 EmitIgnoredExpr(UE); 1690 // Update the linear variables. 1691 // In distribute directives only loop counters may be marked as linear, no 1692 // need to generate the code for them. 1693 if (!isOpenMPDistributeDirective(D.getDirectiveKind())) { 1694 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1695 for (const Expr *UE : C->updates()) 1696 EmitIgnoredExpr(UE); 1697 } 1698 } 1699 1700 // On a continue in the body, jump to the end. 1701 JumpDest Continue = getJumpDestInCurrentScope("omp.body.continue"); 1702 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1703 for (const Expr *E : D.finals_conditions()) { 1704 if (!E) 1705 continue; 1706 // Check that loop counter in non-rectangular nest fits into the iteration 1707 // space. 1708 llvm::BasicBlock *NextBB = createBasicBlock("omp.body.next"); 1709 EmitBranchOnBoolExpr(E, NextBB, Continue.getBlock(), 1710 getProfileCount(D.getBody())); 1711 EmitBlock(NextBB); 1712 } 1713 1714 OMPPrivateScope InscanScope(*this); 1715 EmitOMPReductionClauseInit(D, InscanScope, /*ForInscan=*/true); 1716 bool IsInscanRegion = InscanScope.Privatize(); 1717 if (IsInscanRegion) { 1718 // Need to remember the block before and after scan directive 1719 // to dispatch them correctly depending on the clause used in 1720 // this directive, inclusive or exclusive. For inclusive scan the natural 1721 // order of the blocks is used, for exclusive clause the blocks must be 1722 // executed in reverse order. 1723 OMPBeforeScanBlock = createBasicBlock("omp.before.scan.bb"); 1724 OMPAfterScanBlock = createBasicBlock("omp.after.scan.bb"); 1725 OMPScanExitBlock = createBasicBlock("omp.exit.inscan.bb"); 1726 OMPScanDispatch = createBasicBlock("omp.inscan.dispatch"); 1727 EmitBranch(OMPScanDispatch); 1728 EmitBlock(OMPBeforeScanBlock); 1729 } 1730 1731 // Emit loop variables for C++ range loops. 1732 const Stmt *Body = 1733 D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(); 1734 // Emit loop body. 1735 emitBody(*this, Body, 1736 OMPLoopDirective::tryToFindNextInnerLoop( 1737 Body, /*TryImperfectlyNestedLoops=*/true), 1738 D.getCollapsedNumber()); 1739 1740 // Jump to the dispatcher at the end of the loop body. 1741 if (IsInscanRegion) 1742 EmitBranch(OMPScanExitBlock); 1743 1744 // The end (updates/cleanups). 1745 EmitBlock(Continue.getBlock()); 1746 BreakContinueStack.pop_back(); 1747 } 1748 1749 void CodeGenFunction::EmitOMPInnerLoop( 1750 const OMPExecutableDirective &S, bool RequiresCleanup, const Expr *LoopCond, 1751 const Expr *IncExpr, 1752 const llvm::function_ref<void(CodeGenFunction &)> BodyGen, 1753 const llvm::function_ref<void(CodeGenFunction &)> PostIncGen) { 1754 auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end"); 1755 1756 // Start the loop with a block that tests the condition. 1757 auto CondBlock = createBasicBlock("omp.inner.for.cond"); 1758 EmitBlock(CondBlock); 1759 const SourceRange R = S.getSourceRange(); 1760 1761 // If attributes are attached, push to the basic block with them. 1762 const auto &OMPED = cast<OMPExecutableDirective>(S); 1763 const CapturedStmt *ICS = OMPED.getInnermostCapturedStmt(); 1764 const Stmt *SS = ICS->getCapturedStmt(); 1765 const AttributedStmt *AS = dyn_cast_or_null<AttributedStmt>(SS); 1766 if (AS) 1767 LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(), 1768 AS->getAttrs(), SourceLocToDebugLoc(R.getBegin()), 1769 SourceLocToDebugLoc(R.getEnd())); 1770 else 1771 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), 1772 SourceLocToDebugLoc(R.getEnd())); 1773 1774 // If there are any cleanups between here and the loop-exit scope, 1775 // create a block to stage a loop exit along. 1776 llvm::BasicBlock *ExitBlock = LoopExit.getBlock(); 1777 if (RequiresCleanup) 1778 ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup"); 1779 1780 llvm::BasicBlock *LoopBody = createBasicBlock("omp.inner.for.body"); 1781 1782 // Emit condition. 1783 EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S)); 1784 if (ExitBlock != LoopExit.getBlock()) { 1785 EmitBlock(ExitBlock); 1786 EmitBranchThroughCleanup(LoopExit); 1787 } 1788 1789 EmitBlock(LoopBody); 1790 incrementProfileCounter(&S); 1791 1792 // Create a block for the increment. 1793 JumpDest Continue = getJumpDestInCurrentScope("omp.inner.for.inc"); 1794 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1795 1796 BodyGen(*this); 1797 1798 // Emit "IV = IV + 1" and a back-edge to the condition block. 1799 EmitBlock(Continue.getBlock()); 1800 EmitIgnoredExpr(IncExpr); 1801 PostIncGen(*this); 1802 BreakContinueStack.pop_back(); 1803 EmitBranch(CondBlock); 1804 LoopStack.pop(); 1805 // Emit the fall-through block. 1806 EmitBlock(LoopExit.getBlock()); 1807 } 1808 1809 bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { 1810 if (!HaveInsertPoint()) 1811 return false; 1812 // Emit inits for the linear variables. 1813 bool HasLinears = false; 1814 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1815 for (const Expr *Init : C->inits()) { 1816 HasLinears = true; 1817 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl()); 1818 if (const auto *Ref = 1819 dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) { 1820 AutoVarEmission Emission = EmitAutoVarAlloca(*VD); 1821 const auto *OrigVD = cast<VarDecl>(Ref->getDecl()); 1822 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), 1823 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1824 VD->getInit()->getType(), VK_LValue, 1825 VD->getInit()->getExprLoc()); 1826 EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(), 1827 VD->getType()), 1828 /*capturedByInit=*/false); 1829 EmitAutoVarCleanups(Emission); 1830 } else { 1831 EmitVarDecl(*VD); 1832 } 1833 } 1834 // Emit the linear steps for the linear clauses. 1835 // If a step is not constant, it is pre-calculated before the loop. 1836 if (const auto *CS = cast_or_null<BinaryOperator>(C->getCalcStep())) 1837 if (const auto *SaveRef = cast<DeclRefExpr>(CS->getLHS())) { 1838 EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl())); 1839 // Emit calculation of the linear step. 1840 EmitIgnoredExpr(CS); 1841 } 1842 } 1843 return HasLinears; 1844 } 1845 1846 void CodeGenFunction::EmitOMPLinearClauseFinal( 1847 const OMPLoopDirective &D, 1848 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { 1849 if (!HaveInsertPoint()) 1850 return; 1851 llvm::BasicBlock *DoneBB = nullptr; 1852 // Emit the final values of the linear variables. 1853 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1854 auto IC = C->varlist_begin(); 1855 for (const Expr *F : C->finals()) { 1856 if (!DoneBB) { 1857 if (llvm::Value *Cond = CondGen(*this)) { 1858 // If the first post-update expression is found, emit conditional 1859 // block if it was requested. 1860 llvm::BasicBlock *ThenBB = createBasicBlock(".omp.linear.pu"); 1861 DoneBB = createBasicBlock(".omp.linear.pu.done"); 1862 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1863 EmitBlock(ThenBB); 1864 } 1865 } 1866 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl()); 1867 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), 1868 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1869 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); 1870 Address OrigAddr = EmitLValue(&DRE).getAddress(*this); 1871 CodeGenFunction::OMPPrivateScope VarScope(*this); 1872 VarScope.addPrivate(OrigVD, [OrigAddr]() { return OrigAddr; }); 1873 (void)VarScope.Privatize(); 1874 EmitIgnoredExpr(F); 1875 ++IC; 1876 } 1877 if (const Expr *PostUpdate = C->getPostUpdateExpr()) 1878 EmitIgnoredExpr(PostUpdate); 1879 } 1880 if (DoneBB) 1881 EmitBlock(DoneBB, /*IsFinished=*/true); 1882 } 1883 1884 static void emitAlignedClause(CodeGenFunction &CGF, 1885 const OMPExecutableDirective &D) { 1886 if (!CGF.HaveInsertPoint()) 1887 return; 1888 for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) { 1889 llvm::APInt ClauseAlignment(64, 0); 1890 if (const Expr *AlignmentExpr = Clause->getAlignment()) { 1891 auto *AlignmentCI = 1892 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); 1893 ClauseAlignment = AlignmentCI->getValue(); 1894 } 1895 for (const Expr *E : Clause->varlists()) { 1896 llvm::APInt Alignment(ClauseAlignment); 1897 if (Alignment == 0) { 1898 // OpenMP [2.8.1, Description] 1899 // If no optional parameter is specified, implementation-defined default 1900 // alignments for SIMD instructions on the target platforms are assumed. 1901 Alignment = 1902 CGF.getContext() 1903 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign( 1904 E->getType()->getPointeeType())) 1905 .getQuantity(); 1906 } 1907 assert((Alignment == 0 || Alignment.isPowerOf2()) && 1908 "alignment is not power of 2"); 1909 if (Alignment != 0) { 1910 llvm::Value *PtrValue = CGF.EmitScalarExpr(E); 1911 CGF.emitAlignmentAssumption( 1912 PtrValue, E, /*No second loc needed*/ SourceLocation(), 1913 llvm::ConstantInt::get(CGF.getLLVMContext(), Alignment)); 1914 } 1915 } 1916 } 1917 } 1918 1919 void CodeGenFunction::EmitOMPPrivateLoopCounters( 1920 const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) { 1921 if (!HaveInsertPoint()) 1922 return; 1923 auto I = S.private_counters().begin(); 1924 for (const Expr *E : S.counters()) { 1925 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1926 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()); 1927 // Emit var without initialization. 1928 AutoVarEmission VarEmission = EmitAutoVarAlloca(*PrivateVD); 1929 EmitAutoVarCleanups(VarEmission); 1930 LocalDeclMap.erase(PrivateVD); 1931 (void)LoopScope.addPrivate(VD, [&VarEmission]() { 1932 return VarEmission.getAllocatedAddress(); 1933 }); 1934 if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) || 1935 VD->hasGlobalStorage()) { 1936 (void)LoopScope.addPrivate(PrivateVD, [this, VD, E]() { 1937 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), 1938 LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD), 1939 E->getType(), VK_LValue, E->getExprLoc()); 1940 return EmitLValue(&DRE).getAddress(*this); 1941 }); 1942 } else { 1943 (void)LoopScope.addPrivate(PrivateVD, [&VarEmission]() { 1944 return VarEmission.getAllocatedAddress(); 1945 }); 1946 } 1947 ++I; 1948 } 1949 // Privatize extra loop counters used in loops for ordered(n) clauses. 1950 for (const auto *C : S.getClausesOfKind<OMPOrderedClause>()) { 1951 if (!C->getNumForLoops()) 1952 continue; 1953 for (unsigned I = S.getCollapsedNumber(), 1954 E = C->getLoopNumIterations().size(); 1955 I < E; ++I) { 1956 const auto *DRE = cast<DeclRefExpr>(C->getLoopCounter(I)); 1957 const auto *VD = cast<VarDecl>(DRE->getDecl()); 1958 // Override only those variables that can be captured to avoid re-emission 1959 // of the variables declared within the loops. 1960 if (DRE->refersToEnclosingVariableOrCapture()) { 1961 (void)LoopScope.addPrivate(VD, [this, DRE, VD]() { 1962 return CreateMemTemp(DRE->getType(), VD->getName()); 1963 }); 1964 } 1965 } 1966 } 1967 } 1968 1969 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S, 1970 const Expr *Cond, llvm::BasicBlock *TrueBlock, 1971 llvm::BasicBlock *FalseBlock, uint64_t TrueCount) { 1972 if (!CGF.HaveInsertPoint()) 1973 return; 1974 { 1975 CodeGenFunction::OMPPrivateScope PreCondScope(CGF); 1976 CGF.EmitOMPPrivateLoopCounters(S, PreCondScope); 1977 (void)PreCondScope.Privatize(); 1978 // Get initial values of real counters. 1979 for (const Expr *I : S.inits()) { 1980 CGF.EmitIgnoredExpr(I); 1981 } 1982 } 1983 // Create temp loop control variables with their init values to support 1984 // non-rectangular loops. 1985 CodeGenFunction::OMPMapVars PreCondVars; 1986 for (const Expr * E: S.dependent_counters()) { 1987 if (!E) 1988 continue; 1989 assert(!E->getType().getNonReferenceType()->isRecordType() && 1990 "dependent counter must not be an iterator."); 1991 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1992 Address CounterAddr = 1993 CGF.CreateMemTemp(VD->getType().getNonReferenceType()); 1994 (void)PreCondVars.setVarAddr(CGF, VD, CounterAddr); 1995 } 1996 (void)PreCondVars.apply(CGF); 1997 for (const Expr *E : S.dependent_inits()) { 1998 if (!E) 1999 continue; 2000 CGF.EmitIgnoredExpr(E); 2001 } 2002 // Check that loop is executed at least one time. 2003 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount); 2004 PreCondVars.restore(CGF); 2005 } 2006 2007 void CodeGenFunction::EmitOMPLinearClause( 2008 const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) { 2009 if (!HaveInsertPoint()) 2010 return; 2011 llvm::DenseSet<const VarDecl *> SIMDLCVs; 2012 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 2013 const auto *LoopDirective = cast<OMPLoopDirective>(&D); 2014 for (const Expr *C : LoopDirective->counters()) { 2015 SIMDLCVs.insert( 2016 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 2017 } 2018 } 2019 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 2020 auto CurPrivate = C->privates().begin(); 2021 for (const Expr *E : C->varlists()) { 2022 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2023 const auto *PrivateVD = 2024 cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl()); 2025 if (!SIMDLCVs.count(VD->getCanonicalDecl())) { 2026 bool IsRegistered = PrivateScope.addPrivate(VD, [this, PrivateVD]() { 2027 // Emit private VarDecl with copy init. 2028 EmitVarDecl(*PrivateVD); 2029 return GetAddrOfLocalVar(PrivateVD); 2030 }); 2031 assert(IsRegistered && "linear var already registered as private"); 2032 // Silence the warning about unused variable. 2033 (void)IsRegistered; 2034 } else { 2035 EmitVarDecl(*PrivateVD); 2036 } 2037 ++CurPrivate; 2038 } 2039 } 2040 } 2041 2042 static void emitSimdlenSafelenClause(CodeGenFunction &CGF, 2043 const OMPExecutableDirective &D, 2044 bool IsMonotonic) { 2045 if (!CGF.HaveInsertPoint()) 2046 return; 2047 if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) { 2048 RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(), 2049 /*ignoreResult=*/true); 2050 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 2051 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 2052 // In presence of finite 'safelen', it may be unsafe to mark all 2053 // the memory instructions parallel, because loop-carried 2054 // dependences of 'safelen' iterations are possible. 2055 if (!IsMonotonic) 2056 CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>()); 2057 } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) { 2058 RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(), 2059 /*ignoreResult=*/true); 2060 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 2061 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 2062 // In presence of finite 'safelen', it may be unsafe to mark all 2063 // the memory instructions parallel, because loop-carried 2064 // dependences of 'safelen' iterations are possible. 2065 CGF.LoopStack.setParallel(/*Enable=*/false); 2066 } 2067 } 2068 2069 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D, 2070 bool IsMonotonic) { 2071 // Walk clauses and process safelen/lastprivate. 2072 LoopStack.setParallel(!IsMonotonic); 2073 LoopStack.setVectorizeEnable(); 2074 emitSimdlenSafelenClause(*this, D, IsMonotonic); 2075 if (const auto *C = D.getSingleClause<OMPOrderClause>()) 2076 if (C->getKind() == OMPC_ORDER_concurrent) 2077 LoopStack.setParallel(/*Enable=*/true); 2078 } 2079 2080 void CodeGenFunction::EmitOMPSimdFinal( 2081 const OMPLoopDirective &D, 2082 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { 2083 if (!HaveInsertPoint()) 2084 return; 2085 llvm::BasicBlock *DoneBB = nullptr; 2086 auto IC = D.counters().begin(); 2087 auto IPC = D.private_counters().begin(); 2088 for (const Expr *F : D.finals()) { 2089 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl()); 2090 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl()); 2091 const auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD); 2092 if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) || 2093 OrigVD->hasGlobalStorage() || CED) { 2094 if (!DoneBB) { 2095 if (llvm::Value *Cond = CondGen(*this)) { 2096 // If the first post-update expression is found, emit conditional 2097 // block if it was requested. 2098 llvm::BasicBlock *ThenBB = createBasicBlock(".omp.final.then"); 2099 DoneBB = createBasicBlock(".omp.final.done"); 2100 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 2101 EmitBlock(ThenBB); 2102 } 2103 } 2104 Address OrigAddr = Address::invalid(); 2105 if (CED) { 2106 OrigAddr = 2107 EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress(*this); 2108 } else { 2109 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(PrivateVD), 2110 /*RefersToEnclosingVariableOrCapture=*/false, 2111 (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc()); 2112 OrigAddr = EmitLValue(&DRE).getAddress(*this); 2113 } 2114 OMPPrivateScope VarScope(*this); 2115 VarScope.addPrivate(OrigVD, [OrigAddr]() { return OrigAddr; }); 2116 (void)VarScope.Privatize(); 2117 EmitIgnoredExpr(F); 2118 } 2119 ++IC; 2120 ++IPC; 2121 } 2122 if (DoneBB) 2123 EmitBlock(DoneBB, /*IsFinished=*/true); 2124 } 2125 2126 static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF, 2127 const OMPLoopDirective &S, 2128 CodeGenFunction::JumpDest LoopExit) { 2129 CGF.EmitOMPLoopBody(S, LoopExit); 2130 CGF.EmitStopPoint(&S); 2131 } 2132 2133 /// Emit a helper variable and return corresponding lvalue. 2134 static LValue EmitOMPHelperVar(CodeGenFunction &CGF, 2135 const DeclRefExpr *Helper) { 2136 auto VDecl = cast<VarDecl>(Helper->getDecl()); 2137 CGF.EmitVarDecl(*VDecl); 2138 return CGF.EmitLValue(Helper); 2139 } 2140 2141 static void emitCommonSimdLoop(CodeGenFunction &CGF, const OMPLoopDirective &S, 2142 const RegionCodeGenTy &SimdInitGen, 2143 const RegionCodeGenTy &BodyCodeGen) { 2144 auto &&ThenGen = [&S, &SimdInitGen, &BodyCodeGen](CodeGenFunction &CGF, 2145 PrePostActionTy &) { 2146 CGOpenMPRuntime::NontemporalDeclsRAII NontemporalsRegion(CGF.CGM, S); 2147 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); 2148 SimdInitGen(CGF); 2149 2150 BodyCodeGen(CGF); 2151 }; 2152 auto &&ElseGen = [&BodyCodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 2153 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); 2154 CGF.LoopStack.setVectorizeEnable(/*Enable=*/false); 2155 2156 BodyCodeGen(CGF); 2157 }; 2158 const Expr *IfCond = nullptr; 2159 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 2160 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 2161 if (CGF.getLangOpts().OpenMP >= 50 && 2162 (C->getNameModifier() == OMPD_unknown || 2163 C->getNameModifier() == OMPD_simd)) { 2164 IfCond = C->getCondition(); 2165 break; 2166 } 2167 } 2168 } 2169 if (IfCond) { 2170 CGF.CGM.getOpenMPRuntime().emitIfClause(CGF, IfCond, ThenGen, ElseGen); 2171 } else { 2172 RegionCodeGenTy ThenRCG(ThenGen); 2173 ThenRCG(CGF); 2174 } 2175 } 2176 2177 static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S, 2178 PrePostActionTy &Action) { 2179 Action.Enter(CGF); 2180 assert(isOpenMPSimdDirective(S.getDirectiveKind()) && 2181 "Expected simd directive"); 2182 OMPLoopScope PreInitScope(CGF, S); 2183 // if (PreCond) { 2184 // for (IV in 0..LastIteration) BODY; 2185 // <Final counter/linear vars updates>; 2186 // } 2187 // 2188 if (isOpenMPDistributeDirective(S.getDirectiveKind()) || 2189 isOpenMPWorksharingDirective(S.getDirectiveKind()) || 2190 isOpenMPTaskLoopDirective(S.getDirectiveKind())) { 2191 (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable())); 2192 (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable())); 2193 } 2194 2195 // Emit: if (PreCond) - begin. 2196 // If the condition constant folds and can be elided, avoid emitting the 2197 // whole loop. 2198 bool CondConstant; 2199 llvm::BasicBlock *ContBlock = nullptr; 2200 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 2201 if (!CondConstant) 2202 return; 2203 } else { 2204 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("simd.if.then"); 2205 ContBlock = CGF.createBasicBlock("simd.if.end"); 2206 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 2207 CGF.getProfileCount(&S)); 2208 CGF.EmitBlock(ThenBlock); 2209 CGF.incrementProfileCounter(&S); 2210 } 2211 2212 // Emit the loop iteration variable. 2213 const Expr *IVExpr = S.getIterationVariable(); 2214 const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 2215 CGF.EmitVarDecl(*IVDecl); 2216 CGF.EmitIgnoredExpr(S.getInit()); 2217 2218 // Emit the iterations count variable. 2219 // If it is not a variable, Sema decided to calculate iterations count on 2220 // each iteration (e.g., it is foldable into a constant). 2221 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 2222 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 2223 // Emit calculation of the iterations count. 2224 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 2225 } 2226 2227 emitAlignedClause(CGF, S); 2228 (void)CGF.EmitOMPLinearClauseInit(S); 2229 { 2230 CodeGenFunction::OMPPrivateScope LoopScope(CGF); 2231 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 2232 CGF.EmitOMPLinearClause(S, LoopScope); 2233 CGF.EmitOMPPrivateClause(S, LoopScope); 2234 CGF.EmitOMPReductionClauseInit(S, LoopScope); 2235 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion( 2236 CGF, S, CGF.EmitLValue(S.getIterationVariable())); 2237 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 2238 (void)LoopScope.Privatize(); 2239 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 2240 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); 2241 2242 emitCommonSimdLoop( 2243 CGF, S, 2244 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2245 CGF.EmitOMPSimdInit(S); 2246 }, 2247 [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { 2248 CGF.EmitOMPInnerLoop( 2249 S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), 2250 [&S](CodeGenFunction &CGF) { 2251 emitOMPLoopBodyWithStopPoint(CGF, S, 2252 CodeGenFunction::JumpDest()); 2253 }, 2254 [](CodeGenFunction &) {}); 2255 }); 2256 CGF.EmitOMPSimdFinal(S, [](CodeGenFunction &) { return nullptr; }); 2257 // Emit final copy of the lastprivate variables at the end of loops. 2258 if (HasLastprivateClause) 2259 CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true); 2260 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd); 2261 emitPostUpdateForReductionClause(CGF, S, 2262 [](CodeGenFunction &) { return nullptr; }); 2263 } 2264 CGF.EmitOMPLinearClauseFinal(S, [](CodeGenFunction &) { return nullptr; }); 2265 // Emit: if (PreCond) - end. 2266 if (ContBlock) { 2267 CGF.EmitBranch(ContBlock); 2268 CGF.EmitBlock(ContBlock, true); 2269 } 2270 } 2271 2272 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { 2273 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2274 emitOMPSimdRegion(CGF, S, Action); 2275 }; 2276 { 2277 auto LPCRegion = 2278 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 2279 OMPLexicalScope Scope(*this, S, OMPD_unknown); 2280 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 2281 } 2282 // Check for outer lastprivate conditional update. 2283 checkForLastprivateConditionalUpdate(*this, S); 2284 } 2285 2286 void CodeGenFunction::EmitOMPOuterLoop( 2287 bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S, 2288 CodeGenFunction::OMPPrivateScope &LoopScope, 2289 const CodeGenFunction::OMPLoopArguments &LoopArgs, 2290 const CodeGenFunction::CodeGenLoopTy &CodeGenLoop, 2291 const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) { 2292 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); 2293 2294 const Expr *IVExpr = S.getIterationVariable(); 2295 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2296 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2297 2298 JumpDest LoopExit = getJumpDestInCurrentScope("omp.dispatch.end"); 2299 2300 // Start the loop with a block that tests the condition. 2301 llvm::BasicBlock *CondBlock = createBasicBlock("omp.dispatch.cond"); 2302 EmitBlock(CondBlock); 2303 const SourceRange R = S.getSourceRange(); 2304 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), 2305 SourceLocToDebugLoc(R.getEnd())); 2306 2307 llvm::Value *BoolCondVal = nullptr; 2308 if (!DynamicOrOrdered) { 2309 // UB = min(UB, GlobalUB) or 2310 // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g. 2311 // 'distribute parallel for') 2312 EmitIgnoredExpr(LoopArgs.EUB); 2313 // IV = LB 2314 EmitIgnoredExpr(LoopArgs.Init); 2315 // IV < UB 2316 BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond); 2317 } else { 2318 BoolCondVal = 2319 RT.emitForNext(*this, S.getBeginLoc(), IVSize, IVSigned, LoopArgs.IL, 2320 LoopArgs.LB, LoopArgs.UB, LoopArgs.ST); 2321 } 2322 2323 // If there are any cleanups between here and the loop-exit scope, 2324 // create a block to stage a loop exit along. 2325 llvm::BasicBlock *ExitBlock = LoopExit.getBlock(); 2326 if (LoopScope.requiresCleanups()) 2327 ExitBlock = createBasicBlock("omp.dispatch.cleanup"); 2328 2329 llvm::BasicBlock *LoopBody = createBasicBlock("omp.dispatch.body"); 2330 Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock); 2331 if (ExitBlock != LoopExit.getBlock()) { 2332 EmitBlock(ExitBlock); 2333 EmitBranchThroughCleanup(LoopExit); 2334 } 2335 EmitBlock(LoopBody); 2336 2337 // Emit "IV = LB" (in case of static schedule, we have already calculated new 2338 // LB for loop condition and emitted it above). 2339 if (DynamicOrOrdered) 2340 EmitIgnoredExpr(LoopArgs.Init); 2341 2342 // Create a block for the increment. 2343 JumpDest Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); 2344 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 2345 2346 emitCommonSimdLoop( 2347 *this, S, 2348 [&S, IsMonotonic](CodeGenFunction &CGF, PrePostActionTy &) { 2349 // Generate !llvm.loop.parallel metadata for loads and stores for loops 2350 // with dynamic/guided scheduling and without ordered clause. 2351 if (!isOpenMPSimdDirective(S.getDirectiveKind())) { 2352 CGF.LoopStack.setParallel(!IsMonotonic); 2353 if (const auto *C = S.getSingleClause<OMPOrderClause>()) 2354 if (C->getKind() == OMPC_ORDER_concurrent) 2355 CGF.LoopStack.setParallel(/*Enable=*/true); 2356 } else { 2357 CGF.EmitOMPSimdInit(S, IsMonotonic); 2358 } 2359 }, 2360 [&S, &LoopArgs, LoopExit, &CodeGenLoop, IVSize, IVSigned, &CodeGenOrdered, 2361 &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { 2362 SourceLocation Loc = S.getBeginLoc(); 2363 // when 'distribute' is not combined with a 'for': 2364 // while (idx <= UB) { BODY; ++idx; } 2365 // when 'distribute' is combined with a 'for' 2366 // (e.g. 'distribute parallel for') 2367 // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; } 2368 CGF.EmitOMPInnerLoop( 2369 S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr, 2370 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { 2371 CodeGenLoop(CGF, S, LoopExit); 2372 }, 2373 [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) { 2374 CodeGenOrdered(CGF, Loc, IVSize, IVSigned); 2375 }); 2376 }); 2377 2378 EmitBlock(Continue.getBlock()); 2379 BreakContinueStack.pop_back(); 2380 if (!DynamicOrOrdered) { 2381 // Emit "LB = LB + Stride", "UB = UB + Stride". 2382 EmitIgnoredExpr(LoopArgs.NextLB); 2383 EmitIgnoredExpr(LoopArgs.NextUB); 2384 } 2385 2386 EmitBranch(CondBlock); 2387 LoopStack.pop(); 2388 // Emit the fall-through block. 2389 EmitBlock(LoopExit.getBlock()); 2390 2391 // Tell the runtime we are done. 2392 auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) { 2393 if (!DynamicOrOrdered) 2394 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(), 2395 S.getDirectiveKind()); 2396 }; 2397 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); 2398 } 2399 2400 void CodeGenFunction::EmitOMPForOuterLoop( 2401 const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic, 2402 const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, 2403 const OMPLoopArguments &LoopArgs, 2404 const CodeGenDispatchBoundsTy &CGDispatchBounds) { 2405 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); 2406 2407 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). 2408 const bool DynamicOrOrdered = 2409 Ordered || RT.isDynamic(ScheduleKind.Schedule); 2410 2411 assert((Ordered || 2412 !RT.isStaticNonchunked(ScheduleKind.Schedule, 2413 LoopArgs.Chunk != nullptr)) && 2414 "static non-chunked schedule does not need outer loop"); 2415 2416 // Emit outer loop. 2417 // 2418 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 2419 // When schedule(dynamic,chunk_size) is specified, the iterations are 2420 // distributed to threads in the team in chunks as the threads request them. 2421 // Each thread executes a chunk of iterations, then requests another chunk, 2422 // until no chunks remain to be distributed. Each chunk contains chunk_size 2423 // iterations, except for the last chunk to be distributed, which may have 2424 // fewer iterations. When no chunk_size is specified, it defaults to 1. 2425 // 2426 // When schedule(guided,chunk_size) is specified, the iterations are assigned 2427 // to threads in the team in chunks as the executing threads request them. 2428 // Each thread executes a chunk of iterations, then requests another chunk, 2429 // until no chunks remain to be assigned. For a chunk_size of 1, the size of 2430 // each chunk is proportional to the number of unassigned iterations divided 2431 // by the number of threads in the team, decreasing to 1. For a chunk_size 2432 // with value k (greater than 1), the size of each chunk is determined in the 2433 // same way, with the restriction that the chunks do not contain fewer than k 2434 // iterations (except for the last chunk to be assigned, which may have fewer 2435 // than k iterations). 2436 // 2437 // When schedule(auto) is specified, the decision regarding scheduling is 2438 // delegated to the compiler and/or runtime system. The programmer gives the 2439 // implementation the freedom to choose any possible mapping of iterations to 2440 // threads in the team. 2441 // 2442 // When schedule(runtime) is specified, the decision regarding scheduling is 2443 // deferred until run time, and the schedule and chunk size are taken from the 2444 // run-sched-var ICV. If the ICV is set to auto, the schedule is 2445 // implementation defined 2446 // 2447 // while(__kmpc_dispatch_next(&LB, &UB)) { 2448 // idx = LB; 2449 // while (idx <= UB) { BODY; ++idx; 2450 // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only. 2451 // } // inner loop 2452 // } 2453 // 2454 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 2455 // When schedule(static, chunk_size) is specified, iterations are divided into 2456 // chunks of size chunk_size, and the chunks are assigned to the threads in 2457 // the team in a round-robin fashion in the order of the thread number. 2458 // 2459 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { 2460 // while (idx <= UB) { BODY; ++idx; } // inner loop 2461 // LB = LB + ST; 2462 // UB = UB + ST; 2463 // } 2464 // 2465 2466 const Expr *IVExpr = S.getIterationVariable(); 2467 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2468 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2469 2470 if (DynamicOrOrdered) { 2471 const std::pair<llvm::Value *, llvm::Value *> DispatchBounds = 2472 CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB); 2473 llvm::Value *LBVal = DispatchBounds.first; 2474 llvm::Value *UBVal = DispatchBounds.second; 2475 CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal, 2476 LoopArgs.Chunk}; 2477 RT.emitForDispatchInit(*this, S.getBeginLoc(), ScheduleKind, IVSize, 2478 IVSigned, Ordered, DipatchRTInputValues); 2479 } else { 2480 CGOpenMPRuntime::StaticRTInput StaticInit( 2481 IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB, 2482 LoopArgs.ST, LoopArgs.Chunk); 2483 RT.emitForStaticInit(*this, S.getBeginLoc(), S.getDirectiveKind(), 2484 ScheduleKind, StaticInit); 2485 } 2486 2487 auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc, 2488 const unsigned IVSize, 2489 const bool IVSigned) { 2490 if (Ordered) { 2491 CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize, 2492 IVSigned); 2493 } 2494 }; 2495 2496 OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST, 2497 LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB); 2498 OuterLoopArgs.IncExpr = S.getInc(); 2499 OuterLoopArgs.Init = S.getInit(); 2500 OuterLoopArgs.Cond = S.getCond(); 2501 OuterLoopArgs.NextLB = S.getNextLowerBound(); 2502 OuterLoopArgs.NextUB = S.getNextUpperBound(); 2503 EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs, 2504 emitOMPLoopBodyWithStopPoint, CodeGenOrdered); 2505 } 2506 2507 static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc, 2508 const unsigned IVSize, const bool IVSigned) {} 2509 2510 void CodeGenFunction::EmitOMPDistributeOuterLoop( 2511 OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S, 2512 OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs, 2513 const CodeGenLoopTy &CodeGenLoopContent) { 2514 2515 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); 2516 2517 // Emit outer loop. 2518 // Same behavior as a OMPForOuterLoop, except that schedule cannot be 2519 // dynamic 2520 // 2521 2522 const Expr *IVExpr = S.getIterationVariable(); 2523 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2524 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2525 2526 CGOpenMPRuntime::StaticRTInput StaticInit( 2527 IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB, 2528 LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk); 2529 RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, StaticInit); 2530 2531 // for combined 'distribute' and 'for' the increment expression of distribute 2532 // is stored in DistInc. For 'distribute' alone, it is in Inc. 2533 Expr *IncExpr; 2534 if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())) 2535 IncExpr = S.getDistInc(); 2536 else 2537 IncExpr = S.getInc(); 2538 2539 // this routine is shared by 'omp distribute parallel for' and 2540 // 'omp distribute': select the right EUB expression depending on the 2541 // directive 2542 OMPLoopArguments OuterLoopArgs; 2543 OuterLoopArgs.LB = LoopArgs.LB; 2544 OuterLoopArgs.UB = LoopArgs.UB; 2545 OuterLoopArgs.ST = LoopArgs.ST; 2546 OuterLoopArgs.IL = LoopArgs.IL; 2547 OuterLoopArgs.Chunk = LoopArgs.Chunk; 2548 OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2549 ? S.getCombinedEnsureUpperBound() 2550 : S.getEnsureUpperBound(); 2551 OuterLoopArgs.IncExpr = IncExpr; 2552 OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2553 ? S.getCombinedInit() 2554 : S.getInit(); 2555 OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2556 ? S.getCombinedCond() 2557 : S.getCond(); 2558 OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2559 ? S.getCombinedNextLowerBound() 2560 : S.getNextLowerBound(); 2561 OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2562 ? S.getCombinedNextUpperBound() 2563 : S.getNextUpperBound(); 2564 2565 EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S, 2566 LoopScope, OuterLoopArgs, CodeGenLoopContent, 2567 emitEmptyOrdered); 2568 } 2569 2570 static std::pair<LValue, LValue> 2571 emitDistributeParallelForInnerBounds(CodeGenFunction &CGF, 2572 const OMPExecutableDirective &S) { 2573 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 2574 LValue LB = 2575 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); 2576 LValue UB = 2577 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); 2578 2579 // When composing 'distribute' with 'for' (e.g. as in 'distribute 2580 // parallel for') we need to use the 'distribute' 2581 // chunk lower and upper bounds rather than the whole loop iteration 2582 // space. These are parameters to the outlined function for 'parallel' 2583 // and we copy the bounds of the previous schedule into the 2584 // the current ones. 2585 LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable()); 2586 LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable()); 2587 llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar( 2588 PrevLB, LS.getPrevLowerBoundVariable()->getExprLoc()); 2589 PrevLBVal = CGF.EmitScalarConversion( 2590 PrevLBVal, LS.getPrevLowerBoundVariable()->getType(), 2591 LS.getIterationVariable()->getType(), 2592 LS.getPrevLowerBoundVariable()->getExprLoc()); 2593 llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar( 2594 PrevUB, LS.getPrevUpperBoundVariable()->getExprLoc()); 2595 PrevUBVal = CGF.EmitScalarConversion( 2596 PrevUBVal, LS.getPrevUpperBoundVariable()->getType(), 2597 LS.getIterationVariable()->getType(), 2598 LS.getPrevUpperBoundVariable()->getExprLoc()); 2599 2600 CGF.EmitStoreOfScalar(PrevLBVal, LB); 2601 CGF.EmitStoreOfScalar(PrevUBVal, UB); 2602 2603 return {LB, UB}; 2604 } 2605 2606 /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then 2607 /// we need to use the LB and UB expressions generated by the worksharing 2608 /// code generation support, whereas in non combined situations we would 2609 /// just emit 0 and the LastIteration expression 2610 /// This function is necessary due to the difference of the LB and UB 2611 /// types for the RT emission routines for 'for_static_init' and 2612 /// 'for_dispatch_init' 2613 static std::pair<llvm::Value *, llvm::Value *> 2614 emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF, 2615 const OMPExecutableDirective &S, 2616 Address LB, Address UB) { 2617 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 2618 const Expr *IVExpr = LS.getIterationVariable(); 2619 // when implementing a dynamic schedule for a 'for' combined with a 2620 // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop 2621 // is not normalized as each team only executes its own assigned 2622 // distribute chunk 2623 QualType IteratorTy = IVExpr->getType(); 2624 llvm::Value *LBVal = 2625 CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, S.getBeginLoc()); 2626 llvm::Value *UBVal = 2627 CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, S.getBeginLoc()); 2628 return {LBVal, UBVal}; 2629 } 2630 2631 static void emitDistributeParallelForDistributeInnerBoundParams( 2632 CodeGenFunction &CGF, const OMPExecutableDirective &S, 2633 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) { 2634 const auto &Dir = cast<OMPLoopDirective>(S); 2635 LValue LB = 2636 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable())); 2637 llvm::Value *LBCast = 2638 CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(LB.getAddress(CGF)), 2639 CGF.SizeTy, /*isSigned=*/false); 2640 CapturedVars.push_back(LBCast); 2641 LValue UB = 2642 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable())); 2643 2644 llvm::Value *UBCast = 2645 CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(UB.getAddress(CGF)), 2646 CGF.SizeTy, /*isSigned=*/false); 2647 CapturedVars.push_back(UBCast); 2648 } 2649 2650 static void 2651 emitInnerParallelForWhenCombined(CodeGenFunction &CGF, 2652 const OMPLoopDirective &S, 2653 CodeGenFunction::JumpDest LoopExit) { 2654 auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF, 2655 PrePostActionTy &Action) { 2656 Action.Enter(CGF); 2657 bool HasCancel = false; 2658 if (!isOpenMPSimdDirective(S.getDirectiveKind())) { 2659 if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(&S)) 2660 HasCancel = D->hasCancel(); 2661 else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(&S)) 2662 HasCancel = D->hasCancel(); 2663 else if (const auto *D = 2664 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&S)) 2665 HasCancel = D->hasCancel(); 2666 } 2667 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(), 2668 HasCancel); 2669 CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(), 2670 emitDistributeParallelForInnerBounds, 2671 emitDistributeParallelForDispatchBounds); 2672 }; 2673 2674 emitCommonOMPParallelDirective( 2675 CGF, S, 2676 isOpenMPSimdDirective(S.getDirectiveKind()) ? OMPD_for_simd : OMPD_for, 2677 CGInlinedWorksharingLoop, 2678 emitDistributeParallelForDistributeInnerBoundParams); 2679 } 2680 2681 void CodeGenFunction::EmitOMPDistributeParallelForDirective( 2682 const OMPDistributeParallelForDirective &S) { 2683 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2684 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 2685 S.getDistInc()); 2686 }; 2687 OMPLexicalScope Scope(*this, S, OMPD_parallel); 2688 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); 2689 } 2690 2691 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective( 2692 const OMPDistributeParallelForSimdDirective &S) { 2693 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2694 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 2695 S.getDistInc()); 2696 }; 2697 OMPLexicalScope Scope(*this, S, OMPD_parallel); 2698 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); 2699 } 2700 2701 void CodeGenFunction::EmitOMPDistributeSimdDirective( 2702 const OMPDistributeSimdDirective &S) { 2703 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2704 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 2705 }; 2706 OMPLexicalScope Scope(*this, S, OMPD_unknown); 2707 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 2708 } 2709 2710 void CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 2711 CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) { 2712 // Emit SPMD target parallel for region as a standalone region. 2713 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2714 emitOMPSimdRegion(CGF, S, Action); 2715 }; 2716 llvm::Function *Fn; 2717 llvm::Constant *Addr; 2718 // Emit target region as a standalone region. 2719 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 2720 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 2721 assert(Fn && Addr && "Target device function emission failed."); 2722 } 2723 2724 void CodeGenFunction::EmitOMPTargetSimdDirective( 2725 const OMPTargetSimdDirective &S) { 2726 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2727 emitOMPSimdRegion(CGF, S, Action); 2728 }; 2729 emitCommonOMPTargetDirective(*this, S, CodeGen); 2730 } 2731 2732 namespace { 2733 struct ScheduleKindModifiersTy { 2734 OpenMPScheduleClauseKind Kind; 2735 OpenMPScheduleClauseModifier M1; 2736 OpenMPScheduleClauseModifier M2; 2737 ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind, 2738 OpenMPScheduleClauseModifier M1, 2739 OpenMPScheduleClauseModifier M2) 2740 : Kind(Kind), M1(M1), M2(M2) {} 2741 }; 2742 } // namespace 2743 2744 bool CodeGenFunction::EmitOMPWorksharingLoop( 2745 const OMPLoopDirective &S, Expr *EUB, 2746 const CodeGenLoopBoundsTy &CodeGenLoopBounds, 2747 const CodeGenDispatchBoundsTy &CGDispatchBounds) { 2748 // Emit the loop iteration variable. 2749 const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 2750 const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl()); 2751 EmitVarDecl(*IVDecl); 2752 2753 // Emit the iterations count variable. 2754 // If it is not a variable, Sema decided to calculate iterations count on each 2755 // iteration (e.g., it is foldable into a constant). 2756 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 2757 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 2758 // Emit calculation of the iterations count. 2759 EmitIgnoredExpr(S.getCalcLastIteration()); 2760 } 2761 2762 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); 2763 2764 bool HasLastprivateClause; 2765 // Check pre-condition. 2766 { 2767 OMPLoopScope PreInitScope(*this, S); 2768 // Skip the entire loop if we don't meet the precondition. 2769 // If the condition constant folds and can be elided, avoid emitting the 2770 // whole loop. 2771 bool CondConstant; 2772 llvm::BasicBlock *ContBlock = nullptr; 2773 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 2774 if (!CondConstant) 2775 return false; 2776 } else { 2777 llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then"); 2778 ContBlock = createBasicBlock("omp.precond.end"); 2779 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 2780 getProfileCount(&S)); 2781 EmitBlock(ThenBlock); 2782 incrementProfileCounter(&S); 2783 } 2784 2785 RunCleanupsScope DoacrossCleanupScope(*this); 2786 bool Ordered = false; 2787 if (const auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) { 2788 if (OrderedClause->getNumForLoops()) 2789 RT.emitDoacrossInit(*this, S, OrderedClause->getLoopNumIterations()); 2790 else 2791 Ordered = true; 2792 } 2793 2794 llvm::DenseSet<const Expr *> EmittedFinals; 2795 emitAlignedClause(*this, S); 2796 bool HasLinears = EmitOMPLinearClauseInit(S); 2797 // Emit helper vars inits. 2798 2799 std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S); 2800 LValue LB = Bounds.first; 2801 LValue UB = Bounds.second; 2802 LValue ST = 2803 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 2804 LValue IL = 2805 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 2806 2807 // Emit 'then' code. 2808 { 2809 OMPPrivateScope LoopScope(*this); 2810 if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) { 2811 // Emit implicit barrier to synchronize threads and avoid data races on 2812 // initialization of firstprivate variables and post-update of 2813 // lastprivate variables. 2814 CGM.getOpenMPRuntime().emitBarrierCall( 2815 *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, 2816 /*ForceSimpleCall=*/true); 2817 } 2818 EmitOMPPrivateClause(S, LoopScope); 2819 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion( 2820 *this, S, EmitLValue(S.getIterationVariable())); 2821 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 2822 EmitOMPReductionClauseInit(S, LoopScope); 2823 EmitOMPPrivateLoopCounters(S, LoopScope); 2824 EmitOMPLinearClause(S, LoopScope); 2825 (void)LoopScope.Privatize(); 2826 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 2827 CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S); 2828 2829 // Detect the loop schedule kind and chunk. 2830 const Expr *ChunkExpr = nullptr; 2831 OpenMPScheduleTy ScheduleKind; 2832 if (const auto *C = S.getSingleClause<OMPScheduleClause>()) { 2833 ScheduleKind.Schedule = C->getScheduleKind(); 2834 ScheduleKind.M1 = C->getFirstScheduleModifier(); 2835 ScheduleKind.M2 = C->getSecondScheduleModifier(); 2836 ChunkExpr = C->getChunkSize(); 2837 } else { 2838 // Default behaviour for schedule clause. 2839 CGM.getOpenMPRuntime().getDefaultScheduleAndChunk( 2840 *this, S, ScheduleKind.Schedule, ChunkExpr); 2841 } 2842 bool HasChunkSizeOne = false; 2843 llvm::Value *Chunk = nullptr; 2844 if (ChunkExpr) { 2845 Chunk = EmitScalarExpr(ChunkExpr); 2846 Chunk = EmitScalarConversion(Chunk, ChunkExpr->getType(), 2847 S.getIterationVariable()->getType(), 2848 S.getBeginLoc()); 2849 Expr::EvalResult Result; 2850 if (ChunkExpr->EvaluateAsInt(Result, getContext())) { 2851 llvm::APSInt EvaluatedChunk = Result.Val.getInt(); 2852 HasChunkSizeOne = (EvaluatedChunk.getLimitedValue() == 1); 2853 } 2854 } 2855 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2856 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2857 // OpenMP 4.5, 2.7.1 Loop Construct, Description. 2858 // If the static schedule kind is specified or if the ordered clause is 2859 // specified, and if no monotonic modifier is specified, the effect will 2860 // be as if the monotonic modifier was specified. 2861 bool StaticChunkedOne = RT.isStaticChunked(ScheduleKind.Schedule, 2862 /* Chunked */ Chunk != nullptr) && HasChunkSizeOne && 2863 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()); 2864 if ((RT.isStaticNonchunked(ScheduleKind.Schedule, 2865 /* Chunked */ Chunk != nullptr) || 2866 StaticChunkedOne) && 2867 !Ordered) { 2868 JumpDest LoopExit = 2869 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 2870 emitCommonSimdLoop( 2871 *this, S, 2872 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2873 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 2874 CGF.EmitOMPSimdInit(S, /*IsMonotonic=*/true); 2875 } else if (const auto *C = S.getSingleClause<OMPOrderClause>()) { 2876 if (C->getKind() == OMPC_ORDER_concurrent) 2877 CGF.LoopStack.setParallel(/*Enable=*/true); 2878 } 2879 }, 2880 [IVSize, IVSigned, Ordered, IL, LB, UB, ST, StaticChunkedOne, Chunk, 2881 &S, ScheduleKind, LoopExit, 2882 &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { 2883 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 2884 // When no chunk_size is specified, the iteration space is divided 2885 // into chunks that are approximately equal in size, and at most 2886 // one chunk is distributed to each thread. Note that the size of 2887 // the chunks is unspecified in this case. 2888 CGOpenMPRuntime::StaticRTInput StaticInit( 2889 IVSize, IVSigned, Ordered, IL.getAddress(CGF), 2890 LB.getAddress(CGF), UB.getAddress(CGF), ST.getAddress(CGF), 2891 StaticChunkedOne ? Chunk : nullptr); 2892 CGF.CGM.getOpenMPRuntime().emitForStaticInit( 2893 CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, 2894 StaticInit); 2895 // UB = min(UB, GlobalUB); 2896 if (!StaticChunkedOne) 2897 CGF.EmitIgnoredExpr(S.getEnsureUpperBound()); 2898 // IV = LB; 2899 CGF.EmitIgnoredExpr(S.getInit()); 2900 // For unchunked static schedule generate: 2901 // 2902 // while (idx <= UB) { 2903 // BODY; 2904 // ++idx; 2905 // } 2906 // 2907 // For static schedule with chunk one: 2908 // 2909 // while (IV <= PrevUB) { 2910 // BODY; 2911 // IV += ST; 2912 // } 2913 CGF.EmitOMPInnerLoop( 2914 S, LoopScope.requiresCleanups(), 2915 StaticChunkedOne ? S.getCombinedParForInDistCond() 2916 : S.getCond(), 2917 StaticChunkedOne ? S.getDistInc() : S.getInc(), 2918 [&S, LoopExit](CodeGenFunction &CGF) { 2919 emitOMPLoopBodyWithStopPoint(CGF, S, LoopExit); 2920 }, 2921 [](CodeGenFunction &) {}); 2922 }); 2923 EmitBlock(LoopExit.getBlock()); 2924 // Tell the runtime we are done. 2925 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 2926 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(), 2927 S.getDirectiveKind()); 2928 }; 2929 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); 2930 } else { 2931 const bool IsMonotonic = 2932 Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static || 2933 ScheduleKind.Schedule == OMPC_SCHEDULE_unknown || 2934 ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic || 2935 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic; 2936 // Emit the outer loop, which requests its work chunk [LB..UB] from 2937 // runtime and runs the inner loop to process it. 2938 const OMPLoopArguments LoopArguments( 2939 LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this), 2940 IL.getAddress(*this), Chunk, EUB); 2941 EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered, 2942 LoopArguments, CGDispatchBounds); 2943 } 2944 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 2945 EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) { 2946 return CGF.Builder.CreateIsNotNull( 2947 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 2948 }); 2949 } 2950 EmitOMPReductionClauseFinal( 2951 S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind()) 2952 ? /*Parallel and Simd*/ OMPD_parallel_for_simd 2953 : /*Parallel only*/ OMPD_parallel); 2954 // Emit post-update of the reduction variables if IsLastIter != 0. 2955 emitPostUpdateForReductionClause( 2956 *this, S, [IL, &S](CodeGenFunction &CGF) { 2957 return CGF.Builder.CreateIsNotNull( 2958 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 2959 }); 2960 // Emit final copy of the lastprivate variables if IsLastIter != 0. 2961 if (HasLastprivateClause) 2962 EmitOMPLastprivateClauseFinal( 2963 S, isOpenMPSimdDirective(S.getDirectiveKind()), 2964 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc()))); 2965 } 2966 EmitOMPLinearClauseFinal(S, [IL, &S](CodeGenFunction &CGF) { 2967 return CGF.Builder.CreateIsNotNull( 2968 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 2969 }); 2970 DoacrossCleanupScope.ForceCleanup(); 2971 // We're now done with the loop, so jump to the continuation block. 2972 if (ContBlock) { 2973 EmitBranch(ContBlock); 2974 EmitBlock(ContBlock, /*IsFinished=*/true); 2975 } 2976 } 2977 return HasLastprivateClause; 2978 } 2979 2980 /// The following two functions generate expressions for the loop lower 2981 /// and upper bounds in case of static and dynamic (dispatch) schedule 2982 /// of the associated 'for' or 'distribute' loop. 2983 static std::pair<LValue, LValue> 2984 emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 2985 const auto &LS = cast<OMPLoopDirective>(S); 2986 LValue LB = 2987 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); 2988 LValue UB = 2989 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); 2990 return {LB, UB}; 2991 } 2992 2993 /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not 2994 /// consider the lower and upper bound expressions generated by the 2995 /// worksharing loop support, but we use 0 and the iteration space size as 2996 /// constants 2997 static std::pair<llvm::Value *, llvm::Value *> 2998 emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S, 2999 Address LB, Address UB) { 3000 const auto &LS = cast<OMPLoopDirective>(S); 3001 const Expr *IVExpr = LS.getIterationVariable(); 3002 const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType()); 3003 llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0); 3004 llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration()); 3005 return {LBVal, UBVal}; 3006 } 3007 3008 /// Emits the code for the directive with inscan reductions. 3009 /// The code is the following: 3010 /// \code 3011 /// size num_iters = <num_iters>; 3012 /// <type> buffer[num_iters]; 3013 /// #pragma omp ... 3014 /// for (i: 0..<num_iters>) { 3015 /// <input phase>; 3016 /// buffer[i] = red; 3017 /// } 3018 /// for (int k = 0; k != ceil(log2(num_iters)); ++k) 3019 /// for (size cnt = last_iter; cnt >= pow(2, k); --k) 3020 /// buffer[i] op= buffer[i-pow(2,k)]; 3021 /// #pragma omp ... 3022 /// for (0..<num_iters>) { 3023 /// red = InclusiveScan ? buffer[i] : buffer[i-1]; 3024 /// <scan phase>; 3025 /// } 3026 /// \endcode 3027 static void emitScanBasedDirective( 3028 CodeGenFunction &CGF, const OMPLoopDirective &S, 3029 llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen, 3030 llvm::function_ref<void(CodeGenFunction &)> FirstGen, 3031 llvm::function_ref<void(CodeGenFunction &)> SecondGen) { 3032 llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast( 3033 NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false); 3034 SmallVector<const Expr *, 4> Shareds; 3035 SmallVector<const Expr *, 4> Privates; 3036 SmallVector<const Expr *, 4> ReductionOps; 3037 SmallVector<const Expr *, 4> LHSs; 3038 SmallVector<const Expr *, 4> RHSs; 3039 SmallVector<const Expr *, 4> CopyOps; 3040 SmallVector<const Expr *, 4> CopyArrayTemps; 3041 SmallVector<const Expr *, 4> CopyArrayElems; 3042 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 3043 assert(C->getModifier() == OMPC_REDUCTION_inscan && 3044 "Only inscan reductions are expected."); 3045 Shareds.append(C->varlist_begin(), C->varlist_end()); 3046 Privates.append(C->privates().begin(), C->privates().end()); 3047 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 3048 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 3049 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 3050 CopyOps.append(C->copy_ops().begin(), C->copy_ops().end()); 3051 CopyArrayTemps.append(C->copy_array_temps().begin(), 3052 C->copy_array_temps().end()); 3053 CopyArrayElems.append(C->copy_array_elems().begin(), 3054 C->copy_array_elems().end()); 3055 } 3056 { 3057 // Emit buffers for each reduction variables. 3058 // ReductionCodeGen is required to emit correctly the code for array 3059 // reductions. 3060 ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps); 3061 unsigned Count = 0; 3062 auto *ITA = CopyArrayTemps.begin(); 3063 for (const Expr *IRef : Privates) { 3064 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); 3065 // Emit variably modified arrays, used for arrays/array sections 3066 // reductions. 3067 if (PrivateVD->getType()->isVariablyModifiedType()) { 3068 RedCG.emitSharedOrigLValue(CGF, Count); 3069 RedCG.emitAggregateType(CGF, Count); 3070 } 3071 CodeGenFunction::OpaqueValueMapping DimMapping( 3072 CGF, 3073 cast<OpaqueValueExpr>( 3074 cast<VariableArrayType>((*ITA)->getType()->getAsArrayTypeUnsafe()) 3075 ->getSizeExpr()), 3076 RValue::get(OMPScanNumIterations)); 3077 // Emit temp buffer. 3078 CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(*ITA)->getDecl())); 3079 ++ITA; 3080 ++Count; 3081 } 3082 } 3083 CodeGenFunction::ParentLoopDirectiveForScanRegion ScanRegion(CGF, S); 3084 { 3085 // Emit loop with input phase: 3086 // #pragma omp ... 3087 // for (i: 0..<num_iters>) { 3088 // <input phase>; 3089 // buffer[i] = red; 3090 // } 3091 CGF.OMPFirstScanLoop = true; 3092 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); 3093 FirstGen(CGF); 3094 } 3095 // Emit prefix reduction: 3096 // for (int k = 0; k <= ceil(log2(n)); ++k) 3097 llvm::BasicBlock *InputBB = CGF.Builder.GetInsertBlock(); 3098 llvm::BasicBlock *LoopBB = CGF.createBasicBlock("omp.outer.log.scan.body"); 3099 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.outer.log.scan.exit"); 3100 llvm::Function *F = CGF.CGM.getIntrinsic(llvm::Intrinsic::log2, CGF.DoubleTy); 3101 llvm::Value *Arg = 3102 CGF.Builder.CreateUIToFP(OMPScanNumIterations, CGF.DoubleTy); 3103 llvm::Value *LogVal = CGF.EmitNounwindRuntimeCall(F, Arg); 3104 F = CGF.CGM.getIntrinsic(llvm::Intrinsic::ceil, CGF.DoubleTy); 3105 LogVal = CGF.EmitNounwindRuntimeCall(F, LogVal); 3106 LogVal = CGF.Builder.CreateFPToUI(LogVal, CGF.IntTy); 3107 llvm::Value *NMin1 = CGF.Builder.CreateNUWSub( 3108 OMPScanNumIterations, llvm::ConstantInt::get(CGF.SizeTy, 1)); 3109 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getBeginLoc()); 3110 CGF.EmitBlock(LoopBB); 3111 auto *Counter = CGF.Builder.CreatePHI(CGF.IntTy, 2); 3112 // size pow2k = 1; 3113 auto *Pow2K = CGF.Builder.CreatePHI(CGF.SizeTy, 2); 3114 Counter->addIncoming(llvm::ConstantInt::get(CGF.IntTy, 0), InputBB); 3115 Pow2K->addIncoming(llvm::ConstantInt::get(CGF.SizeTy, 1), InputBB); 3116 // for (size i = n - 1; i >= 2 ^ k; --i) 3117 // tmp[i] op= tmp[i-pow2k]; 3118 llvm::BasicBlock *InnerLoopBB = 3119 CGF.createBasicBlock("omp.inner.log.scan.body"); 3120 llvm::BasicBlock *InnerExitBB = 3121 CGF.createBasicBlock("omp.inner.log.scan.exit"); 3122 llvm::Value *CmpI = CGF.Builder.CreateICmpUGE(NMin1, Pow2K); 3123 CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB); 3124 CGF.EmitBlock(InnerLoopBB); 3125 auto *IVal = CGF.Builder.CreatePHI(CGF.SizeTy, 2); 3126 IVal->addIncoming(NMin1, LoopBB); 3127 { 3128 CodeGenFunction::OMPPrivateScope PrivScope(CGF); 3129 auto *ILHS = LHSs.begin(); 3130 auto *IRHS = RHSs.begin(); 3131 for (const Expr *CopyArrayElem : CopyArrayElems) { 3132 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 3133 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 3134 Address LHSAddr = Address::invalid(); 3135 { 3136 CodeGenFunction::OpaqueValueMapping IdxMapping( 3137 CGF, 3138 cast<OpaqueValueExpr>( 3139 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), 3140 RValue::get(IVal)); 3141 LHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF); 3142 } 3143 PrivScope.addPrivate(LHSVD, [LHSAddr]() { return LHSAddr; }); 3144 Address RHSAddr = Address::invalid(); 3145 { 3146 llvm::Value *OffsetIVal = CGF.Builder.CreateNUWSub(IVal, Pow2K); 3147 CodeGenFunction::OpaqueValueMapping IdxMapping( 3148 CGF, 3149 cast<OpaqueValueExpr>( 3150 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), 3151 RValue::get(OffsetIVal)); 3152 RHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF); 3153 } 3154 PrivScope.addPrivate(RHSVD, [RHSAddr]() { return RHSAddr; }); 3155 ++ILHS; 3156 ++IRHS; 3157 } 3158 PrivScope.Privatize(); 3159 CGF.CGM.getOpenMPRuntime().emitReduction( 3160 CGF, S.getEndLoc(), Privates, LHSs, RHSs, ReductionOps, 3161 {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_unknown}); 3162 } 3163 llvm::Value *NextIVal = 3164 CGF.Builder.CreateNUWSub(IVal, llvm::ConstantInt::get(CGF.SizeTy, 1)); 3165 IVal->addIncoming(NextIVal, CGF.Builder.GetInsertBlock()); 3166 CmpI = CGF.Builder.CreateICmpUGE(NextIVal, Pow2K); 3167 CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB); 3168 CGF.EmitBlock(InnerExitBB); 3169 llvm::Value *Next = 3170 CGF.Builder.CreateNUWAdd(Counter, llvm::ConstantInt::get(CGF.IntTy, 1)); 3171 Counter->addIncoming(Next, CGF.Builder.GetInsertBlock()); 3172 // pow2k <<= 1; 3173 llvm::Value *NextPow2K = CGF.Builder.CreateShl(Pow2K, 1, "", /*HasNUW=*/true); 3174 Pow2K->addIncoming(NextPow2K, CGF.Builder.GetInsertBlock()); 3175 llvm::Value *Cmp = CGF.Builder.CreateICmpNE(Next, LogVal); 3176 CGF.Builder.CreateCondBr(Cmp, LoopBB, ExitBB); 3177 auto DL1 = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getEndLoc()); 3178 CGF.EmitBlock(ExitBB); 3179 3180 CGF.OMPFirstScanLoop = false; 3181 SecondGen(CGF); 3182 } 3183 3184 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { 3185 bool HasLastprivates = false; 3186 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, 3187 PrePostActionTy &) { 3188 if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(), 3189 [](const OMPReductionClause *C) { 3190 return C->getModifier() == OMPC_REDUCTION_inscan; 3191 })) { 3192 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) { 3193 OMPLocalDeclMapRAII Scope(CGF); 3194 OMPLoopScope LoopScope(CGF, S); 3195 return CGF.EmitScalarExpr(S.getNumIterations()); 3196 }; 3197 const auto &&FirstGen = [&S](CodeGenFunction &CGF) { 3198 OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel()); 3199 (void)CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 3200 emitForLoopBounds, 3201 emitDispatchForLoopBounds); 3202 // Emit an implicit barrier at the end. 3203 CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getBeginLoc(), 3204 OMPD_for); 3205 }; 3206 const auto &&SecondGen = [&S, &HasLastprivates](CodeGenFunction &CGF) { 3207 OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel()); 3208 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 3209 emitForLoopBounds, 3210 emitDispatchForLoopBounds); 3211 }; 3212 emitScanBasedDirective(CGF, S, NumIteratorsGen, FirstGen, SecondGen); 3213 } else { 3214 OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel()); 3215 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 3216 emitForLoopBounds, 3217 emitDispatchForLoopBounds); 3218 } 3219 }; 3220 { 3221 auto LPCRegion = 3222 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 3223 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3224 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen, 3225 S.hasCancel()); 3226 } 3227 3228 // Emit an implicit barrier at the end. 3229 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) 3230 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for); 3231 // Check for outer lastprivate conditional update. 3232 checkForLastprivateConditionalUpdate(*this, S); 3233 } 3234 3235 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { 3236 bool HasLastprivates = false; 3237 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, 3238 PrePostActionTy &) { 3239 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 3240 emitForLoopBounds, 3241 emitDispatchForLoopBounds); 3242 }; 3243 { 3244 auto LPCRegion = 3245 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 3246 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3247 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 3248 } 3249 3250 // Emit an implicit barrier at the end. 3251 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) 3252 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for); 3253 // Check for outer lastprivate conditional update. 3254 checkForLastprivateConditionalUpdate(*this, S); 3255 } 3256 3257 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, 3258 const Twine &Name, 3259 llvm::Value *Init = nullptr) { 3260 LValue LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty); 3261 if (Init) 3262 CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true); 3263 return LVal; 3264 } 3265 3266 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { 3267 const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt(); 3268 const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt); 3269 bool HasLastprivates = false; 3270 auto &&CodeGen = [&S, CapturedStmt, CS, 3271 &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) { 3272 const ASTContext &C = CGF.getContext(); 3273 QualType KmpInt32Ty = 3274 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 3275 // Emit helper vars inits. 3276 LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.", 3277 CGF.Builder.getInt32(0)); 3278 llvm::ConstantInt *GlobalUBVal = CS != nullptr 3279 ? CGF.Builder.getInt32(CS->size() - 1) 3280 : CGF.Builder.getInt32(0); 3281 LValue UB = 3282 createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal); 3283 LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.", 3284 CGF.Builder.getInt32(1)); 3285 LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.", 3286 CGF.Builder.getInt32(0)); 3287 // Loop counter. 3288 LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv."); 3289 OpaqueValueExpr IVRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue); 3290 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV); 3291 OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue); 3292 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); 3293 // Generate condition for loop. 3294 BinaryOperator *Cond = BinaryOperator::Create( 3295 C, &IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, OK_Ordinary, 3296 S.getBeginLoc(), FPOptions(C.getLangOpts())); 3297 // Increment for loop counter. 3298 UnaryOperator *Inc = UnaryOperator::Create( 3299 C, &IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary, 3300 S.getBeginLoc(), true, FPOptions(C.getLangOpts())); 3301 auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) { 3302 // Iterate through all sections and emit a switch construct: 3303 // switch (IV) { 3304 // case 0: 3305 // <SectionStmt[0]>; 3306 // break; 3307 // ... 3308 // case <NumSection> - 1: 3309 // <SectionStmt[<NumSection> - 1]>; 3310 // break; 3311 // } 3312 // .omp.sections.exit: 3313 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".omp.sections.exit"); 3314 llvm::SwitchInst *SwitchStmt = 3315 CGF.Builder.CreateSwitch(CGF.EmitLoadOfScalar(IV, S.getBeginLoc()), 3316 ExitBB, CS == nullptr ? 1 : CS->size()); 3317 if (CS) { 3318 unsigned CaseNumber = 0; 3319 for (const Stmt *SubStmt : CS->children()) { 3320 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 3321 CGF.EmitBlock(CaseBB); 3322 SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB); 3323 CGF.EmitStmt(SubStmt); 3324 CGF.EmitBranch(ExitBB); 3325 ++CaseNumber; 3326 } 3327 } else { 3328 llvm::BasicBlock *CaseBB = CGF.createBasicBlock(".omp.sections.case"); 3329 CGF.EmitBlock(CaseBB); 3330 SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB); 3331 CGF.EmitStmt(CapturedStmt); 3332 CGF.EmitBranch(ExitBB); 3333 } 3334 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 3335 }; 3336 3337 CodeGenFunction::OMPPrivateScope LoopScope(CGF); 3338 if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) { 3339 // Emit implicit barrier to synchronize threads and avoid data races on 3340 // initialization of firstprivate variables and post-update of lastprivate 3341 // variables. 3342 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 3343 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, 3344 /*ForceSimpleCall=*/true); 3345 } 3346 CGF.EmitOMPPrivateClause(S, LoopScope); 3347 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(CGF, S, IV); 3348 HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 3349 CGF.EmitOMPReductionClauseInit(S, LoopScope); 3350 (void)LoopScope.Privatize(); 3351 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 3352 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); 3353 3354 // Emit static non-chunked loop. 3355 OpenMPScheduleTy ScheduleKind; 3356 ScheduleKind.Schedule = OMPC_SCHEDULE_static; 3357 CGOpenMPRuntime::StaticRTInput StaticInit( 3358 /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(CGF), 3359 LB.getAddress(CGF), UB.getAddress(CGF), ST.getAddress(CGF)); 3360 CGF.CGM.getOpenMPRuntime().emitForStaticInit( 3361 CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, StaticInit); 3362 // UB = min(UB, GlobalUB); 3363 llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, S.getBeginLoc()); 3364 llvm::Value *MinUBGlobalUB = CGF.Builder.CreateSelect( 3365 CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal); 3366 CGF.EmitStoreOfScalar(MinUBGlobalUB, UB); 3367 // IV = LB; 3368 CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getBeginLoc()), IV); 3369 // while (idx <= UB) { BODY; ++idx; } 3370 CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, Cond, Inc, BodyGen, 3371 [](CodeGenFunction &) {}); 3372 // Tell the runtime we are done. 3373 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 3374 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(), 3375 S.getDirectiveKind()); 3376 }; 3377 CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen); 3378 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 3379 // Emit post-update of the reduction variables if IsLastIter != 0. 3380 emitPostUpdateForReductionClause(CGF, S, [IL, &S](CodeGenFunction &CGF) { 3381 return CGF.Builder.CreateIsNotNull( 3382 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 3383 }); 3384 3385 // Emit final copy of the lastprivate variables if IsLastIter != 0. 3386 if (HasLastprivates) 3387 CGF.EmitOMPLastprivateClauseFinal( 3388 S, /*NoFinals=*/false, 3389 CGF.Builder.CreateIsNotNull( 3390 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()))); 3391 }; 3392 3393 bool HasCancel = false; 3394 if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S)) 3395 HasCancel = OSD->hasCancel(); 3396 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S)) 3397 HasCancel = OPSD->hasCancel(); 3398 OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel); 3399 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen, 3400 HasCancel); 3401 // Emit barrier for lastprivates only if 'sections' directive has 'nowait' 3402 // clause. Otherwise the barrier will be generated by the codegen for the 3403 // directive. 3404 if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) { 3405 // Emit implicit barrier to synchronize threads and avoid data races on 3406 // initialization of firstprivate variables. 3407 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), 3408 OMPD_unknown); 3409 } 3410 } 3411 3412 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { 3413 { 3414 auto LPCRegion = 3415 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 3416 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3417 EmitSections(S); 3418 } 3419 // Emit an implicit barrier at the end. 3420 if (!S.getSingleClause<OMPNowaitClause>()) { 3421 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), 3422 OMPD_sections); 3423 } 3424 // Check for outer lastprivate conditional update. 3425 checkForLastprivateConditionalUpdate(*this, S); 3426 } 3427 3428 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { 3429 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3430 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 3431 }; 3432 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3433 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen, 3434 S.hasCancel()); 3435 } 3436 3437 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { 3438 llvm::SmallVector<const Expr *, 8> CopyprivateVars; 3439 llvm::SmallVector<const Expr *, 8> DestExprs; 3440 llvm::SmallVector<const Expr *, 8> SrcExprs; 3441 llvm::SmallVector<const Expr *, 8> AssignmentOps; 3442 // Check if there are any 'copyprivate' clauses associated with this 3443 // 'single' construct. 3444 // Build a list of copyprivate variables along with helper expressions 3445 // (<source>, <destination>, <destination>=<source> expressions) 3446 for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) { 3447 CopyprivateVars.append(C->varlists().begin(), C->varlists().end()); 3448 DestExprs.append(C->destination_exprs().begin(), 3449 C->destination_exprs().end()); 3450 SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end()); 3451 AssignmentOps.append(C->assignment_ops().begin(), 3452 C->assignment_ops().end()); 3453 } 3454 // Emit code for 'single' region along with 'copyprivate' clauses 3455 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3456 Action.Enter(CGF); 3457 OMPPrivateScope SingleScope(CGF); 3458 (void)CGF.EmitOMPFirstprivateClause(S, SingleScope); 3459 CGF.EmitOMPPrivateClause(S, SingleScope); 3460 (void)SingleScope.Privatize(); 3461 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 3462 }; 3463 { 3464 auto LPCRegion = 3465 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 3466 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3467 CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getBeginLoc(), 3468 CopyprivateVars, DestExprs, 3469 SrcExprs, AssignmentOps); 3470 } 3471 // Emit an implicit barrier at the end (to avoid data race on firstprivate 3472 // init or if no 'nowait' clause was specified and no 'copyprivate' clause). 3473 if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) { 3474 CGM.getOpenMPRuntime().emitBarrierCall( 3475 *this, S.getBeginLoc(), 3476 S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single); 3477 } 3478 // Check for outer lastprivate conditional update. 3479 checkForLastprivateConditionalUpdate(*this, S); 3480 } 3481 3482 static void emitMaster(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 3483 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3484 Action.Enter(CGF); 3485 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 3486 }; 3487 CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc()); 3488 } 3489 3490 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { 3491 if (llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder()) { 3492 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 3493 3494 const CapturedStmt *CS = S.getInnermostCapturedStmt(); 3495 const Stmt *MasterRegionBodyStmt = CS->getCapturedStmt(); 3496 3497 auto FiniCB = [this](InsertPointTy IP) { 3498 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); 3499 }; 3500 3501 auto BodyGenCB = [MasterRegionBodyStmt, this](InsertPointTy AllocaIP, 3502 InsertPointTy CodeGenIP, 3503 llvm::BasicBlock &FiniBB) { 3504 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB); 3505 OMPBuilderCBHelpers::EmitOMPRegionBody(*this, MasterRegionBodyStmt, 3506 CodeGenIP, FiniBB); 3507 }; 3508 3509 CGCapturedStmtInfo CGSI(*CS, CR_OpenMP); 3510 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI); 3511 Builder.restoreIP(OMPBuilder->CreateMaster(Builder, BodyGenCB, FiniCB)); 3512 3513 return; 3514 } 3515 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3516 emitMaster(*this, S); 3517 } 3518 3519 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { 3520 if (llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder()) { 3521 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 3522 3523 const CapturedStmt *CS = S.getInnermostCapturedStmt(); 3524 const Stmt *CriticalRegionBodyStmt = CS->getCapturedStmt(); 3525 const Expr *Hint = nullptr; 3526 if (const auto *HintClause = S.getSingleClause<OMPHintClause>()) 3527 Hint = HintClause->getHint(); 3528 3529 // TODO: This is slightly different from what's currently being done in 3530 // clang. Fix the Int32Ty to IntPtrTy (pointer width size) when everything 3531 // about typing is final. 3532 llvm::Value *HintInst = nullptr; 3533 if (Hint) 3534 HintInst = 3535 Builder.CreateIntCast(EmitScalarExpr(Hint), CGM.Int32Ty, false); 3536 3537 auto FiniCB = [this](InsertPointTy IP) { 3538 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); 3539 }; 3540 3541 auto BodyGenCB = [CriticalRegionBodyStmt, this](InsertPointTy AllocaIP, 3542 InsertPointTy CodeGenIP, 3543 llvm::BasicBlock &FiniBB) { 3544 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB); 3545 OMPBuilderCBHelpers::EmitOMPRegionBody(*this, CriticalRegionBodyStmt, 3546 CodeGenIP, FiniBB); 3547 }; 3548 3549 CGCapturedStmtInfo CGSI(*CS, CR_OpenMP); 3550 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI); 3551 Builder.restoreIP(OMPBuilder->CreateCritical( 3552 Builder, BodyGenCB, FiniCB, S.getDirectiveName().getAsString(), 3553 HintInst)); 3554 3555 return; 3556 } 3557 3558 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3559 Action.Enter(CGF); 3560 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 3561 }; 3562 const Expr *Hint = nullptr; 3563 if (const auto *HintClause = S.getSingleClause<OMPHintClause>()) 3564 Hint = HintClause->getHint(); 3565 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3566 CGM.getOpenMPRuntime().emitCriticalRegion(*this, 3567 S.getDirectiveName().getAsString(), 3568 CodeGen, S.getBeginLoc(), Hint); 3569 } 3570 3571 void CodeGenFunction::EmitOMPParallelForDirective( 3572 const OMPParallelForDirective &S) { 3573 // Emit directive as a combined directive that consists of two implicit 3574 // directives: 'parallel' with 'for' directive. 3575 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3576 Action.Enter(CGF); 3577 OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel()); 3578 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 3579 emitDispatchForLoopBounds); 3580 }; 3581 { 3582 auto LPCRegion = 3583 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 3584 emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen, 3585 emitEmptyBoundParameters); 3586 } 3587 // Check for outer lastprivate conditional update. 3588 checkForLastprivateConditionalUpdate(*this, S); 3589 } 3590 3591 void CodeGenFunction::EmitOMPParallelForSimdDirective( 3592 const OMPParallelForSimdDirective &S) { 3593 // Emit directive as a combined directive that consists of two implicit 3594 // directives: 'parallel' with 'for' directive. 3595 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3596 Action.Enter(CGF); 3597 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 3598 emitDispatchForLoopBounds); 3599 }; 3600 { 3601 auto LPCRegion = 3602 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 3603 emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen, 3604 emitEmptyBoundParameters); 3605 } 3606 // Check for outer lastprivate conditional update. 3607 checkForLastprivateConditionalUpdate(*this, S); 3608 } 3609 3610 void CodeGenFunction::EmitOMPParallelMasterDirective( 3611 const OMPParallelMasterDirective &S) { 3612 // Emit directive as a combined directive that consists of two implicit 3613 // directives: 'parallel' with 'master' directive. 3614 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3615 Action.Enter(CGF); 3616 OMPPrivateScope PrivateScope(CGF); 3617 bool Copyins = CGF.EmitOMPCopyinClause(S); 3618 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 3619 if (Copyins) { 3620 // Emit implicit barrier to synchronize threads and avoid data races on 3621 // propagation master's thread values of threadprivate variables to local 3622 // instances of that variables of all other implicit threads. 3623 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 3624 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, 3625 /*ForceSimpleCall=*/true); 3626 } 3627 CGF.EmitOMPPrivateClause(S, PrivateScope); 3628 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 3629 (void)PrivateScope.Privatize(); 3630 emitMaster(CGF, S); 3631 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 3632 }; 3633 { 3634 auto LPCRegion = 3635 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 3636 emitCommonOMPParallelDirective(*this, S, OMPD_master, CodeGen, 3637 emitEmptyBoundParameters); 3638 emitPostUpdateForReductionClause(*this, S, 3639 [](CodeGenFunction &) { return nullptr; }); 3640 } 3641 // Check for outer lastprivate conditional update. 3642 checkForLastprivateConditionalUpdate(*this, S); 3643 } 3644 3645 void CodeGenFunction::EmitOMPParallelSectionsDirective( 3646 const OMPParallelSectionsDirective &S) { 3647 // Emit directive as a combined directive that consists of two implicit 3648 // directives: 'parallel' with 'sections' directive. 3649 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3650 Action.Enter(CGF); 3651 CGF.EmitSections(S); 3652 }; 3653 { 3654 auto LPCRegion = 3655 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 3656 emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen, 3657 emitEmptyBoundParameters); 3658 } 3659 // Check for outer lastprivate conditional update. 3660 checkForLastprivateConditionalUpdate(*this, S); 3661 } 3662 3663 void CodeGenFunction::EmitOMPTaskBasedDirective( 3664 const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion, 3665 const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen, 3666 OMPTaskDataTy &Data) { 3667 // Emit outlined function for task construct. 3668 const CapturedStmt *CS = S.getCapturedStmt(CapturedRegion); 3669 auto I = CS->getCapturedDecl()->param_begin(); 3670 auto PartId = std::next(I); 3671 auto TaskT = std::next(I, 4); 3672 // Check if the task is final 3673 if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) { 3674 // If the condition constant folds and can be elided, try to avoid emitting 3675 // the condition and the dead arm of the if/else. 3676 const Expr *Cond = Clause->getCondition(); 3677 bool CondConstant; 3678 if (ConstantFoldsToSimpleInteger(Cond, CondConstant)) 3679 Data.Final.setInt(CondConstant); 3680 else 3681 Data.Final.setPointer(EvaluateExprAsBool(Cond)); 3682 } else { 3683 // By default the task is not final. 3684 Data.Final.setInt(/*IntVal=*/false); 3685 } 3686 // Check if the task has 'priority' clause. 3687 if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) { 3688 const Expr *Prio = Clause->getPriority(); 3689 Data.Priority.setInt(/*IntVal=*/true); 3690 Data.Priority.setPointer(EmitScalarConversion( 3691 EmitScalarExpr(Prio), Prio->getType(), 3692 getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1), 3693 Prio->getExprLoc())); 3694 } 3695 // The first function argument for tasks is a thread id, the second one is a 3696 // part id (0 for tied tasks, >=0 for untied task). 3697 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 3698 // Get list of private variables. 3699 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 3700 auto IRef = C->varlist_begin(); 3701 for (const Expr *IInit : C->private_copies()) { 3702 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 3703 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 3704 Data.PrivateVars.push_back(*IRef); 3705 Data.PrivateCopies.push_back(IInit); 3706 } 3707 ++IRef; 3708 } 3709 } 3710 EmittedAsPrivate.clear(); 3711 // Get list of firstprivate variables. 3712 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 3713 auto IRef = C->varlist_begin(); 3714 auto IElemInitRef = C->inits().begin(); 3715 for (const Expr *IInit : C->private_copies()) { 3716 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 3717 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 3718 Data.FirstprivateVars.push_back(*IRef); 3719 Data.FirstprivateCopies.push_back(IInit); 3720 Data.FirstprivateInits.push_back(*IElemInitRef); 3721 } 3722 ++IRef; 3723 ++IElemInitRef; 3724 } 3725 } 3726 // Get list of lastprivate variables (for taskloops). 3727 llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs; 3728 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 3729 auto IRef = C->varlist_begin(); 3730 auto ID = C->destination_exprs().begin(); 3731 for (const Expr *IInit : C->private_copies()) { 3732 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 3733 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 3734 Data.LastprivateVars.push_back(*IRef); 3735 Data.LastprivateCopies.push_back(IInit); 3736 } 3737 LastprivateDstsOrigs.insert( 3738 {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()), 3739 cast<DeclRefExpr>(*IRef)}); 3740 ++IRef; 3741 ++ID; 3742 } 3743 } 3744 SmallVector<const Expr *, 4> LHSs; 3745 SmallVector<const Expr *, 4> RHSs; 3746 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 3747 Data.ReductionVars.append(C->varlist_begin(), C->varlist_end()); 3748 Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end()); 3749 Data.ReductionCopies.append(C->privates().begin(), C->privates().end()); 3750 Data.ReductionOps.append(C->reduction_ops().begin(), 3751 C->reduction_ops().end()); 3752 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 3753 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 3754 } 3755 Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit( 3756 *this, S.getBeginLoc(), LHSs, RHSs, Data); 3757 // Build list of dependences. 3758 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) { 3759 OMPTaskDataTy::DependData &DD = 3760 Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier()); 3761 DD.DepExprs.append(C->varlist_begin(), C->varlist_end()); 3762 } 3763 auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs, 3764 CapturedRegion](CodeGenFunction &CGF, 3765 PrePostActionTy &Action) { 3766 // Set proper addresses for generated private copies. 3767 OMPPrivateScope Scope(CGF); 3768 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> FirstprivatePtrs; 3769 if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() || 3770 !Data.LastprivateVars.empty()) { 3771 llvm::FunctionType *CopyFnTy = llvm::FunctionType::get( 3772 CGF.Builder.getVoidTy(), {CGF.Builder.getInt8PtrTy()}, true); 3773 enum { PrivatesParam = 2, CopyFnParam = 3 }; 3774 llvm::Value *CopyFn = CGF.Builder.CreateLoad( 3775 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam))); 3776 llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar( 3777 CS->getCapturedDecl()->getParam(PrivatesParam))); 3778 // Map privates. 3779 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; 3780 llvm::SmallVector<llvm::Value *, 16> CallArgs; 3781 CallArgs.push_back(PrivatesPtr); 3782 for (const Expr *E : Data.PrivateVars) { 3783 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3784 Address PrivatePtr = CGF.CreateMemTemp( 3785 CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr"); 3786 PrivatePtrs.emplace_back(VD, PrivatePtr); 3787 CallArgs.push_back(PrivatePtr.getPointer()); 3788 } 3789 for (const Expr *E : Data.FirstprivateVars) { 3790 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3791 Address PrivatePtr = 3792 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 3793 ".firstpriv.ptr.addr"); 3794 PrivatePtrs.emplace_back(VD, PrivatePtr); 3795 FirstprivatePtrs.emplace_back(VD, PrivatePtr); 3796 CallArgs.push_back(PrivatePtr.getPointer()); 3797 } 3798 for (const Expr *E : Data.LastprivateVars) { 3799 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3800 Address PrivatePtr = 3801 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 3802 ".lastpriv.ptr.addr"); 3803 PrivatePtrs.emplace_back(VD, PrivatePtr); 3804 CallArgs.push_back(PrivatePtr.getPointer()); 3805 } 3806 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall( 3807 CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs); 3808 for (const auto &Pair : LastprivateDstsOrigs) { 3809 const auto *OrigVD = cast<VarDecl>(Pair.second->getDecl()); 3810 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(OrigVD), 3811 /*RefersToEnclosingVariableOrCapture=*/ 3812 CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr, 3813 Pair.second->getType(), VK_LValue, 3814 Pair.second->getExprLoc()); 3815 Scope.addPrivate(Pair.first, [&CGF, &DRE]() { 3816 return CGF.EmitLValue(&DRE).getAddress(CGF); 3817 }); 3818 } 3819 for (const auto &Pair : PrivatePtrs) { 3820 Address Replacement(CGF.Builder.CreateLoad(Pair.second), 3821 CGF.getContext().getDeclAlign(Pair.first)); 3822 Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); 3823 } 3824 } 3825 if (Data.Reductions) { 3826 OMPPrivateScope FirstprivateScope(CGF); 3827 for (const auto &Pair : FirstprivatePtrs) { 3828 Address Replacement(CGF.Builder.CreateLoad(Pair.second), 3829 CGF.getContext().getDeclAlign(Pair.first)); 3830 FirstprivateScope.addPrivate(Pair.first, 3831 [Replacement]() { return Replacement; }); 3832 } 3833 (void)FirstprivateScope.Privatize(); 3834 OMPLexicalScope LexScope(CGF, S, CapturedRegion); 3835 ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars, 3836 Data.ReductionCopies, Data.ReductionOps); 3837 llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad( 3838 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9))); 3839 for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) { 3840 RedCG.emitSharedOrigLValue(CGF, Cnt); 3841 RedCG.emitAggregateType(CGF, Cnt); 3842 // FIXME: This must removed once the runtime library is fixed. 3843 // Emit required threadprivate variables for 3844 // initializer/combiner/finalizer. 3845 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(), 3846 RedCG, Cnt); 3847 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( 3848 CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); 3849 Replacement = 3850 Address(CGF.EmitScalarConversion( 3851 Replacement.getPointer(), CGF.getContext().VoidPtrTy, 3852 CGF.getContext().getPointerType( 3853 Data.ReductionCopies[Cnt]->getType()), 3854 Data.ReductionCopies[Cnt]->getExprLoc()), 3855 Replacement.getAlignment()); 3856 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); 3857 Scope.addPrivate(RedCG.getBaseDecl(Cnt), 3858 [Replacement]() { return Replacement; }); 3859 } 3860 } 3861 // Privatize all private variables except for in_reduction items. 3862 (void)Scope.Privatize(); 3863 SmallVector<const Expr *, 4> InRedVars; 3864 SmallVector<const Expr *, 4> InRedPrivs; 3865 SmallVector<const Expr *, 4> InRedOps; 3866 SmallVector<const Expr *, 4> TaskgroupDescriptors; 3867 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) { 3868 auto IPriv = C->privates().begin(); 3869 auto IRed = C->reduction_ops().begin(); 3870 auto ITD = C->taskgroup_descriptors().begin(); 3871 for (const Expr *Ref : C->varlists()) { 3872 InRedVars.emplace_back(Ref); 3873 InRedPrivs.emplace_back(*IPriv); 3874 InRedOps.emplace_back(*IRed); 3875 TaskgroupDescriptors.emplace_back(*ITD); 3876 std::advance(IPriv, 1); 3877 std::advance(IRed, 1); 3878 std::advance(ITD, 1); 3879 } 3880 } 3881 // Privatize in_reduction items here, because taskgroup descriptors must be 3882 // privatized earlier. 3883 OMPPrivateScope InRedScope(CGF); 3884 if (!InRedVars.empty()) { 3885 ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps); 3886 for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) { 3887 RedCG.emitSharedOrigLValue(CGF, Cnt); 3888 RedCG.emitAggregateType(CGF, Cnt); 3889 // The taskgroup descriptor variable is always implicit firstprivate and 3890 // privatized already during processing of the firstprivates. 3891 // FIXME: This must removed once the runtime library is fixed. 3892 // Emit required threadprivate variables for 3893 // initializer/combiner/finalizer. 3894 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(), 3895 RedCG, Cnt); 3896 llvm::Value *ReductionsPtr; 3897 if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) { 3898 ReductionsPtr = CGF.EmitLoadOfScalar(CGF.EmitLValue(TRExpr), 3899 TRExpr->getExprLoc()); 3900 } else { 3901 ReductionsPtr = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3902 } 3903 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( 3904 CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); 3905 Replacement = Address( 3906 CGF.EmitScalarConversion( 3907 Replacement.getPointer(), CGF.getContext().VoidPtrTy, 3908 CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()), 3909 InRedPrivs[Cnt]->getExprLoc()), 3910 Replacement.getAlignment()); 3911 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); 3912 InRedScope.addPrivate(RedCG.getBaseDecl(Cnt), 3913 [Replacement]() { return Replacement; }); 3914 } 3915 } 3916 (void)InRedScope.Privatize(); 3917 3918 Action.Enter(CGF); 3919 BodyGen(CGF); 3920 }; 3921 llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( 3922 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied, 3923 Data.NumberOfParts); 3924 OMPLexicalScope Scope(*this, S, llvm::None, 3925 !isOpenMPParallelDirective(S.getDirectiveKind()) && 3926 !isOpenMPSimdDirective(S.getDirectiveKind())); 3927 TaskGen(*this, OutlinedFn, Data); 3928 } 3929 3930 static ImplicitParamDecl * 3931 createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data, 3932 QualType Ty, CapturedDecl *CD, 3933 SourceLocation Loc) { 3934 auto *OrigVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty, 3935 ImplicitParamDecl::Other); 3936 auto *OrigRef = DeclRefExpr::Create( 3937 C, NestedNameSpecifierLoc(), SourceLocation(), OrigVD, 3938 /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue); 3939 auto *PrivateVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty, 3940 ImplicitParamDecl::Other); 3941 auto *PrivateRef = DeclRefExpr::Create( 3942 C, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD, 3943 /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue); 3944 QualType ElemType = C.getBaseElementType(Ty); 3945 auto *InitVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, ElemType, 3946 ImplicitParamDecl::Other); 3947 auto *InitRef = DeclRefExpr::Create( 3948 C, NestedNameSpecifierLoc(), SourceLocation(), InitVD, 3949 /*RefersToEnclosingVariableOrCapture=*/false, Loc, ElemType, VK_LValue); 3950 PrivateVD->setInitStyle(VarDecl::CInit); 3951 PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue, 3952 InitRef, /*BasePath=*/nullptr, 3953 VK_RValue)); 3954 Data.FirstprivateVars.emplace_back(OrigRef); 3955 Data.FirstprivateCopies.emplace_back(PrivateRef); 3956 Data.FirstprivateInits.emplace_back(InitRef); 3957 return OrigVD; 3958 } 3959 3960 void CodeGenFunction::EmitOMPTargetTaskBasedDirective( 3961 const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen, 3962 OMPTargetDataInfo &InputInfo) { 3963 // Emit outlined function for task construct. 3964 const CapturedStmt *CS = S.getCapturedStmt(OMPD_task); 3965 Address CapturedStruct = GenerateCapturedStmtArgument(*CS); 3966 QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 3967 auto I = CS->getCapturedDecl()->param_begin(); 3968 auto PartId = std::next(I); 3969 auto TaskT = std::next(I, 4); 3970 OMPTaskDataTy Data; 3971 // The task is not final. 3972 Data.Final.setInt(/*IntVal=*/false); 3973 // Get list of firstprivate variables. 3974 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 3975 auto IRef = C->varlist_begin(); 3976 auto IElemInitRef = C->inits().begin(); 3977 for (auto *IInit : C->private_copies()) { 3978 Data.FirstprivateVars.push_back(*IRef); 3979 Data.FirstprivateCopies.push_back(IInit); 3980 Data.FirstprivateInits.push_back(*IElemInitRef); 3981 ++IRef; 3982 ++IElemInitRef; 3983 } 3984 } 3985 OMPPrivateScope TargetScope(*this); 3986 VarDecl *BPVD = nullptr; 3987 VarDecl *PVD = nullptr; 3988 VarDecl *SVD = nullptr; 3989 if (InputInfo.NumberOfTargetItems > 0) { 3990 auto *CD = CapturedDecl::Create( 3991 getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0); 3992 llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems); 3993 QualType BaseAndPointersType = getContext().getConstantArrayType( 3994 getContext().VoidPtrTy, ArrSize, nullptr, ArrayType::Normal, 3995 /*IndexTypeQuals=*/0); 3996 BPVD = createImplicitFirstprivateForType( 3997 getContext(), Data, BaseAndPointersType, CD, S.getBeginLoc()); 3998 PVD = createImplicitFirstprivateForType( 3999 getContext(), Data, BaseAndPointersType, CD, S.getBeginLoc()); 4000 QualType SizesType = getContext().getConstantArrayType( 4001 getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1), 4002 ArrSize, nullptr, ArrayType::Normal, 4003 /*IndexTypeQuals=*/0); 4004 SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD, 4005 S.getBeginLoc()); 4006 TargetScope.addPrivate( 4007 BPVD, [&InputInfo]() { return InputInfo.BasePointersArray; }); 4008 TargetScope.addPrivate(PVD, 4009 [&InputInfo]() { return InputInfo.PointersArray; }); 4010 TargetScope.addPrivate(SVD, 4011 [&InputInfo]() { return InputInfo.SizesArray; }); 4012 } 4013 (void)TargetScope.Privatize(); 4014 // Build list of dependences. 4015 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) { 4016 OMPTaskDataTy::DependData &DD = 4017 Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier()); 4018 DD.DepExprs.append(C->varlist_begin(), C->varlist_end()); 4019 } 4020 auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD, 4021 &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) { 4022 // Set proper addresses for generated private copies. 4023 OMPPrivateScope Scope(CGF); 4024 if (!Data.FirstprivateVars.empty()) { 4025 llvm::FunctionType *CopyFnTy = llvm::FunctionType::get( 4026 CGF.Builder.getVoidTy(), {CGF.Builder.getInt8PtrTy()}, true); 4027 enum { PrivatesParam = 2, CopyFnParam = 3 }; 4028 llvm::Value *CopyFn = CGF.Builder.CreateLoad( 4029 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam))); 4030 llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar( 4031 CS->getCapturedDecl()->getParam(PrivatesParam))); 4032 // Map privates. 4033 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; 4034 llvm::SmallVector<llvm::Value *, 16> CallArgs; 4035 CallArgs.push_back(PrivatesPtr); 4036 for (const Expr *E : Data.FirstprivateVars) { 4037 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4038 Address PrivatePtr = 4039 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 4040 ".firstpriv.ptr.addr"); 4041 PrivatePtrs.emplace_back(VD, PrivatePtr); 4042 CallArgs.push_back(PrivatePtr.getPointer()); 4043 } 4044 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall( 4045 CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs); 4046 for (const auto &Pair : PrivatePtrs) { 4047 Address Replacement(CGF.Builder.CreateLoad(Pair.second), 4048 CGF.getContext().getDeclAlign(Pair.first)); 4049 Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); 4050 } 4051 } 4052 // Privatize all private variables except for in_reduction items. 4053 (void)Scope.Privatize(); 4054 if (InputInfo.NumberOfTargetItems > 0) { 4055 InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP( 4056 CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0); 4057 InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP( 4058 CGF.GetAddrOfLocalVar(PVD), /*Index=*/0); 4059 InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP( 4060 CGF.GetAddrOfLocalVar(SVD), /*Index=*/0); 4061 } 4062 4063 Action.Enter(CGF); 4064 OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false); 4065 BodyGen(CGF); 4066 }; 4067 llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( 4068 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true, 4069 Data.NumberOfParts); 4070 llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0); 4071 IntegerLiteral IfCond(getContext(), TrueOrFalse, 4072 getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 4073 SourceLocation()); 4074 4075 CGM.getOpenMPRuntime().emitTaskCall(*this, S.getBeginLoc(), S, OutlinedFn, 4076 SharedsTy, CapturedStruct, &IfCond, Data); 4077 } 4078 4079 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { 4080 // Emit outlined function for task construct. 4081 const CapturedStmt *CS = S.getCapturedStmt(OMPD_task); 4082 Address CapturedStruct = GenerateCapturedStmtArgument(*CS); 4083 QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 4084 const Expr *IfCond = nullptr; 4085 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 4086 if (C->getNameModifier() == OMPD_unknown || 4087 C->getNameModifier() == OMPD_task) { 4088 IfCond = C->getCondition(); 4089 break; 4090 } 4091 } 4092 4093 OMPTaskDataTy Data; 4094 // Check if we should emit tied or untied task. 4095 Data.Tied = !S.getSingleClause<OMPUntiedClause>(); 4096 auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { 4097 CGF.EmitStmt(CS->getCapturedStmt()); 4098 }; 4099 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 4100 IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn, 4101 const OMPTaskDataTy &Data) { 4102 CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getBeginLoc(), S, OutlinedFn, 4103 SharedsTy, CapturedStruct, IfCond, 4104 Data); 4105 }; 4106 auto LPCRegion = 4107 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 4108 EmitOMPTaskBasedDirective(S, OMPD_task, BodyGen, TaskGen, Data); 4109 } 4110 4111 void CodeGenFunction::EmitOMPTaskyieldDirective( 4112 const OMPTaskyieldDirective &S) { 4113 CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getBeginLoc()); 4114 } 4115 4116 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { 4117 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_barrier); 4118 } 4119 4120 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) { 4121 CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getBeginLoc()); 4122 } 4123 4124 void CodeGenFunction::EmitOMPTaskgroupDirective( 4125 const OMPTaskgroupDirective &S) { 4126 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4127 Action.Enter(CGF); 4128 if (const Expr *E = S.getReductionRef()) { 4129 SmallVector<const Expr *, 4> LHSs; 4130 SmallVector<const Expr *, 4> RHSs; 4131 OMPTaskDataTy Data; 4132 for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) { 4133 Data.ReductionVars.append(C->varlist_begin(), C->varlist_end()); 4134 Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end()); 4135 Data.ReductionCopies.append(C->privates().begin(), C->privates().end()); 4136 Data.ReductionOps.append(C->reduction_ops().begin(), 4137 C->reduction_ops().end()); 4138 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 4139 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 4140 } 4141 llvm::Value *ReductionDesc = 4142 CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getBeginLoc(), 4143 LHSs, RHSs, Data); 4144 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4145 CGF.EmitVarDecl(*VD); 4146 CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD), 4147 /*Volatile=*/false, E->getType()); 4148 } 4149 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 4150 }; 4151 OMPLexicalScope Scope(*this, S, OMPD_unknown); 4152 CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getBeginLoc()); 4153 } 4154 4155 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { 4156 llvm::AtomicOrdering AO = S.getSingleClause<OMPFlushClause>() 4157 ? llvm::AtomicOrdering::NotAtomic 4158 : llvm::AtomicOrdering::AcquireRelease; 4159 CGM.getOpenMPRuntime().emitFlush( 4160 *this, 4161 [&S]() -> ArrayRef<const Expr *> { 4162 if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) 4163 return llvm::makeArrayRef(FlushClause->varlist_begin(), 4164 FlushClause->varlist_end()); 4165 return llvm::None; 4166 }(), 4167 S.getBeginLoc(), AO); 4168 } 4169 4170 void CodeGenFunction::EmitOMPDepobjDirective(const OMPDepobjDirective &S) { 4171 const auto *DO = S.getSingleClause<OMPDepobjClause>(); 4172 LValue DOLVal = EmitLValue(DO->getDepobj()); 4173 if (const auto *DC = S.getSingleClause<OMPDependClause>()) { 4174 OMPTaskDataTy::DependData Dependencies(DC->getDependencyKind(), 4175 DC->getModifier()); 4176 Dependencies.DepExprs.append(DC->varlist_begin(), DC->varlist_end()); 4177 Address DepAddr = CGM.getOpenMPRuntime().emitDepobjDependClause( 4178 *this, Dependencies, DC->getBeginLoc()); 4179 EmitStoreOfScalar(DepAddr.getPointer(), DOLVal); 4180 return; 4181 } 4182 if (const auto *DC = S.getSingleClause<OMPDestroyClause>()) { 4183 CGM.getOpenMPRuntime().emitDestroyClause(*this, DOLVal, DC->getBeginLoc()); 4184 return; 4185 } 4186 if (const auto *UC = S.getSingleClause<OMPUpdateClause>()) { 4187 CGM.getOpenMPRuntime().emitUpdateClause( 4188 *this, DOLVal, UC->getDependencyKind(), UC->getBeginLoc()); 4189 return; 4190 } 4191 } 4192 4193 void CodeGenFunction::EmitOMPScanDirective(const OMPScanDirective &S) { 4194 // Do not emit code for non-simd directives in simd-only mode. 4195 if (getLangOpts().OpenMPSimd && !OMPParentLoopDirectiveForScan) 4196 return; 4197 const OMPExecutableDirective &ParentDir = *OMPParentLoopDirectiveForScan; 4198 SmallVector<const Expr *, 4> Shareds; 4199 SmallVector<const Expr *, 4> Privates; 4200 SmallVector<const Expr *, 4> LHSs; 4201 SmallVector<const Expr *, 4> RHSs; 4202 SmallVector<const Expr *, 4> CopyOps; 4203 SmallVector<const Expr *, 4> CopyArrayTemps; 4204 SmallVector<const Expr *, 4> CopyArrayElems; 4205 for (const auto *C : ParentDir.getClausesOfKind<OMPReductionClause>()) { 4206 if (C->getModifier() != OMPC_REDUCTION_inscan) 4207 continue; 4208 Shareds.append(C->varlist_begin(), C->varlist_end()); 4209 Privates.append(C->privates().begin(), C->privates().end()); 4210 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 4211 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 4212 CopyOps.append(C->copy_ops().begin(), C->copy_ops().end()); 4213 CopyArrayTemps.append(C->copy_array_temps().begin(), 4214 C->copy_array_temps().end()); 4215 CopyArrayElems.append(C->copy_array_elems().begin(), 4216 C->copy_array_elems().end()); 4217 } 4218 bool IsInclusive = S.hasClausesOfKind<OMPInclusiveClause>(); 4219 if (!IsInclusive) { 4220 EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock()); 4221 EmitBlock(OMPScanExitBlock); 4222 } 4223 if (OMPFirstScanLoop) { 4224 // Emit buffer[i] = red; at the end of the input phase. 4225 const auto *IVExpr = cast<OMPLoopDirective>(ParentDir) 4226 .getIterationVariable() 4227 ->IgnoreParenImpCasts(); 4228 LValue IdxLVal = EmitLValue(IVExpr); 4229 llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc()); 4230 IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false); 4231 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { 4232 const Expr *PrivateExpr = Privates[I]; 4233 const Expr *OrigExpr = Shareds[I]; 4234 const Expr *CopyArrayElem = CopyArrayElems[I]; 4235 OpaqueValueMapping IdxMapping( 4236 *this, 4237 cast<OpaqueValueExpr>( 4238 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), 4239 RValue::get(IdxVal)); 4240 LValue DestLVal = EmitLValue(CopyArrayElem); 4241 LValue SrcLVal = EmitLValue(OrigExpr); 4242 EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this), 4243 SrcLVal.getAddress(*this), 4244 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), 4245 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), 4246 CopyOps[I]); 4247 } 4248 } 4249 EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock()); 4250 if (IsInclusive) { 4251 EmitBlock(OMPScanExitBlock); 4252 EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock()); 4253 } 4254 EmitBlock(OMPScanDispatch); 4255 if (!OMPFirstScanLoop) { 4256 // Emit red = buffer[i]; at the entrance to the scan phase. 4257 const auto *IVExpr = cast<OMPLoopDirective>(ParentDir) 4258 .getIterationVariable() 4259 ->IgnoreParenImpCasts(); 4260 LValue IdxLVal = EmitLValue(IVExpr); 4261 llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc()); 4262 IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false); 4263 llvm::BasicBlock *ExclusiveExitBB = nullptr; 4264 if (!IsInclusive) { 4265 llvm::BasicBlock *ContBB = createBasicBlock("omp.exclusive.dec"); 4266 ExclusiveExitBB = createBasicBlock("omp.exclusive.copy.exit"); 4267 llvm::Value *Cmp = Builder.CreateIsNull(IdxVal); 4268 Builder.CreateCondBr(Cmp, ExclusiveExitBB, ContBB); 4269 EmitBlock(ContBB); 4270 // Use idx - 1 iteration for exclusive scan. 4271 IdxVal = Builder.CreateNUWSub(IdxVal, llvm::ConstantInt::get(SizeTy, 1)); 4272 } 4273 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { 4274 const Expr *PrivateExpr = Privates[I]; 4275 const Expr *OrigExpr = Shareds[I]; 4276 const Expr *CopyArrayElem = CopyArrayElems[I]; 4277 OpaqueValueMapping IdxMapping( 4278 *this, 4279 cast<OpaqueValueExpr>( 4280 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), 4281 RValue::get(IdxVal)); 4282 LValue SrcLVal = EmitLValue(CopyArrayElem); 4283 LValue DestLVal = EmitLValue(OrigExpr); 4284 EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this), 4285 SrcLVal.getAddress(*this), 4286 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), 4287 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), 4288 CopyOps[I]); 4289 } 4290 if (!IsInclusive) { 4291 EmitBlock(ExclusiveExitBB); 4292 } 4293 } 4294 EmitBranch((OMPFirstScanLoop == IsInclusive) ? OMPBeforeScanBlock 4295 : OMPAfterScanBlock); 4296 EmitBlock(OMPAfterScanBlock); 4297 } 4298 4299 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, 4300 const CodeGenLoopTy &CodeGenLoop, 4301 Expr *IncExpr) { 4302 // Emit the loop iteration variable. 4303 const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 4304 const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl()); 4305 EmitVarDecl(*IVDecl); 4306 4307 // Emit the iterations count variable. 4308 // If it is not a variable, Sema decided to calculate iterations count on each 4309 // iteration (e.g., it is foldable into a constant). 4310 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 4311 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 4312 // Emit calculation of the iterations count. 4313 EmitIgnoredExpr(S.getCalcLastIteration()); 4314 } 4315 4316 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); 4317 4318 bool HasLastprivateClause = false; 4319 // Check pre-condition. 4320 { 4321 OMPLoopScope PreInitScope(*this, S); 4322 // Skip the entire loop if we don't meet the precondition. 4323 // If the condition constant folds and can be elided, avoid emitting the 4324 // whole loop. 4325 bool CondConstant; 4326 llvm::BasicBlock *ContBlock = nullptr; 4327 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 4328 if (!CondConstant) 4329 return; 4330 } else { 4331 llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then"); 4332 ContBlock = createBasicBlock("omp.precond.end"); 4333 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 4334 getProfileCount(&S)); 4335 EmitBlock(ThenBlock); 4336 incrementProfileCounter(&S); 4337 } 4338 4339 emitAlignedClause(*this, S); 4340 // Emit 'then' code. 4341 { 4342 // Emit helper vars inits. 4343 4344 LValue LB = EmitOMPHelperVar( 4345 *this, cast<DeclRefExpr>( 4346 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 4347 ? S.getCombinedLowerBoundVariable() 4348 : S.getLowerBoundVariable()))); 4349 LValue UB = EmitOMPHelperVar( 4350 *this, cast<DeclRefExpr>( 4351 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 4352 ? S.getCombinedUpperBoundVariable() 4353 : S.getUpperBoundVariable()))); 4354 LValue ST = 4355 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 4356 LValue IL = 4357 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 4358 4359 OMPPrivateScope LoopScope(*this); 4360 if (EmitOMPFirstprivateClause(S, LoopScope)) { 4361 // Emit implicit barrier to synchronize threads and avoid data races 4362 // on initialization of firstprivate variables and post-update of 4363 // lastprivate variables. 4364 CGM.getOpenMPRuntime().emitBarrierCall( 4365 *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, 4366 /*ForceSimpleCall=*/true); 4367 } 4368 EmitOMPPrivateClause(S, LoopScope); 4369 if (isOpenMPSimdDirective(S.getDirectiveKind()) && 4370 !isOpenMPParallelDirective(S.getDirectiveKind()) && 4371 !isOpenMPTeamsDirective(S.getDirectiveKind())) 4372 EmitOMPReductionClauseInit(S, LoopScope); 4373 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 4374 EmitOMPPrivateLoopCounters(S, LoopScope); 4375 (void)LoopScope.Privatize(); 4376 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 4377 CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S); 4378 4379 // Detect the distribute schedule kind and chunk. 4380 llvm::Value *Chunk = nullptr; 4381 OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown; 4382 if (const auto *C = S.getSingleClause<OMPDistScheduleClause>()) { 4383 ScheduleKind = C->getDistScheduleKind(); 4384 if (const Expr *Ch = C->getChunkSize()) { 4385 Chunk = EmitScalarExpr(Ch); 4386 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 4387 S.getIterationVariable()->getType(), 4388 S.getBeginLoc()); 4389 } 4390 } else { 4391 // Default behaviour for dist_schedule clause. 4392 CGM.getOpenMPRuntime().getDefaultDistScheduleAndChunk( 4393 *this, S, ScheduleKind, Chunk); 4394 } 4395 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 4396 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 4397 4398 // OpenMP [2.10.8, distribute Construct, Description] 4399 // If dist_schedule is specified, kind must be static. If specified, 4400 // iterations are divided into chunks of size chunk_size, chunks are 4401 // assigned to the teams of the league in a round-robin fashion in the 4402 // order of the team number. When no chunk_size is specified, the 4403 // iteration space is divided into chunks that are approximately equal 4404 // in size, and at most one chunk is distributed to each team of the 4405 // league. The size of the chunks is unspecified in this case. 4406 bool StaticChunked = RT.isStaticChunked( 4407 ScheduleKind, /* Chunked */ Chunk != nullptr) && 4408 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()); 4409 if (RT.isStaticNonchunked(ScheduleKind, 4410 /* Chunked */ Chunk != nullptr) || 4411 StaticChunked) { 4412 CGOpenMPRuntime::StaticRTInput StaticInit( 4413 IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(*this), 4414 LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this), 4415 StaticChunked ? Chunk : nullptr); 4416 RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, 4417 StaticInit); 4418 JumpDest LoopExit = 4419 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 4420 // UB = min(UB, GlobalUB); 4421 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 4422 ? S.getCombinedEnsureUpperBound() 4423 : S.getEnsureUpperBound()); 4424 // IV = LB; 4425 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 4426 ? S.getCombinedInit() 4427 : S.getInit()); 4428 4429 const Expr *Cond = 4430 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 4431 ? S.getCombinedCond() 4432 : S.getCond(); 4433 4434 if (StaticChunked) 4435 Cond = S.getCombinedDistCond(); 4436 4437 // For static unchunked schedules generate: 4438 // 4439 // 1. For distribute alone, codegen 4440 // while (idx <= UB) { 4441 // BODY; 4442 // ++idx; 4443 // } 4444 // 4445 // 2. When combined with 'for' (e.g. as in 'distribute parallel for') 4446 // while (idx <= UB) { 4447 // <CodeGen rest of pragma>(LB, UB); 4448 // idx += ST; 4449 // } 4450 // 4451 // For static chunk one schedule generate: 4452 // 4453 // while (IV <= GlobalUB) { 4454 // <CodeGen rest of pragma>(LB, UB); 4455 // LB += ST; 4456 // UB += ST; 4457 // UB = min(UB, GlobalUB); 4458 // IV = LB; 4459 // } 4460 // 4461 emitCommonSimdLoop( 4462 *this, S, 4463 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4464 if (isOpenMPSimdDirective(S.getDirectiveKind())) 4465 CGF.EmitOMPSimdInit(S, /*IsMonotonic=*/true); 4466 }, 4467 [&S, &LoopScope, Cond, IncExpr, LoopExit, &CodeGenLoop, 4468 StaticChunked](CodeGenFunction &CGF, PrePostActionTy &) { 4469 CGF.EmitOMPInnerLoop( 4470 S, LoopScope.requiresCleanups(), Cond, IncExpr, 4471 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { 4472 CodeGenLoop(CGF, S, LoopExit); 4473 }, 4474 [&S, StaticChunked](CodeGenFunction &CGF) { 4475 if (StaticChunked) { 4476 CGF.EmitIgnoredExpr(S.getCombinedNextLowerBound()); 4477 CGF.EmitIgnoredExpr(S.getCombinedNextUpperBound()); 4478 CGF.EmitIgnoredExpr(S.getCombinedEnsureUpperBound()); 4479 CGF.EmitIgnoredExpr(S.getCombinedInit()); 4480 } 4481 }); 4482 }); 4483 EmitBlock(LoopExit.getBlock()); 4484 // Tell the runtime we are done. 4485 RT.emitForStaticFinish(*this, S.getEndLoc(), S.getDirectiveKind()); 4486 } else { 4487 // Emit the outer loop, which requests its work chunk [LB..UB] from 4488 // runtime and runs the inner loop to process it. 4489 const OMPLoopArguments LoopArguments = { 4490 LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this), 4491 IL.getAddress(*this), Chunk}; 4492 EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments, 4493 CodeGenLoop); 4494 } 4495 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 4496 EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) { 4497 return CGF.Builder.CreateIsNotNull( 4498 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 4499 }); 4500 } 4501 if (isOpenMPSimdDirective(S.getDirectiveKind()) && 4502 !isOpenMPParallelDirective(S.getDirectiveKind()) && 4503 !isOpenMPTeamsDirective(S.getDirectiveKind())) { 4504 EmitOMPReductionClauseFinal(S, OMPD_simd); 4505 // Emit post-update of the reduction variables if IsLastIter != 0. 4506 emitPostUpdateForReductionClause( 4507 *this, S, [IL, &S](CodeGenFunction &CGF) { 4508 return CGF.Builder.CreateIsNotNull( 4509 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 4510 }); 4511 } 4512 // Emit final copy of the lastprivate variables if IsLastIter != 0. 4513 if (HasLastprivateClause) { 4514 EmitOMPLastprivateClauseFinal( 4515 S, /*NoFinals=*/false, 4516 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc()))); 4517 } 4518 } 4519 4520 // We're now done with the loop, so jump to the continuation block. 4521 if (ContBlock) { 4522 EmitBranch(ContBlock); 4523 EmitBlock(ContBlock, true); 4524 } 4525 } 4526 } 4527 4528 void CodeGenFunction::EmitOMPDistributeDirective( 4529 const OMPDistributeDirective &S) { 4530 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4531 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 4532 }; 4533 OMPLexicalScope Scope(*this, S, OMPD_unknown); 4534 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); 4535 } 4536 4537 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, 4538 const CapturedStmt *S, 4539 SourceLocation Loc) { 4540 CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); 4541 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; 4542 CGF.CapturedStmtInfo = &CapStmtInfo; 4543 llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S, Loc); 4544 Fn->setDoesNotRecurse(); 4545 return Fn; 4546 } 4547 4548 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { 4549 if (S.hasClausesOfKind<OMPDependClause>()) { 4550 assert(!S.getAssociatedStmt() && 4551 "No associated statement must be in ordered depend construct."); 4552 for (const auto *DC : S.getClausesOfKind<OMPDependClause>()) 4553 CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC); 4554 return; 4555 } 4556 const auto *C = S.getSingleClause<OMPSIMDClause>(); 4557 auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF, 4558 PrePostActionTy &Action) { 4559 const CapturedStmt *CS = S.getInnermostCapturedStmt(); 4560 if (C) { 4561 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 4562 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 4563 llvm::Function *OutlinedFn = 4564 emitOutlinedOrderedFunction(CGM, CS, S.getBeginLoc()); 4565 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getBeginLoc(), 4566 OutlinedFn, CapturedVars); 4567 } else { 4568 Action.Enter(CGF); 4569 CGF.EmitStmt(CS->getCapturedStmt()); 4570 } 4571 }; 4572 OMPLexicalScope Scope(*this, S, OMPD_unknown); 4573 CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getBeginLoc(), !C); 4574 } 4575 4576 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, 4577 QualType SrcType, QualType DestType, 4578 SourceLocation Loc) { 4579 assert(CGF.hasScalarEvaluationKind(DestType) && 4580 "DestType must have scalar evaluation kind."); 4581 assert(!Val.isAggregate() && "Must be a scalar or complex."); 4582 return Val.isScalar() ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, 4583 DestType, Loc) 4584 : CGF.EmitComplexToScalarConversion( 4585 Val.getComplexVal(), SrcType, DestType, Loc); 4586 } 4587 4588 static CodeGenFunction::ComplexPairTy 4589 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, 4590 QualType DestType, SourceLocation Loc) { 4591 assert(CGF.getEvaluationKind(DestType) == TEK_Complex && 4592 "DestType must have complex evaluation kind."); 4593 CodeGenFunction::ComplexPairTy ComplexVal; 4594 if (Val.isScalar()) { 4595 // Convert the input element to the element type of the complex. 4596 QualType DestElementType = 4597 DestType->castAs<ComplexType>()->getElementType(); 4598 llvm::Value *ScalarVal = CGF.EmitScalarConversion( 4599 Val.getScalarVal(), SrcType, DestElementType, Loc); 4600 ComplexVal = CodeGenFunction::ComplexPairTy( 4601 ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType())); 4602 } else { 4603 assert(Val.isComplex() && "Must be a scalar or complex."); 4604 QualType SrcElementType = SrcType->castAs<ComplexType>()->getElementType(); 4605 QualType DestElementType = 4606 DestType->castAs<ComplexType>()->getElementType(); 4607 ComplexVal.first = CGF.EmitScalarConversion( 4608 Val.getComplexVal().first, SrcElementType, DestElementType, Loc); 4609 ComplexVal.second = CGF.EmitScalarConversion( 4610 Val.getComplexVal().second, SrcElementType, DestElementType, Loc); 4611 } 4612 return ComplexVal; 4613 } 4614 4615 static void emitSimpleAtomicStore(CodeGenFunction &CGF, llvm::AtomicOrdering AO, 4616 LValue LVal, RValue RVal) { 4617 if (LVal.isGlobalReg()) 4618 CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal); 4619 else 4620 CGF.EmitAtomicStore(RVal, LVal, AO, LVal.isVolatile(), /*isInit=*/false); 4621 } 4622 4623 static RValue emitSimpleAtomicLoad(CodeGenFunction &CGF, 4624 llvm::AtomicOrdering AO, LValue LVal, 4625 SourceLocation Loc) { 4626 if (LVal.isGlobalReg()) 4627 return CGF.EmitLoadOfLValue(LVal, Loc); 4628 return CGF.EmitAtomicLoad( 4629 LVal, Loc, llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO), 4630 LVal.isVolatile()); 4631 } 4632 4633 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal, 4634 QualType RValTy, SourceLocation Loc) { 4635 switch (getEvaluationKind(LVal.getType())) { 4636 case TEK_Scalar: 4637 EmitStoreThroughLValue(RValue::get(convertToScalarValue( 4638 *this, RVal, RValTy, LVal.getType(), Loc)), 4639 LVal); 4640 break; 4641 case TEK_Complex: 4642 EmitStoreOfComplex( 4643 convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal, 4644 /*isInit=*/false); 4645 break; 4646 case TEK_Aggregate: 4647 llvm_unreachable("Must be a scalar or complex."); 4648 } 4649 } 4650 4651 static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, llvm::AtomicOrdering AO, 4652 const Expr *X, const Expr *V, 4653 SourceLocation Loc) { 4654 // v = x; 4655 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue"); 4656 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue"); 4657 LValue XLValue = CGF.EmitLValue(X); 4658 LValue VLValue = CGF.EmitLValue(V); 4659 RValue Res = emitSimpleAtomicLoad(CGF, AO, XLValue, Loc); 4660 // OpenMP, 2.17.7, atomic Construct 4661 // If the read or capture clause is specified and the acquire, acq_rel, or 4662 // seq_cst clause is specified then the strong flush on exit from the atomic 4663 // operation is also an acquire flush. 4664 switch (AO) { 4665 case llvm::AtomicOrdering::Acquire: 4666 case llvm::AtomicOrdering::AcquireRelease: 4667 case llvm::AtomicOrdering::SequentiallyConsistent: 4668 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, 4669 llvm::AtomicOrdering::Acquire); 4670 break; 4671 case llvm::AtomicOrdering::Monotonic: 4672 case llvm::AtomicOrdering::Release: 4673 break; 4674 case llvm::AtomicOrdering::NotAtomic: 4675 case llvm::AtomicOrdering::Unordered: 4676 llvm_unreachable("Unexpected ordering."); 4677 } 4678 CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc); 4679 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V); 4680 } 4681 4682 static void emitOMPAtomicWriteExpr(CodeGenFunction &CGF, 4683 llvm::AtomicOrdering AO, const Expr *X, 4684 const Expr *E, SourceLocation Loc) { 4685 // x = expr; 4686 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue"); 4687 emitSimpleAtomicStore(CGF, AO, CGF.EmitLValue(X), CGF.EmitAnyExpr(E)); 4688 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X); 4689 // OpenMP, 2.17.7, atomic Construct 4690 // If the write, update, or capture clause is specified and the release, 4691 // acq_rel, or seq_cst clause is specified then the strong flush on entry to 4692 // the atomic operation is also a release flush. 4693 switch (AO) { 4694 case llvm::AtomicOrdering::Release: 4695 case llvm::AtomicOrdering::AcquireRelease: 4696 case llvm::AtomicOrdering::SequentiallyConsistent: 4697 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, 4698 llvm::AtomicOrdering::Release); 4699 break; 4700 case llvm::AtomicOrdering::Acquire: 4701 case llvm::AtomicOrdering::Monotonic: 4702 break; 4703 case llvm::AtomicOrdering::NotAtomic: 4704 case llvm::AtomicOrdering::Unordered: 4705 llvm_unreachable("Unexpected ordering."); 4706 } 4707 } 4708 4709 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, 4710 RValue Update, 4711 BinaryOperatorKind BO, 4712 llvm::AtomicOrdering AO, 4713 bool IsXLHSInRHSPart) { 4714 ASTContext &Context = CGF.getContext(); 4715 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x' 4716 // expression is simple and atomic is allowed for the given type for the 4717 // target platform. 4718 if (BO == BO_Comma || !Update.isScalar() || 4719 !Update.getScalarVal()->getType()->isIntegerTy() || !X.isSimple() || 4720 (!isa<llvm::ConstantInt>(Update.getScalarVal()) && 4721 (Update.getScalarVal()->getType() != 4722 X.getAddress(CGF).getElementType())) || 4723 !X.getAddress(CGF).getElementType()->isIntegerTy() || 4724 !Context.getTargetInfo().hasBuiltinAtomic( 4725 Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment()))) 4726 return std::make_pair(false, RValue::get(nullptr)); 4727 4728 llvm::AtomicRMWInst::BinOp RMWOp; 4729 switch (BO) { 4730 case BO_Add: 4731 RMWOp = llvm::AtomicRMWInst::Add; 4732 break; 4733 case BO_Sub: 4734 if (!IsXLHSInRHSPart) 4735 return std::make_pair(false, RValue::get(nullptr)); 4736 RMWOp = llvm::AtomicRMWInst::Sub; 4737 break; 4738 case BO_And: 4739 RMWOp = llvm::AtomicRMWInst::And; 4740 break; 4741 case BO_Or: 4742 RMWOp = llvm::AtomicRMWInst::Or; 4743 break; 4744 case BO_Xor: 4745 RMWOp = llvm::AtomicRMWInst::Xor; 4746 break; 4747 case BO_LT: 4748 RMWOp = X.getType()->hasSignedIntegerRepresentation() 4749 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min 4750 : llvm::AtomicRMWInst::Max) 4751 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin 4752 : llvm::AtomicRMWInst::UMax); 4753 break; 4754 case BO_GT: 4755 RMWOp = X.getType()->hasSignedIntegerRepresentation() 4756 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max 4757 : llvm::AtomicRMWInst::Min) 4758 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax 4759 : llvm::AtomicRMWInst::UMin); 4760 break; 4761 case BO_Assign: 4762 RMWOp = llvm::AtomicRMWInst::Xchg; 4763 break; 4764 case BO_Mul: 4765 case BO_Div: 4766 case BO_Rem: 4767 case BO_Shl: 4768 case BO_Shr: 4769 case BO_LAnd: 4770 case BO_LOr: 4771 return std::make_pair(false, RValue::get(nullptr)); 4772 case BO_PtrMemD: 4773 case BO_PtrMemI: 4774 case BO_LE: 4775 case BO_GE: 4776 case BO_EQ: 4777 case BO_NE: 4778 case BO_Cmp: 4779 case BO_AddAssign: 4780 case BO_SubAssign: 4781 case BO_AndAssign: 4782 case BO_OrAssign: 4783 case BO_XorAssign: 4784 case BO_MulAssign: 4785 case BO_DivAssign: 4786 case BO_RemAssign: 4787 case BO_ShlAssign: 4788 case BO_ShrAssign: 4789 case BO_Comma: 4790 llvm_unreachable("Unsupported atomic update operation"); 4791 } 4792 llvm::Value *UpdateVal = Update.getScalarVal(); 4793 if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) { 4794 UpdateVal = CGF.Builder.CreateIntCast( 4795 IC, X.getAddress(CGF).getElementType(), 4796 X.getType()->hasSignedIntegerRepresentation()); 4797 } 4798 llvm::Value *Res = 4799 CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(CGF), UpdateVal, AO); 4800 return std::make_pair(true, RValue::get(Res)); 4801 } 4802 4803 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr( 4804 LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart, 4805 llvm::AtomicOrdering AO, SourceLocation Loc, 4806 const llvm::function_ref<RValue(RValue)> CommonGen) { 4807 // Update expressions are allowed to have the following forms: 4808 // x binop= expr; -> xrval + expr; 4809 // x++, ++x -> xrval + 1; 4810 // x--, --x -> xrval - 1; 4811 // x = x binop expr; -> xrval binop expr 4812 // x = expr Op x; - > expr binop xrval; 4813 auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart); 4814 if (!Res.first) { 4815 if (X.isGlobalReg()) { 4816 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop 4817 // 'xrval'. 4818 EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X); 4819 } else { 4820 // Perform compare-and-swap procedure. 4821 EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified()); 4822 } 4823 } 4824 return Res; 4825 } 4826 4827 static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF, 4828 llvm::AtomicOrdering AO, const Expr *X, 4829 const Expr *E, const Expr *UE, 4830 bool IsXLHSInRHSPart, SourceLocation Loc) { 4831 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 4832 "Update expr in 'atomic update' must be a binary operator."); 4833 const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 4834 // Update expressions are allowed to have the following forms: 4835 // x binop= expr; -> xrval + expr; 4836 // x++, ++x -> xrval + 1; 4837 // x--, --x -> xrval - 1; 4838 // x = x binop expr; -> xrval binop expr 4839 // x = expr Op x; - > expr binop xrval; 4840 assert(X->isLValue() && "X of 'omp atomic update' is not lvalue"); 4841 LValue XLValue = CGF.EmitLValue(X); 4842 RValue ExprRValue = CGF.EmitAnyExpr(E); 4843 const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 4844 const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 4845 const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 4846 const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 4847 auto &&Gen = [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) { 4848 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 4849 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 4850 return CGF.EmitAnyExpr(UE); 4851 }; 4852 (void)CGF.EmitOMPAtomicSimpleUpdateExpr( 4853 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 4854 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X); 4855 // OpenMP, 2.17.7, atomic Construct 4856 // If the write, update, or capture clause is specified and the release, 4857 // acq_rel, or seq_cst clause is specified then the strong flush on entry to 4858 // the atomic operation is also a release flush. 4859 switch (AO) { 4860 case llvm::AtomicOrdering::Release: 4861 case llvm::AtomicOrdering::AcquireRelease: 4862 case llvm::AtomicOrdering::SequentiallyConsistent: 4863 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, 4864 llvm::AtomicOrdering::Release); 4865 break; 4866 case llvm::AtomicOrdering::Acquire: 4867 case llvm::AtomicOrdering::Monotonic: 4868 break; 4869 case llvm::AtomicOrdering::NotAtomic: 4870 case llvm::AtomicOrdering::Unordered: 4871 llvm_unreachable("Unexpected ordering."); 4872 } 4873 } 4874 4875 static RValue convertToType(CodeGenFunction &CGF, RValue Value, 4876 QualType SourceType, QualType ResType, 4877 SourceLocation Loc) { 4878 switch (CGF.getEvaluationKind(ResType)) { 4879 case TEK_Scalar: 4880 return RValue::get( 4881 convertToScalarValue(CGF, Value, SourceType, ResType, Loc)); 4882 case TEK_Complex: { 4883 auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc); 4884 return RValue::getComplex(Res.first, Res.second); 4885 } 4886 case TEK_Aggregate: 4887 break; 4888 } 4889 llvm_unreachable("Must be a scalar or complex."); 4890 } 4891 4892 static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF, 4893 llvm::AtomicOrdering AO, 4894 bool IsPostfixUpdate, const Expr *V, 4895 const Expr *X, const Expr *E, 4896 const Expr *UE, bool IsXLHSInRHSPart, 4897 SourceLocation Loc) { 4898 assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue"); 4899 assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue"); 4900 RValue NewVVal; 4901 LValue VLValue = CGF.EmitLValue(V); 4902 LValue XLValue = CGF.EmitLValue(X); 4903 RValue ExprRValue = CGF.EmitAnyExpr(E); 4904 QualType NewVValType; 4905 if (UE) { 4906 // 'x' is updated with some additional value. 4907 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 4908 "Update expr in 'atomic capture' must be a binary operator."); 4909 const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 4910 // Update expressions are allowed to have the following forms: 4911 // x binop= expr; -> xrval + expr; 4912 // x++, ++x -> xrval + 1; 4913 // x--, --x -> xrval - 1; 4914 // x = x binop expr; -> xrval binop expr 4915 // x = expr Op x; - > expr binop xrval; 4916 const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 4917 const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 4918 const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 4919 NewVValType = XRValExpr->getType(); 4920 const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 4921 auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr, 4922 IsPostfixUpdate](RValue XRValue) { 4923 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 4924 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 4925 RValue Res = CGF.EmitAnyExpr(UE); 4926 NewVVal = IsPostfixUpdate ? XRValue : Res; 4927 return Res; 4928 }; 4929 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 4930 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 4931 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X); 4932 if (Res.first) { 4933 // 'atomicrmw' instruction was generated. 4934 if (IsPostfixUpdate) { 4935 // Use old value from 'atomicrmw'. 4936 NewVVal = Res.second; 4937 } else { 4938 // 'atomicrmw' does not provide new value, so evaluate it using old 4939 // value of 'x'. 4940 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 4941 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second); 4942 NewVVal = CGF.EmitAnyExpr(UE); 4943 } 4944 } 4945 } else { 4946 // 'x' is simply rewritten with some 'expr'. 4947 NewVValType = X->getType().getNonReferenceType(); 4948 ExprRValue = convertToType(CGF, ExprRValue, E->getType(), 4949 X->getType().getNonReferenceType(), Loc); 4950 auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) { 4951 NewVVal = XRValue; 4952 return ExprRValue; 4953 }; 4954 // Try to perform atomicrmw xchg, otherwise simple exchange. 4955 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 4956 XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO, 4957 Loc, Gen); 4958 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X); 4959 if (Res.first) { 4960 // 'atomicrmw' instruction was generated. 4961 NewVVal = IsPostfixUpdate ? Res.second : ExprRValue; 4962 } 4963 } 4964 // Emit post-update store to 'v' of old/new 'x' value. 4965 CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc); 4966 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V); 4967 // OpenMP, 2.17.7, atomic Construct 4968 // If the write, update, or capture clause is specified and the release, 4969 // acq_rel, or seq_cst clause is specified then the strong flush on entry to 4970 // the atomic operation is also a release flush. 4971 // If the read or capture clause is specified and the acquire, acq_rel, or 4972 // seq_cst clause is specified then the strong flush on exit from the atomic 4973 // operation is also an acquire flush. 4974 switch (AO) { 4975 case llvm::AtomicOrdering::Release: 4976 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, 4977 llvm::AtomicOrdering::Release); 4978 break; 4979 case llvm::AtomicOrdering::Acquire: 4980 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, 4981 llvm::AtomicOrdering::Acquire); 4982 break; 4983 case llvm::AtomicOrdering::AcquireRelease: 4984 case llvm::AtomicOrdering::SequentiallyConsistent: 4985 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, 4986 llvm::AtomicOrdering::AcquireRelease); 4987 break; 4988 case llvm::AtomicOrdering::Monotonic: 4989 break; 4990 case llvm::AtomicOrdering::NotAtomic: 4991 case llvm::AtomicOrdering::Unordered: 4992 llvm_unreachable("Unexpected ordering."); 4993 } 4994 } 4995 4996 static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, 4997 llvm::AtomicOrdering AO, bool IsPostfixUpdate, 4998 const Expr *X, const Expr *V, const Expr *E, 4999 const Expr *UE, bool IsXLHSInRHSPart, 5000 SourceLocation Loc) { 5001 switch (Kind) { 5002 case OMPC_read: 5003 emitOMPAtomicReadExpr(CGF, AO, X, V, Loc); 5004 break; 5005 case OMPC_write: 5006 emitOMPAtomicWriteExpr(CGF, AO, X, E, Loc); 5007 break; 5008 case OMPC_unknown: 5009 case OMPC_update: 5010 emitOMPAtomicUpdateExpr(CGF, AO, X, E, UE, IsXLHSInRHSPart, Loc); 5011 break; 5012 case OMPC_capture: 5013 emitOMPAtomicCaptureExpr(CGF, AO, IsPostfixUpdate, V, X, E, UE, 5014 IsXLHSInRHSPart, Loc); 5015 break; 5016 case OMPC_if: 5017 case OMPC_final: 5018 case OMPC_num_threads: 5019 case OMPC_private: 5020 case OMPC_firstprivate: 5021 case OMPC_lastprivate: 5022 case OMPC_reduction: 5023 case OMPC_task_reduction: 5024 case OMPC_in_reduction: 5025 case OMPC_safelen: 5026 case OMPC_simdlen: 5027 case OMPC_allocator: 5028 case OMPC_allocate: 5029 case OMPC_collapse: 5030 case OMPC_default: 5031 case OMPC_seq_cst: 5032 case OMPC_acq_rel: 5033 case OMPC_acquire: 5034 case OMPC_release: 5035 case OMPC_relaxed: 5036 case OMPC_shared: 5037 case OMPC_linear: 5038 case OMPC_aligned: 5039 case OMPC_copyin: 5040 case OMPC_copyprivate: 5041 case OMPC_flush: 5042 case OMPC_depobj: 5043 case OMPC_proc_bind: 5044 case OMPC_schedule: 5045 case OMPC_ordered: 5046 case OMPC_nowait: 5047 case OMPC_untied: 5048 case OMPC_threadprivate: 5049 case OMPC_depend: 5050 case OMPC_mergeable: 5051 case OMPC_device: 5052 case OMPC_threads: 5053 case OMPC_simd: 5054 case OMPC_map: 5055 case OMPC_num_teams: 5056 case OMPC_thread_limit: 5057 case OMPC_priority: 5058 case OMPC_grainsize: 5059 case OMPC_nogroup: 5060 case OMPC_num_tasks: 5061 case OMPC_hint: 5062 case OMPC_dist_schedule: 5063 case OMPC_defaultmap: 5064 case OMPC_uniform: 5065 case OMPC_to: 5066 case OMPC_from: 5067 case OMPC_use_device_ptr: 5068 case OMPC_use_device_addr: 5069 case OMPC_is_device_ptr: 5070 case OMPC_unified_address: 5071 case OMPC_unified_shared_memory: 5072 case OMPC_reverse_offload: 5073 case OMPC_dynamic_allocators: 5074 case OMPC_atomic_default_mem_order: 5075 case OMPC_device_type: 5076 case OMPC_match: 5077 case OMPC_nontemporal: 5078 case OMPC_order: 5079 case OMPC_destroy: 5080 case OMPC_detach: 5081 case OMPC_inclusive: 5082 case OMPC_exclusive: 5083 case OMPC_uses_allocators: 5084 case OMPC_affinity: 5085 llvm_unreachable("Clause is not allowed in 'omp atomic'."); 5086 } 5087 } 5088 5089 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { 5090 llvm::AtomicOrdering AO = llvm::AtomicOrdering::Monotonic; 5091 bool MemOrderingSpecified = false; 5092 if (S.getSingleClause<OMPSeqCstClause>()) { 5093 AO = llvm::AtomicOrdering::SequentiallyConsistent; 5094 MemOrderingSpecified = true; 5095 } else if (S.getSingleClause<OMPAcqRelClause>()) { 5096 AO = llvm::AtomicOrdering::AcquireRelease; 5097 MemOrderingSpecified = true; 5098 } else if (S.getSingleClause<OMPAcquireClause>()) { 5099 AO = llvm::AtomicOrdering::Acquire; 5100 MemOrderingSpecified = true; 5101 } else if (S.getSingleClause<OMPReleaseClause>()) { 5102 AO = llvm::AtomicOrdering::Release; 5103 MemOrderingSpecified = true; 5104 } else if (S.getSingleClause<OMPRelaxedClause>()) { 5105 AO = llvm::AtomicOrdering::Monotonic; 5106 MemOrderingSpecified = true; 5107 } 5108 OpenMPClauseKind Kind = OMPC_unknown; 5109 for (const OMPClause *C : S.clauses()) { 5110 // Find first clause (skip seq_cst|acq_rel|aqcuire|release|relaxed clause, 5111 // if it is first). 5112 if (C->getClauseKind() != OMPC_seq_cst && 5113 C->getClauseKind() != OMPC_acq_rel && 5114 C->getClauseKind() != OMPC_acquire && 5115 C->getClauseKind() != OMPC_release && 5116 C->getClauseKind() != OMPC_relaxed) { 5117 Kind = C->getClauseKind(); 5118 break; 5119 } 5120 } 5121 if (!MemOrderingSpecified) { 5122 llvm::AtomicOrdering DefaultOrder = 5123 CGM.getOpenMPRuntime().getDefaultMemoryOrdering(); 5124 if (DefaultOrder == llvm::AtomicOrdering::Monotonic || 5125 DefaultOrder == llvm::AtomicOrdering::SequentiallyConsistent || 5126 (DefaultOrder == llvm::AtomicOrdering::AcquireRelease && 5127 Kind == OMPC_capture)) { 5128 AO = DefaultOrder; 5129 } else if (DefaultOrder == llvm::AtomicOrdering::AcquireRelease) { 5130 if (Kind == OMPC_unknown || Kind == OMPC_update || Kind == OMPC_write) { 5131 AO = llvm::AtomicOrdering::Release; 5132 } else if (Kind == OMPC_read) { 5133 assert(Kind == OMPC_read && "Unexpected atomic kind."); 5134 AO = llvm::AtomicOrdering::Acquire; 5135 } 5136 } 5137 } 5138 5139 const Stmt *CS = S.getInnermostCapturedStmt()->IgnoreContainers(); 5140 if (const auto *FE = dyn_cast<FullExpr>(CS)) 5141 enterFullExpression(FE); 5142 // Processing for statements under 'atomic capture'. 5143 if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) { 5144 for (const Stmt *C : Compound->body()) { 5145 if (const auto *FE = dyn_cast<FullExpr>(C)) 5146 enterFullExpression(FE); 5147 } 5148 } 5149 5150 auto &&CodeGen = [&S, Kind, AO, CS](CodeGenFunction &CGF, 5151 PrePostActionTy &) { 5152 CGF.EmitStopPoint(CS); 5153 emitOMPAtomicExpr(CGF, Kind, AO, S.isPostfixUpdate(), S.getX(), S.getV(), 5154 S.getExpr(), S.getUpdateExpr(), S.isXLHSInRHSPart(), 5155 S.getBeginLoc()); 5156 }; 5157 OMPLexicalScope Scope(*this, S, OMPD_unknown); 5158 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen); 5159 } 5160 5161 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, 5162 const OMPExecutableDirective &S, 5163 const RegionCodeGenTy &CodeGen) { 5164 assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind())); 5165 CodeGenModule &CGM = CGF.CGM; 5166 5167 // On device emit this construct as inlined code. 5168 if (CGM.getLangOpts().OpenMPIsDevice) { 5169 OMPLexicalScope Scope(CGF, S, OMPD_target); 5170 CGM.getOpenMPRuntime().emitInlinedDirective( 5171 CGF, OMPD_target, [&S](CodeGenFunction &CGF, PrePostActionTy &) { 5172 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 5173 }); 5174 return; 5175 } 5176 5177 auto LPCRegion = 5178 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S); 5179 llvm::Function *Fn = nullptr; 5180 llvm::Constant *FnID = nullptr; 5181 5182 const Expr *IfCond = nullptr; 5183 // Check for the at most one if clause associated with the target region. 5184 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 5185 if (C->getNameModifier() == OMPD_unknown || 5186 C->getNameModifier() == OMPD_target) { 5187 IfCond = C->getCondition(); 5188 break; 5189 } 5190 } 5191 5192 // Check if we have any device clause associated with the directive. 5193 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device( 5194 nullptr, OMPC_DEVICE_unknown); 5195 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 5196 Device.setPointerAndInt(C->getDevice(), C->getModifier()); 5197 5198 // Check if we have an if clause whose conditional always evaluates to false 5199 // or if we do not have any targets specified. If so the target region is not 5200 // an offload entry point. 5201 bool IsOffloadEntry = true; 5202 if (IfCond) { 5203 bool Val; 5204 if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val) 5205 IsOffloadEntry = false; 5206 } 5207 if (CGM.getLangOpts().OMPTargetTriples.empty()) 5208 IsOffloadEntry = false; 5209 5210 assert(CGF.CurFuncDecl && "No parent declaration for target region!"); 5211 StringRef ParentName; 5212 // In case we have Ctors/Dtors we use the complete type variant to produce 5213 // the mangling of the device outlined kernel. 5214 if (const auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl)) 5215 ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete)); 5216 else if (const auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl)) 5217 ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete)); 5218 else 5219 ParentName = 5220 CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl))); 5221 5222 // Emit target region as a standalone region. 5223 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID, 5224 IsOffloadEntry, CodeGen); 5225 OMPLexicalScope Scope(CGF, S, OMPD_task); 5226 auto &&SizeEmitter = 5227 [IsOffloadEntry](CodeGenFunction &CGF, 5228 const OMPLoopDirective &D) -> llvm::Value * { 5229 if (IsOffloadEntry) { 5230 OMPLoopScope(CGF, D); 5231 // Emit calculation of the iterations count. 5232 llvm::Value *NumIterations = CGF.EmitScalarExpr(D.getNumIterations()); 5233 NumIterations = CGF.Builder.CreateIntCast(NumIterations, CGF.Int64Ty, 5234 /*isSigned=*/false); 5235 return NumIterations; 5236 } 5237 return nullptr; 5238 }; 5239 CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device, 5240 SizeEmitter); 5241 } 5242 5243 static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S, 5244 PrePostActionTy &Action) { 5245 Action.Enter(CGF); 5246 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5247 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 5248 CGF.EmitOMPPrivateClause(S, PrivateScope); 5249 (void)PrivateScope.Privatize(); 5250 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 5251 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); 5252 5253 CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt()); 5254 } 5255 5256 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM, 5257 StringRef ParentName, 5258 const OMPTargetDirective &S) { 5259 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5260 emitTargetRegion(CGF, S, Action); 5261 }; 5262 llvm::Function *Fn; 5263 llvm::Constant *Addr; 5264 // Emit target region as a standalone region. 5265 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 5266 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 5267 assert(Fn && Addr && "Target device function emission failed."); 5268 } 5269 5270 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { 5271 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5272 emitTargetRegion(CGF, S, Action); 5273 }; 5274 emitCommonOMPTargetDirective(*this, S, CodeGen); 5275 } 5276 5277 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF, 5278 const OMPExecutableDirective &S, 5279 OpenMPDirectiveKind InnermostKind, 5280 const RegionCodeGenTy &CodeGen) { 5281 const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams); 5282 llvm::Function *OutlinedFn = 5283 CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction( 5284 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 5285 5286 const auto *NT = S.getSingleClause<OMPNumTeamsClause>(); 5287 const auto *TL = S.getSingleClause<OMPThreadLimitClause>(); 5288 if (NT || TL) { 5289 const Expr *NumTeams = NT ? NT->getNumTeams() : nullptr; 5290 const Expr *ThreadLimit = TL ? TL->getThreadLimit() : nullptr; 5291 5292 CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit, 5293 S.getBeginLoc()); 5294 } 5295 5296 OMPTeamsScope Scope(CGF, S); 5297 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 5298 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 5299 CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getBeginLoc(), OutlinedFn, 5300 CapturedVars); 5301 } 5302 5303 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) { 5304 // Emit teams region as a standalone region. 5305 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5306 Action.Enter(CGF); 5307 OMPPrivateScope PrivateScope(CGF); 5308 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 5309 CGF.EmitOMPPrivateClause(S, PrivateScope); 5310 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 5311 (void)PrivateScope.Privatize(); 5312 CGF.EmitStmt(S.getCapturedStmt(OMPD_teams)->getCapturedStmt()); 5313 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 5314 }; 5315 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen); 5316 emitPostUpdateForReductionClause(*this, S, 5317 [](CodeGenFunction &) { return nullptr; }); 5318 } 5319 5320 static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action, 5321 const OMPTargetTeamsDirective &S) { 5322 auto *CS = S.getCapturedStmt(OMPD_teams); 5323 Action.Enter(CGF); 5324 // Emit teams region as a standalone region. 5325 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) { 5326 Action.Enter(CGF); 5327 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5328 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 5329 CGF.EmitOMPPrivateClause(S, PrivateScope); 5330 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 5331 (void)PrivateScope.Privatize(); 5332 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 5333 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); 5334 CGF.EmitStmt(CS->getCapturedStmt()); 5335 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 5336 }; 5337 emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen); 5338 emitPostUpdateForReductionClause(CGF, S, 5339 [](CodeGenFunction &) { return nullptr; }); 5340 } 5341 5342 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 5343 CodeGenModule &CGM, StringRef ParentName, 5344 const OMPTargetTeamsDirective &S) { 5345 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5346 emitTargetTeamsRegion(CGF, Action, S); 5347 }; 5348 llvm::Function *Fn; 5349 llvm::Constant *Addr; 5350 // Emit target region as a standalone region. 5351 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 5352 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 5353 assert(Fn && Addr && "Target device function emission failed."); 5354 } 5355 5356 void CodeGenFunction::EmitOMPTargetTeamsDirective( 5357 const OMPTargetTeamsDirective &S) { 5358 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5359 emitTargetTeamsRegion(CGF, Action, S); 5360 }; 5361 emitCommonOMPTargetDirective(*this, S, CodeGen); 5362 } 5363 5364 static void 5365 emitTargetTeamsDistributeRegion(CodeGenFunction &CGF, PrePostActionTy &Action, 5366 const OMPTargetTeamsDistributeDirective &S) { 5367 Action.Enter(CGF); 5368 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 5369 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 5370 }; 5371 5372 // Emit teams region as a standalone region. 5373 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 5374 PrePostActionTy &Action) { 5375 Action.Enter(CGF); 5376 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5377 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 5378 (void)PrivateScope.Privatize(); 5379 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 5380 CodeGenDistribute); 5381 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 5382 }; 5383 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute, CodeGen); 5384 emitPostUpdateForReductionClause(CGF, S, 5385 [](CodeGenFunction &) { return nullptr; }); 5386 } 5387 5388 void CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 5389 CodeGenModule &CGM, StringRef ParentName, 5390 const OMPTargetTeamsDistributeDirective &S) { 5391 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5392 emitTargetTeamsDistributeRegion(CGF, Action, S); 5393 }; 5394 llvm::Function *Fn; 5395 llvm::Constant *Addr; 5396 // Emit target region as a standalone region. 5397 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 5398 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 5399 assert(Fn && Addr && "Target device function emission failed."); 5400 } 5401 5402 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective( 5403 const OMPTargetTeamsDistributeDirective &S) { 5404 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5405 emitTargetTeamsDistributeRegion(CGF, Action, S); 5406 }; 5407 emitCommonOMPTargetDirective(*this, S, CodeGen); 5408 } 5409 5410 static void emitTargetTeamsDistributeSimdRegion( 5411 CodeGenFunction &CGF, PrePostActionTy &Action, 5412 const OMPTargetTeamsDistributeSimdDirective &S) { 5413 Action.Enter(CGF); 5414 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 5415 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 5416 }; 5417 5418 // Emit teams region as a standalone region. 5419 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 5420 PrePostActionTy &Action) { 5421 Action.Enter(CGF); 5422 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5423 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 5424 (void)PrivateScope.Privatize(); 5425 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 5426 CodeGenDistribute); 5427 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 5428 }; 5429 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_simd, CodeGen); 5430 emitPostUpdateForReductionClause(CGF, S, 5431 [](CodeGenFunction &) { return nullptr; }); 5432 } 5433 5434 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 5435 CodeGenModule &CGM, StringRef ParentName, 5436 const OMPTargetTeamsDistributeSimdDirective &S) { 5437 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5438 emitTargetTeamsDistributeSimdRegion(CGF, Action, S); 5439 }; 5440 llvm::Function *Fn; 5441 llvm::Constant *Addr; 5442 // Emit target region as a standalone region. 5443 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 5444 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 5445 assert(Fn && Addr && "Target device function emission failed."); 5446 } 5447 5448 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective( 5449 const OMPTargetTeamsDistributeSimdDirective &S) { 5450 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5451 emitTargetTeamsDistributeSimdRegion(CGF, Action, S); 5452 }; 5453 emitCommonOMPTargetDirective(*this, S, CodeGen); 5454 } 5455 5456 void CodeGenFunction::EmitOMPTeamsDistributeDirective( 5457 const OMPTeamsDistributeDirective &S) { 5458 5459 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 5460 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 5461 }; 5462 5463 // Emit teams region as a standalone region. 5464 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 5465 PrePostActionTy &Action) { 5466 Action.Enter(CGF); 5467 OMPPrivateScope PrivateScope(CGF); 5468 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 5469 (void)PrivateScope.Privatize(); 5470 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 5471 CodeGenDistribute); 5472 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 5473 }; 5474 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen); 5475 emitPostUpdateForReductionClause(*this, S, 5476 [](CodeGenFunction &) { return nullptr; }); 5477 } 5478 5479 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective( 5480 const OMPTeamsDistributeSimdDirective &S) { 5481 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 5482 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 5483 }; 5484 5485 // Emit teams region as a standalone region. 5486 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 5487 PrePostActionTy &Action) { 5488 Action.Enter(CGF); 5489 OMPPrivateScope PrivateScope(CGF); 5490 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 5491 (void)PrivateScope.Privatize(); 5492 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_simd, 5493 CodeGenDistribute); 5494 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 5495 }; 5496 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_simd, CodeGen); 5497 emitPostUpdateForReductionClause(*this, S, 5498 [](CodeGenFunction &) { return nullptr; }); 5499 } 5500 5501 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective( 5502 const OMPTeamsDistributeParallelForDirective &S) { 5503 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 5504 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 5505 S.getDistInc()); 5506 }; 5507 5508 // Emit teams region as a standalone region. 5509 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 5510 PrePostActionTy &Action) { 5511 Action.Enter(CGF); 5512 OMPPrivateScope PrivateScope(CGF); 5513 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 5514 (void)PrivateScope.Privatize(); 5515 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 5516 CodeGenDistribute); 5517 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 5518 }; 5519 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen); 5520 emitPostUpdateForReductionClause(*this, S, 5521 [](CodeGenFunction &) { return nullptr; }); 5522 } 5523 5524 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective( 5525 const OMPTeamsDistributeParallelForSimdDirective &S) { 5526 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 5527 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 5528 S.getDistInc()); 5529 }; 5530 5531 // Emit teams region as a standalone region. 5532 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 5533 PrePostActionTy &Action) { 5534 Action.Enter(CGF); 5535 OMPPrivateScope PrivateScope(CGF); 5536 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 5537 (void)PrivateScope.Privatize(); 5538 CGF.CGM.getOpenMPRuntime().emitInlinedDirective( 5539 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); 5540 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 5541 }; 5542 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for_simd, 5543 CodeGen); 5544 emitPostUpdateForReductionClause(*this, S, 5545 [](CodeGenFunction &) { return nullptr; }); 5546 } 5547 5548 static void emitTargetTeamsDistributeParallelForRegion( 5549 CodeGenFunction &CGF, const OMPTargetTeamsDistributeParallelForDirective &S, 5550 PrePostActionTy &Action) { 5551 Action.Enter(CGF); 5552 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 5553 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 5554 S.getDistInc()); 5555 }; 5556 5557 // Emit teams region as a standalone region. 5558 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 5559 PrePostActionTy &Action) { 5560 Action.Enter(CGF); 5561 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5562 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 5563 (void)PrivateScope.Privatize(); 5564 CGF.CGM.getOpenMPRuntime().emitInlinedDirective( 5565 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); 5566 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 5567 }; 5568 5569 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for, 5570 CodeGenTeams); 5571 emitPostUpdateForReductionClause(CGF, S, 5572 [](CodeGenFunction &) { return nullptr; }); 5573 } 5574 5575 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 5576 CodeGenModule &CGM, StringRef ParentName, 5577 const OMPTargetTeamsDistributeParallelForDirective &S) { 5578 // Emit SPMD target teams distribute parallel for region as a standalone 5579 // region. 5580 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5581 emitTargetTeamsDistributeParallelForRegion(CGF, S, Action); 5582 }; 5583 llvm::Function *Fn; 5584 llvm::Constant *Addr; 5585 // Emit target region as a standalone region. 5586 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 5587 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 5588 assert(Fn && Addr && "Target device function emission failed."); 5589 } 5590 5591 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective( 5592 const OMPTargetTeamsDistributeParallelForDirective &S) { 5593 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5594 emitTargetTeamsDistributeParallelForRegion(CGF, S, Action); 5595 }; 5596 emitCommonOMPTargetDirective(*this, S, CodeGen); 5597 } 5598 5599 static void emitTargetTeamsDistributeParallelForSimdRegion( 5600 CodeGenFunction &CGF, 5601 const OMPTargetTeamsDistributeParallelForSimdDirective &S, 5602 PrePostActionTy &Action) { 5603 Action.Enter(CGF); 5604 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 5605 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 5606 S.getDistInc()); 5607 }; 5608 5609 // Emit teams region as a standalone region. 5610 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 5611 PrePostActionTy &Action) { 5612 Action.Enter(CGF); 5613 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5614 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 5615 (void)PrivateScope.Privatize(); 5616 CGF.CGM.getOpenMPRuntime().emitInlinedDirective( 5617 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); 5618 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 5619 }; 5620 5621 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for_simd, 5622 CodeGenTeams); 5623 emitPostUpdateForReductionClause(CGF, S, 5624 [](CodeGenFunction &) { return nullptr; }); 5625 } 5626 5627 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 5628 CodeGenModule &CGM, StringRef ParentName, 5629 const OMPTargetTeamsDistributeParallelForSimdDirective &S) { 5630 // Emit SPMD target teams distribute parallel for simd region as a standalone 5631 // region. 5632 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5633 emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action); 5634 }; 5635 llvm::Function *Fn; 5636 llvm::Constant *Addr; 5637 // Emit target region as a standalone region. 5638 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 5639 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 5640 assert(Fn && Addr && "Target device function emission failed."); 5641 } 5642 5643 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective( 5644 const OMPTargetTeamsDistributeParallelForSimdDirective &S) { 5645 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5646 emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action); 5647 }; 5648 emitCommonOMPTargetDirective(*this, S, CodeGen); 5649 } 5650 5651 void CodeGenFunction::EmitOMPCancellationPointDirective( 5652 const OMPCancellationPointDirective &S) { 5653 CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getBeginLoc(), 5654 S.getCancelRegion()); 5655 } 5656 5657 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { 5658 const Expr *IfCond = nullptr; 5659 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 5660 if (C->getNameModifier() == OMPD_unknown || 5661 C->getNameModifier() == OMPD_cancel) { 5662 IfCond = C->getCondition(); 5663 break; 5664 } 5665 } 5666 if (llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder()) { 5667 // TODO: This check is necessary as we only generate `omp parallel` through 5668 // the OpenMPIRBuilder for now. 5669 if (S.getCancelRegion() == OMPD_parallel) { 5670 llvm::Value *IfCondition = nullptr; 5671 if (IfCond) 5672 IfCondition = EmitScalarExpr(IfCond, 5673 /*IgnoreResultAssign=*/true); 5674 return Builder.restoreIP( 5675 OMPBuilder->CreateCancel(Builder, IfCondition, S.getCancelRegion())); 5676 } 5677 } 5678 5679 CGM.getOpenMPRuntime().emitCancelCall(*this, S.getBeginLoc(), IfCond, 5680 S.getCancelRegion()); 5681 } 5682 5683 CodeGenFunction::JumpDest 5684 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) { 5685 if (Kind == OMPD_parallel || Kind == OMPD_task || 5686 Kind == OMPD_target_parallel || Kind == OMPD_taskloop || 5687 Kind == OMPD_master_taskloop || Kind == OMPD_parallel_master_taskloop) 5688 return ReturnBlock; 5689 assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections || 5690 Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for || 5691 Kind == OMPD_distribute_parallel_for || 5692 Kind == OMPD_target_parallel_for || 5693 Kind == OMPD_teams_distribute_parallel_for || 5694 Kind == OMPD_target_teams_distribute_parallel_for); 5695 return OMPCancelStack.getExitBlock(); 5696 } 5697 5698 void CodeGenFunction::EmitOMPUseDevicePtrClause( 5699 const OMPClause &NC, OMPPrivateScope &PrivateScope, 5700 const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) { 5701 const auto &C = cast<OMPUseDevicePtrClause>(NC); 5702 auto OrigVarIt = C.varlist_begin(); 5703 auto InitIt = C.inits().begin(); 5704 for (const Expr *PvtVarIt : C.private_copies()) { 5705 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl()); 5706 const auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl()); 5707 const auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl()); 5708 5709 // In order to identify the right initializer we need to match the 5710 // declaration used by the mapping logic. In some cases we may get 5711 // OMPCapturedExprDecl that refers to the original declaration. 5712 const ValueDecl *MatchingVD = OrigVD; 5713 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) { 5714 // OMPCapturedExprDecl are used to privative fields of the current 5715 // structure. 5716 const auto *ME = cast<MemberExpr>(OED->getInit()); 5717 assert(isa<CXXThisExpr>(ME->getBase()) && 5718 "Base should be the current struct!"); 5719 MatchingVD = ME->getMemberDecl(); 5720 } 5721 5722 // If we don't have information about the current list item, move on to 5723 // the next one. 5724 auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD); 5725 if (InitAddrIt == CaptureDeviceAddrMap.end()) 5726 continue; 5727 5728 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, OrigVD, 5729 InitAddrIt, InitVD, 5730 PvtVD]() { 5731 // Initialize the temporary initialization variable with the address we 5732 // get from the runtime library. We have to cast the source address 5733 // because it is always a void *. References are materialized in the 5734 // privatization scope, so the initialization here disregards the fact 5735 // the original variable is a reference. 5736 QualType AddrQTy = 5737 getContext().getPointerType(OrigVD->getType().getNonReferenceType()); 5738 llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy); 5739 Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy); 5740 setAddrOfLocalVar(InitVD, InitAddr); 5741 5742 // Emit private declaration, it will be initialized by the value we 5743 // declaration we just added to the local declarations map. 5744 EmitDecl(*PvtVD); 5745 5746 // The initialization variables reached its purpose in the emission 5747 // of the previous declaration, so we don't need it anymore. 5748 LocalDeclMap.erase(InitVD); 5749 5750 // Return the address of the private variable. 5751 return GetAddrOfLocalVar(PvtVD); 5752 }); 5753 assert(IsRegistered && "firstprivate var already registered as private"); 5754 // Silence the warning about unused variable. 5755 (void)IsRegistered; 5756 5757 ++OrigVarIt; 5758 ++InitIt; 5759 } 5760 } 5761 5762 // Generate the instructions for '#pragma omp target data' directive. 5763 void CodeGenFunction::EmitOMPTargetDataDirective( 5764 const OMPTargetDataDirective &S) { 5765 CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true); 5766 5767 // Create a pre/post action to signal the privatization of the device pointer. 5768 // This action can be replaced by the OpenMP runtime code generation to 5769 // deactivate privatization. 5770 bool PrivatizeDevicePointers = false; 5771 class DevicePointerPrivActionTy : public PrePostActionTy { 5772 bool &PrivatizeDevicePointers; 5773 5774 public: 5775 explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers) 5776 : PrePostActionTy(), PrivatizeDevicePointers(PrivatizeDevicePointers) {} 5777 void Enter(CodeGenFunction &CGF) override { 5778 PrivatizeDevicePointers = true; 5779 } 5780 }; 5781 DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers); 5782 5783 auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers]( 5784 CodeGenFunction &CGF, PrePostActionTy &Action) { 5785 auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 5786 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 5787 }; 5788 5789 // Codegen that selects whether to generate the privatization code or not. 5790 auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers, 5791 &InnermostCodeGen](CodeGenFunction &CGF, 5792 PrePostActionTy &Action) { 5793 RegionCodeGenTy RCG(InnermostCodeGen); 5794 PrivatizeDevicePointers = false; 5795 5796 // Call the pre-action to change the status of PrivatizeDevicePointers if 5797 // needed. 5798 Action.Enter(CGF); 5799 5800 if (PrivatizeDevicePointers) { 5801 OMPPrivateScope PrivateScope(CGF); 5802 // Emit all instances of the use_device_ptr clause. 5803 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>()) 5804 CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope, 5805 Info.CaptureDeviceAddrMap); 5806 (void)PrivateScope.Privatize(); 5807 RCG(CGF); 5808 } else { 5809 RCG(CGF); 5810 } 5811 }; 5812 5813 // Forward the provided action to the privatization codegen. 5814 RegionCodeGenTy PrivRCG(PrivCodeGen); 5815 PrivRCG.setAction(Action); 5816 5817 // Notwithstanding the body of the region is emitted as inlined directive, 5818 // we don't use an inline scope as changes in the references inside the 5819 // region are expected to be visible outside, so we do not privative them. 5820 OMPLexicalScope Scope(CGF, S); 5821 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data, 5822 PrivRCG); 5823 }; 5824 5825 RegionCodeGenTy RCG(CodeGen); 5826 5827 // If we don't have target devices, don't bother emitting the data mapping 5828 // code. 5829 if (CGM.getLangOpts().OMPTargetTriples.empty()) { 5830 RCG(*this); 5831 return; 5832 } 5833 5834 // Check if we have any if clause associated with the directive. 5835 const Expr *IfCond = nullptr; 5836 if (const auto *C = S.getSingleClause<OMPIfClause>()) 5837 IfCond = C->getCondition(); 5838 5839 // Check if we have any device clause associated with the directive. 5840 const Expr *Device = nullptr; 5841 if (const auto *C = S.getSingleClause<OMPDeviceClause>()) 5842 Device = C->getDevice(); 5843 5844 // Set the action to signal privatization of device pointers. 5845 RCG.setAction(PrivAction); 5846 5847 // Emit region code. 5848 CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG, 5849 Info); 5850 } 5851 5852 void CodeGenFunction::EmitOMPTargetEnterDataDirective( 5853 const OMPTargetEnterDataDirective &S) { 5854 // If we don't have target devices, don't bother emitting the data mapping 5855 // code. 5856 if (CGM.getLangOpts().OMPTargetTriples.empty()) 5857 return; 5858 5859 // Check if we have any if clause associated with the directive. 5860 const Expr *IfCond = nullptr; 5861 if (const auto *C = S.getSingleClause<OMPIfClause>()) 5862 IfCond = C->getCondition(); 5863 5864 // Check if we have any device clause associated with the directive. 5865 const Expr *Device = nullptr; 5866 if (const auto *C = S.getSingleClause<OMPDeviceClause>()) 5867 Device = C->getDevice(); 5868 5869 OMPLexicalScope Scope(*this, S, OMPD_task); 5870 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 5871 } 5872 5873 void CodeGenFunction::EmitOMPTargetExitDataDirective( 5874 const OMPTargetExitDataDirective &S) { 5875 // If we don't have target devices, don't bother emitting the data mapping 5876 // code. 5877 if (CGM.getLangOpts().OMPTargetTriples.empty()) 5878 return; 5879 5880 // Check if we have any if clause associated with the directive. 5881 const Expr *IfCond = nullptr; 5882 if (const auto *C = S.getSingleClause<OMPIfClause>()) 5883 IfCond = C->getCondition(); 5884 5885 // Check if we have any device clause associated with the directive. 5886 const Expr *Device = nullptr; 5887 if (const auto *C = S.getSingleClause<OMPDeviceClause>()) 5888 Device = C->getDevice(); 5889 5890 OMPLexicalScope Scope(*this, S, OMPD_task); 5891 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 5892 } 5893 5894 static void emitTargetParallelRegion(CodeGenFunction &CGF, 5895 const OMPTargetParallelDirective &S, 5896 PrePostActionTy &Action) { 5897 // Get the captured statement associated with the 'parallel' region. 5898 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); 5899 Action.Enter(CGF); 5900 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) { 5901 Action.Enter(CGF); 5902 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5903 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 5904 CGF.EmitOMPPrivateClause(S, PrivateScope); 5905 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 5906 (void)PrivateScope.Privatize(); 5907 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 5908 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); 5909 // TODO: Add support for clauses. 5910 CGF.EmitStmt(CS->getCapturedStmt()); 5911 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 5912 }; 5913 emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen, 5914 emitEmptyBoundParameters); 5915 emitPostUpdateForReductionClause(CGF, S, 5916 [](CodeGenFunction &) { return nullptr; }); 5917 } 5918 5919 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 5920 CodeGenModule &CGM, StringRef ParentName, 5921 const OMPTargetParallelDirective &S) { 5922 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5923 emitTargetParallelRegion(CGF, S, Action); 5924 }; 5925 llvm::Function *Fn; 5926 llvm::Constant *Addr; 5927 // Emit target region as a standalone region. 5928 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 5929 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 5930 assert(Fn && Addr && "Target device function emission failed."); 5931 } 5932 5933 void CodeGenFunction::EmitOMPTargetParallelDirective( 5934 const OMPTargetParallelDirective &S) { 5935 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5936 emitTargetParallelRegion(CGF, S, Action); 5937 }; 5938 emitCommonOMPTargetDirective(*this, S, CodeGen); 5939 } 5940 5941 static void emitTargetParallelForRegion(CodeGenFunction &CGF, 5942 const OMPTargetParallelForDirective &S, 5943 PrePostActionTy &Action) { 5944 Action.Enter(CGF); 5945 // Emit directive as a combined directive that consists of two implicit 5946 // directives: 'parallel' with 'for' directive. 5947 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5948 Action.Enter(CGF); 5949 CodeGenFunction::OMPCancelStackRAII CancelRegion( 5950 CGF, OMPD_target_parallel_for, S.hasCancel()); 5951 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 5952 emitDispatchForLoopBounds); 5953 }; 5954 emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen, 5955 emitEmptyBoundParameters); 5956 } 5957 5958 void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 5959 CodeGenModule &CGM, StringRef ParentName, 5960 const OMPTargetParallelForDirective &S) { 5961 // Emit SPMD target parallel for region as a standalone region. 5962 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5963 emitTargetParallelForRegion(CGF, S, Action); 5964 }; 5965 llvm::Function *Fn; 5966 llvm::Constant *Addr; 5967 // Emit target region as a standalone region. 5968 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 5969 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 5970 assert(Fn && Addr && "Target device function emission failed."); 5971 } 5972 5973 void CodeGenFunction::EmitOMPTargetParallelForDirective( 5974 const OMPTargetParallelForDirective &S) { 5975 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5976 emitTargetParallelForRegion(CGF, S, Action); 5977 }; 5978 emitCommonOMPTargetDirective(*this, S, CodeGen); 5979 } 5980 5981 static void 5982 emitTargetParallelForSimdRegion(CodeGenFunction &CGF, 5983 const OMPTargetParallelForSimdDirective &S, 5984 PrePostActionTy &Action) { 5985 Action.Enter(CGF); 5986 // Emit directive as a combined directive that consists of two implicit 5987 // directives: 'parallel' with 'for' directive. 5988 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5989 Action.Enter(CGF); 5990 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 5991 emitDispatchForLoopBounds); 5992 }; 5993 emitCommonOMPParallelDirective(CGF, S, OMPD_simd, CodeGen, 5994 emitEmptyBoundParameters); 5995 } 5996 5997 void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 5998 CodeGenModule &CGM, StringRef ParentName, 5999 const OMPTargetParallelForSimdDirective &S) { 6000 // Emit SPMD target parallel for region as a standalone region. 6001 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6002 emitTargetParallelForSimdRegion(CGF, S, Action); 6003 }; 6004 llvm::Function *Fn; 6005 llvm::Constant *Addr; 6006 // Emit target region as a standalone region. 6007 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 6008 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 6009 assert(Fn && Addr && "Target device function emission failed."); 6010 } 6011 6012 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective( 6013 const OMPTargetParallelForSimdDirective &S) { 6014 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6015 emitTargetParallelForSimdRegion(CGF, S, Action); 6016 }; 6017 emitCommonOMPTargetDirective(*this, S, CodeGen); 6018 } 6019 6020 /// Emit a helper variable and return corresponding lvalue. 6021 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper, 6022 const ImplicitParamDecl *PVD, 6023 CodeGenFunction::OMPPrivateScope &Privates) { 6024 const auto *VDecl = cast<VarDecl>(Helper->getDecl()); 6025 Privates.addPrivate(VDecl, 6026 [&CGF, PVD]() { return CGF.GetAddrOfLocalVar(PVD); }); 6027 } 6028 6029 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { 6030 assert(isOpenMPTaskLoopDirective(S.getDirectiveKind())); 6031 // Emit outlined function for task construct. 6032 const CapturedStmt *CS = S.getCapturedStmt(OMPD_taskloop); 6033 Address CapturedStruct = Address::invalid(); 6034 { 6035 OMPLexicalScope Scope(*this, S, OMPD_taskloop, /*EmitPreInitStmt=*/false); 6036 CapturedStruct = GenerateCapturedStmtArgument(*CS); 6037 } 6038 QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 6039 const Expr *IfCond = nullptr; 6040 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 6041 if (C->getNameModifier() == OMPD_unknown || 6042 C->getNameModifier() == OMPD_taskloop) { 6043 IfCond = C->getCondition(); 6044 break; 6045 } 6046 } 6047 6048 OMPTaskDataTy Data; 6049 // Check if taskloop must be emitted without taskgroup. 6050 Data.Nogroup = S.getSingleClause<OMPNogroupClause>(); 6051 // TODO: Check if we should emit tied or untied task. 6052 Data.Tied = true; 6053 // Set scheduling for taskloop 6054 if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) { 6055 // grainsize clause 6056 Data.Schedule.setInt(/*IntVal=*/false); 6057 Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize())); 6058 } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) { 6059 // num_tasks clause 6060 Data.Schedule.setInt(/*IntVal=*/true); 6061 Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks())); 6062 } 6063 6064 auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) { 6065 // if (PreCond) { 6066 // for (IV in 0..LastIteration) BODY; 6067 // <Final counter/linear vars updates>; 6068 // } 6069 // 6070 6071 // Emit: if (PreCond) - begin. 6072 // If the condition constant folds and can be elided, avoid emitting the 6073 // whole loop. 6074 bool CondConstant; 6075 llvm::BasicBlock *ContBlock = nullptr; 6076 OMPLoopScope PreInitScope(CGF, S); 6077 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 6078 if (!CondConstant) 6079 return; 6080 } else { 6081 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("taskloop.if.then"); 6082 ContBlock = CGF.createBasicBlock("taskloop.if.end"); 6083 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 6084 CGF.getProfileCount(&S)); 6085 CGF.EmitBlock(ThenBlock); 6086 CGF.incrementProfileCounter(&S); 6087 } 6088 6089 (void)CGF.EmitOMPLinearClauseInit(S); 6090 6091 OMPPrivateScope LoopScope(CGF); 6092 // Emit helper vars inits. 6093 enum { LowerBound = 5, UpperBound, Stride, LastIter }; 6094 auto *I = CS->getCapturedDecl()->param_begin(); 6095 auto *LBP = std::next(I, LowerBound); 6096 auto *UBP = std::next(I, UpperBound); 6097 auto *STP = std::next(I, Stride); 6098 auto *LIP = std::next(I, LastIter); 6099 mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP, 6100 LoopScope); 6101 mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP, 6102 LoopScope); 6103 mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope); 6104 mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP, 6105 LoopScope); 6106 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 6107 CGF.EmitOMPLinearClause(S, LoopScope); 6108 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 6109 (void)LoopScope.Privatize(); 6110 // Emit the loop iteration variable. 6111 const Expr *IVExpr = S.getIterationVariable(); 6112 const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 6113 CGF.EmitVarDecl(*IVDecl); 6114 CGF.EmitIgnoredExpr(S.getInit()); 6115 6116 // Emit the iterations count variable. 6117 // If it is not a variable, Sema decided to calculate iterations count on 6118 // each iteration (e.g., it is foldable into a constant). 6119 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 6120 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 6121 // Emit calculation of the iterations count. 6122 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 6123 } 6124 6125 { 6126 OMPLexicalScope Scope(CGF, S, OMPD_taskloop, /*EmitPreInitStmt=*/false); 6127 emitCommonSimdLoop( 6128 CGF, S, 6129 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 6130 if (isOpenMPSimdDirective(S.getDirectiveKind())) 6131 CGF.EmitOMPSimdInit(S); 6132 }, 6133 [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { 6134 CGF.EmitOMPInnerLoop( 6135 S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), 6136 [&S](CodeGenFunction &CGF) { 6137 emitOMPLoopBodyWithStopPoint(CGF, S, 6138 CodeGenFunction::JumpDest()); 6139 }, 6140 [](CodeGenFunction &) {}); 6141 }); 6142 } 6143 // Emit: if (PreCond) - end. 6144 if (ContBlock) { 6145 CGF.EmitBranch(ContBlock); 6146 CGF.EmitBlock(ContBlock, true); 6147 } 6148 // Emit final copy of the lastprivate variables if IsLastIter != 0. 6149 if (HasLastprivateClause) { 6150 CGF.EmitOMPLastprivateClauseFinal( 6151 S, isOpenMPSimdDirective(S.getDirectiveKind()), 6152 CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar( 6153 CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false, 6154 (*LIP)->getType(), S.getBeginLoc()))); 6155 } 6156 CGF.EmitOMPLinearClauseFinal(S, [LIP, &S](CodeGenFunction &CGF) { 6157 return CGF.Builder.CreateIsNotNull( 6158 CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false, 6159 (*LIP)->getType(), S.getBeginLoc())); 6160 }); 6161 }; 6162 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 6163 IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn, 6164 const OMPTaskDataTy &Data) { 6165 auto &&CodeGen = [&S, OutlinedFn, SharedsTy, CapturedStruct, IfCond, 6166 &Data](CodeGenFunction &CGF, PrePostActionTy &) { 6167 OMPLoopScope PreInitScope(CGF, S); 6168 CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getBeginLoc(), S, 6169 OutlinedFn, SharedsTy, 6170 CapturedStruct, IfCond, Data); 6171 }; 6172 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop, 6173 CodeGen); 6174 }; 6175 if (Data.Nogroup) { 6176 EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, Data); 6177 } else { 6178 CGM.getOpenMPRuntime().emitTaskgroupRegion( 6179 *this, 6180 [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF, 6181 PrePostActionTy &Action) { 6182 Action.Enter(CGF); 6183 CGF.EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, 6184 Data); 6185 }, 6186 S.getBeginLoc()); 6187 } 6188 } 6189 6190 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) { 6191 auto LPCRegion = 6192 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 6193 EmitOMPTaskLoopBasedDirective(S); 6194 } 6195 6196 void CodeGenFunction::EmitOMPTaskLoopSimdDirective( 6197 const OMPTaskLoopSimdDirective &S) { 6198 auto LPCRegion = 6199 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 6200 OMPLexicalScope Scope(*this, S); 6201 EmitOMPTaskLoopBasedDirective(S); 6202 } 6203 6204 void CodeGenFunction::EmitOMPMasterTaskLoopDirective( 6205 const OMPMasterTaskLoopDirective &S) { 6206 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6207 Action.Enter(CGF); 6208 EmitOMPTaskLoopBasedDirective(S); 6209 }; 6210 auto LPCRegion = 6211 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 6212 OMPLexicalScope Scope(*this, S, llvm::None, /*EmitPreInitStmt=*/false); 6213 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc()); 6214 } 6215 6216 void CodeGenFunction::EmitOMPMasterTaskLoopSimdDirective( 6217 const OMPMasterTaskLoopSimdDirective &S) { 6218 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6219 Action.Enter(CGF); 6220 EmitOMPTaskLoopBasedDirective(S); 6221 }; 6222 auto LPCRegion = 6223 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 6224 OMPLexicalScope Scope(*this, S); 6225 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc()); 6226 } 6227 6228 void CodeGenFunction::EmitOMPParallelMasterTaskLoopDirective( 6229 const OMPParallelMasterTaskLoopDirective &S) { 6230 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6231 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF, 6232 PrePostActionTy &Action) { 6233 Action.Enter(CGF); 6234 CGF.EmitOMPTaskLoopBasedDirective(S); 6235 }; 6236 OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false); 6237 CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen, 6238 S.getBeginLoc()); 6239 }; 6240 auto LPCRegion = 6241 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 6242 emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop, CodeGen, 6243 emitEmptyBoundParameters); 6244 } 6245 6246 void CodeGenFunction::EmitOMPParallelMasterTaskLoopSimdDirective( 6247 const OMPParallelMasterTaskLoopSimdDirective &S) { 6248 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6249 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF, 6250 PrePostActionTy &Action) { 6251 Action.Enter(CGF); 6252 CGF.EmitOMPTaskLoopBasedDirective(S); 6253 }; 6254 OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false); 6255 CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen, 6256 S.getBeginLoc()); 6257 }; 6258 auto LPCRegion = 6259 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 6260 emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop_simd, CodeGen, 6261 emitEmptyBoundParameters); 6262 } 6263 6264 // Generate the instructions for '#pragma omp target update' directive. 6265 void CodeGenFunction::EmitOMPTargetUpdateDirective( 6266 const OMPTargetUpdateDirective &S) { 6267 // If we don't have target devices, don't bother emitting the data mapping 6268 // code. 6269 if (CGM.getLangOpts().OMPTargetTriples.empty()) 6270 return; 6271 6272 // Check if we have any if clause associated with the directive. 6273 const Expr *IfCond = nullptr; 6274 if (const auto *C = S.getSingleClause<OMPIfClause>()) 6275 IfCond = C->getCondition(); 6276 6277 // Check if we have any device clause associated with the directive. 6278 const Expr *Device = nullptr; 6279 if (const auto *C = S.getSingleClause<OMPDeviceClause>()) 6280 Device = C->getDevice(); 6281 6282 OMPLexicalScope Scope(*this, S, OMPD_task); 6283 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 6284 } 6285 6286 void CodeGenFunction::EmitSimpleOMPExecutableDirective( 6287 const OMPExecutableDirective &D) { 6288 if (const auto *SD = dyn_cast<OMPScanDirective>(&D)) { 6289 EmitOMPScanDirective(*SD); 6290 return; 6291 } 6292 if (!D.hasAssociatedStmt() || !D.getAssociatedStmt()) 6293 return; 6294 auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) { 6295 OMPPrivateScope GlobalsScope(CGF); 6296 if (isOpenMPTaskingDirective(D.getDirectiveKind())) { 6297 // Capture global firstprivates to avoid crash. 6298 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { 6299 for (const Expr *Ref : C->varlists()) { 6300 const auto *DRE = cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 6301 if (!DRE) 6302 continue; 6303 const auto *VD = dyn_cast<VarDecl>(DRE->getDecl()); 6304 if (!VD || VD->hasLocalStorage()) 6305 continue; 6306 if (!CGF.LocalDeclMap.count(VD)) { 6307 LValue GlobLVal = CGF.EmitLValue(Ref); 6308 GlobalsScope.addPrivate( 6309 VD, [&GlobLVal, &CGF]() { return GlobLVal.getAddress(CGF); }); 6310 } 6311 } 6312 } 6313 } 6314 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 6315 (void)GlobalsScope.Privatize(); 6316 emitOMPSimdRegion(CGF, cast<OMPLoopDirective>(D), Action); 6317 } else { 6318 if (const auto *LD = dyn_cast<OMPLoopDirective>(&D)) { 6319 for (const Expr *E : LD->counters()) { 6320 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 6321 if (!VD->hasLocalStorage() && !CGF.LocalDeclMap.count(VD)) { 6322 LValue GlobLVal = CGF.EmitLValue(E); 6323 GlobalsScope.addPrivate( 6324 VD, [&GlobLVal, &CGF]() { return GlobLVal.getAddress(CGF); }); 6325 } 6326 if (isa<OMPCapturedExprDecl>(VD)) { 6327 // Emit only those that were not explicitly referenced in clauses. 6328 if (!CGF.LocalDeclMap.count(VD)) 6329 CGF.EmitVarDecl(*VD); 6330 } 6331 } 6332 for (const auto *C : D.getClausesOfKind<OMPOrderedClause>()) { 6333 if (!C->getNumForLoops()) 6334 continue; 6335 for (unsigned I = LD->getCollapsedNumber(), 6336 E = C->getLoopNumIterations().size(); 6337 I < E; ++I) { 6338 if (const auto *VD = dyn_cast<OMPCapturedExprDecl>( 6339 cast<DeclRefExpr>(C->getLoopCounter(I))->getDecl())) { 6340 // Emit only those that were not explicitly referenced in clauses. 6341 if (!CGF.LocalDeclMap.count(VD)) 6342 CGF.EmitVarDecl(*VD); 6343 } 6344 } 6345 } 6346 } 6347 (void)GlobalsScope.Privatize(); 6348 CGF.EmitStmt(D.getInnermostCapturedStmt()->getCapturedStmt()); 6349 } 6350 }; 6351 { 6352 auto LPCRegion = 6353 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, D); 6354 OMPSimdLexicalScope Scope(*this, D); 6355 CGM.getOpenMPRuntime().emitInlinedDirective( 6356 *this, 6357 isOpenMPSimdDirective(D.getDirectiveKind()) ? OMPD_simd 6358 : D.getDirectiveKind(), 6359 CodeGen); 6360 } 6361 // Check for outer lastprivate conditional update. 6362 checkForLastprivateConditionalUpdate(*this, D); 6363 } 6364