1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This contains code to emit OpenMP nodes as LLVM code. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGCleanup.h" 14 #include "CGOpenMPRuntime.h" 15 #include "CodeGenFunction.h" 16 #include "CodeGenModule.h" 17 #include "TargetInfo.h" 18 #include "clang/AST/ASTContext.h" 19 #include "clang/AST/Attr.h" 20 #include "clang/AST/DeclOpenMP.h" 21 #include "clang/AST/OpenMPClause.h" 22 #include "clang/AST/Stmt.h" 23 #include "clang/AST/StmtOpenMP.h" 24 #include "clang/Basic/OpenMPKinds.h" 25 #include "clang/Basic/PrettyStackTrace.h" 26 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" 27 #include "llvm/IR/Constants.h" 28 #include "llvm/IR/Instructions.h" 29 #include "llvm/Support/AtomicOrdering.h" 30 using namespace clang; 31 using namespace CodeGen; 32 using namespace llvm::omp; 33 34 namespace { 35 /// Lexical scope for OpenMP executable constructs, that handles correct codegen 36 /// for captured expressions. 37 class OMPLexicalScope : public CodeGenFunction::LexicalScope { 38 void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 39 for (const auto *C : S.clauses()) { 40 if (const auto *CPI = OMPClauseWithPreInit::get(C)) { 41 if (const auto *PreInit = 42 cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { 43 for (const auto *I : PreInit->decls()) { 44 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 45 CGF.EmitVarDecl(cast<VarDecl>(*I)); 46 } else { 47 CodeGenFunction::AutoVarEmission Emission = 48 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 49 CGF.EmitAutoVarCleanups(Emission); 50 } 51 } 52 } 53 } 54 } 55 } 56 CodeGenFunction::OMPPrivateScope InlinedShareds; 57 58 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { 59 return CGF.LambdaCaptureFields.lookup(VD) || 60 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || 61 (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) && 62 cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD)); 63 } 64 65 public: 66 OMPLexicalScope( 67 CodeGenFunction &CGF, const OMPExecutableDirective &S, 68 const llvm::Optional<OpenMPDirectiveKind> CapturedRegion = llvm::None, 69 const bool EmitPreInitStmt = true) 70 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), 71 InlinedShareds(CGF) { 72 if (EmitPreInitStmt) 73 emitPreInitStmt(CGF, S); 74 if (!CapturedRegion.hasValue()) 75 return; 76 assert(S.hasAssociatedStmt() && 77 "Expected associated statement for inlined directive."); 78 const CapturedStmt *CS = S.getCapturedStmt(*CapturedRegion); 79 for (const auto &C : CS->captures()) { 80 if (C.capturesVariable() || C.capturesVariableByCopy()) { 81 auto *VD = C.getCapturedVar(); 82 assert(VD == VD->getCanonicalDecl() && 83 "Canonical decl must be captured."); 84 DeclRefExpr DRE( 85 CGF.getContext(), const_cast<VarDecl *>(VD), 86 isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo && 87 InlinedShareds.isGlobalVarCaptured(VD)), 88 VD->getType().getNonReferenceType(), VK_LValue, C.getLocation()); 89 InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address { 90 return CGF.EmitLValue(&DRE).getAddress(CGF); 91 }); 92 } 93 } 94 (void)InlinedShareds.Privatize(); 95 } 96 }; 97 98 /// Lexical scope for OpenMP parallel construct, that handles correct codegen 99 /// for captured expressions. 100 class OMPParallelScope final : public OMPLexicalScope { 101 bool EmitPreInitStmt(const OMPExecutableDirective &S) { 102 OpenMPDirectiveKind Kind = S.getDirectiveKind(); 103 return !(isOpenMPTargetExecutionDirective(Kind) || 104 isOpenMPLoopBoundSharingDirective(Kind)) && 105 isOpenMPParallelDirective(Kind); 106 } 107 108 public: 109 OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 110 : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None, 111 EmitPreInitStmt(S)) {} 112 }; 113 114 /// Lexical scope for OpenMP teams construct, that handles correct codegen 115 /// for captured expressions. 116 class OMPTeamsScope final : public OMPLexicalScope { 117 bool EmitPreInitStmt(const OMPExecutableDirective &S) { 118 OpenMPDirectiveKind Kind = S.getDirectiveKind(); 119 return !isOpenMPTargetExecutionDirective(Kind) && 120 isOpenMPTeamsDirective(Kind); 121 } 122 123 public: 124 OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 125 : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None, 126 EmitPreInitStmt(S)) {} 127 }; 128 129 /// Private scope for OpenMP loop-based directives, that supports capturing 130 /// of used expression from loop statement. 131 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { 132 void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) { 133 CodeGenFunction::OMPMapVars PreCondVars; 134 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 135 for (const auto *E : S.counters()) { 136 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 137 EmittedAsPrivate.insert(VD->getCanonicalDecl()); 138 (void)PreCondVars.setVarAddr( 139 CGF, VD, CGF.CreateMemTemp(VD->getType().getNonReferenceType())); 140 } 141 // Mark private vars as undefs. 142 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 143 for (const Expr *IRef : C->varlists()) { 144 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); 145 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 146 (void)PreCondVars.setVarAddr( 147 CGF, OrigVD, 148 Address(llvm::UndefValue::get( 149 CGF.ConvertTypeForMem(CGF.getContext().getPointerType( 150 OrigVD->getType().getNonReferenceType()))), 151 CGF.getContext().getDeclAlign(OrigVD))); 152 } 153 } 154 } 155 (void)PreCondVars.apply(CGF); 156 // Emit init, __range and __end variables for C++ range loops. 157 const Stmt *Body = 158 S.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(); 159 for (unsigned Cnt = 0; Cnt < S.getCollapsedNumber(); ++Cnt) { 160 Body = OMPLoopDirective::tryToFindNextInnerLoop( 161 Body, /*TryImperfectlyNestedLoops=*/true); 162 if (auto *For = dyn_cast<ForStmt>(Body)) { 163 Body = For->getBody(); 164 } else { 165 assert(isa<CXXForRangeStmt>(Body) && 166 "Expected canonical for loop or range-based for loop."); 167 auto *CXXFor = cast<CXXForRangeStmt>(Body); 168 if (const Stmt *Init = CXXFor->getInit()) 169 CGF.EmitStmt(Init); 170 CGF.EmitStmt(CXXFor->getRangeStmt()); 171 CGF.EmitStmt(CXXFor->getEndStmt()); 172 Body = CXXFor->getBody(); 173 } 174 } 175 if (const auto *PreInits = cast_or_null<DeclStmt>(S.getPreInits())) { 176 for (const auto *I : PreInits->decls()) 177 CGF.EmitVarDecl(cast<VarDecl>(*I)); 178 } 179 PreCondVars.restore(CGF); 180 } 181 182 public: 183 OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S) 184 : CodeGenFunction::RunCleanupsScope(CGF) { 185 emitPreInitStmt(CGF, S); 186 } 187 }; 188 189 class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope { 190 CodeGenFunction::OMPPrivateScope InlinedShareds; 191 192 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { 193 return CGF.LambdaCaptureFields.lookup(VD) || 194 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || 195 (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) && 196 cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD)); 197 } 198 199 public: 200 OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 201 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), 202 InlinedShareds(CGF) { 203 for (const auto *C : S.clauses()) { 204 if (const auto *CPI = OMPClauseWithPreInit::get(C)) { 205 if (const auto *PreInit = 206 cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { 207 for (const auto *I : PreInit->decls()) { 208 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 209 CGF.EmitVarDecl(cast<VarDecl>(*I)); 210 } else { 211 CodeGenFunction::AutoVarEmission Emission = 212 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 213 CGF.EmitAutoVarCleanups(Emission); 214 } 215 } 216 } 217 } else if (const auto *UDP = dyn_cast<OMPUseDevicePtrClause>(C)) { 218 for (const Expr *E : UDP->varlists()) { 219 const Decl *D = cast<DeclRefExpr>(E)->getDecl(); 220 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D)) 221 CGF.EmitVarDecl(*OED); 222 } 223 } 224 } 225 if (!isOpenMPSimdDirective(S.getDirectiveKind())) 226 CGF.EmitOMPPrivateClause(S, InlinedShareds); 227 if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(&S)) { 228 if (const Expr *E = TG->getReductionRef()) 229 CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl())); 230 } 231 const auto *CS = cast_or_null<CapturedStmt>(S.getAssociatedStmt()); 232 while (CS) { 233 for (auto &C : CS->captures()) { 234 if (C.capturesVariable() || C.capturesVariableByCopy()) { 235 auto *VD = C.getCapturedVar(); 236 assert(VD == VD->getCanonicalDecl() && 237 "Canonical decl must be captured."); 238 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 239 isCapturedVar(CGF, VD) || 240 (CGF.CapturedStmtInfo && 241 InlinedShareds.isGlobalVarCaptured(VD)), 242 VD->getType().getNonReferenceType(), VK_LValue, 243 C.getLocation()); 244 InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address { 245 return CGF.EmitLValue(&DRE).getAddress(CGF); 246 }); 247 } 248 } 249 CS = dyn_cast<CapturedStmt>(CS->getCapturedStmt()); 250 } 251 (void)InlinedShareds.Privatize(); 252 } 253 }; 254 255 } // namespace 256 257 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, 258 const OMPExecutableDirective &S, 259 const RegionCodeGenTy &CodeGen); 260 261 LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) { 262 if (const auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) { 263 if (const auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) { 264 OrigVD = OrigVD->getCanonicalDecl(); 265 bool IsCaptured = 266 LambdaCaptureFields.lookup(OrigVD) || 267 (CapturedStmtInfo && CapturedStmtInfo->lookup(OrigVD)) || 268 (CurCodeDecl && isa<BlockDecl>(CurCodeDecl)); 269 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), IsCaptured, 270 OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc()); 271 return EmitLValue(&DRE); 272 } 273 } 274 return EmitLValue(E); 275 } 276 277 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) { 278 ASTContext &C = getContext(); 279 llvm::Value *Size = nullptr; 280 auto SizeInChars = C.getTypeSizeInChars(Ty); 281 if (SizeInChars.isZero()) { 282 // getTypeSizeInChars() returns 0 for a VLA. 283 while (const VariableArrayType *VAT = C.getAsVariableArrayType(Ty)) { 284 VlaSizePair VlaSize = getVLASize(VAT); 285 Ty = VlaSize.Type; 286 Size = Size ? Builder.CreateNUWMul(Size, VlaSize.NumElts) 287 : VlaSize.NumElts; 288 } 289 SizeInChars = C.getTypeSizeInChars(Ty); 290 if (SizeInChars.isZero()) 291 return llvm::ConstantInt::get(SizeTy, /*V=*/0); 292 return Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars)); 293 } 294 return CGM.getSize(SizeInChars); 295 } 296 297 void CodeGenFunction::GenerateOpenMPCapturedVars( 298 const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) { 299 const RecordDecl *RD = S.getCapturedRecordDecl(); 300 auto CurField = RD->field_begin(); 301 auto CurCap = S.captures().begin(); 302 for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(), 303 E = S.capture_init_end(); 304 I != E; ++I, ++CurField, ++CurCap) { 305 if (CurField->hasCapturedVLAType()) { 306 const VariableArrayType *VAT = CurField->getCapturedVLAType(); 307 llvm::Value *Val = VLASizeMap[VAT->getSizeExpr()]; 308 CapturedVars.push_back(Val); 309 } else if (CurCap->capturesThis()) { 310 CapturedVars.push_back(CXXThisValue); 311 } else if (CurCap->capturesVariableByCopy()) { 312 llvm::Value *CV = EmitLoadOfScalar(EmitLValue(*I), CurCap->getLocation()); 313 314 // If the field is not a pointer, we need to save the actual value 315 // and load it as a void pointer. 316 if (!CurField->getType()->isAnyPointerType()) { 317 ASTContext &Ctx = getContext(); 318 Address DstAddr = CreateMemTemp( 319 Ctx.getUIntPtrType(), 320 Twine(CurCap->getCapturedVar()->getName(), ".casted")); 321 LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType()); 322 323 llvm::Value *SrcAddrVal = EmitScalarConversion( 324 DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()), 325 Ctx.getPointerType(CurField->getType()), CurCap->getLocation()); 326 LValue SrcLV = 327 MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType()); 328 329 // Store the value using the source type pointer. 330 EmitStoreThroughLValue(RValue::get(CV), SrcLV); 331 332 // Load the value using the destination type pointer. 333 CV = EmitLoadOfScalar(DstLV, CurCap->getLocation()); 334 } 335 CapturedVars.push_back(CV); 336 } else { 337 assert(CurCap->capturesVariable() && "Expected capture by reference."); 338 CapturedVars.push_back(EmitLValue(*I).getAddress(*this).getPointer()); 339 } 340 } 341 } 342 343 static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc, 344 QualType DstType, StringRef Name, 345 LValue AddrLV) { 346 ASTContext &Ctx = CGF.getContext(); 347 348 llvm::Value *CastedPtr = CGF.EmitScalarConversion( 349 AddrLV.getAddress(CGF).getPointer(), Ctx.getUIntPtrType(), 350 Ctx.getPointerType(DstType), Loc); 351 Address TmpAddr = 352 CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType)) 353 .getAddress(CGF); 354 return TmpAddr; 355 } 356 357 static QualType getCanonicalParamType(ASTContext &C, QualType T) { 358 if (T->isLValueReferenceType()) 359 return C.getLValueReferenceType( 360 getCanonicalParamType(C, T.getNonReferenceType()), 361 /*SpelledAsLValue=*/false); 362 if (T->isPointerType()) 363 return C.getPointerType(getCanonicalParamType(C, T->getPointeeType())); 364 if (const ArrayType *A = T->getAsArrayTypeUnsafe()) { 365 if (const auto *VLA = dyn_cast<VariableArrayType>(A)) 366 return getCanonicalParamType(C, VLA->getElementType()); 367 if (!A->isVariablyModifiedType()) 368 return C.getCanonicalType(T); 369 } 370 return C.getCanonicalParamType(T); 371 } 372 373 namespace { 374 /// Contains required data for proper outlined function codegen. 375 struct FunctionOptions { 376 /// Captured statement for which the function is generated. 377 const CapturedStmt *S = nullptr; 378 /// true if cast to/from UIntPtr is required for variables captured by 379 /// value. 380 const bool UIntPtrCastRequired = true; 381 /// true if only casted arguments must be registered as local args or VLA 382 /// sizes. 383 const bool RegisterCastedArgsOnly = false; 384 /// Name of the generated function. 385 const StringRef FunctionName; 386 /// Location of the non-debug version of the outlined function. 387 SourceLocation Loc; 388 explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired, 389 bool RegisterCastedArgsOnly, StringRef FunctionName, 390 SourceLocation Loc) 391 : S(S), UIntPtrCastRequired(UIntPtrCastRequired), 392 RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly), 393 FunctionName(FunctionName), Loc(Loc) {} 394 }; 395 } // namespace 396 397 static llvm::Function *emitOutlinedFunctionPrologue( 398 CodeGenFunction &CGF, FunctionArgList &Args, 399 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> 400 &LocalAddrs, 401 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> 402 &VLASizes, 403 llvm::Value *&CXXThisValue, const FunctionOptions &FO) { 404 const CapturedDecl *CD = FO.S->getCapturedDecl(); 405 const RecordDecl *RD = FO.S->getCapturedRecordDecl(); 406 assert(CD->hasBody() && "missing CapturedDecl body"); 407 408 CXXThisValue = nullptr; 409 // Build the argument list. 410 CodeGenModule &CGM = CGF.CGM; 411 ASTContext &Ctx = CGM.getContext(); 412 FunctionArgList TargetArgs; 413 Args.append(CD->param_begin(), 414 std::next(CD->param_begin(), CD->getContextParamPosition())); 415 TargetArgs.append( 416 CD->param_begin(), 417 std::next(CD->param_begin(), CD->getContextParamPosition())); 418 auto I = FO.S->captures().begin(); 419 FunctionDecl *DebugFunctionDecl = nullptr; 420 if (!FO.UIntPtrCastRequired) { 421 FunctionProtoType::ExtProtoInfo EPI; 422 QualType FunctionTy = Ctx.getFunctionType(Ctx.VoidTy, llvm::None, EPI); 423 DebugFunctionDecl = FunctionDecl::Create( 424 Ctx, Ctx.getTranslationUnitDecl(), FO.S->getBeginLoc(), 425 SourceLocation(), DeclarationName(), FunctionTy, 426 Ctx.getTrivialTypeSourceInfo(FunctionTy), SC_Static, 427 /*isInlineSpecified=*/false, /*hasWrittenPrototype=*/false); 428 } 429 for (const FieldDecl *FD : RD->fields()) { 430 QualType ArgType = FD->getType(); 431 IdentifierInfo *II = nullptr; 432 VarDecl *CapVar = nullptr; 433 434 // If this is a capture by copy and the type is not a pointer, the outlined 435 // function argument type should be uintptr and the value properly casted to 436 // uintptr. This is necessary given that the runtime library is only able to 437 // deal with pointers. We can pass in the same way the VLA type sizes to the 438 // outlined function. 439 if (FO.UIntPtrCastRequired && 440 ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) || 441 I->capturesVariableArrayType())) 442 ArgType = Ctx.getUIntPtrType(); 443 444 if (I->capturesVariable() || I->capturesVariableByCopy()) { 445 CapVar = I->getCapturedVar(); 446 II = CapVar->getIdentifier(); 447 } else if (I->capturesThis()) { 448 II = &Ctx.Idents.get("this"); 449 } else { 450 assert(I->capturesVariableArrayType()); 451 II = &Ctx.Idents.get("vla"); 452 } 453 if (ArgType->isVariablyModifiedType()) 454 ArgType = getCanonicalParamType(Ctx, ArgType); 455 VarDecl *Arg; 456 if (DebugFunctionDecl && (CapVar || I->capturesThis())) { 457 Arg = ParmVarDecl::Create( 458 Ctx, DebugFunctionDecl, 459 CapVar ? CapVar->getBeginLoc() : FD->getBeginLoc(), 460 CapVar ? CapVar->getLocation() : FD->getLocation(), II, ArgType, 461 /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr); 462 } else { 463 Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(), 464 II, ArgType, ImplicitParamDecl::Other); 465 } 466 Args.emplace_back(Arg); 467 // Do not cast arguments if we emit function with non-original types. 468 TargetArgs.emplace_back( 469 FO.UIntPtrCastRequired 470 ? Arg 471 : CGM.getOpenMPRuntime().translateParameter(FD, Arg)); 472 ++I; 473 } 474 Args.append( 475 std::next(CD->param_begin(), CD->getContextParamPosition() + 1), 476 CD->param_end()); 477 TargetArgs.append( 478 std::next(CD->param_begin(), CD->getContextParamPosition() + 1), 479 CD->param_end()); 480 481 // Create the function declaration. 482 const CGFunctionInfo &FuncInfo = 483 CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs); 484 llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); 485 486 auto *F = 487 llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage, 488 FO.FunctionName, &CGM.getModule()); 489 CGM.SetInternalFunctionAttributes(CD, F, FuncInfo); 490 if (CD->isNothrow()) 491 F->setDoesNotThrow(); 492 F->setDoesNotRecurse(); 493 494 // Generate the function. 495 CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs, 496 FO.UIntPtrCastRequired ? FO.Loc : FO.S->getBeginLoc(), 497 FO.UIntPtrCastRequired ? FO.Loc 498 : CD->getBody()->getBeginLoc()); 499 unsigned Cnt = CD->getContextParamPosition(); 500 I = FO.S->captures().begin(); 501 for (const FieldDecl *FD : RD->fields()) { 502 // Do not map arguments if we emit function with non-original types. 503 Address LocalAddr(Address::invalid()); 504 if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) { 505 LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt], 506 TargetArgs[Cnt]); 507 } else { 508 LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]); 509 } 510 // If we are capturing a pointer by copy we don't need to do anything, just 511 // use the value that we get from the arguments. 512 if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) { 513 const VarDecl *CurVD = I->getCapturedVar(); 514 if (!FO.RegisterCastedArgsOnly) 515 LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}}); 516 ++Cnt; 517 ++I; 518 continue; 519 } 520 521 LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(), 522 AlignmentSource::Decl); 523 if (FD->hasCapturedVLAType()) { 524 if (FO.UIntPtrCastRequired) { 525 ArgLVal = CGF.MakeAddrLValue( 526 castValueFromUintptr(CGF, I->getLocation(), FD->getType(), 527 Args[Cnt]->getName(), ArgLVal), 528 FD->getType(), AlignmentSource::Decl); 529 } 530 llvm::Value *ExprArg = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation()); 531 const VariableArrayType *VAT = FD->getCapturedVLAType(); 532 VLASizes.try_emplace(Args[Cnt], VAT->getSizeExpr(), ExprArg); 533 } else if (I->capturesVariable()) { 534 const VarDecl *Var = I->getCapturedVar(); 535 QualType VarTy = Var->getType(); 536 Address ArgAddr = ArgLVal.getAddress(CGF); 537 if (ArgLVal.getType()->isLValueReferenceType()) { 538 ArgAddr = CGF.EmitLoadOfReference(ArgLVal); 539 } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) { 540 assert(ArgLVal.getType()->isPointerType()); 541 ArgAddr = CGF.EmitLoadOfPointer( 542 ArgAddr, ArgLVal.getType()->castAs<PointerType>()); 543 } 544 if (!FO.RegisterCastedArgsOnly) { 545 LocalAddrs.insert( 546 {Args[Cnt], 547 {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}}); 548 } 549 } else if (I->capturesVariableByCopy()) { 550 assert(!FD->getType()->isAnyPointerType() && 551 "Not expecting a captured pointer."); 552 const VarDecl *Var = I->getCapturedVar(); 553 LocalAddrs.insert({Args[Cnt], 554 {Var, FO.UIntPtrCastRequired 555 ? castValueFromUintptr( 556 CGF, I->getLocation(), FD->getType(), 557 Args[Cnt]->getName(), ArgLVal) 558 : ArgLVal.getAddress(CGF)}}); 559 } else { 560 // If 'this' is captured, load it into CXXThisValue. 561 assert(I->capturesThis()); 562 CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation()); 563 LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress(CGF)}}); 564 } 565 ++Cnt; 566 ++I; 567 } 568 569 return F; 570 } 571 572 llvm::Function * 573 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, 574 SourceLocation Loc) { 575 assert( 576 CapturedStmtInfo && 577 "CapturedStmtInfo should be set when generating the captured function"); 578 const CapturedDecl *CD = S.getCapturedDecl(); 579 // Build the argument list. 580 bool NeedWrapperFunction = 581 getDebugInfo() && CGM.getCodeGenOpts().hasReducedDebugInfo(); 582 FunctionArgList Args; 583 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs; 584 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes; 585 SmallString<256> Buffer; 586 llvm::raw_svector_ostream Out(Buffer); 587 Out << CapturedStmtInfo->getHelperName(); 588 if (NeedWrapperFunction) 589 Out << "_debug__"; 590 FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false, 591 Out.str(), Loc); 592 llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs, 593 VLASizes, CXXThisValue, FO); 594 CodeGenFunction::OMPPrivateScope LocalScope(*this); 595 for (const auto &LocalAddrPair : LocalAddrs) { 596 if (LocalAddrPair.second.first) { 597 LocalScope.addPrivate(LocalAddrPair.second.first, [&LocalAddrPair]() { 598 return LocalAddrPair.second.second; 599 }); 600 } 601 } 602 (void)LocalScope.Privatize(); 603 for (const auto &VLASizePair : VLASizes) 604 VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second; 605 PGO.assignRegionCounters(GlobalDecl(CD), F); 606 CapturedStmtInfo->EmitBody(*this, CD->getBody()); 607 (void)LocalScope.ForceCleanup(); 608 FinishFunction(CD->getBodyRBrace()); 609 if (!NeedWrapperFunction) 610 return F; 611 612 FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true, 613 /*RegisterCastedArgsOnly=*/true, 614 CapturedStmtInfo->getHelperName(), Loc); 615 CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true); 616 WrapperCGF.CapturedStmtInfo = CapturedStmtInfo; 617 Args.clear(); 618 LocalAddrs.clear(); 619 VLASizes.clear(); 620 llvm::Function *WrapperF = 621 emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes, 622 WrapperCGF.CXXThisValue, WrapperFO); 623 llvm::SmallVector<llvm::Value *, 4> CallArgs; 624 for (const auto *Arg : Args) { 625 llvm::Value *CallArg; 626 auto I = LocalAddrs.find(Arg); 627 if (I != LocalAddrs.end()) { 628 LValue LV = WrapperCGF.MakeAddrLValue( 629 I->second.second, 630 I->second.first ? I->second.first->getType() : Arg->getType(), 631 AlignmentSource::Decl); 632 CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc()); 633 } else { 634 auto EI = VLASizes.find(Arg); 635 if (EI != VLASizes.end()) { 636 CallArg = EI->second.second; 637 } else { 638 LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg), 639 Arg->getType(), 640 AlignmentSource::Decl); 641 CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc()); 642 } 643 } 644 CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType())); 645 } 646 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, Loc, F, CallArgs); 647 WrapperCGF.FinishFunction(); 648 return WrapperF; 649 } 650 651 //===----------------------------------------------------------------------===// 652 // OpenMP Directive Emission 653 //===----------------------------------------------------------------------===// 654 void CodeGenFunction::EmitOMPAggregateAssign( 655 Address DestAddr, Address SrcAddr, QualType OriginalType, 656 const llvm::function_ref<void(Address, Address)> CopyGen) { 657 // Perform element-by-element initialization. 658 QualType ElementTy; 659 660 // Drill down to the base element type on both arrays. 661 const ArrayType *ArrayTy = OriginalType->getAsArrayTypeUnsafe(); 662 llvm::Value *NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr); 663 SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 664 665 llvm::Value *SrcBegin = SrcAddr.getPointer(); 666 llvm::Value *DestBegin = DestAddr.getPointer(); 667 // Cast from pointer to array type to pointer to single element. 668 llvm::Value *DestEnd = Builder.CreateGEP(DestBegin, NumElements); 669 // The basic structure here is a while-do loop. 670 llvm::BasicBlock *BodyBB = createBasicBlock("omp.arraycpy.body"); 671 llvm::BasicBlock *DoneBB = createBasicBlock("omp.arraycpy.done"); 672 llvm::Value *IsEmpty = 673 Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty"); 674 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 675 676 // Enter the loop body, making that address the current address. 677 llvm::BasicBlock *EntryBB = Builder.GetInsertBlock(); 678 EmitBlock(BodyBB); 679 680 CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy); 681 682 llvm::PHINode *SrcElementPHI = 683 Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 684 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 685 Address SrcElementCurrent = 686 Address(SrcElementPHI, 687 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 688 689 llvm::PHINode *DestElementPHI = 690 Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 691 DestElementPHI->addIncoming(DestBegin, EntryBB); 692 Address DestElementCurrent = 693 Address(DestElementPHI, 694 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 695 696 // Emit copy. 697 CopyGen(DestElementCurrent, SrcElementCurrent); 698 699 // Shift the address forward by one element. 700 llvm::Value *DestElementNext = Builder.CreateConstGEP1_32( 701 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 702 llvm::Value *SrcElementNext = Builder.CreateConstGEP1_32( 703 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 704 // Check whether we've reached the end. 705 llvm::Value *Done = 706 Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 707 Builder.CreateCondBr(Done, DoneBB, BodyBB); 708 DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock()); 709 SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock()); 710 711 // Done. 712 EmitBlock(DoneBB, /*IsFinished=*/true); 713 } 714 715 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr, 716 Address SrcAddr, const VarDecl *DestVD, 717 const VarDecl *SrcVD, const Expr *Copy) { 718 if (OriginalType->isArrayType()) { 719 const auto *BO = dyn_cast<BinaryOperator>(Copy); 720 if (BO && BO->getOpcode() == BO_Assign) { 721 // Perform simple memcpy for simple copying. 722 LValue Dest = MakeAddrLValue(DestAddr, OriginalType); 723 LValue Src = MakeAddrLValue(SrcAddr, OriginalType); 724 EmitAggregateAssign(Dest, Src, OriginalType); 725 } else { 726 // For arrays with complex element types perform element by element 727 // copying. 728 EmitOMPAggregateAssign( 729 DestAddr, SrcAddr, OriginalType, 730 [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) { 731 // Working with the single array element, so have to remap 732 // destination and source variables to corresponding array 733 // elements. 734 CodeGenFunction::OMPPrivateScope Remap(*this); 735 Remap.addPrivate(DestVD, [DestElement]() { return DestElement; }); 736 Remap.addPrivate(SrcVD, [SrcElement]() { return SrcElement; }); 737 (void)Remap.Privatize(); 738 EmitIgnoredExpr(Copy); 739 }); 740 } 741 } else { 742 // Remap pseudo source variable to private copy. 743 CodeGenFunction::OMPPrivateScope Remap(*this); 744 Remap.addPrivate(SrcVD, [SrcAddr]() { return SrcAddr; }); 745 Remap.addPrivate(DestVD, [DestAddr]() { return DestAddr; }); 746 (void)Remap.Privatize(); 747 // Emit copying of the whole variable. 748 EmitIgnoredExpr(Copy); 749 } 750 } 751 752 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, 753 OMPPrivateScope &PrivateScope) { 754 if (!HaveInsertPoint()) 755 return false; 756 bool DeviceConstTarget = 757 getLangOpts().OpenMPIsDevice && 758 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 759 bool FirstprivateIsLastprivate = false; 760 llvm::DenseMap<const VarDecl *, OpenMPLastprivateModifier> Lastprivates; 761 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 762 for (const auto *D : C->varlists()) 763 Lastprivates.try_emplace( 764 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl(), 765 C->getKind()); 766 } 767 llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate; 768 llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 769 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 770 // Force emission of the firstprivate copy if the directive does not emit 771 // outlined function, like omp for, omp simd, omp distribute etc. 772 bool MustEmitFirstprivateCopy = 773 CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown; 774 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { 775 const auto *IRef = C->varlist_begin(); 776 const auto *InitsRef = C->inits().begin(); 777 for (const Expr *IInit : C->private_copies()) { 778 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 779 bool ThisFirstprivateIsLastprivate = 780 Lastprivates.count(OrigVD->getCanonicalDecl()) > 0; 781 const FieldDecl *FD = CapturedStmtInfo->lookup(OrigVD); 782 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 783 if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD && 784 !FD->getType()->isReferenceType() && 785 (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) { 786 EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()); 787 ++IRef; 788 ++InitsRef; 789 continue; 790 } 791 // Do not emit copy for firstprivate constant variables in target regions, 792 // captured by reference. 793 if (DeviceConstTarget && OrigVD->getType().isConstant(getContext()) && 794 FD && FD->getType()->isReferenceType() && 795 (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) { 796 (void)CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(*this, 797 OrigVD); 798 ++IRef; 799 ++InitsRef; 800 continue; 801 } 802 FirstprivateIsLastprivate = 803 FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate; 804 if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) { 805 const auto *VDInit = 806 cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl()); 807 bool IsRegistered; 808 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), 809 /*RefersToEnclosingVariableOrCapture=*/FD != nullptr, 810 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 811 LValue OriginalLVal; 812 if (!FD) { 813 // Check if the firstprivate variable is just a constant value. 814 ConstantEmission CE = tryEmitAsConstant(&DRE); 815 if (CE && !CE.isReference()) { 816 // Constant value, no need to create a copy. 817 ++IRef; 818 ++InitsRef; 819 continue; 820 } 821 if (CE && CE.isReference()) { 822 OriginalLVal = CE.getReferenceLValue(*this, &DRE); 823 } else { 824 assert(!CE && "Expected non-constant firstprivate."); 825 OriginalLVal = EmitLValue(&DRE); 826 } 827 } else { 828 OriginalLVal = EmitLValue(&DRE); 829 } 830 QualType Type = VD->getType(); 831 if (Type->isArrayType()) { 832 // Emit VarDecl with copy init for arrays. 833 // Get the address of the original variable captured in current 834 // captured region. 835 IsRegistered = PrivateScope.addPrivate( 836 OrigVD, [this, VD, Type, OriginalLVal, VDInit]() { 837 AutoVarEmission Emission = EmitAutoVarAlloca(*VD); 838 const Expr *Init = VD->getInit(); 839 if (!isa<CXXConstructExpr>(Init) || 840 isTrivialInitializer(Init)) { 841 // Perform simple memcpy. 842 LValue Dest = 843 MakeAddrLValue(Emission.getAllocatedAddress(), Type); 844 EmitAggregateAssign(Dest, OriginalLVal, Type); 845 } else { 846 EmitOMPAggregateAssign( 847 Emission.getAllocatedAddress(), 848 OriginalLVal.getAddress(*this), Type, 849 [this, VDInit, Init](Address DestElement, 850 Address SrcElement) { 851 // Clean up any temporaries needed by the 852 // initialization. 853 RunCleanupsScope InitScope(*this); 854 // Emit initialization for single element. 855 setAddrOfLocalVar(VDInit, SrcElement); 856 EmitAnyExprToMem(Init, DestElement, 857 Init->getType().getQualifiers(), 858 /*IsInitializer*/ false); 859 LocalDeclMap.erase(VDInit); 860 }); 861 } 862 EmitAutoVarCleanups(Emission); 863 return Emission.getAllocatedAddress(); 864 }); 865 } else { 866 Address OriginalAddr = OriginalLVal.getAddress(*this); 867 IsRegistered = 868 PrivateScope.addPrivate(OrigVD, [this, VDInit, OriginalAddr, VD, 869 ThisFirstprivateIsLastprivate, 870 OrigVD, &Lastprivates, IRef]() { 871 // Emit private VarDecl with copy init. 872 // Remap temp VDInit variable to the address of the original 873 // variable (for proper handling of captured global variables). 874 setAddrOfLocalVar(VDInit, OriginalAddr); 875 EmitDecl(*VD); 876 LocalDeclMap.erase(VDInit); 877 if (ThisFirstprivateIsLastprivate && 878 Lastprivates[OrigVD->getCanonicalDecl()] == 879 OMPC_LASTPRIVATE_conditional) { 880 // Create/init special variable for lastprivate conditionals. 881 Address VDAddr = 882 CGM.getOpenMPRuntime().emitLastprivateConditionalInit( 883 *this, OrigVD); 884 llvm::Value *V = EmitLoadOfScalar( 885 MakeAddrLValue(GetAddrOfLocalVar(VD), (*IRef)->getType(), 886 AlignmentSource::Decl), 887 (*IRef)->getExprLoc()); 888 EmitStoreOfScalar(V, 889 MakeAddrLValue(VDAddr, (*IRef)->getType(), 890 AlignmentSource::Decl)); 891 LocalDeclMap.erase(VD); 892 setAddrOfLocalVar(VD, VDAddr); 893 return VDAddr; 894 } 895 return GetAddrOfLocalVar(VD); 896 }); 897 } 898 assert(IsRegistered && 899 "firstprivate var already registered as private"); 900 // Silence the warning about unused variable. 901 (void)IsRegistered; 902 } 903 ++IRef; 904 ++InitsRef; 905 } 906 } 907 return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty(); 908 } 909 910 void CodeGenFunction::EmitOMPPrivateClause( 911 const OMPExecutableDirective &D, 912 CodeGenFunction::OMPPrivateScope &PrivateScope) { 913 if (!HaveInsertPoint()) 914 return; 915 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 916 for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) { 917 auto IRef = C->varlist_begin(); 918 for (const Expr *IInit : C->private_copies()) { 919 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 920 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 921 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 922 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD]() { 923 // Emit private VarDecl with copy init. 924 EmitDecl(*VD); 925 return GetAddrOfLocalVar(VD); 926 }); 927 assert(IsRegistered && "private var already registered as private"); 928 // Silence the warning about unused variable. 929 (void)IsRegistered; 930 } 931 ++IRef; 932 } 933 } 934 } 935 936 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { 937 if (!HaveInsertPoint()) 938 return false; 939 // threadprivate_var1 = master_threadprivate_var1; 940 // operator=(threadprivate_var2, master_threadprivate_var2); 941 // ... 942 // __kmpc_barrier(&loc, global_tid); 943 llvm::DenseSet<const VarDecl *> CopiedVars; 944 llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr; 945 for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) { 946 auto IRef = C->varlist_begin(); 947 auto ISrcRef = C->source_exprs().begin(); 948 auto IDestRef = C->destination_exprs().begin(); 949 for (const Expr *AssignOp : C->assignment_ops()) { 950 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 951 QualType Type = VD->getType(); 952 if (CopiedVars.insert(VD->getCanonicalDecl()).second) { 953 // Get the address of the master variable. If we are emitting code with 954 // TLS support, the address is passed from the master as field in the 955 // captured declaration. 956 Address MasterAddr = Address::invalid(); 957 if (getLangOpts().OpenMPUseTLS && 958 getContext().getTargetInfo().isTLSSupported()) { 959 assert(CapturedStmtInfo->lookup(VD) && 960 "Copyin threadprivates should have been captured!"); 961 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), true, 962 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 963 MasterAddr = EmitLValue(&DRE).getAddress(*this); 964 LocalDeclMap.erase(VD); 965 } else { 966 MasterAddr = 967 Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD) 968 : CGM.GetAddrOfGlobal(VD), 969 getContext().getDeclAlign(VD)); 970 } 971 // Get the address of the threadprivate variable. 972 Address PrivateAddr = EmitLValue(*IRef).getAddress(*this); 973 if (CopiedVars.size() == 1) { 974 // At first check if current thread is a master thread. If it is, no 975 // need to copy data. 976 CopyBegin = createBasicBlock("copyin.not.master"); 977 CopyEnd = createBasicBlock("copyin.not.master.end"); 978 Builder.CreateCondBr( 979 Builder.CreateICmpNE( 980 Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy), 981 Builder.CreatePtrToInt(PrivateAddr.getPointer(), 982 CGM.IntPtrTy)), 983 CopyBegin, CopyEnd); 984 EmitBlock(CopyBegin); 985 } 986 const auto *SrcVD = 987 cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 988 const auto *DestVD = 989 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 990 EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp); 991 } 992 ++IRef; 993 ++ISrcRef; 994 ++IDestRef; 995 } 996 } 997 if (CopyEnd) { 998 // Exit out of copying procedure for non-master thread. 999 EmitBlock(CopyEnd, /*IsFinished=*/true); 1000 return true; 1001 } 1002 return false; 1003 } 1004 1005 bool CodeGenFunction::EmitOMPLastprivateClauseInit( 1006 const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) { 1007 if (!HaveInsertPoint()) 1008 return false; 1009 bool HasAtLeastOneLastprivate = false; 1010 llvm::DenseSet<const VarDecl *> SIMDLCVs; 1011 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 1012 const auto *LoopDirective = cast<OMPLoopDirective>(&D); 1013 for (const Expr *C : LoopDirective->counters()) { 1014 SIMDLCVs.insert( 1015 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 1016 } 1017 } 1018 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 1019 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 1020 HasAtLeastOneLastprivate = true; 1021 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 1022 !getLangOpts().OpenMPSimd) 1023 break; 1024 const auto *IRef = C->varlist_begin(); 1025 const auto *IDestRef = C->destination_exprs().begin(); 1026 for (const Expr *IInit : C->private_copies()) { 1027 // Keep the address of the original variable for future update at the end 1028 // of the loop. 1029 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 1030 // Taskloops do not require additional initialization, it is done in 1031 // runtime support library. 1032 if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) { 1033 const auto *DestVD = 1034 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 1035 PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() { 1036 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), 1037 /*RefersToEnclosingVariableOrCapture=*/ 1038 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1039 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 1040 return EmitLValue(&DRE).getAddress(*this); 1041 }); 1042 // Check if the variable is also a firstprivate: in this case IInit is 1043 // not generated. Initialization of this variable will happen in codegen 1044 // for 'firstprivate' clause. 1045 if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) { 1046 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 1047 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD, C, 1048 OrigVD]() { 1049 if (C->getKind() == OMPC_LASTPRIVATE_conditional) { 1050 Address VDAddr = 1051 CGM.getOpenMPRuntime().emitLastprivateConditionalInit(*this, 1052 OrigVD); 1053 setAddrOfLocalVar(VD, VDAddr); 1054 return VDAddr; 1055 } 1056 // Emit private VarDecl with copy init. 1057 EmitDecl(*VD); 1058 return GetAddrOfLocalVar(VD); 1059 }); 1060 assert(IsRegistered && 1061 "lastprivate var already registered as private"); 1062 (void)IsRegistered; 1063 } 1064 } 1065 ++IRef; 1066 ++IDestRef; 1067 } 1068 } 1069 return HasAtLeastOneLastprivate; 1070 } 1071 1072 void CodeGenFunction::EmitOMPLastprivateClauseFinal( 1073 const OMPExecutableDirective &D, bool NoFinals, 1074 llvm::Value *IsLastIterCond) { 1075 if (!HaveInsertPoint()) 1076 return; 1077 // Emit following code: 1078 // if (<IsLastIterCond>) { 1079 // orig_var1 = private_orig_var1; 1080 // ... 1081 // orig_varn = private_orig_varn; 1082 // } 1083 llvm::BasicBlock *ThenBB = nullptr; 1084 llvm::BasicBlock *DoneBB = nullptr; 1085 if (IsLastIterCond) { 1086 // Emit implicit barrier if at least one lastprivate conditional is found 1087 // and this is not a simd mode. 1088 if (!getLangOpts().OpenMPSimd && 1089 llvm::any_of(D.getClausesOfKind<OMPLastprivateClause>(), 1090 [](const OMPLastprivateClause *C) { 1091 return C->getKind() == OMPC_LASTPRIVATE_conditional; 1092 })) { 1093 CGM.getOpenMPRuntime().emitBarrierCall(*this, D.getBeginLoc(), 1094 OMPD_unknown, 1095 /*EmitChecks=*/false, 1096 /*ForceSimpleCall=*/true); 1097 } 1098 ThenBB = createBasicBlock(".omp.lastprivate.then"); 1099 DoneBB = createBasicBlock(".omp.lastprivate.done"); 1100 Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB); 1101 EmitBlock(ThenBB); 1102 } 1103 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 1104 llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates; 1105 if (const auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) { 1106 auto IC = LoopDirective->counters().begin(); 1107 for (const Expr *F : LoopDirective->finals()) { 1108 const auto *D = 1109 cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl(); 1110 if (NoFinals) 1111 AlreadyEmittedVars.insert(D); 1112 else 1113 LoopCountersAndUpdates[D] = F; 1114 ++IC; 1115 } 1116 } 1117 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 1118 auto IRef = C->varlist_begin(); 1119 auto ISrcRef = C->source_exprs().begin(); 1120 auto IDestRef = C->destination_exprs().begin(); 1121 for (const Expr *AssignOp : C->assignment_ops()) { 1122 const auto *PrivateVD = 1123 cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 1124 QualType Type = PrivateVD->getType(); 1125 const auto *CanonicalVD = PrivateVD->getCanonicalDecl(); 1126 if (AlreadyEmittedVars.insert(CanonicalVD).second) { 1127 // If lastprivate variable is a loop control variable for loop-based 1128 // directive, update its value before copyin back to original 1129 // variable. 1130 if (const Expr *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD)) 1131 EmitIgnoredExpr(FinalExpr); 1132 const auto *SrcVD = 1133 cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 1134 const auto *DestVD = 1135 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 1136 // Get the address of the private variable. 1137 Address PrivateAddr = GetAddrOfLocalVar(PrivateVD); 1138 if (const auto *RefTy = PrivateVD->getType()->getAs<ReferenceType>()) 1139 PrivateAddr = 1140 Address(Builder.CreateLoad(PrivateAddr), 1141 CGM.getNaturalTypeAlignment(RefTy->getPointeeType())); 1142 // Store the last value to the private copy in the last iteration. 1143 if (C->getKind() == OMPC_LASTPRIVATE_conditional) 1144 CGM.getOpenMPRuntime().emitLastprivateConditionalFinalUpdate( 1145 *this, MakeAddrLValue(PrivateAddr, (*IRef)->getType()), PrivateVD, 1146 (*IRef)->getExprLoc()); 1147 // Get the address of the original variable. 1148 Address OriginalAddr = GetAddrOfLocalVar(DestVD); 1149 EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp); 1150 } 1151 ++IRef; 1152 ++ISrcRef; 1153 ++IDestRef; 1154 } 1155 if (const Expr *PostUpdate = C->getPostUpdateExpr()) 1156 EmitIgnoredExpr(PostUpdate); 1157 } 1158 if (IsLastIterCond) 1159 EmitBlock(DoneBB, /*IsFinished=*/true); 1160 } 1161 1162 void CodeGenFunction::EmitOMPReductionClauseInit( 1163 const OMPExecutableDirective &D, 1164 CodeGenFunction::OMPPrivateScope &PrivateScope, bool ForInscan) { 1165 if (!HaveInsertPoint()) 1166 return; 1167 SmallVector<const Expr *, 4> Shareds; 1168 SmallVector<const Expr *, 4> Privates; 1169 SmallVector<const Expr *, 4> ReductionOps; 1170 SmallVector<const Expr *, 4> LHSs; 1171 SmallVector<const Expr *, 4> RHSs; 1172 OMPTaskDataTy Data; 1173 SmallVector<const Expr *, 4> TaskLHSs; 1174 SmallVector<const Expr *, 4> TaskRHSs; 1175 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1176 if (ForInscan != (C->getModifier() == OMPC_REDUCTION_inscan)) 1177 continue; 1178 Shareds.append(C->varlist_begin(), C->varlist_end()); 1179 Privates.append(C->privates().begin(), C->privates().end()); 1180 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 1181 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 1182 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 1183 if (C->getModifier() == OMPC_REDUCTION_task) { 1184 Data.ReductionVars.append(C->privates().begin(), C->privates().end()); 1185 Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end()); 1186 Data.ReductionCopies.append(C->privates().begin(), C->privates().end()); 1187 Data.ReductionOps.append(C->reduction_ops().begin(), 1188 C->reduction_ops().end()); 1189 TaskLHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 1190 TaskRHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 1191 } 1192 } 1193 ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps); 1194 unsigned Count = 0; 1195 auto *ILHS = LHSs.begin(); 1196 auto *IRHS = RHSs.begin(); 1197 auto *IPriv = Privates.begin(); 1198 for (const Expr *IRef : Shareds) { 1199 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl()); 1200 // Emit private VarDecl with reduction init. 1201 RedCG.emitSharedOrigLValue(*this, Count); 1202 RedCG.emitAggregateType(*this, Count); 1203 AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD); 1204 RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(), 1205 RedCG.getSharedLValue(Count), 1206 [&Emission](CodeGenFunction &CGF) { 1207 CGF.EmitAutoVarInit(Emission); 1208 return true; 1209 }); 1210 EmitAutoVarCleanups(Emission); 1211 Address BaseAddr = RedCG.adjustPrivateAddress( 1212 *this, Count, Emission.getAllocatedAddress()); 1213 bool IsRegistered = PrivateScope.addPrivate( 1214 RedCG.getBaseDecl(Count), [BaseAddr]() { return BaseAddr; }); 1215 assert(IsRegistered && "private var already registered as private"); 1216 // Silence the warning about unused variable. 1217 (void)IsRegistered; 1218 1219 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 1220 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 1221 QualType Type = PrivateVD->getType(); 1222 bool isaOMPArraySectionExpr = isa<OMPArraySectionExpr>(IRef); 1223 if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) { 1224 // Store the address of the original variable associated with the LHS 1225 // implicit variable. 1226 PrivateScope.addPrivate(LHSVD, [&RedCG, Count, this]() { 1227 return RedCG.getSharedLValue(Count).getAddress(*this); 1228 }); 1229 PrivateScope.addPrivate( 1230 RHSVD, [this, PrivateVD]() { return GetAddrOfLocalVar(PrivateVD); }); 1231 } else if ((isaOMPArraySectionExpr && Type->isScalarType()) || 1232 isa<ArraySubscriptExpr>(IRef)) { 1233 // Store the address of the original variable associated with the LHS 1234 // implicit variable. 1235 PrivateScope.addPrivate(LHSVD, [&RedCG, Count, this]() { 1236 return RedCG.getSharedLValue(Count).getAddress(*this); 1237 }); 1238 PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() { 1239 return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD), 1240 ConvertTypeForMem(RHSVD->getType()), 1241 "rhs.begin"); 1242 }); 1243 } else { 1244 QualType Type = PrivateVD->getType(); 1245 bool IsArray = getContext().getAsArrayType(Type) != nullptr; 1246 Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress(*this); 1247 // Store the address of the original variable associated with the LHS 1248 // implicit variable. 1249 if (IsArray) { 1250 OriginalAddr = Builder.CreateElementBitCast( 1251 OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin"); 1252 } 1253 PrivateScope.addPrivate(LHSVD, [OriginalAddr]() { return OriginalAddr; }); 1254 PrivateScope.addPrivate( 1255 RHSVD, [this, PrivateVD, RHSVD, IsArray]() { 1256 return IsArray 1257 ? Builder.CreateElementBitCast( 1258 GetAddrOfLocalVar(PrivateVD), 1259 ConvertTypeForMem(RHSVD->getType()), "rhs.begin") 1260 : GetAddrOfLocalVar(PrivateVD); 1261 }); 1262 } 1263 ++ILHS; 1264 ++IRHS; 1265 ++IPriv; 1266 ++Count; 1267 } 1268 if (!Data.ReductionVars.empty()) { 1269 Data.IsReductionWithTaskMod = true; 1270 Data.IsWorksharingReduction = 1271 isOpenMPWorksharingDirective(D.getDirectiveKind()); 1272 llvm::Value *ReductionDesc = CGM.getOpenMPRuntime().emitTaskReductionInit( 1273 *this, D.getBeginLoc(), TaskLHSs, TaskRHSs, Data); 1274 const Expr *TaskRedRef = nullptr; 1275 switch (D.getDirectiveKind()) { 1276 case OMPD_parallel: 1277 TaskRedRef = cast<OMPParallelDirective>(D).getTaskReductionRefExpr(); 1278 break; 1279 case OMPD_for: 1280 TaskRedRef = cast<OMPForDirective>(D).getTaskReductionRefExpr(); 1281 break; 1282 case OMPD_sections: 1283 TaskRedRef = cast<OMPSectionsDirective>(D).getTaskReductionRefExpr(); 1284 break; 1285 case OMPD_parallel_for: 1286 TaskRedRef = cast<OMPParallelForDirective>(D).getTaskReductionRefExpr(); 1287 break; 1288 case OMPD_parallel_master: 1289 TaskRedRef = 1290 cast<OMPParallelMasterDirective>(D).getTaskReductionRefExpr(); 1291 break; 1292 case OMPD_parallel_sections: 1293 TaskRedRef = 1294 cast<OMPParallelSectionsDirective>(D).getTaskReductionRefExpr(); 1295 break; 1296 case OMPD_target_parallel: 1297 TaskRedRef = 1298 cast<OMPTargetParallelDirective>(D).getTaskReductionRefExpr(); 1299 break; 1300 case OMPD_target_parallel_for: 1301 TaskRedRef = 1302 cast<OMPTargetParallelForDirective>(D).getTaskReductionRefExpr(); 1303 break; 1304 case OMPD_distribute_parallel_for: 1305 TaskRedRef = 1306 cast<OMPDistributeParallelForDirective>(D).getTaskReductionRefExpr(); 1307 break; 1308 case OMPD_teams_distribute_parallel_for: 1309 TaskRedRef = cast<OMPTeamsDistributeParallelForDirective>(D) 1310 .getTaskReductionRefExpr(); 1311 break; 1312 case OMPD_target_teams_distribute_parallel_for: 1313 TaskRedRef = cast<OMPTargetTeamsDistributeParallelForDirective>(D) 1314 .getTaskReductionRefExpr(); 1315 break; 1316 case OMPD_simd: 1317 case OMPD_for_simd: 1318 case OMPD_section: 1319 case OMPD_single: 1320 case OMPD_master: 1321 case OMPD_critical: 1322 case OMPD_parallel_for_simd: 1323 case OMPD_task: 1324 case OMPD_taskyield: 1325 case OMPD_barrier: 1326 case OMPD_taskwait: 1327 case OMPD_taskgroup: 1328 case OMPD_flush: 1329 case OMPD_depobj: 1330 case OMPD_scan: 1331 case OMPD_ordered: 1332 case OMPD_atomic: 1333 case OMPD_teams: 1334 case OMPD_target: 1335 case OMPD_cancellation_point: 1336 case OMPD_cancel: 1337 case OMPD_target_data: 1338 case OMPD_target_enter_data: 1339 case OMPD_target_exit_data: 1340 case OMPD_taskloop: 1341 case OMPD_taskloop_simd: 1342 case OMPD_master_taskloop: 1343 case OMPD_master_taskloop_simd: 1344 case OMPD_parallel_master_taskloop: 1345 case OMPD_parallel_master_taskloop_simd: 1346 case OMPD_distribute: 1347 case OMPD_target_update: 1348 case OMPD_distribute_parallel_for_simd: 1349 case OMPD_distribute_simd: 1350 case OMPD_target_parallel_for_simd: 1351 case OMPD_target_simd: 1352 case OMPD_teams_distribute: 1353 case OMPD_teams_distribute_simd: 1354 case OMPD_teams_distribute_parallel_for_simd: 1355 case OMPD_target_teams: 1356 case OMPD_target_teams_distribute: 1357 case OMPD_target_teams_distribute_parallel_for_simd: 1358 case OMPD_target_teams_distribute_simd: 1359 case OMPD_declare_target: 1360 case OMPD_end_declare_target: 1361 case OMPD_threadprivate: 1362 case OMPD_allocate: 1363 case OMPD_declare_reduction: 1364 case OMPD_declare_mapper: 1365 case OMPD_declare_simd: 1366 case OMPD_requires: 1367 case OMPD_declare_variant: 1368 case OMPD_begin_declare_variant: 1369 case OMPD_end_declare_variant: 1370 case OMPD_unknown: 1371 llvm_unreachable("Enexpected directive with task reductions."); 1372 } 1373 1374 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(TaskRedRef)->getDecl()); 1375 EmitVarDecl(*VD); 1376 EmitStoreOfScalar(ReductionDesc, GetAddrOfLocalVar(VD), 1377 /*Volatile=*/false, TaskRedRef->getType()); 1378 } 1379 } 1380 1381 void CodeGenFunction::EmitOMPReductionClauseFinal( 1382 const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) { 1383 if (!HaveInsertPoint()) 1384 return; 1385 llvm::SmallVector<const Expr *, 8> Privates; 1386 llvm::SmallVector<const Expr *, 8> LHSExprs; 1387 llvm::SmallVector<const Expr *, 8> RHSExprs; 1388 llvm::SmallVector<const Expr *, 8> ReductionOps; 1389 bool HasAtLeastOneReduction = false; 1390 bool IsReductionWithTaskMod = false; 1391 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1392 // Do not emit for inscan reductions. 1393 if (C->getModifier() == OMPC_REDUCTION_inscan) 1394 continue; 1395 HasAtLeastOneReduction = true; 1396 Privates.append(C->privates().begin(), C->privates().end()); 1397 LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 1398 RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 1399 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 1400 IsReductionWithTaskMod = 1401 IsReductionWithTaskMod || C->getModifier() == OMPC_REDUCTION_task; 1402 } 1403 if (HasAtLeastOneReduction) { 1404 if (IsReductionWithTaskMod) { 1405 CGM.getOpenMPRuntime().emitTaskReductionFini( 1406 *this, D.getBeginLoc(), 1407 isOpenMPWorksharingDirective(D.getDirectiveKind())); 1408 } 1409 bool WithNowait = D.getSingleClause<OMPNowaitClause>() || 1410 isOpenMPParallelDirective(D.getDirectiveKind()) || 1411 ReductionKind == OMPD_simd; 1412 bool SimpleReduction = ReductionKind == OMPD_simd; 1413 // Emit nowait reduction if nowait clause is present or directive is a 1414 // parallel directive (it always has implicit barrier). 1415 CGM.getOpenMPRuntime().emitReduction( 1416 *this, D.getEndLoc(), Privates, LHSExprs, RHSExprs, ReductionOps, 1417 {WithNowait, SimpleReduction, ReductionKind}); 1418 } 1419 } 1420 1421 static void emitPostUpdateForReductionClause( 1422 CodeGenFunction &CGF, const OMPExecutableDirective &D, 1423 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { 1424 if (!CGF.HaveInsertPoint()) 1425 return; 1426 llvm::BasicBlock *DoneBB = nullptr; 1427 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1428 if (const Expr *PostUpdate = C->getPostUpdateExpr()) { 1429 if (!DoneBB) { 1430 if (llvm::Value *Cond = CondGen(CGF)) { 1431 // If the first post-update expression is found, emit conditional 1432 // block if it was requested. 1433 llvm::BasicBlock *ThenBB = CGF.createBasicBlock(".omp.reduction.pu"); 1434 DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done"); 1435 CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1436 CGF.EmitBlock(ThenBB); 1437 } 1438 } 1439 CGF.EmitIgnoredExpr(PostUpdate); 1440 } 1441 } 1442 if (DoneBB) 1443 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 1444 } 1445 1446 namespace { 1447 /// Codegen lambda for appending distribute lower and upper bounds to outlined 1448 /// parallel function. This is necessary for combined constructs such as 1449 /// 'distribute parallel for' 1450 typedef llvm::function_ref<void(CodeGenFunction &, 1451 const OMPExecutableDirective &, 1452 llvm::SmallVectorImpl<llvm::Value *> &)> 1453 CodeGenBoundParametersTy; 1454 } // anonymous namespace 1455 1456 static void 1457 checkForLastprivateConditionalUpdate(CodeGenFunction &CGF, 1458 const OMPExecutableDirective &S) { 1459 if (CGF.getLangOpts().OpenMP < 50) 1460 return; 1461 llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> PrivateDecls; 1462 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 1463 for (const Expr *Ref : C->varlists()) { 1464 if (!Ref->getType()->isScalarType()) 1465 continue; 1466 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 1467 if (!DRE) 1468 continue; 1469 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); 1470 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref); 1471 } 1472 } 1473 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 1474 for (const Expr *Ref : C->varlists()) { 1475 if (!Ref->getType()->isScalarType()) 1476 continue; 1477 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 1478 if (!DRE) 1479 continue; 1480 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); 1481 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref); 1482 } 1483 } 1484 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 1485 for (const Expr *Ref : C->varlists()) { 1486 if (!Ref->getType()->isScalarType()) 1487 continue; 1488 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 1489 if (!DRE) 1490 continue; 1491 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); 1492 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref); 1493 } 1494 } 1495 // Privates should ne analyzed since they are not captured at all. 1496 // Task reductions may be skipped - tasks are ignored. 1497 // Firstprivates do not return value but may be passed by reference - no need 1498 // to check for updated lastprivate conditional. 1499 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 1500 for (const Expr *Ref : C->varlists()) { 1501 if (!Ref->getType()->isScalarType()) 1502 continue; 1503 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 1504 if (!DRE) 1505 continue; 1506 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); 1507 } 1508 } 1509 CGF.CGM.getOpenMPRuntime().checkAndEmitSharedLastprivateConditional( 1510 CGF, S, PrivateDecls); 1511 } 1512 1513 static void emitCommonOMPParallelDirective( 1514 CodeGenFunction &CGF, const OMPExecutableDirective &S, 1515 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1516 const CodeGenBoundParametersTy &CodeGenBoundParameters) { 1517 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); 1518 llvm::Function *OutlinedFn = 1519 CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( 1520 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 1521 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) { 1522 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 1523 llvm::Value *NumThreads = 1524 CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), 1525 /*IgnoreResultAssign=*/true); 1526 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause( 1527 CGF, NumThreads, NumThreadsClause->getBeginLoc()); 1528 } 1529 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) { 1530 CodeGenFunction::RunCleanupsScope ProcBindScope(CGF); 1531 CGF.CGM.getOpenMPRuntime().emitProcBindClause( 1532 CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getBeginLoc()); 1533 } 1534 const Expr *IfCond = nullptr; 1535 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 1536 if (C->getNameModifier() == OMPD_unknown || 1537 C->getNameModifier() == OMPD_parallel) { 1538 IfCond = C->getCondition(); 1539 break; 1540 } 1541 } 1542 1543 OMPParallelScope Scope(CGF, S); 1544 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 1545 // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk 1546 // lower and upper bounds with the pragma 'for' chunking mechanism. 1547 // The following lambda takes care of appending the lower and upper bound 1548 // parameters when necessary 1549 CodeGenBoundParameters(CGF, S, CapturedVars); 1550 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 1551 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getBeginLoc(), OutlinedFn, 1552 CapturedVars, IfCond); 1553 } 1554 1555 static void emitEmptyBoundParameters(CodeGenFunction &, 1556 const OMPExecutableDirective &, 1557 llvm::SmallVectorImpl<llvm::Value *> &) {} 1558 1559 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { 1560 if (llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder()) { 1561 // Check if we have any if clause associated with the directive. 1562 llvm::Value *IfCond = nullptr; 1563 if (const auto *C = S.getSingleClause<OMPIfClause>()) 1564 IfCond = EmitScalarExpr(C->getCondition(), 1565 /*IgnoreResultAssign=*/true); 1566 1567 llvm::Value *NumThreads = nullptr; 1568 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) 1569 NumThreads = EmitScalarExpr(NumThreadsClause->getNumThreads(), 1570 /*IgnoreResultAssign=*/true); 1571 1572 ProcBindKind ProcBind = OMP_PROC_BIND_default; 1573 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) 1574 ProcBind = ProcBindClause->getProcBindKind(); 1575 1576 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 1577 1578 // The cleanup callback that finalizes all variabels at the given location, 1579 // thus calls destructors etc. 1580 auto FiniCB = [this](InsertPointTy IP) { 1581 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); 1582 }; 1583 1584 // Privatization callback that performs appropriate action for 1585 // shared/private/firstprivate/lastprivate/copyin/... variables. 1586 // 1587 // TODO: This defaults to shared right now. 1588 auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, 1589 llvm::Value &Val, llvm::Value *&ReplVal) { 1590 // The next line is appropriate only for variables (Val) with the 1591 // data-sharing attribute "shared". 1592 ReplVal = &Val; 1593 1594 return CodeGenIP; 1595 }; 1596 1597 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); 1598 const Stmt *ParallelRegionBodyStmt = CS->getCapturedStmt(); 1599 1600 auto BodyGenCB = [ParallelRegionBodyStmt, 1601 this](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, 1602 llvm::BasicBlock &ContinuationBB) { 1603 OMPBuilderCBHelpers::OutlinedRegionBodyRAII ORB(*this, AllocaIP, 1604 ContinuationBB); 1605 OMPBuilderCBHelpers::EmitOMPRegionBody(*this, ParallelRegionBodyStmt, 1606 CodeGenIP, ContinuationBB); 1607 }; 1608 1609 CGCapturedStmtInfo CGSI(*CS, CR_OpenMP); 1610 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI); 1611 Builder.restoreIP(OMPBuilder->CreateParallel(Builder, BodyGenCB, PrivCB, 1612 FiniCB, IfCond, NumThreads, 1613 ProcBind, S.hasCancel())); 1614 return; 1615 } 1616 1617 // Emit parallel region as a standalone region. 1618 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 1619 Action.Enter(CGF); 1620 OMPPrivateScope PrivateScope(CGF); 1621 bool Copyins = CGF.EmitOMPCopyinClause(S); 1622 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 1623 if (Copyins) { 1624 // Emit implicit barrier to synchronize threads and avoid data races on 1625 // propagation master's thread values of threadprivate variables to local 1626 // instances of that variables of all other implicit threads. 1627 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 1628 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, 1629 /*ForceSimpleCall=*/true); 1630 } 1631 CGF.EmitOMPPrivateClause(S, PrivateScope); 1632 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 1633 (void)PrivateScope.Privatize(); 1634 CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt()); 1635 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 1636 }; 1637 { 1638 auto LPCRegion = 1639 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 1640 emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen, 1641 emitEmptyBoundParameters); 1642 emitPostUpdateForReductionClause(*this, S, 1643 [](CodeGenFunction &) { return nullptr; }); 1644 } 1645 // Check for outer lastprivate conditional update. 1646 checkForLastprivateConditionalUpdate(*this, S); 1647 } 1648 1649 static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop, 1650 int MaxLevel, int Level = 0) { 1651 assert(Level < MaxLevel && "Too deep lookup during loop body codegen."); 1652 const Stmt *SimplifiedS = S->IgnoreContainers(); 1653 if (const auto *CS = dyn_cast<CompoundStmt>(SimplifiedS)) { 1654 PrettyStackTraceLoc CrashInfo( 1655 CGF.getContext().getSourceManager(), CS->getLBracLoc(), 1656 "LLVM IR generation of compound statement ('{}')"); 1657 1658 // Keep track of the current cleanup stack depth, including debug scopes. 1659 CodeGenFunction::LexicalScope Scope(CGF, S->getSourceRange()); 1660 for (const Stmt *CurStmt : CS->body()) 1661 emitBody(CGF, CurStmt, NextLoop, MaxLevel, Level); 1662 return; 1663 } 1664 if (SimplifiedS == NextLoop) { 1665 if (const auto *For = dyn_cast<ForStmt>(SimplifiedS)) { 1666 S = For->getBody(); 1667 } else { 1668 assert(isa<CXXForRangeStmt>(SimplifiedS) && 1669 "Expected canonical for loop or range-based for loop."); 1670 const auto *CXXFor = cast<CXXForRangeStmt>(SimplifiedS); 1671 CGF.EmitStmt(CXXFor->getLoopVarStmt()); 1672 S = CXXFor->getBody(); 1673 } 1674 if (Level + 1 < MaxLevel) { 1675 NextLoop = OMPLoopDirective::tryToFindNextInnerLoop( 1676 S, /*TryImperfectlyNestedLoops=*/true); 1677 emitBody(CGF, S, NextLoop, MaxLevel, Level + 1); 1678 return; 1679 } 1680 } 1681 CGF.EmitStmt(S); 1682 } 1683 1684 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, 1685 JumpDest LoopExit) { 1686 RunCleanupsScope BodyScope(*this); 1687 // Update counters values on current iteration. 1688 for (const Expr *UE : D.updates()) 1689 EmitIgnoredExpr(UE); 1690 // Update the linear variables. 1691 // In distribute directives only loop counters may be marked as linear, no 1692 // need to generate the code for them. 1693 if (!isOpenMPDistributeDirective(D.getDirectiveKind())) { 1694 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1695 for (const Expr *UE : C->updates()) 1696 EmitIgnoredExpr(UE); 1697 } 1698 } 1699 1700 // On a continue in the body, jump to the end. 1701 JumpDest Continue = getJumpDestInCurrentScope("omp.body.continue"); 1702 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1703 for (const Expr *E : D.finals_conditions()) { 1704 if (!E) 1705 continue; 1706 // Check that loop counter in non-rectangular nest fits into the iteration 1707 // space. 1708 llvm::BasicBlock *NextBB = createBasicBlock("omp.body.next"); 1709 EmitBranchOnBoolExpr(E, NextBB, Continue.getBlock(), 1710 getProfileCount(D.getBody())); 1711 EmitBlock(NextBB); 1712 } 1713 1714 OMPPrivateScope InscanScope(*this); 1715 EmitOMPReductionClauseInit(D, InscanScope, /*ForInscan=*/true); 1716 bool IsInscanRegion = InscanScope.Privatize(); 1717 if (IsInscanRegion) { 1718 // Need to remember the block before and after scan directive 1719 // to dispatch them correctly depending on the clause used in 1720 // this directive, inclusive or exclusive. For inclusive scan the natural 1721 // order of the blocks is used, for exclusive clause the blocks must be 1722 // executed in reverse order. 1723 OMPBeforeScanBlock = createBasicBlock("omp.before.scan.bb"); 1724 OMPAfterScanBlock = createBasicBlock("omp.after.scan.bb"); 1725 OMPScanExitBlock = createBasicBlock("omp.exit.inscan.bb"); 1726 OMPScanDispatch = createBasicBlock("omp.inscan.dispatch"); 1727 EmitBranch(OMPScanDispatch); 1728 EmitBlock(OMPBeforeScanBlock); 1729 } 1730 1731 // Emit loop variables for C++ range loops. 1732 const Stmt *Body = 1733 D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(); 1734 // Emit loop body. 1735 emitBody(*this, Body, 1736 OMPLoopDirective::tryToFindNextInnerLoop( 1737 Body, /*TryImperfectlyNestedLoops=*/true), 1738 D.getCollapsedNumber()); 1739 1740 // Jump to the dispatcher at the end of the loop body. 1741 if (IsInscanRegion) 1742 EmitBranch(OMPScanExitBlock); 1743 1744 // The end (updates/cleanups). 1745 EmitBlock(Continue.getBlock()); 1746 BreakContinueStack.pop_back(); 1747 } 1748 1749 void CodeGenFunction::EmitOMPInnerLoop( 1750 const Stmt &S, bool RequiresCleanup, const Expr *LoopCond, 1751 const Expr *IncExpr, 1752 const llvm::function_ref<void(CodeGenFunction &)> BodyGen, 1753 const llvm::function_ref<void(CodeGenFunction &)> PostIncGen) { 1754 auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end"); 1755 1756 // Start the loop with a block that tests the condition. 1757 auto CondBlock = createBasicBlock("omp.inner.for.cond"); 1758 EmitBlock(CondBlock); 1759 const SourceRange R = S.getSourceRange(); 1760 1761 // If attributes are attached, push to the basic block with them. 1762 const auto &OMPED = cast<OMPExecutableDirective>(S); 1763 const CapturedStmt *ICS = OMPED.getInnermostCapturedStmt(); 1764 const Stmt *SS = ICS->getCapturedStmt(); 1765 const AttributedStmt *AS = dyn_cast_or_null<AttributedStmt>(SS); 1766 if (AS) 1767 LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(), 1768 AS->getAttrs(), SourceLocToDebugLoc(R.getBegin()), 1769 SourceLocToDebugLoc(R.getEnd())); 1770 else 1771 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), 1772 SourceLocToDebugLoc(R.getEnd())); 1773 1774 // If there are any cleanups between here and the loop-exit scope, 1775 // create a block to stage a loop exit along. 1776 llvm::BasicBlock *ExitBlock = LoopExit.getBlock(); 1777 if (RequiresCleanup) 1778 ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup"); 1779 1780 llvm::BasicBlock *LoopBody = createBasicBlock("omp.inner.for.body"); 1781 1782 // Emit condition. 1783 EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S)); 1784 if (ExitBlock != LoopExit.getBlock()) { 1785 EmitBlock(ExitBlock); 1786 EmitBranchThroughCleanup(LoopExit); 1787 } 1788 1789 EmitBlock(LoopBody); 1790 incrementProfileCounter(&S); 1791 1792 // Create a block for the increment. 1793 JumpDest Continue = getJumpDestInCurrentScope("omp.inner.for.inc"); 1794 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1795 1796 BodyGen(*this); 1797 1798 // Emit "IV = IV + 1" and a back-edge to the condition block. 1799 EmitBlock(Continue.getBlock()); 1800 EmitIgnoredExpr(IncExpr); 1801 PostIncGen(*this); 1802 BreakContinueStack.pop_back(); 1803 EmitBranch(CondBlock); 1804 LoopStack.pop(); 1805 // Emit the fall-through block. 1806 EmitBlock(LoopExit.getBlock()); 1807 } 1808 1809 bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { 1810 if (!HaveInsertPoint()) 1811 return false; 1812 // Emit inits for the linear variables. 1813 bool HasLinears = false; 1814 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1815 for (const Expr *Init : C->inits()) { 1816 HasLinears = true; 1817 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl()); 1818 if (const auto *Ref = 1819 dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) { 1820 AutoVarEmission Emission = EmitAutoVarAlloca(*VD); 1821 const auto *OrigVD = cast<VarDecl>(Ref->getDecl()); 1822 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), 1823 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1824 VD->getInit()->getType(), VK_LValue, 1825 VD->getInit()->getExprLoc()); 1826 EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(), 1827 VD->getType()), 1828 /*capturedByInit=*/false); 1829 EmitAutoVarCleanups(Emission); 1830 } else { 1831 EmitVarDecl(*VD); 1832 } 1833 } 1834 // Emit the linear steps for the linear clauses. 1835 // If a step is not constant, it is pre-calculated before the loop. 1836 if (const auto *CS = cast_or_null<BinaryOperator>(C->getCalcStep())) 1837 if (const auto *SaveRef = cast<DeclRefExpr>(CS->getLHS())) { 1838 EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl())); 1839 // Emit calculation of the linear step. 1840 EmitIgnoredExpr(CS); 1841 } 1842 } 1843 return HasLinears; 1844 } 1845 1846 void CodeGenFunction::EmitOMPLinearClauseFinal( 1847 const OMPLoopDirective &D, 1848 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { 1849 if (!HaveInsertPoint()) 1850 return; 1851 llvm::BasicBlock *DoneBB = nullptr; 1852 // Emit the final values of the linear variables. 1853 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1854 auto IC = C->varlist_begin(); 1855 for (const Expr *F : C->finals()) { 1856 if (!DoneBB) { 1857 if (llvm::Value *Cond = CondGen(*this)) { 1858 // If the first post-update expression is found, emit conditional 1859 // block if it was requested. 1860 llvm::BasicBlock *ThenBB = createBasicBlock(".omp.linear.pu"); 1861 DoneBB = createBasicBlock(".omp.linear.pu.done"); 1862 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1863 EmitBlock(ThenBB); 1864 } 1865 } 1866 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl()); 1867 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), 1868 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1869 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); 1870 Address OrigAddr = EmitLValue(&DRE).getAddress(*this); 1871 CodeGenFunction::OMPPrivateScope VarScope(*this); 1872 VarScope.addPrivate(OrigVD, [OrigAddr]() { return OrigAddr; }); 1873 (void)VarScope.Privatize(); 1874 EmitIgnoredExpr(F); 1875 ++IC; 1876 } 1877 if (const Expr *PostUpdate = C->getPostUpdateExpr()) 1878 EmitIgnoredExpr(PostUpdate); 1879 } 1880 if (DoneBB) 1881 EmitBlock(DoneBB, /*IsFinished=*/true); 1882 } 1883 1884 static void emitAlignedClause(CodeGenFunction &CGF, 1885 const OMPExecutableDirective &D) { 1886 if (!CGF.HaveInsertPoint()) 1887 return; 1888 for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) { 1889 llvm::APInt ClauseAlignment(64, 0); 1890 if (const Expr *AlignmentExpr = Clause->getAlignment()) { 1891 auto *AlignmentCI = 1892 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); 1893 ClauseAlignment = AlignmentCI->getValue(); 1894 } 1895 for (const Expr *E : Clause->varlists()) { 1896 llvm::APInt Alignment(ClauseAlignment); 1897 if (Alignment == 0) { 1898 // OpenMP [2.8.1, Description] 1899 // If no optional parameter is specified, implementation-defined default 1900 // alignments for SIMD instructions on the target platforms are assumed. 1901 Alignment = 1902 CGF.getContext() 1903 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign( 1904 E->getType()->getPointeeType())) 1905 .getQuantity(); 1906 } 1907 assert((Alignment == 0 || Alignment.isPowerOf2()) && 1908 "alignment is not power of 2"); 1909 if (Alignment != 0) { 1910 llvm::Value *PtrValue = CGF.EmitScalarExpr(E); 1911 CGF.emitAlignmentAssumption( 1912 PtrValue, E, /*No second loc needed*/ SourceLocation(), 1913 llvm::ConstantInt::get(CGF.getLLVMContext(), Alignment)); 1914 } 1915 } 1916 } 1917 } 1918 1919 void CodeGenFunction::EmitOMPPrivateLoopCounters( 1920 const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) { 1921 if (!HaveInsertPoint()) 1922 return; 1923 auto I = S.private_counters().begin(); 1924 for (const Expr *E : S.counters()) { 1925 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1926 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()); 1927 // Emit var without initialization. 1928 AutoVarEmission VarEmission = EmitAutoVarAlloca(*PrivateVD); 1929 EmitAutoVarCleanups(VarEmission); 1930 LocalDeclMap.erase(PrivateVD); 1931 (void)LoopScope.addPrivate(VD, [&VarEmission]() { 1932 return VarEmission.getAllocatedAddress(); 1933 }); 1934 if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) || 1935 VD->hasGlobalStorage()) { 1936 (void)LoopScope.addPrivate(PrivateVD, [this, VD, E]() { 1937 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), 1938 LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD), 1939 E->getType(), VK_LValue, E->getExprLoc()); 1940 return EmitLValue(&DRE).getAddress(*this); 1941 }); 1942 } else { 1943 (void)LoopScope.addPrivate(PrivateVD, [&VarEmission]() { 1944 return VarEmission.getAllocatedAddress(); 1945 }); 1946 } 1947 ++I; 1948 } 1949 // Privatize extra loop counters used in loops for ordered(n) clauses. 1950 for (const auto *C : S.getClausesOfKind<OMPOrderedClause>()) { 1951 if (!C->getNumForLoops()) 1952 continue; 1953 for (unsigned I = S.getCollapsedNumber(), 1954 E = C->getLoopNumIterations().size(); 1955 I < E; ++I) { 1956 const auto *DRE = cast<DeclRefExpr>(C->getLoopCounter(I)); 1957 const auto *VD = cast<VarDecl>(DRE->getDecl()); 1958 // Override only those variables that can be captured to avoid re-emission 1959 // of the variables declared within the loops. 1960 if (DRE->refersToEnclosingVariableOrCapture()) { 1961 (void)LoopScope.addPrivate(VD, [this, DRE, VD]() { 1962 return CreateMemTemp(DRE->getType(), VD->getName()); 1963 }); 1964 } 1965 } 1966 } 1967 } 1968 1969 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S, 1970 const Expr *Cond, llvm::BasicBlock *TrueBlock, 1971 llvm::BasicBlock *FalseBlock, uint64_t TrueCount) { 1972 if (!CGF.HaveInsertPoint()) 1973 return; 1974 { 1975 CodeGenFunction::OMPPrivateScope PreCondScope(CGF); 1976 CGF.EmitOMPPrivateLoopCounters(S, PreCondScope); 1977 (void)PreCondScope.Privatize(); 1978 // Get initial values of real counters. 1979 for (const Expr *I : S.inits()) { 1980 CGF.EmitIgnoredExpr(I); 1981 } 1982 } 1983 // Create temp loop control variables with their init values to support 1984 // non-rectangular loops. 1985 CodeGenFunction::OMPMapVars PreCondVars; 1986 for (const Expr * E: S.dependent_counters()) { 1987 if (!E) 1988 continue; 1989 assert(!E->getType().getNonReferenceType()->isRecordType() && 1990 "dependent counter must not be an iterator."); 1991 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1992 Address CounterAddr = 1993 CGF.CreateMemTemp(VD->getType().getNonReferenceType()); 1994 (void)PreCondVars.setVarAddr(CGF, VD, CounterAddr); 1995 } 1996 (void)PreCondVars.apply(CGF); 1997 for (const Expr *E : S.dependent_inits()) { 1998 if (!E) 1999 continue; 2000 CGF.EmitIgnoredExpr(E); 2001 } 2002 // Check that loop is executed at least one time. 2003 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount); 2004 PreCondVars.restore(CGF); 2005 } 2006 2007 void CodeGenFunction::EmitOMPLinearClause( 2008 const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) { 2009 if (!HaveInsertPoint()) 2010 return; 2011 llvm::DenseSet<const VarDecl *> SIMDLCVs; 2012 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 2013 const auto *LoopDirective = cast<OMPLoopDirective>(&D); 2014 for (const Expr *C : LoopDirective->counters()) { 2015 SIMDLCVs.insert( 2016 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 2017 } 2018 } 2019 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 2020 auto CurPrivate = C->privates().begin(); 2021 for (const Expr *E : C->varlists()) { 2022 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2023 const auto *PrivateVD = 2024 cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl()); 2025 if (!SIMDLCVs.count(VD->getCanonicalDecl())) { 2026 bool IsRegistered = PrivateScope.addPrivate(VD, [this, PrivateVD]() { 2027 // Emit private VarDecl with copy init. 2028 EmitVarDecl(*PrivateVD); 2029 return GetAddrOfLocalVar(PrivateVD); 2030 }); 2031 assert(IsRegistered && "linear var already registered as private"); 2032 // Silence the warning about unused variable. 2033 (void)IsRegistered; 2034 } else { 2035 EmitVarDecl(*PrivateVD); 2036 } 2037 ++CurPrivate; 2038 } 2039 } 2040 } 2041 2042 static void emitSimdlenSafelenClause(CodeGenFunction &CGF, 2043 const OMPExecutableDirective &D, 2044 bool IsMonotonic) { 2045 if (!CGF.HaveInsertPoint()) 2046 return; 2047 if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) { 2048 RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(), 2049 /*ignoreResult=*/true); 2050 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 2051 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 2052 // In presence of finite 'safelen', it may be unsafe to mark all 2053 // the memory instructions parallel, because loop-carried 2054 // dependences of 'safelen' iterations are possible. 2055 if (!IsMonotonic) 2056 CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>()); 2057 } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) { 2058 RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(), 2059 /*ignoreResult=*/true); 2060 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 2061 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 2062 // In presence of finite 'safelen', it may be unsafe to mark all 2063 // the memory instructions parallel, because loop-carried 2064 // dependences of 'safelen' iterations are possible. 2065 CGF.LoopStack.setParallel(/*Enable=*/false); 2066 } 2067 } 2068 2069 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D, 2070 bool IsMonotonic) { 2071 // Walk clauses and process safelen/lastprivate. 2072 LoopStack.setParallel(!IsMonotonic); 2073 LoopStack.setVectorizeEnable(); 2074 emitSimdlenSafelenClause(*this, D, IsMonotonic); 2075 if (const auto *C = D.getSingleClause<OMPOrderClause>()) 2076 if (C->getKind() == OMPC_ORDER_concurrent) 2077 LoopStack.setParallel(/*Enable=*/true); 2078 } 2079 2080 void CodeGenFunction::EmitOMPSimdFinal( 2081 const OMPLoopDirective &D, 2082 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { 2083 if (!HaveInsertPoint()) 2084 return; 2085 llvm::BasicBlock *DoneBB = nullptr; 2086 auto IC = D.counters().begin(); 2087 auto IPC = D.private_counters().begin(); 2088 for (const Expr *F : D.finals()) { 2089 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl()); 2090 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl()); 2091 const auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD); 2092 if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) || 2093 OrigVD->hasGlobalStorage() || CED) { 2094 if (!DoneBB) { 2095 if (llvm::Value *Cond = CondGen(*this)) { 2096 // If the first post-update expression is found, emit conditional 2097 // block if it was requested. 2098 llvm::BasicBlock *ThenBB = createBasicBlock(".omp.final.then"); 2099 DoneBB = createBasicBlock(".omp.final.done"); 2100 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 2101 EmitBlock(ThenBB); 2102 } 2103 } 2104 Address OrigAddr = Address::invalid(); 2105 if (CED) { 2106 OrigAddr = 2107 EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress(*this); 2108 } else { 2109 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(PrivateVD), 2110 /*RefersToEnclosingVariableOrCapture=*/false, 2111 (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc()); 2112 OrigAddr = EmitLValue(&DRE).getAddress(*this); 2113 } 2114 OMPPrivateScope VarScope(*this); 2115 VarScope.addPrivate(OrigVD, [OrigAddr]() { return OrigAddr; }); 2116 (void)VarScope.Privatize(); 2117 EmitIgnoredExpr(F); 2118 } 2119 ++IC; 2120 ++IPC; 2121 } 2122 if (DoneBB) 2123 EmitBlock(DoneBB, /*IsFinished=*/true); 2124 } 2125 2126 static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF, 2127 const OMPLoopDirective &S, 2128 CodeGenFunction::JumpDest LoopExit) { 2129 CGF.EmitOMPLoopBody(S, LoopExit); 2130 CGF.EmitStopPoint(&S); 2131 } 2132 2133 /// Emit a helper variable and return corresponding lvalue. 2134 static LValue EmitOMPHelperVar(CodeGenFunction &CGF, 2135 const DeclRefExpr *Helper) { 2136 auto VDecl = cast<VarDecl>(Helper->getDecl()); 2137 CGF.EmitVarDecl(*VDecl); 2138 return CGF.EmitLValue(Helper); 2139 } 2140 2141 static void emitCommonSimdLoop(CodeGenFunction &CGF, const OMPLoopDirective &S, 2142 const RegionCodeGenTy &SimdInitGen, 2143 const RegionCodeGenTy &BodyCodeGen) { 2144 auto &&ThenGen = [&S, &SimdInitGen, &BodyCodeGen](CodeGenFunction &CGF, 2145 PrePostActionTy &) { 2146 CGOpenMPRuntime::NontemporalDeclsRAII NontemporalsRegion(CGF.CGM, S); 2147 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); 2148 SimdInitGen(CGF); 2149 2150 BodyCodeGen(CGF); 2151 }; 2152 auto &&ElseGen = [&BodyCodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 2153 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); 2154 CGF.LoopStack.setVectorizeEnable(/*Enable=*/false); 2155 2156 BodyCodeGen(CGF); 2157 }; 2158 const Expr *IfCond = nullptr; 2159 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 2160 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 2161 if (CGF.getLangOpts().OpenMP >= 50 && 2162 (C->getNameModifier() == OMPD_unknown || 2163 C->getNameModifier() == OMPD_simd)) { 2164 IfCond = C->getCondition(); 2165 break; 2166 } 2167 } 2168 } 2169 if (IfCond) { 2170 CGF.CGM.getOpenMPRuntime().emitIfClause(CGF, IfCond, ThenGen, ElseGen); 2171 } else { 2172 RegionCodeGenTy ThenRCG(ThenGen); 2173 ThenRCG(CGF); 2174 } 2175 } 2176 2177 static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S, 2178 PrePostActionTy &Action) { 2179 Action.Enter(CGF); 2180 assert(isOpenMPSimdDirective(S.getDirectiveKind()) && 2181 "Expected simd directive"); 2182 OMPLoopScope PreInitScope(CGF, S); 2183 // if (PreCond) { 2184 // for (IV in 0..LastIteration) BODY; 2185 // <Final counter/linear vars updates>; 2186 // } 2187 // 2188 if (isOpenMPDistributeDirective(S.getDirectiveKind()) || 2189 isOpenMPWorksharingDirective(S.getDirectiveKind()) || 2190 isOpenMPTaskLoopDirective(S.getDirectiveKind())) { 2191 (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable())); 2192 (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable())); 2193 } 2194 2195 // Emit: if (PreCond) - begin. 2196 // If the condition constant folds and can be elided, avoid emitting the 2197 // whole loop. 2198 bool CondConstant; 2199 llvm::BasicBlock *ContBlock = nullptr; 2200 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 2201 if (!CondConstant) 2202 return; 2203 } else { 2204 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("simd.if.then"); 2205 ContBlock = CGF.createBasicBlock("simd.if.end"); 2206 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 2207 CGF.getProfileCount(&S)); 2208 CGF.EmitBlock(ThenBlock); 2209 CGF.incrementProfileCounter(&S); 2210 } 2211 2212 // Emit the loop iteration variable. 2213 const Expr *IVExpr = S.getIterationVariable(); 2214 const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 2215 CGF.EmitVarDecl(*IVDecl); 2216 CGF.EmitIgnoredExpr(S.getInit()); 2217 2218 // Emit the iterations count variable. 2219 // If it is not a variable, Sema decided to calculate iterations count on 2220 // each iteration (e.g., it is foldable into a constant). 2221 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 2222 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 2223 // Emit calculation of the iterations count. 2224 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 2225 } 2226 2227 emitAlignedClause(CGF, S); 2228 (void)CGF.EmitOMPLinearClauseInit(S); 2229 { 2230 CodeGenFunction::OMPPrivateScope LoopScope(CGF); 2231 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 2232 CGF.EmitOMPLinearClause(S, LoopScope); 2233 CGF.EmitOMPPrivateClause(S, LoopScope); 2234 CGF.EmitOMPReductionClauseInit(S, LoopScope); 2235 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion( 2236 CGF, S, CGF.EmitLValue(S.getIterationVariable())); 2237 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 2238 (void)LoopScope.Privatize(); 2239 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 2240 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); 2241 2242 emitCommonSimdLoop( 2243 CGF, S, 2244 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2245 CGF.EmitOMPSimdInit(S); 2246 }, 2247 [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { 2248 CGF.EmitOMPInnerLoop( 2249 S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), 2250 [&S](CodeGenFunction &CGF) { 2251 CGF.EmitOMPLoopBody(S, CodeGenFunction::JumpDest()); 2252 CGF.EmitStopPoint(&S); 2253 }, 2254 [](CodeGenFunction &) {}); 2255 }); 2256 CGF.EmitOMPSimdFinal(S, [](CodeGenFunction &) { return nullptr; }); 2257 // Emit final copy of the lastprivate variables at the end of loops. 2258 if (HasLastprivateClause) 2259 CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true); 2260 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd); 2261 emitPostUpdateForReductionClause(CGF, S, 2262 [](CodeGenFunction &) { return nullptr; }); 2263 } 2264 CGF.EmitOMPLinearClauseFinal(S, [](CodeGenFunction &) { return nullptr; }); 2265 // Emit: if (PreCond) - end. 2266 if (ContBlock) { 2267 CGF.EmitBranch(ContBlock); 2268 CGF.EmitBlock(ContBlock, true); 2269 } 2270 } 2271 2272 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { 2273 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2274 emitOMPSimdRegion(CGF, S, Action); 2275 }; 2276 { 2277 auto LPCRegion = 2278 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 2279 OMPLexicalScope Scope(*this, S, OMPD_unknown); 2280 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 2281 } 2282 // Check for outer lastprivate conditional update. 2283 checkForLastprivateConditionalUpdate(*this, S); 2284 } 2285 2286 void CodeGenFunction::EmitOMPOuterLoop( 2287 bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S, 2288 CodeGenFunction::OMPPrivateScope &LoopScope, 2289 const CodeGenFunction::OMPLoopArguments &LoopArgs, 2290 const CodeGenFunction::CodeGenLoopTy &CodeGenLoop, 2291 const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) { 2292 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); 2293 2294 const Expr *IVExpr = S.getIterationVariable(); 2295 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2296 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2297 2298 JumpDest LoopExit = getJumpDestInCurrentScope("omp.dispatch.end"); 2299 2300 // Start the loop with a block that tests the condition. 2301 llvm::BasicBlock *CondBlock = createBasicBlock("omp.dispatch.cond"); 2302 EmitBlock(CondBlock); 2303 const SourceRange R = S.getSourceRange(); 2304 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), 2305 SourceLocToDebugLoc(R.getEnd())); 2306 2307 llvm::Value *BoolCondVal = nullptr; 2308 if (!DynamicOrOrdered) { 2309 // UB = min(UB, GlobalUB) or 2310 // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g. 2311 // 'distribute parallel for') 2312 EmitIgnoredExpr(LoopArgs.EUB); 2313 // IV = LB 2314 EmitIgnoredExpr(LoopArgs.Init); 2315 // IV < UB 2316 BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond); 2317 } else { 2318 BoolCondVal = 2319 RT.emitForNext(*this, S.getBeginLoc(), IVSize, IVSigned, LoopArgs.IL, 2320 LoopArgs.LB, LoopArgs.UB, LoopArgs.ST); 2321 } 2322 2323 // If there are any cleanups between here and the loop-exit scope, 2324 // create a block to stage a loop exit along. 2325 llvm::BasicBlock *ExitBlock = LoopExit.getBlock(); 2326 if (LoopScope.requiresCleanups()) 2327 ExitBlock = createBasicBlock("omp.dispatch.cleanup"); 2328 2329 llvm::BasicBlock *LoopBody = createBasicBlock("omp.dispatch.body"); 2330 Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock); 2331 if (ExitBlock != LoopExit.getBlock()) { 2332 EmitBlock(ExitBlock); 2333 EmitBranchThroughCleanup(LoopExit); 2334 } 2335 EmitBlock(LoopBody); 2336 2337 // Emit "IV = LB" (in case of static schedule, we have already calculated new 2338 // LB for loop condition and emitted it above). 2339 if (DynamicOrOrdered) 2340 EmitIgnoredExpr(LoopArgs.Init); 2341 2342 // Create a block for the increment. 2343 JumpDest Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); 2344 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 2345 2346 emitCommonSimdLoop( 2347 *this, S, 2348 [&S, IsMonotonic](CodeGenFunction &CGF, PrePostActionTy &) { 2349 // Generate !llvm.loop.parallel metadata for loads and stores for loops 2350 // with dynamic/guided scheduling and without ordered clause. 2351 if (!isOpenMPSimdDirective(S.getDirectiveKind())) { 2352 CGF.LoopStack.setParallel(!IsMonotonic); 2353 if (const auto *C = S.getSingleClause<OMPOrderClause>()) 2354 if (C->getKind() == OMPC_ORDER_concurrent) 2355 CGF.LoopStack.setParallel(/*Enable=*/true); 2356 } else { 2357 CGF.EmitOMPSimdInit(S, IsMonotonic); 2358 } 2359 }, 2360 [&S, &LoopArgs, LoopExit, &CodeGenLoop, IVSize, IVSigned, &CodeGenOrdered, 2361 &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { 2362 SourceLocation Loc = S.getBeginLoc(); 2363 // when 'distribute' is not combined with a 'for': 2364 // while (idx <= UB) { BODY; ++idx; } 2365 // when 'distribute' is combined with a 'for' 2366 // (e.g. 'distribute parallel for') 2367 // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; } 2368 CGF.EmitOMPInnerLoop( 2369 S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr, 2370 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { 2371 CodeGenLoop(CGF, S, LoopExit); 2372 }, 2373 [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) { 2374 CodeGenOrdered(CGF, Loc, IVSize, IVSigned); 2375 }); 2376 }); 2377 2378 EmitBlock(Continue.getBlock()); 2379 BreakContinueStack.pop_back(); 2380 if (!DynamicOrOrdered) { 2381 // Emit "LB = LB + Stride", "UB = UB + Stride". 2382 EmitIgnoredExpr(LoopArgs.NextLB); 2383 EmitIgnoredExpr(LoopArgs.NextUB); 2384 } 2385 2386 EmitBranch(CondBlock); 2387 LoopStack.pop(); 2388 // Emit the fall-through block. 2389 EmitBlock(LoopExit.getBlock()); 2390 2391 // Tell the runtime we are done. 2392 auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) { 2393 if (!DynamicOrOrdered) 2394 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(), 2395 S.getDirectiveKind()); 2396 }; 2397 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); 2398 } 2399 2400 void CodeGenFunction::EmitOMPForOuterLoop( 2401 const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic, 2402 const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, 2403 const OMPLoopArguments &LoopArgs, 2404 const CodeGenDispatchBoundsTy &CGDispatchBounds) { 2405 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); 2406 2407 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). 2408 const bool DynamicOrOrdered = 2409 Ordered || RT.isDynamic(ScheduleKind.Schedule); 2410 2411 assert((Ordered || 2412 !RT.isStaticNonchunked(ScheduleKind.Schedule, 2413 LoopArgs.Chunk != nullptr)) && 2414 "static non-chunked schedule does not need outer loop"); 2415 2416 // Emit outer loop. 2417 // 2418 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 2419 // When schedule(dynamic,chunk_size) is specified, the iterations are 2420 // distributed to threads in the team in chunks as the threads request them. 2421 // Each thread executes a chunk of iterations, then requests another chunk, 2422 // until no chunks remain to be distributed. Each chunk contains chunk_size 2423 // iterations, except for the last chunk to be distributed, which may have 2424 // fewer iterations. When no chunk_size is specified, it defaults to 1. 2425 // 2426 // When schedule(guided,chunk_size) is specified, the iterations are assigned 2427 // to threads in the team in chunks as the executing threads request them. 2428 // Each thread executes a chunk of iterations, then requests another chunk, 2429 // until no chunks remain to be assigned. For a chunk_size of 1, the size of 2430 // each chunk is proportional to the number of unassigned iterations divided 2431 // by the number of threads in the team, decreasing to 1. For a chunk_size 2432 // with value k (greater than 1), the size of each chunk is determined in the 2433 // same way, with the restriction that the chunks do not contain fewer than k 2434 // iterations (except for the last chunk to be assigned, which may have fewer 2435 // than k iterations). 2436 // 2437 // When schedule(auto) is specified, the decision regarding scheduling is 2438 // delegated to the compiler and/or runtime system. The programmer gives the 2439 // implementation the freedom to choose any possible mapping of iterations to 2440 // threads in the team. 2441 // 2442 // When schedule(runtime) is specified, the decision regarding scheduling is 2443 // deferred until run time, and the schedule and chunk size are taken from the 2444 // run-sched-var ICV. If the ICV is set to auto, the schedule is 2445 // implementation defined 2446 // 2447 // while(__kmpc_dispatch_next(&LB, &UB)) { 2448 // idx = LB; 2449 // while (idx <= UB) { BODY; ++idx; 2450 // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only. 2451 // } // inner loop 2452 // } 2453 // 2454 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 2455 // When schedule(static, chunk_size) is specified, iterations are divided into 2456 // chunks of size chunk_size, and the chunks are assigned to the threads in 2457 // the team in a round-robin fashion in the order of the thread number. 2458 // 2459 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { 2460 // while (idx <= UB) { BODY; ++idx; } // inner loop 2461 // LB = LB + ST; 2462 // UB = UB + ST; 2463 // } 2464 // 2465 2466 const Expr *IVExpr = S.getIterationVariable(); 2467 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2468 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2469 2470 if (DynamicOrOrdered) { 2471 const std::pair<llvm::Value *, llvm::Value *> DispatchBounds = 2472 CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB); 2473 llvm::Value *LBVal = DispatchBounds.first; 2474 llvm::Value *UBVal = DispatchBounds.second; 2475 CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal, 2476 LoopArgs.Chunk}; 2477 RT.emitForDispatchInit(*this, S.getBeginLoc(), ScheduleKind, IVSize, 2478 IVSigned, Ordered, DipatchRTInputValues); 2479 } else { 2480 CGOpenMPRuntime::StaticRTInput StaticInit( 2481 IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB, 2482 LoopArgs.ST, LoopArgs.Chunk); 2483 RT.emitForStaticInit(*this, S.getBeginLoc(), S.getDirectiveKind(), 2484 ScheduleKind, StaticInit); 2485 } 2486 2487 auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc, 2488 const unsigned IVSize, 2489 const bool IVSigned) { 2490 if (Ordered) { 2491 CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize, 2492 IVSigned); 2493 } 2494 }; 2495 2496 OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST, 2497 LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB); 2498 OuterLoopArgs.IncExpr = S.getInc(); 2499 OuterLoopArgs.Init = S.getInit(); 2500 OuterLoopArgs.Cond = S.getCond(); 2501 OuterLoopArgs.NextLB = S.getNextLowerBound(); 2502 OuterLoopArgs.NextUB = S.getNextUpperBound(); 2503 EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs, 2504 emitOMPLoopBodyWithStopPoint, CodeGenOrdered); 2505 } 2506 2507 static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc, 2508 const unsigned IVSize, const bool IVSigned) {} 2509 2510 void CodeGenFunction::EmitOMPDistributeOuterLoop( 2511 OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S, 2512 OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs, 2513 const CodeGenLoopTy &CodeGenLoopContent) { 2514 2515 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); 2516 2517 // Emit outer loop. 2518 // Same behavior as a OMPForOuterLoop, except that schedule cannot be 2519 // dynamic 2520 // 2521 2522 const Expr *IVExpr = S.getIterationVariable(); 2523 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2524 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2525 2526 CGOpenMPRuntime::StaticRTInput StaticInit( 2527 IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB, 2528 LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk); 2529 RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, StaticInit); 2530 2531 // for combined 'distribute' and 'for' the increment expression of distribute 2532 // is stored in DistInc. For 'distribute' alone, it is in Inc. 2533 Expr *IncExpr; 2534 if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())) 2535 IncExpr = S.getDistInc(); 2536 else 2537 IncExpr = S.getInc(); 2538 2539 // this routine is shared by 'omp distribute parallel for' and 2540 // 'omp distribute': select the right EUB expression depending on the 2541 // directive 2542 OMPLoopArguments OuterLoopArgs; 2543 OuterLoopArgs.LB = LoopArgs.LB; 2544 OuterLoopArgs.UB = LoopArgs.UB; 2545 OuterLoopArgs.ST = LoopArgs.ST; 2546 OuterLoopArgs.IL = LoopArgs.IL; 2547 OuterLoopArgs.Chunk = LoopArgs.Chunk; 2548 OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2549 ? S.getCombinedEnsureUpperBound() 2550 : S.getEnsureUpperBound(); 2551 OuterLoopArgs.IncExpr = IncExpr; 2552 OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2553 ? S.getCombinedInit() 2554 : S.getInit(); 2555 OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2556 ? S.getCombinedCond() 2557 : S.getCond(); 2558 OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2559 ? S.getCombinedNextLowerBound() 2560 : S.getNextLowerBound(); 2561 OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2562 ? S.getCombinedNextUpperBound() 2563 : S.getNextUpperBound(); 2564 2565 EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S, 2566 LoopScope, OuterLoopArgs, CodeGenLoopContent, 2567 emitEmptyOrdered); 2568 } 2569 2570 static std::pair<LValue, LValue> 2571 emitDistributeParallelForInnerBounds(CodeGenFunction &CGF, 2572 const OMPExecutableDirective &S) { 2573 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 2574 LValue LB = 2575 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); 2576 LValue UB = 2577 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); 2578 2579 // When composing 'distribute' with 'for' (e.g. as in 'distribute 2580 // parallel for') we need to use the 'distribute' 2581 // chunk lower and upper bounds rather than the whole loop iteration 2582 // space. These are parameters to the outlined function for 'parallel' 2583 // and we copy the bounds of the previous schedule into the 2584 // the current ones. 2585 LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable()); 2586 LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable()); 2587 llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar( 2588 PrevLB, LS.getPrevLowerBoundVariable()->getExprLoc()); 2589 PrevLBVal = CGF.EmitScalarConversion( 2590 PrevLBVal, LS.getPrevLowerBoundVariable()->getType(), 2591 LS.getIterationVariable()->getType(), 2592 LS.getPrevLowerBoundVariable()->getExprLoc()); 2593 llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar( 2594 PrevUB, LS.getPrevUpperBoundVariable()->getExprLoc()); 2595 PrevUBVal = CGF.EmitScalarConversion( 2596 PrevUBVal, LS.getPrevUpperBoundVariable()->getType(), 2597 LS.getIterationVariable()->getType(), 2598 LS.getPrevUpperBoundVariable()->getExprLoc()); 2599 2600 CGF.EmitStoreOfScalar(PrevLBVal, LB); 2601 CGF.EmitStoreOfScalar(PrevUBVal, UB); 2602 2603 return {LB, UB}; 2604 } 2605 2606 /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then 2607 /// we need to use the LB and UB expressions generated by the worksharing 2608 /// code generation support, whereas in non combined situations we would 2609 /// just emit 0 and the LastIteration expression 2610 /// This function is necessary due to the difference of the LB and UB 2611 /// types for the RT emission routines for 'for_static_init' and 2612 /// 'for_dispatch_init' 2613 static std::pair<llvm::Value *, llvm::Value *> 2614 emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF, 2615 const OMPExecutableDirective &S, 2616 Address LB, Address UB) { 2617 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 2618 const Expr *IVExpr = LS.getIterationVariable(); 2619 // when implementing a dynamic schedule for a 'for' combined with a 2620 // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop 2621 // is not normalized as each team only executes its own assigned 2622 // distribute chunk 2623 QualType IteratorTy = IVExpr->getType(); 2624 llvm::Value *LBVal = 2625 CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, S.getBeginLoc()); 2626 llvm::Value *UBVal = 2627 CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, S.getBeginLoc()); 2628 return {LBVal, UBVal}; 2629 } 2630 2631 static void emitDistributeParallelForDistributeInnerBoundParams( 2632 CodeGenFunction &CGF, const OMPExecutableDirective &S, 2633 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) { 2634 const auto &Dir = cast<OMPLoopDirective>(S); 2635 LValue LB = 2636 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable())); 2637 llvm::Value *LBCast = 2638 CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(LB.getAddress(CGF)), 2639 CGF.SizeTy, /*isSigned=*/false); 2640 CapturedVars.push_back(LBCast); 2641 LValue UB = 2642 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable())); 2643 2644 llvm::Value *UBCast = 2645 CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(UB.getAddress(CGF)), 2646 CGF.SizeTy, /*isSigned=*/false); 2647 CapturedVars.push_back(UBCast); 2648 } 2649 2650 static void 2651 emitInnerParallelForWhenCombined(CodeGenFunction &CGF, 2652 const OMPLoopDirective &S, 2653 CodeGenFunction::JumpDest LoopExit) { 2654 auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF, 2655 PrePostActionTy &Action) { 2656 Action.Enter(CGF); 2657 bool HasCancel = false; 2658 if (!isOpenMPSimdDirective(S.getDirectiveKind())) { 2659 if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(&S)) 2660 HasCancel = D->hasCancel(); 2661 else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(&S)) 2662 HasCancel = D->hasCancel(); 2663 else if (const auto *D = 2664 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&S)) 2665 HasCancel = D->hasCancel(); 2666 } 2667 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(), 2668 HasCancel); 2669 CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(), 2670 emitDistributeParallelForInnerBounds, 2671 emitDistributeParallelForDispatchBounds); 2672 }; 2673 2674 emitCommonOMPParallelDirective( 2675 CGF, S, 2676 isOpenMPSimdDirective(S.getDirectiveKind()) ? OMPD_for_simd : OMPD_for, 2677 CGInlinedWorksharingLoop, 2678 emitDistributeParallelForDistributeInnerBoundParams); 2679 } 2680 2681 void CodeGenFunction::EmitOMPDistributeParallelForDirective( 2682 const OMPDistributeParallelForDirective &S) { 2683 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2684 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 2685 S.getDistInc()); 2686 }; 2687 OMPLexicalScope Scope(*this, S, OMPD_parallel); 2688 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); 2689 } 2690 2691 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective( 2692 const OMPDistributeParallelForSimdDirective &S) { 2693 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2694 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 2695 S.getDistInc()); 2696 }; 2697 OMPLexicalScope Scope(*this, S, OMPD_parallel); 2698 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); 2699 } 2700 2701 void CodeGenFunction::EmitOMPDistributeSimdDirective( 2702 const OMPDistributeSimdDirective &S) { 2703 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2704 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 2705 }; 2706 OMPLexicalScope Scope(*this, S, OMPD_unknown); 2707 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 2708 } 2709 2710 void CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 2711 CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) { 2712 // Emit SPMD target parallel for region as a standalone region. 2713 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2714 emitOMPSimdRegion(CGF, S, Action); 2715 }; 2716 llvm::Function *Fn; 2717 llvm::Constant *Addr; 2718 // Emit target region as a standalone region. 2719 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 2720 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 2721 assert(Fn && Addr && "Target device function emission failed."); 2722 } 2723 2724 void CodeGenFunction::EmitOMPTargetSimdDirective( 2725 const OMPTargetSimdDirective &S) { 2726 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2727 emitOMPSimdRegion(CGF, S, Action); 2728 }; 2729 emitCommonOMPTargetDirective(*this, S, CodeGen); 2730 } 2731 2732 namespace { 2733 struct ScheduleKindModifiersTy { 2734 OpenMPScheduleClauseKind Kind; 2735 OpenMPScheduleClauseModifier M1; 2736 OpenMPScheduleClauseModifier M2; 2737 ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind, 2738 OpenMPScheduleClauseModifier M1, 2739 OpenMPScheduleClauseModifier M2) 2740 : Kind(Kind), M1(M1), M2(M2) {} 2741 }; 2742 } // namespace 2743 2744 bool CodeGenFunction::EmitOMPWorksharingLoop( 2745 const OMPLoopDirective &S, Expr *EUB, 2746 const CodeGenLoopBoundsTy &CodeGenLoopBounds, 2747 const CodeGenDispatchBoundsTy &CGDispatchBounds) { 2748 // Emit the loop iteration variable. 2749 const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 2750 const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl()); 2751 EmitVarDecl(*IVDecl); 2752 2753 // Emit the iterations count variable. 2754 // If it is not a variable, Sema decided to calculate iterations count on each 2755 // iteration (e.g., it is foldable into a constant). 2756 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 2757 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 2758 // Emit calculation of the iterations count. 2759 EmitIgnoredExpr(S.getCalcLastIteration()); 2760 } 2761 2762 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); 2763 2764 bool HasLastprivateClause; 2765 // Check pre-condition. 2766 { 2767 OMPLoopScope PreInitScope(*this, S); 2768 // Skip the entire loop if we don't meet the precondition. 2769 // If the condition constant folds and can be elided, avoid emitting the 2770 // whole loop. 2771 bool CondConstant; 2772 llvm::BasicBlock *ContBlock = nullptr; 2773 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 2774 if (!CondConstant) 2775 return false; 2776 } else { 2777 llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then"); 2778 ContBlock = createBasicBlock("omp.precond.end"); 2779 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 2780 getProfileCount(&S)); 2781 EmitBlock(ThenBlock); 2782 incrementProfileCounter(&S); 2783 } 2784 2785 RunCleanupsScope DoacrossCleanupScope(*this); 2786 bool Ordered = false; 2787 if (const auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) { 2788 if (OrderedClause->getNumForLoops()) 2789 RT.emitDoacrossInit(*this, S, OrderedClause->getLoopNumIterations()); 2790 else 2791 Ordered = true; 2792 } 2793 2794 llvm::DenseSet<const Expr *> EmittedFinals; 2795 emitAlignedClause(*this, S); 2796 bool HasLinears = EmitOMPLinearClauseInit(S); 2797 // Emit helper vars inits. 2798 2799 std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S); 2800 LValue LB = Bounds.first; 2801 LValue UB = Bounds.second; 2802 LValue ST = 2803 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 2804 LValue IL = 2805 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 2806 2807 // Emit 'then' code. 2808 { 2809 OMPPrivateScope LoopScope(*this); 2810 if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) { 2811 // Emit implicit barrier to synchronize threads and avoid data races on 2812 // initialization of firstprivate variables and post-update of 2813 // lastprivate variables. 2814 CGM.getOpenMPRuntime().emitBarrierCall( 2815 *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, 2816 /*ForceSimpleCall=*/true); 2817 } 2818 EmitOMPPrivateClause(S, LoopScope); 2819 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion( 2820 *this, S, EmitLValue(S.getIterationVariable())); 2821 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 2822 EmitOMPReductionClauseInit(S, LoopScope); 2823 EmitOMPPrivateLoopCounters(S, LoopScope); 2824 EmitOMPLinearClause(S, LoopScope); 2825 (void)LoopScope.Privatize(); 2826 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 2827 CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S); 2828 2829 // Detect the loop schedule kind and chunk. 2830 const Expr *ChunkExpr = nullptr; 2831 OpenMPScheduleTy ScheduleKind; 2832 if (const auto *C = S.getSingleClause<OMPScheduleClause>()) { 2833 ScheduleKind.Schedule = C->getScheduleKind(); 2834 ScheduleKind.M1 = C->getFirstScheduleModifier(); 2835 ScheduleKind.M2 = C->getSecondScheduleModifier(); 2836 ChunkExpr = C->getChunkSize(); 2837 } else { 2838 // Default behaviour for schedule clause. 2839 CGM.getOpenMPRuntime().getDefaultScheduleAndChunk( 2840 *this, S, ScheduleKind.Schedule, ChunkExpr); 2841 } 2842 bool HasChunkSizeOne = false; 2843 llvm::Value *Chunk = nullptr; 2844 if (ChunkExpr) { 2845 Chunk = EmitScalarExpr(ChunkExpr); 2846 Chunk = EmitScalarConversion(Chunk, ChunkExpr->getType(), 2847 S.getIterationVariable()->getType(), 2848 S.getBeginLoc()); 2849 Expr::EvalResult Result; 2850 if (ChunkExpr->EvaluateAsInt(Result, getContext())) { 2851 llvm::APSInt EvaluatedChunk = Result.Val.getInt(); 2852 HasChunkSizeOne = (EvaluatedChunk.getLimitedValue() == 1); 2853 } 2854 } 2855 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2856 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2857 // OpenMP 4.5, 2.7.1 Loop Construct, Description. 2858 // If the static schedule kind is specified or if the ordered clause is 2859 // specified, and if no monotonic modifier is specified, the effect will 2860 // be as if the monotonic modifier was specified. 2861 bool StaticChunkedOne = RT.isStaticChunked(ScheduleKind.Schedule, 2862 /* Chunked */ Chunk != nullptr) && HasChunkSizeOne && 2863 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()); 2864 if ((RT.isStaticNonchunked(ScheduleKind.Schedule, 2865 /* Chunked */ Chunk != nullptr) || 2866 StaticChunkedOne) && 2867 !Ordered) { 2868 JumpDest LoopExit = 2869 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 2870 emitCommonSimdLoop( 2871 *this, S, 2872 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2873 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 2874 CGF.EmitOMPSimdInit(S, /*IsMonotonic=*/true); 2875 } else if (const auto *C = S.getSingleClause<OMPOrderClause>()) { 2876 if (C->getKind() == OMPC_ORDER_concurrent) 2877 CGF.LoopStack.setParallel(/*Enable=*/true); 2878 } 2879 }, 2880 [IVSize, IVSigned, Ordered, IL, LB, UB, ST, StaticChunkedOne, Chunk, 2881 &S, ScheduleKind, LoopExit, 2882 &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { 2883 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 2884 // When no chunk_size is specified, the iteration space is divided 2885 // into chunks that are approximately equal in size, and at most 2886 // one chunk is distributed to each thread. Note that the size of 2887 // the chunks is unspecified in this case. 2888 CGOpenMPRuntime::StaticRTInput StaticInit( 2889 IVSize, IVSigned, Ordered, IL.getAddress(CGF), 2890 LB.getAddress(CGF), UB.getAddress(CGF), ST.getAddress(CGF), 2891 StaticChunkedOne ? Chunk : nullptr); 2892 CGF.CGM.getOpenMPRuntime().emitForStaticInit( 2893 CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, 2894 StaticInit); 2895 // UB = min(UB, GlobalUB); 2896 if (!StaticChunkedOne) 2897 CGF.EmitIgnoredExpr(S.getEnsureUpperBound()); 2898 // IV = LB; 2899 CGF.EmitIgnoredExpr(S.getInit()); 2900 // For unchunked static schedule generate: 2901 // 2902 // while (idx <= UB) { 2903 // BODY; 2904 // ++idx; 2905 // } 2906 // 2907 // For static schedule with chunk one: 2908 // 2909 // while (IV <= PrevUB) { 2910 // BODY; 2911 // IV += ST; 2912 // } 2913 CGF.EmitOMPInnerLoop( 2914 S, LoopScope.requiresCleanups(), 2915 StaticChunkedOne ? S.getCombinedParForInDistCond() 2916 : S.getCond(), 2917 StaticChunkedOne ? S.getDistInc() : S.getInc(), 2918 [&S, LoopExit](CodeGenFunction &CGF) { 2919 CGF.EmitOMPLoopBody(S, LoopExit); 2920 CGF.EmitStopPoint(&S); 2921 }, 2922 [](CodeGenFunction &) {}); 2923 }); 2924 EmitBlock(LoopExit.getBlock()); 2925 // Tell the runtime we are done. 2926 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 2927 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(), 2928 S.getDirectiveKind()); 2929 }; 2930 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); 2931 } else { 2932 const bool IsMonotonic = 2933 Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static || 2934 ScheduleKind.Schedule == OMPC_SCHEDULE_unknown || 2935 ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic || 2936 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic; 2937 // Emit the outer loop, which requests its work chunk [LB..UB] from 2938 // runtime and runs the inner loop to process it. 2939 const OMPLoopArguments LoopArguments( 2940 LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this), 2941 IL.getAddress(*this), Chunk, EUB); 2942 EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered, 2943 LoopArguments, CGDispatchBounds); 2944 } 2945 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 2946 EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) { 2947 return CGF.Builder.CreateIsNotNull( 2948 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 2949 }); 2950 } 2951 EmitOMPReductionClauseFinal( 2952 S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind()) 2953 ? /*Parallel and Simd*/ OMPD_parallel_for_simd 2954 : /*Parallel only*/ OMPD_parallel); 2955 // Emit post-update of the reduction variables if IsLastIter != 0. 2956 emitPostUpdateForReductionClause( 2957 *this, S, [IL, &S](CodeGenFunction &CGF) { 2958 return CGF.Builder.CreateIsNotNull( 2959 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 2960 }); 2961 // Emit final copy of the lastprivate variables if IsLastIter != 0. 2962 if (HasLastprivateClause) 2963 EmitOMPLastprivateClauseFinal( 2964 S, isOpenMPSimdDirective(S.getDirectiveKind()), 2965 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc()))); 2966 } 2967 EmitOMPLinearClauseFinal(S, [IL, &S](CodeGenFunction &CGF) { 2968 return CGF.Builder.CreateIsNotNull( 2969 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 2970 }); 2971 DoacrossCleanupScope.ForceCleanup(); 2972 // We're now done with the loop, so jump to the continuation block. 2973 if (ContBlock) { 2974 EmitBranch(ContBlock); 2975 EmitBlock(ContBlock, /*IsFinished=*/true); 2976 } 2977 } 2978 return HasLastprivateClause; 2979 } 2980 2981 /// The following two functions generate expressions for the loop lower 2982 /// and upper bounds in case of static and dynamic (dispatch) schedule 2983 /// of the associated 'for' or 'distribute' loop. 2984 static std::pair<LValue, LValue> 2985 emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 2986 const auto &LS = cast<OMPLoopDirective>(S); 2987 LValue LB = 2988 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); 2989 LValue UB = 2990 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); 2991 return {LB, UB}; 2992 } 2993 2994 /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not 2995 /// consider the lower and upper bound expressions generated by the 2996 /// worksharing loop support, but we use 0 and the iteration space size as 2997 /// constants 2998 static std::pair<llvm::Value *, llvm::Value *> 2999 emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S, 3000 Address LB, Address UB) { 3001 const auto &LS = cast<OMPLoopDirective>(S); 3002 const Expr *IVExpr = LS.getIterationVariable(); 3003 const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType()); 3004 llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0); 3005 llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration()); 3006 return {LBVal, UBVal}; 3007 } 3008 3009 /// Emits the code for the directive with inscan reductions. 3010 /// The code is the following: 3011 /// \code 3012 /// size num_iters = <num_iters>; 3013 /// <type> buffer[num_iters]; 3014 /// #pragma omp ... 3015 /// for (i: 0..<num_iters>) { 3016 /// <input phase>; 3017 /// buffer[i] = red; 3018 /// } 3019 /// for (int k = 0; k != ceil(log2(num_iters)); ++k) 3020 /// for (size cnt = last_iter; cnt >= pow(2, k); --k) 3021 /// buffer[i] op= buffer[i-pow(2,k)]; 3022 /// #pragma omp ... 3023 /// for (0..<num_iters>) { 3024 /// red = InclusiveScan ? buffer[i] : buffer[i-1]; 3025 /// <scan phase>; 3026 /// } 3027 /// \endcode 3028 static void emitScanBasedDirective( 3029 CodeGenFunction &CGF, const OMPLoopDirective &S, 3030 llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen, 3031 llvm::function_ref<void(CodeGenFunction &)> FirstGen, 3032 llvm::function_ref<void(CodeGenFunction &)> SecondGen) { 3033 llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast( 3034 NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false); 3035 SmallVector<const Expr *, 4> Shareds; 3036 SmallVector<const Expr *, 4> Privates; 3037 SmallVector<const Expr *, 4> ReductionOps; 3038 SmallVector<const Expr *, 4> LHSs; 3039 SmallVector<const Expr *, 4> RHSs; 3040 SmallVector<const Expr *, 4> CopyOps; 3041 SmallVector<const Expr *, 4> CopyArrayTemps; 3042 SmallVector<const Expr *, 4> CopyArrayElems; 3043 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 3044 assert(C->getModifier() == OMPC_REDUCTION_inscan && 3045 "Only inscan reductions are expected."); 3046 Shareds.append(C->varlist_begin(), C->varlist_end()); 3047 Privates.append(C->privates().begin(), C->privates().end()); 3048 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 3049 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 3050 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 3051 CopyOps.append(C->copy_ops().begin(), C->copy_ops().end()); 3052 CopyArrayTemps.append(C->copy_array_temps().begin(), 3053 C->copy_array_temps().end()); 3054 CopyArrayElems.append(C->copy_array_elems().begin(), 3055 C->copy_array_elems().end()); 3056 } 3057 { 3058 // Emit buffers for each reduction variables. 3059 // ReductionCodeGen is required to emit correctly the code for array 3060 // reductions. 3061 ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps); 3062 unsigned Count = 0; 3063 auto *ITA = CopyArrayTemps.begin(); 3064 for (const Expr *IRef : Privates) { 3065 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); 3066 // Emit variably modified arrays, used for arrays/array sections 3067 // reductions. 3068 if (PrivateVD->getType()->isVariablyModifiedType()) { 3069 RedCG.emitSharedOrigLValue(CGF, Count); 3070 RedCG.emitAggregateType(CGF, Count); 3071 } 3072 CodeGenFunction::OpaqueValueMapping DimMapping( 3073 CGF, 3074 cast<OpaqueValueExpr>( 3075 cast<VariableArrayType>((*ITA)->getType()->getAsArrayTypeUnsafe()) 3076 ->getSizeExpr()), 3077 RValue::get(OMPScanNumIterations)); 3078 // Emit temp buffer. 3079 CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(*ITA)->getDecl())); 3080 ++ITA; 3081 ++Count; 3082 } 3083 } 3084 CodeGenFunction::ParentLoopDirectiveForScanRegion ScanRegion(CGF, S); 3085 { 3086 // Emit loop with input phase: 3087 // #pragma omp ... 3088 // for (i: 0..<num_iters>) { 3089 // <input phase>; 3090 // buffer[i] = red; 3091 // } 3092 CGF.OMPFirstScanLoop = true; 3093 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); 3094 FirstGen(CGF); 3095 } 3096 // Emit prefix reduction: 3097 // for (int k = 0; k <= ceil(log2(n)); ++k) 3098 llvm::BasicBlock *InputBB = CGF.Builder.GetInsertBlock(); 3099 llvm::BasicBlock *LoopBB = CGF.createBasicBlock("omp.outer.log.scan.body"); 3100 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.outer.log.scan.exit"); 3101 llvm::Function *F = CGF.CGM.getIntrinsic(llvm::Intrinsic::log2, CGF.DoubleTy); 3102 llvm::Value *Arg = 3103 CGF.Builder.CreateUIToFP(OMPScanNumIterations, CGF.DoubleTy); 3104 llvm::Value *LogVal = CGF.EmitNounwindRuntimeCall(F, Arg); 3105 F = CGF.CGM.getIntrinsic(llvm::Intrinsic::ceil, CGF.DoubleTy); 3106 LogVal = CGF.EmitNounwindRuntimeCall(F, LogVal); 3107 LogVal = CGF.Builder.CreateFPToUI(LogVal, CGF.IntTy); 3108 llvm::Value *NMin1 = CGF.Builder.CreateNUWSub( 3109 OMPScanNumIterations, llvm::ConstantInt::get(CGF.SizeTy, 1)); 3110 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getBeginLoc()); 3111 CGF.EmitBlock(LoopBB); 3112 auto *Counter = CGF.Builder.CreatePHI(CGF.IntTy, 2); 3113 // size pow2k = 1; 3114 auto *Pow2K = CGF.Builder.CreatePHI(CGF.SizeTy, 2); 3115 Counter->addIncoming(llvm::ConstantInt::get(CGF.IntTy, 0), InputBB); 3116 Pow2K->addIncoming(llvm::ConstantInt::get(CGF.SizeTy, 1), InputBB); 3117 // for (size i = n - 1; i >= 2 ^ k; --i) 3118 // tmp[i] op= tmp[i-pow2k]; 3119 llvm::BasicBlock *InnerLoopBB = 3120 CGF.createBasicBlock("omp.inner.log.scan.body"); 3121 llvm::BasicBlock *InnerExitBB = 3122 CGF.createBasicBlock("omp.inner.log.scan.exit"); 3123 llvm::Value *CmpI = CGF.Builder.CreateICmpUGE(NMin1, Pow2K); 3124 CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB); 3125 CGF.EmitBlock(InnerLoopBB); 3126 auto *IVal = CGF.Builder.CreatePHI(CGF.SizeTy, 2); 3127 IVal->addIncoming(NMin1, LoopBB); 3128 { 3129 CodeGenFunction::OMPPrivateScope PrivScope(CGF); 3130 auto *ILHS = LHSs.begin(); 3131 auto *IRHS = RHSs.begin(); 3132 for (const Expr *CopyArrayElem : CopyArrayElems) { 3133 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 3134 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 3135 Address LHSAddr = Address::invalid(); 3136 { 3137 CodeGenFunction::OpaqueValueMapping IdxMapping( 3138 CGF, 3139 cast<OpaqueValueExpr>( 3140 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), 3141 RValue::get(IVal)); 3142 LHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF); 3143 } 3144 PrivScope.addPrivate(LHSVD, [LHSAddr]() { return LHSAddr; }); 3145 Address RHSAddr = Address::invalid(); 3146 { 3147 llvm::Value *OffsetIVal = CGF.Builder.CreateNUWSub(IVal, Pow2K); 3148 CodeGenFunction::OpaqueValueMapping IdxMapping( 3149 CGF, 3150 cast<OpaqueValueExpr>( 3151 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), 3152 RValue::get(OffsetIVal)); 3153 RHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF); 3154 } 3155 PrivScope.addPrivate(RHSVD, [RHSAddr]() { return RHSAddr; }); 3156 ++ILHS; 3157 ++IRHS; 3158 } 3159 PrivScope.Privatize(); 3160 CGF.CGM.getOpenMPRuntime().emitReduction( 3161 CGF, S.getEndLoc(), Privates, LHSs, RHSs, ReductionOps, 3162 {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_unknown}); 3163 } 3164 llvm::Value *NextIVal = 3165 CGF.Builder.CreateNUWSub(IVal, llvm::ConstantInt::get(CGF.SizeTy, 1)); 3166 IVal->addIncoming(NextIVal, CGF.Builder.GetInsertBlock()); 3167 CmpI = CGF.Builder.CreateICmpUGE(NextIVal, Pow2K); 3168 CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB); 3169 CGF.EmitBlock(InnerExitBB); 3170 llvm::Value *Next = 3171 CGF.Builder.CreateNUWAdd(Counter, llvm::ConstantInt::get(CGF.IntTy, 1)); 3172 Counter->addIncoming(Next, CGF.Builder.GetInsertBlock()); 3173 // pow2k <<= 1; 3174 llvm::Value *NextPow2K = CGF.Builder.CreateShl(Pow2K, 1, "", /*HasNUW=*/true); 3175 Pow2K->addIncoming(NextPow2K, CGF.Builder.GetInsertBlock()); 3176 llvm::Value *Cmp = CGF.Builder.CreateICmpNE(Next, LogVal); 3177 CGF.Builder.CreateCondBr(Cmp, LoopBB, ExitBB); 3178 auto DL1 = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getEndLoc()); 3179 CGF.EmitBlock(ExitBB); 3180 3181 CGF.OMPFirstScanLoop = false; 3182 SecondGen(CGF); 3183 } 3184 3185 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { 3186 bool HasLastprivates = false; 3187 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, 3188 PrePostActionTy &) { 3189 if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(), 3190 [](const OMPReductionClause *C) { 3191 return C->getModifier() == OMPC_REDUCTION_inscan; 3192 })) { 3193 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) { 3194 OMPLocalDeclMapRAII Scope(CGF); 3195 OMPLoopScope LoopScope(CGF, S); 3196 return CGF.EmitScalarExpr(S.getNumIterations()); 3197 }; 3198 const auto &&FirstGen = [&S](CodeGenFunction &CGF) { 3199 OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel()); 3200 (void)CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 3201 emitForLoopBounds, 3202 emitDispatchForLoopBounds); 3203 // Emit an implicit barrier at the end. 3204 CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getBeginLoc(), 3205 OMPD_for); 3206 }; 3207 const auto &&SecondGen = [&S, &HasLastprivates](CodeGenFunction &CGF) { 3208 OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel()); 3209 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 3210 emitForLoopBounds, 3211 emitDispatchForLoopBounds); 3212 }; 3213 emitScanBasedDirective(CGF, S, NumIteratorsGen, FirstGen, SecondGen); 3214 } else { 3215 OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel()); 3216 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 3217 emitForLoopBounds, 3218 emitDispatchForLoopBounds); 3219 } 3220 }; 3221 { 3222 auto LPCRegion = 3223 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 3224 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3225 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen, 3226 S.hasCancel()); 3227 } 3228 3229 // Emit an implicit barrier at the end. 3230 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) 3231 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for); 3232 // Check for outer lastprivate conditional update. 3233 checkForLastprivateConditionalUpdate(*this, S); 3234 } 3235 3236 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { 3237 bool HasLastprivates = false; 3238 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, 3239 PrePostActionTy &) { 3240 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 3241 emitForLoopBounds, 3242 emitDispatchForLoopBounds); 3243 }; 3244 { 3245 auto LPCRegion = 3246 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 3247 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3248 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 3249 } 3250 3251 // Emit an implicit barrier at the end. 3252 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) 3253 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for); 3254 // Check for outer lastprivate conditional update. 3255 checkForLastprivateConditionalUpdate(*this, S); 3256 } 3257 3258 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, 3259 const Twine &Name, 3260 llvm::Value *Init = nullptr) { 3261 LValue LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty); 3262 if (Init) 3263 CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true); 3264 return LVal; 3265 } 3266 3267 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { 3268 const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt(); 3269 const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt); 3270 bool HasLastprivates = false; 3271 auto &&CodeGen = [&S, CapturedStmt, CS, 3272 &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) { 3273 const ASTContext &C = CGF.getContext(); 3274 QualType KmpInt32Ty = 3275 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 3276 // Emit helper vars inits. 3277 LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.", 3278 CGF.Builder.getInt32(0)); 3279 llvm::ConstantInt *GlobalUBVal = CS != nullptr 3280 ? CGF.Builder.getInt32(CS->size() - 1) 3281 : CGF.Builder.getInt32(0); 3282 LValue UB = 3283 createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal); 3284 LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.", 3285 CGF.Builder.getInt32(1)); 3286 LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.", 3287 CGF.Builder.getInt32(0)); 3288 // Loop counter. 3289 LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv."); 3290 OpaqueValueExpr IVRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue); 3291 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV); 3292 OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue); 3293 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); 3294 // Generate condition for loop. 3295 BinaryOperator *Cond = BinaryOperator::Create( 3296 C, &IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, OK_Ordinary, 3297 S.getBeginLoc(), FPOptions(C.getLangOpts())); 3298 // Increment for loop counter. 3299 UnaryOperator *Inc = UnaryOperator::Create( 3300 C, &IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary, 3301 S.getBeginLoc(), true, FPOptions(C.getLangOpts())); 3302 auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) { 3303 // Iterate through all sections and emit a switch construct: 3304 // switch (IV) { 3305 // case 0: 3306 // <SectionStmt[0]>; 3307 // break; 3308 // ... 3309 // case <NumSection> - 1: 3310 // <SectionStmt[<NumSection> - 1]>; 3311 // break; 3312 // } 3313 // .omp.sections.exit: 3314 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".omp.sections.exit"); 3315 llvm::SwitchInst *SwitchStmt = 3316 CGF.Builder.CreateSwitch(CGF.EmitLoadOfScalar(IV, S.getBeginLoc()), 3317 ExitBB, CS == nullptr ? 1 : CS->size()); 3318 if (CS) { 3319 unsigned CaseNumber = 0; 3320 for (const Stmt *SubStmt : CS->children()) { 3321 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 3322 CGF.EmitBlock(CaseBB); 3323 SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB); 3324 CGF.EmitStmt(SubStmt); 3325 CGF.EmitBranch(ExitBB); 3326 ++CaseNumber; 3327 } 3328 } else { 3329 llvm::BasicBlock *CaseBB = CGF.createBasicBlock(".omp.sections.case"); 3330 CGF.EmitBlock(CaseBB); 3331 SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB); 3332 CGF.EmitStmt(CapturedStmt); 3333 CGF.EmitBranch(ExitBB); 3334 } 3335 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 3336 }; 3337 3338 CodeGenFunction::OMPPrivateScope LoopScope(CGF); 3339 if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) { 3340 // Emit implicit barrier to synchronize threads and avoid data races on 3341 // initialization of firstprivate variables and post-update of lastprivate 3342 // variables. 3343 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 3344 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, 3345 /*ForceSimpleCall=*/true); 3346 } 3347 CGF.EmitOMPPrivateClause(S, LoopScope); 3348 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(CGF, S, IV); 3349 HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 3350 CGF.EmitOMPReductionClauseInit(S, LoopScope); 3351 (void)LoopScope.Privatize(); 3352 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 3353 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); 3354 3355 // Emit static non-chunked loop. 3356 OpenMPScheduleTy ScheduleKind; 3357 ScheduleKind.Schedule = OMPC_SCHEDULE_static; 3358 CGOpenMPRuntime::StaticRTInput StaticInit( 3359 /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(CGF), 3360 LB.getAddress(CGF), UB.getAddress(CGF), ST.getAddress(CGF)); 3361 CGF.CGM.getOpenMPRuntime().emitForStaticInit( 3362 CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, StaticInit); 3363 // UB = min(UB, GlobalUB); 3364 llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, S.getBeginLoc()); 3365 llvm::Value *MinUBGlobalUB = CGF.Builder.CreateSelect( 3366 CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal); 3367 CGF.EmitStoreOfScalar(MinUBGlobalUB, UB); 3368 // IV = LB; 3369 CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getBeginLoc()), IV); 3370 // while (idx <= UB) { BODY; ++idx; } 3371 CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, Cond, Inc, BodyGen, 3372 [](CodeGenFunction &) {}); 3373 // Tell the runtime we are done. 3374 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 3375 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(), 3376 S.getDirectiveKind()); 3377 }; 3378 CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen); 3379 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 3380 // Emit post-update of the reduction variables if IsLastIter != 0. 3381 emitPostUpdateForReductionClause(CGF, S, [IL, &S](CodeGenFunction &CGF) { 3382 return CGF.Builder.CreateIsNotNull( 3383 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 3384 }); 3385 3386 // Emit final copy of the lastprivate variables if IsLastIter != 0. 3387 if (HasLastprivates) 3388 CGF.EmitOMPLastprivateClauseFinal( 3389 S, /*NoFinals=*/false, 3390 CGF.Builder.CreateIsNotNull( 3391 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()))); 3392 }; 3393 3394 bool HasCancel = false; 3395 if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S)) 3396 HasCancel = OSD->hasCancel(); 3397 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S)) 3398 HasCancel = OPSD->hasCancel(); 3399 OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel); 3400 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen, 3401 HasCancel); 3402 // Emit barrier for lastprivates only if 'sections' directive has 'nowait' 3403 // clause. Otherwise the barrier will be generated by the codegen for the 3404 // directive. 3405 if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) { 3406 // Emit implicit barrier to synchronize threads and avoid data races on 3407 // initialization of firstprivate variables. 3408 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), 3409 OMPD_unknown); 3410 } 3411 } 3412 3413 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { 3414 { 3415 auto LPCRegion = 3416 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 3417 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3418 EmitSections(S); 3419 } 3420 // Emit an implicit barrier at the end. 3421 if (!S.getSingleClause<OMPNowaitClause>()) { 3422 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), 3423 OMPD_sections); 3424 } 3425 // Check for outer lastprivate conditional update. 3426 checkForLastprivateConditionalUpdate(*this, S); 3427 } 3428 3429 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { 3430 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3431 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 3432 }; 3433 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3434 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen, 3435 S.hasCancel()); 3436 } 3437 3438 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { 3439 llvm::SmallVector<const Expr *, 8> CopyprivateVars; 3440 llvm::SmallVector<const Expr *, 8> DestExprs; 3441 llvm::SmallVector<const Expr *, 8> SrcExprs; 3442 llvm::SmallVector<const Expr *, 8> AssignmentOps; 3443 // Check if there are any 'copyprivate' clauses associated with this 3444 // 'single' construct. 3445 // Build a list of copyprivate variables along with helper expressions 3446 // (<source>, <destination>, <destination>=<source> expressions) 3447 for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) { 3448 CopyprivateVars.append(C->varlists().begin(), C->varlists().end()); 3449 DestExprs.append(C->destination_exprs().begin(), 3450 C->destination_exprs().end()); 3451 SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end()); 3452 AssignmentOps.append(C->assignment_ops().begin(), 3453 C->assignment_ops().end()); 3454 } 3455 // Emit code for 'single' region along with 'copyprivate' clauses 3456 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3457 Action.Enter(CGF); 3458 OMPPrivateScope SingleScope(CGF); 3459 (void)CGF.EmitOMPFirstprivateClause(S, SingleScope); 3460 CGF.EmitOMPPrivateClause(S, SingleScope); 3461 (void)SingleScope.Privatize(); 3462 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 3463 }; 3464 { 3465 auto LPCRegion = 3466 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 3467 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3468 CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getBeginLoc(), 3469 CopyprivateVars, DestExprs, 3470 SrcExprs, AssignmentOps); 3471 } 3472 // Emit an implicit barrier at the end (to avoid data race on firstprivate 3473 // init or if no 'nowait' clause was specified and no 'copyprivate' clause). 3474 if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) { 3475 CGM.getOpenMPRuntime().emitBarrierCall( 3476 *this, S.getBeginLoc(), 3477 S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single); 3478 } 3479 // Check for outer lastprivate conditional update. 3480 checkForLastprivateConditionalUpdate(*this, S); 3481 } 3482 3483 static void emitMaster(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 3484 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3485 Action.Enter(CGF); 3486 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 3487 }; 3488 CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc()); 3489 } 3490 3491 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { 3492 if (llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder()) { 3493 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 3494 3495 const CapturedStmt *CS = S.getInnermostCapturedStmt(); 3496 const Stmt *MasterRegionBodyStmt = CS->getCapturedStmt(); 3497 3498 auto FiniCB = [this](InsertPointTy IP) { 3499 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); 3500 }; 3501 3502 auto BodyGenCB = [MasterRegionBodyStmt, this](InsertPointTy AllocaIP, 3503 InsertPointTy CodeGenIP, 3504 llvm::BasicBlock &FiniBB) { 3505 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB); 3506 OMPBuilderCBHelpers::EmitOMPRegionBody(*this, MasterRegionBodyStmt, 3507 CodeGenIP, FiniBB); 3508 }; 3509 3510 CGCapturedStmtInfo CGSI(*CS, CR_OpenMP); 3511 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI); 3512 Builder.restoreIP(OMPBuilder->CreateMaster(Builder, BodyGenCB, FiniCB)); 3513 3514 return; 3515 } 3516 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3517 emitMaster(*this, S); 3518 } 3519 3520 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { 3521 if (llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder()) { 3522 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 3523 3524 const CapturedStmt *CS = S.getInnermostCapturedStmt(); 3525 const Stmt *CriticalRegionBodyStmt = CS->getCapturedStmt(); 3526 const Expr *Hint = nullptr; 3527 if (const auto *HintClause = S.getSingleClause<OMPHintClause>()) 3528 Hint = HintClause->getHint(); 3529 3530 // TODO: This is slightly different from what's currently being done in 3531 // clang. Fix the Int32Ty to IntPtrTy (pointer width size) when everything 3532 // about typing is final. 3533 llvm::Value *HintInst = nullptr; 3534 if (Hint) 3535 HintInst = 3536 Builder.CreateIntCast(EmitScalarExpr(Hint), CGM.Int32Ty, false); 3537 3538 auto FiniCB = [this](InsertPointTy IP) { 3539 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); 3540 }; 3541 3542 auto BodyGenCB = [CriticalRegionBodyStmt, this](InsertPointTy AllocaIP, 3543 InsertPointTy CodeGenIP, 3544 llvm::BasicBlock &FiniBB) { 3545 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB); 3546 OMPBuilderCBHelpers::EmitOMPRegionBody(*this, CriticalRegionBodyStmt, 3547 CodeGenIP, FiniBB); 3548 }; 3549 3550 CGCapturedStmtInfo CGSI(*CS, CR_OpenMP); 3551 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI); 3552 Builder.restoreIP(OMPBuilder->CreateCritical( 3553 Builder, BodyGenCB, FiniCB, S.getDirectiveName().getAsString(), 3554 HintInst)); 3555 3556 return; 3557 } 3558 3559 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3560 Action.Enter(CGF); 3561 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 3562 }; 3563 const Expr *Hint = nullptr; 3564 if (const auto *HintClause = S.getSingleClause<OMPHintClause>()) 3565 Hint = HintClause->getHint(); 3566 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3567 CGM.getOpenMPRuntime().emitCriticalRegion(*this, 3568 S.getDirectiveName().getAsString(), 3569 CodeGen, S.getBeginLoc(), Hint); 3570 } 3571 3572 void CodeGenFunction::EmitOMPParallelForDirective( 3573 const OMPParallelForDirective &S) { 3574 // Emit directive as a combined directive that consists of two implicit 3575 // directives: 'parallel' with 'for' directive. 3576 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3577 Action.Enter(CGF); 3578 OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel()); 3579 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 3580 emitDispatchForLoopBounds); 3581 }; 3582 { 3583 auto LPCRegion = 3584 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 3585 emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen, 3586 emitEmptyBoundParameters); 3587 } 3588 // Check for outer lastprivate conditional update. 3589 checkForLastprivateConditionalUpdate(*this, S); 3590 } 3591 3592 void CodeGenFunction::EmitOMPParallelForSimdDirective( 3593 const OMPParallelForSimdDirective &S) { 3594 // Emit directive as a combined directive that consists of two implicit 3595 // directives: 'parallel' with 'for' directive. 3596 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3597 Action.Enter(CGF); 3598 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 3599 emitDispatchForLoopBounds); 3600 }; 3601 { 3602 auto LPCRegion = 3603 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 3604 emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen, 3605 emitEmptyBoundParameters); 3606 } 3607 // Check for outer lastprivate conditional update. 3608 checkForLastprivateConditionalUpdate(*this, S); 3609 } 3610 3611 void CodeGenFunction::EmitOMPParallelMasterDirective( 3612 const OMPParallelMasterDirective &S) { 3613 // Emit directive as a combined directive that consists of two implicit 3614 // directives: 'parallel' with 'master' directive. 3615 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3616 Action.Enter(CGF); 3617 OMPPrivateScope PrivateScope(CGF); 3618 bool Copyins = CGF.EmitOMPCopyinClause(S); 3619 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 3620 if (Copyins) { 3621 // Emit implicit barrier to synchronize threads and avoid data races on 3622 // propagation master's thread values of threadprivate variables to local 3623 // instances of that variables of all other implicit threads. 3624 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 3625 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, 3626 /*ForceSimpleCall=*/true); 3627 } 3628 CGF.EmitOMPPrivateClause(S, PrivateScope); 3629 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 3630 (void)PrivateScope.Privatize(); 3631 emitMaster(CGF, S); 3632 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 3633 }; 3634 { 3635 auto LPCRegion = 3636 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 3637 emitCommonOMPParallelDirective(*this, S, OMPD_master, CodeGen, 3638 emitEmptyBoundParameters); 3639 emitPostUpdateForReductionClause(*this, S, 3640 [](CodeGenFunction &) { return nullptr; }); 3641 } 3642 // Check for outer lastprivate conditional update. 3643 checkForLastprivateConditionalUpdate(*this, S); 3644 } 3645 3646 void CodeGenFunction::EmitOMPParallelSectionsDirective( 3647 const OMPParallelSectionsDirective &S) { 3648 // Emit directive as a combined directive that consists of two implicit 3649 // directives: 'parallel' with 'sections' directive. 3650 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3651 Action.Enter(CGF); 3652 CGF.EmitSections(S); 3653 }; 3654 { 3655 auto LPCRegion = 3656 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 3657 emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen, 3658 emitEmptyBoundParameters); 3659 } 3660 // Check for outer lastprivate conditional update. 3661 checkForLastprivateConditionalUpdate(*this, S); 3662 } 3663 3664 void CodeGenFunction::EmitOMPTaskBasedDirective( 3665 const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion, 3666 const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen, 3667 OMPTaskDataTy &Data) { 3668 // Emit outlined function for task construct. 3669 const CapturedStmt *CS = S.getCapturedStmt(CapturedRegion); 3670 auto I = CS->getCapturedDecl()->param_begin(); 3671 auto PartId = std::next(I); 3672 auto TaskT = std::next(I, 4); 3673 // Check if the task is final 3674 if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) { 3675 // If the condition constant folds and can be elided, try to avoid emitting 3676 // the condition and the dead arm of the if/else. 3677 const Expr *Cond = Clause->getCondition(); 3678 bool CondConstant; 3679 if (ConstantFoldsToSimpleInteger(Cond, CondConstant)) 3680 Data.Final.setInt(CondConstant); 3681 else 3682 Data.Final.setPointer(EvaluateExprAsBool(Cond)); 3683 } else { 3684 // By default the task is not final. 3685 Data.Final.setInt(/*IntVal=*/false); 3686 } 3687 // Check if the task has 'priority' clause. 3688 if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) { 3689 const Expr *Prio = Clause->getPriority(); 3690 Data.Priority.setInt(/*IntVal=*/true); 3691 Data.Priority.setPointer(EmitScalarConversion( 3692 EmitScalarExpr(Prio), Prio->getType(), 3693 getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1), 3694 Prio->getExprLoc())); 3695 } 3696 // The first function argument for tasks is a thread id, the second one is a 3697 // part id (0 for tied tasks, >=0 for untied task). 3698 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 3699 // Get list of private variables. 3700 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 3701 auto IRef = C->varlist_begin(); 3702 for (const Expr *IInit : C->private_copies()) { 3703 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 3704 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 3705 Data.PrivateVars.push_back(*IRef); 3706 Data.PrivateCopies.push_back(IInit); 3707 } 3708 ++IRef; 3709 } 3710 } 3711 EmittedAsPrivate.clear(); 3712 // Get list of firstprivate variables. 3713 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 3714 auto IRef = C->varlist_begin(); 3715 auto IElemInitRef = C->inits().begin(); 3716 for (const Expr *IInit : C->private_copies()) { 3717 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 3718 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 3719 Data.FirstprivateVars.push_back(*IRef); 3720 Data.FirstprivateCopies.push_back(IInit); 3721 Data.FirstprivateInits.push_back(*IElemInitRef); 3722 } 3723 ++IRef; 3724 ++IElemInitRef; 3725 } 3726 } 3727 // Get list of lastprivate variables (for taskloops). 3728 llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs; 3729 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 3730 auto IRef = C->varlist_begin(); 3731 auto ID = C->destination_exprs().begin(); 3732 for (const Expr *IInit : C->private_copies()) { 3733 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 3734 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 3735 Data.LastprivateVars.push_back(*IRef); 3736 Data.LastprivateCopies.push_back(IInit); 3737 } 3738 LastprivateDstsOrigs.insert( 3739 {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()), 3740 cast<DeclRefExpr>(*IRef)}); 3741 ++IRef; 3742 ++ID; 3743 } 3744 } 3745 SmallVector<const Expr *, 4> LHSs; 3746 SmallVector<const Expr *, 4> RHSs; 3747 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 3748 Data.ReductionVars.append(C->varlist_begin(), C->varlist_end()); 3749 Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end()); 3750 Data.ReductionCopies.append(C->privates().begin(), C->privates().end()); 3751 Data.ReductionOps.append(C->reduction_ops().begin(), 3752 C->reduction_ops().end()); 3753 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 3754 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 3755 } 3756 Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit( 3757 *this, S.getBeginLoc(), LHSs, RHSs, Data); 3758 // Build list of dependences. 3759 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) { 3760 OMPTaskDataTy::DependData &DD = 3761 Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier()); 3762 DD.DepExprs.append(C->varlist_begin(), C->varlist_end()); 3763 } 3764 auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs, 3765 CapturedRegion](CodeGenFunction &CGF, 3766 PrePostActionTy &Action) { 3767 // Set proper addresses for generated private copies. 3768 OMPPrivateScope Scope(CGF); 3769 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> FirstprivatePtrs; 3770 if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() || 3771 !Data.LastprivateVars.empty()) { 3772 llvm::FunctionType *CopyFnTy = llvm::FunctionType::get( 3773 CGF.Builder.getVoidTy(), {CGF.Builder.getInt8PtrTy()}, true); 3774 enum { PrivatesParam = 2, CopyFnParam = 3 }; 3775 llvm::Value *CopyFn = CGF.Builder.CreateLoad( 3776 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam))); 3777 llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar( 3778 CS->getCapturedDecl()->getParam(PrivatesParam))); 3779 // Map privates. 3780 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; 3781 llvm::SmallVector<llvm::Value *, 16> CallArgs; 3782 CallArgs.push_back(PrivatesPtr); 3783 for (const Expr *E : Data.PrivateVars) { 3784 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3785 Address PrivatePtr = CGF.CreateMemTemp( 3786 CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr"); 3787 PrivatePtrs.emplace_back(VD, PrivatePtr); 3788 CallArgs.push_back(PrivatePtr.getPointer()); 3789 } 3790 for (const Expr *E : Data.FirstprivateVars) { 3791 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3792 Address PrivatePtr = 3793 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 3794 ".firstpriv.ptr.addr"); 3795 PrivatePtrs.emplace_back(VD, PrivatePtr); 3796 FirstprivatePtrs.emplace_back(VD, PrivatePtr); 3797 CallArgs.push_back(PrivatePtr.getPointer()); 3798 } 3799 for (const Expr *E : Data.LastprivateVars) { 3800 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3801 Address PrivatePtr = 3802 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 3803 ".lastpriv.ptr.addr"); 3804 PrivatePtrs.emplace_back(VD, PrivatePtr); 3805 CallArgs.push_back(PrivatePtr.getPointer()); 3806 } 3807 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall( 3808 CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs); 3809 for (const auto &Pair : LastprivateDstsOrigs) { 3810 const auto *OrigVD = cast<VarDecl>(Pair.second->getDecl()); 3811 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(OrigVD), 3812 /*RefersToEnclosingVariableOrCapture=*/ 3813 CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr, 3814 Pair.second->getType(), VK_LValue, 3815 Pair.second->getExprLoc()); 3816 Scope.addPrivate(Pair.first, [&CGF, &DRE]() { 3817 return CGF.EmitLValue(&DRE).getAddress(CGF); 3818 }); 3819 } 3820 for (const auto &Pair : PrivatePtrs) { 3821 Address Replacement(CGF.Builder.CreateLoad(Pair.second), 3822 CGF.getContext().getDeclAlign(Pair.first)); 3823 Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); 3824 } 3825 } 3826 if (Data.Reductions) { 3827 OMPPrivateScope FirstprivateScope(CGF); 3828 for (const auto &Pair : FirstprivatePtrs) { 3829 Address Replacement(CGF.Builder.CreateLoad(Pair.second), 3830 CGF.getContext().getDeclAlign(Pair.first)); 3831 FirstprivateScope.addPrivate(Pair.first, 3832 [Replacement]() { return Replacement; }); 3833 } 3834 (void)FirstprivateScope.Privatize(); 3835 OMPLexicalScope LexScope(CGF, S, CapturedRegion); 3836 ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars, 3837 Data.ReductionCopies, Data.ReductionOps); 3838 llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad( 3839 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9))); 3840 for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) { 3841 RedCG.emitSharedOrigLValue(CGF, Cnt); 3842 RedCG.emitAggregateType(CGF, Cnt); 3843 // FIXME: This must removed once the runtime library is fixed. 3844 // Emit required threadprivate variables for 3845 // initializer/combiner/finalizer. 3846 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(), 3847 RedCG, Cnt); 3848 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( 3849 CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); 3850 Replacement = 3851 Address(CGF.EmitScalarConversion( 3852 Replacement.getPointer(), CGF.getContext().VoidPtrTy, 3853 CGF.getContext().getPointerType( 3854 Data.ReductionCopies[Cnt]->getType()), 3855 Data.ReductionCopies[Cnt]->getExprLoc()), 3856 Replacement.getAlignment()); 3857 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); 3858 Scope.addPrivate(RedCG.getBaseDecl(Cnt), 3859 [Replacement]() { return Replacement; }); 3860 } 3861 } 3862 // Privatize all private variables except for in_reduction items. 3863 (void)Scope.Privatize(); 3864 SmallVector<const Expr *, 4> InRedVars; 3865 SmallVector<const Expr *, 4> InRedPrivs; 3866 SmallVector<const Expr *, 4> InRedOps; 3867 SmallVector<const Expr *, 4> TaskgroupDescriptors; 3868 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) { 3869 auto IPriv = C->privates().begin(); 3870 auto IRed = C->reduction_ops().begin(); 3871 auto ITD = C->taskgroup_descriptors().begin(); 3872 for (const Expr *Ref : C->varlists()) { 3873 InRedVars.emplace_back(Ref); 3874 InRedPrivs.emplace_back(*IPriv); 3875 InRedOps.emplace_back(*IRed); 3876 TaskgroupDescriptors.emplace_back(*ITD); 3877 std::advance(IPriv, 1); 3878 std::advance(IRed, 1); 3879 std::advance(ITD, 1); 3880 } 3881 } 3882 // Privatize in_reduction items here, because taskgroup descriptors must be 3883 // privatized earlier. 3884 OMPPrivateScope InRedScope(CGF); 3885 if (!InRedVars.empty()) { 3886 ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps); 3887 for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) { 3888 RedCG.emitSharedOrigLValue(CGF, Cnt); 3889 RedCG.emitAggregateType(CGF, Cnt); 3890 // The taskgroup descriptor variable is always implicit firstprivate and 3891 // privatized already during processing of the firstprivates. 3892 // FIXME: This must removed once the runtime library is fixed. 3893 // Emit required threadprivate variables for 3894 // initializer/combiner/finalizer. 3895 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(), 3896 RedCG, Cnt); 3897 llvm::Value *ReductionsPtr; 3898 if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) { 3899 ReductionsPtr = CGF.EmitLoadOfScalar(CGF.EmitLValue(TRExpr), 3900 TRExpr->getExprLoc()); 3901 } else { 3902 ReductionsPtr = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3903 } 3904 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( 3905 CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); 3906 Replacement = Address( 3907 CGF.EmitScalarConversion( 3908 Replacement.getPointer(), CGF.getContext().VoidPtrTy, 3909 CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()), 3910 InRedPrivs[Cnt]->getExprLoc()), 3911 Replacement.getAlignment()); 3912 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); 3913 InRedScope.addPrivate(RedCG.getBaseDecl(Cnt), 3914 [Replacement]() { return Replacement; }); 3915 } 3916 } 3917 (void)InRedScope.Privatize(); 3918 3919 Action.Enter(CGF); 3920 BodyGen(CGF); 3921 }; 3922 llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( 3923 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied, 3924 Data.NumberOfParts); 3925 OMPLexicalScope Scope(*this, S, llvm::None, 3926 !isOpenMPParallelDirective(S.getDirectiveKind()) && 3927 !isOpenMPSimdDirective(S.getDirectiveKind())); 3928 TaskGen(*this, OutlinedFn, Data); 3929 } 3930 3931 static ImplicitParamDecl * 3932 createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data, 3933 QualType Ty, CapturedDecl *CD, 3934 SourceLocation Loc) { 3935 auto *OrigVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty, 3936 ImplicitParamDecl::Other); 3937 auto *OrigRef = DeclRefExpr::Create( 3938 C, NestedNameSpecifierLoc(), SourceLocation(), OrigVD, 3939 /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue); 3940 auto *PrivateVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty, 3941 ImplicitParamDecl::Other); 3942 auto *PrivateRef = DeclRefExpr::Create( 3943 C, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD, 3944 /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue); 3945 QualType ElemType = C.getBaseElementType(Ty); 3946 auto *InitVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, ElemType, 3947 ImplicitParamDecl::Other); 3948 auto *InitRef = DeclRefExpr::Create( 3949 C, NestedNameSpecifierLoc(), SourceLocation(), InitVD, 3950 /*RefersToEnclosingVariableOrCapture=*/false, Loc, ElemType, VK_LValue); 3951 PrivateVD->setInitStyle(VarDecl::CInit); 3952 PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue, 3953 InitRef, /*BasePath=*/nullptr, 3954 VK_RValue)); 3955 Data.FirstprivateVars.emplace_back(OrigRef); 3956 Data.FirstprivateCopies.emplace_back(PrivateRef); 3957 Data.FirstprivateInits.emplace_back(InitRef); 3958 return OrigVD; 3959 } 3960 3961 void CodeGenFunction::EmitOMPTargetTaskBasedDirective( 3962 const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen, 3963 OMPTargetDataInfo &InputInfo) { 3964 // Emit outlined function for task construct. 3965 const CapturedStmt *CS = S.getCapturedStmt(OMPD_task); 3966 Address CapturedStruct = GenerateCapturedStmtArgument(*CS); 3967 QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 3968 auto I = CS->getCapturedDecl()->param_begin(); 3969 auto PartId = std::next(I); 3970 auto TaskT = std::next(I, 4); 3971 OMPTaskDataTy Data; 3972 // The task is not final. 3973 Data.Final.setInt(/*IntVal=*/false); 3974 // Get list of firstprivate variables. 3975 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 3976 auto IRef = C->varlist_begin(); 3977 auto IElemInitRef = C->inits().begin(); 3978 for (auto *IInit : C->private_copies()) { 3979 Data.FirstprivateVars.push_back(*IRef); 3980 Data.FirstprivateCopies.push_back(IInit); 3981 Data.FirstprivateInits.push_back(*IElemInitRef); 3982 ++IRef; 3983 ++IElemInitRef; 3984 } 3985 } 3986 OMPPrivateScope TargetScope(*this); 3987 VarDecl *BPVD = nullptr; 3988 VarDecl *PVD = nullptr; 3989 VarDecl *SVD = nullptr; 3990 if (InputInfo.NumberOfTargetItems > 0) { 3991 auto *CD = CapturedDecl::Create( 3992 getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0); 3993 llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems); 3994 QualType BaseAndPointersType = getContext().getConstantArrayType( 3995 getContext().VoidPtrTy, ArrSize, nullptr, ArrayType::Normal, 3996 /*IndexTypeQuals=*/0); 3997 BPVD = createImplicitFirstprivateForType( 3998 getContext(), Data, BaseAndPointersType, CD, S.getBeginLoc()); 3999 PVD = createImplicitFirstprivateForType( 4000 getContext(), Data, BaseAndPointersType, CD, S.getBeginLoc()); 4001 QualType SizesType = getContext().getConstantArrayType( 4002 getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1), 4003 ArrSize, nullptr, ArrayType::Normal, 4004 /*IndexTypeQuals=*/0); 4005 SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD, 4006 S.getBeginLoc()); 4007 TargetScope.addPrivate( 4008 BPVD, [&InputInfo]() { return InputInfo.BasePointersArray; }); 4009 TargetScope.addPrivate(PVD, 4010 [&InputInfo]() { return InputInfo.PointersArray; }); 4011 TargetScope.addPrivate(SVD, 4012 [&InputInfo]() { return InputInfo.SizesArray; }); 4013 } 4014 (void)TargetScope.Privatize(); 4015 // Build list of dependences. 4016 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) { 4017 OMPTaskDataTy::DependData &DD = 4018 Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier()); 4019 DD.DepExprs.append(C->varlist_begin(), C->varlist_end()); 4020 } 4021 auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD, 4022 &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) { 4023 // Set proper addresses for generated private copies. 4024 OMPPrivateScope Scope(CGF); 4025 if (!Data.FirstprivateVars.empty()) { 4026 llvm::FunctionType *CopyFnTy = llvm::FunctionType::get( 4027 CGF.Builder.getVoidTy(), {CGF.Builder.getInt8PtrTy()}, true); 4028 enum { PrivatesParam = 2, CopyFnParam = 3 }; 4029 llvm::Value *CopyFn = CGF.Builder.CreateLoad( 4030 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam))); 4031 llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar( 4032 CS->getCapturedDecl()->getParam(PrivatesParam))); 4033 // Map privates. 4034 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; 4035 llvm::SmallVector<llvm::Value *, 16> CallArgs; 4036 CallArgs.push_back(PrivatesPtr); 4037 for (const Expr *E : Data.FirstprivateVars) { 4038 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4039 Address PrivatePtr = 4040 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 4041 ".firstpriv.ptr.addr"); 4042 PrivatePtrs.emplace_back(VD, PrivatePtr); 4043 CallArgs.push_back(PrivatePtr.getPointer()); 4044 } 4045 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall( 4046 CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs); 4047 for (const auto &Pair : PrivatePtrs) { 4048 Address Replacement(CGF.Builder.CreateLoad(Pair.second), 4049 CGF.getContext().getDeclAlign(Pair.first)); 4050 Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); 4051 } 4052 } 4053 // Privatize all private variables except for in_reduction items. 4054 (void)Scope.Privatize(); 4055 if (InputInfo.NumberOfTargetItems > 0) { 4056 InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP( 4057 CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0); 4058 InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP( 4059 CGF.GetAddrOfLocalVar(PVD), /*Index=*/0); 4060 InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP( 4061 CGF.GetAddrOfLocalVar(SVD), /*Index=*/0); 4062 } 4063 4064 Action.Enter(CGF); 4065 OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false); 4066 BodyGen(CGF); 4067 }; 4068 llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( 4069 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true, 4070 Data.NumberOfParts); 4071 llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0); 4072 IntegerLiteral IfCond(getContext(), TrueOrFalse, 4073 getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 4074 SourceLocation()); 4075 4076 CGM.getOpenMPRuntime().emitTaskCall(*this, S.getBeginLoc(), S, OutlinedFn, 4077 SharedsTy, CapturedStruct, &IfCond, Data); 4078 } 4079 4080 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { 4081 // Emit outlined function for task construct. 4082 const CapturedStmt *CS = S.getCapturedStmt(OMPD_task); 4083 Address CapturedStruct = GenerateCapturedStmtArgument(*CS); 4084 QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 4085 const Expr *IfCond = nullptr; 4086 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 4087 if (C->getNameModifier() == OMPD_unknown || 4088 C->getNameModifier() == OMPD_task) { 4089 IfCond = C->getCondition(); 4090 break; 4091 } 4092 } 4093 4094 OMPTaskDataTy Data; 4095 // Check if we should emit tied or untied task. 4096 Data.Tied = !S.getSingleClause<OMPUntiedClause>(); 4097 auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { 4098 CGF.EmitStmt(CS->getCapturedStmt()); 4099 }; 4100 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 4101 IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn, 4102 const OMPTaskDataTy &Data) { 4103 CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getBeginLoc(), S, OutlinedFn, 4104 SharedsTy, CapturedStruct, IfCond, 4105 Data); 4106 }; 4107 auto LPCRegion = 4108 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 4109 EmitOMPTaskBasedDirective(S, OMPD_task, BodyGen, TaskGen, Data); 4110 } 4111 4112 void CodeGenFunction::EmitOMPTaskyieldDirective( 4113 const OMPTaskyieldDirective &S) { 4114 CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getBeginLoc()); 4115 } 4116 4117 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { 4118 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_barrier); 4119 } 4120 4121 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) { 4122 CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getBeginLoc()); 4123 } 4124 4125 void CodeGenFunction::EmitOMPTaskgroupDirective( 4126 const OMPTaskgroupDirective &S) { 4127 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4128 Action.Enter(CGF); 4129 if (const Expr *E = S.getReductionRef()) { 4130 SmallVector<const Expr *, 4> LHSs; 4131 SmallVector<const Expr *, 4> RHSs; 4132 OMPTaskDataTy Data; 4133 for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) { 4134 Data.ReductionVars.append(C->varlist_begin(), C->varlist_end()); 4135 Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end()); 4136 Data.ReductionCopies.append(C->privates().begin(), C->privates().end()); 4137 Data.ReductionOps.append(C->reduction_ops().begin(), 4138 C->reduction_ops().end()); 4139 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 4140 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 4141 } 4142 llvm::Value *ReductionDesc = 4143 CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getBeginLoc(), 4144 LHSs, RHSs, Data); 4145 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4146 CGF.EmitVarDecl(*VD); 4147 CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD), 4148 /*Volatile=*/false, E->getType()); 4149 } 4150 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 4151 }; 4152 OMPLexicalScope Scope(*this, S, OMPD_unknown); 4153 CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getBeginLoc()); 4154 } 4155 4156 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { 4157 llvm::AtomicOrdering AO = S.getSingleClause<OMPFlushClause>() 4158 ? llvm::AtomicOrdering::NotAtomic 4159 : llvm::AtomicOrdering::AcquireRelease; 4160 CGM.getOpenMPRuntime().emitFlush( 4161 *this, 4162 [&S]() -> ArrayRef<const Expr *> { 4163 if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) 4164 return llvm::makeArrayRef(FlushClause->varlist_begin(), 4165 FlushClause->varlist_end()); 4166 return llvm::None; 4167 }(), 4168 S.getBeginLoc(), AO); 4169 } 4170 4171 void CodeGenFunction::EmitOMPDepobjDirective(const OMPDepobjDirective &S) { 4172 const auto *DO = S.getSingleClause<OMPDepobjClause>(); 4173 LValue DOLVal = EmitLValue(DO->getDepobj()); 4174 if (const auto *DC = S.getSingleClause<OMPDependClause>()) { 4175 OMPTaskDataTy::DependData Dependencies(DC->getDependencyKind(), 4176 DC->getModifier()); 4177 Dependencies.DepExprs.append(DC->varlist_begin(), DC->varlist_end()); 4178 Address DepAddr = CGM.getOpenMPRuntime().emitDepobjDependClause( 4179 *this, Dependencies, DC->getBeginLoc()); 4180 EmitStoreOfScalar(DepAddr.getPointer(), DOLVal); 4181 return; 4182 } 4183 if (const auto *DC = S.getSingleClause<OMPDestroyClause>()) { 4184 CGM.getOpenMPRuntime().emitDestroyClause(*this, DOLVal, DC->getBeginLoc()); 4185 return; 4186 } 4187 if (const auto *UC = S.getSingleClause<OMPUpdateClause>()) { 4188 CGM.getOpenMPRuntime().emitUpdateClause( 4189 *this, DOLVal, UC->getDependencyKind(), UC->getBeginLoc()); 4190 return; 4191 } 4192 } 4193 4194 void CodeGenFunction::EmitOMPScanDirective(const OMPScanDirective &S) { 4195 // Do not emit code for non-simd directives in simd-only mode. 4196 if (getLangOpts().OpenMPSimd && !OMPParentLoopDirectiveForScan) 4197 return; 4198 const OMPExecutableDirective &ParentDir = *OMPParentLoopDirectiveForScan; 4199 SmallVector<const Expr *, 4> Shareds; 4200 SmallVector<const Expr *, 4> Privates; 4201 SmallVector<const Expr *, 4> LHSs; 4202 SmallVector<const Expr *, 4> RHSs; 4203 SmallVector<const Expr *, 4> CopyOps; 4204 SmallVector<const Expr *, 4> CopyArrayTemps; 4205 SmallVector<const Expr *, 4> CopyArrayElems; 4206 for (const auto *C : ParentDir.getClausesOfKind<OMPReductionClause>()) { 4207 if (C->getModifier() != OMPC_REDUCTION_inscan) 4208 continue; 4209 Shareds.append(C->varlist_begin(), C->varlist_end()); 4210 Privates.append(C->privates().begin(), C->privates().end()); 4211 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 4212 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 4213 CopyOps.append(C->copy_ops().begin(), C->copy_ops().end()); 4214 CopyArrayTemps.append(C->copy_array_temps().begin(), 4215 C->copy_array_temps().end()); 4216 CopyArrayElems.append(C->copy_array_elems().begin(), 4217 C->copy_array_elems().end()); 4218 } 4219 bool IsInclusive = S.hasClausesOfKind<OMPInclusiveClause>(); 4220 if (!IsInclusive) { 4221 EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock()); 4222 EmitBlock(OMPScanExitBlock); 4223 } 4224 if (OMPFirstScanLoop) { 4225 // Emit buffer[i] = red; at the end of the input phase. 4226 const auto *IVExpr = cast<OMPLoopDirective>(ParentDir) 4227 .getIterationVariable() 4228 ->IgnoreParenImpCasts(); 4229 LValue IdxLVal = EmitLValue(IVExpr); 4230 llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc()); 4231 IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false); 4232 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { 4233 const Expr *PrivateExpr = Privates[I]; 4234 const Expr *OrigExpr = Shareds[I]; 4235 const Expr *CopyArrayElem = CopyArrayElems[I]; 4236 OpaqueValueMapping IdxMapping( 4237 *this, 4238 cast<OpaqueValueExpr>( 4239 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), 4240 RValue::get(IdxVal)); 4241 LValue DestLVal = EmitLValue(CopyArrayElem); 4242 LValue SrcLVal = EmitLValue(OrigExpr); 4243 EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this), 4244 SrcLVal.getAddress(*this), 4245 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), 4246 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), 4247 CopyOps[I]); 4248 } 4249 } 4250 EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock()); 4251 if (IsInclusive) { 4252 EmitBlock(OMPScanExitBlock); 4253 EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock()); 4254 } 4255 EmitBlock(OMPScanDispatch); 4256 if (!OMPFirstScanLoop) { 4257 // Emit red = buffer[i]; at the entrance to the scan phase. 4258 const auto *IVExpr = cast<OMPLoopDirective>(ParentDir) 4259 .getIterationVariable() 4260 ->IgnoreParenImpCasts(); 4261 LValue IdxLVal = EmitLValue(IVExpr); 4262 llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc()); 4263 IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false); 4264 llvm::BasicBlock *ExclusiveExitBB = nullptr; 4265 if (!IsInclusive) { 4266 llvm::BasicBlock *ContBB = createBasicBlock("omp.exclusive.dec"); 4267 ExclusiveExitBB = createBasicBlock("omp.exclusive.copy.exit"); 4268 llvm::Value *Cmp = Builder.CreateIsNull(IdxVal); 4269 Builder.CreateCondBr(Cmp, ExclusiveExitBB, ContBB); 4270 EmitBlock(ContBB); 4271 // Use idx - 1 iteration for exclusive scan. 4272 IdxVal = Builder.CreateNUWSub(IdxVal, llvm::ConstantInt::get(SizeTy, 1)); 4273 } 4274 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { 4275 const Expr *PrivateExpr = Privates[I]; 4276 const Expr *OrigExpr = Shareds[I]; 4277 const Expr *CopyArrayElem = CopyArrayElems[I]; 4278 OpaqueValueMapping IdxMapping( 4279 *this, 4280 cast<OpaqueValueExpr>( 4281 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), 4282 RValue::get(IdxVal)); 4283 LValue SrcLVal = EmitLValue(CopyArrayElem); 4284 LValue DestLVal = EmitLValue(OrigExpr); 4285 EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this), 4286 SrcLVal.getAddress(*this), 4287 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), 4288 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), 4289 CopyOps[I]); 4290 } 4291 if (!IsInclusive) { 4292 EmitBlock(ExclusiveExitBB); 4293 } 4294 } 4295 EmitBranch((OMPFirstScanLoop == IsInclusive) ? OMPBeforeScanBlock 4296 : OMPAfterScanBlock); 4297 EmitBlock(OMPAfterScanBlock); 4298 } 4299 4300 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, 4301 const CodeGenLoopTy &CodeGenLoop, 4302 Expr *IncExpr) { 4303 // Emit the loop iteration variable. 4304 const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 4305 const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl()); 4306 EmitVarDecl(*IVDecl); 4307 4308 // Emit the iterations count variable. 4309 // If it is not a variable, Sema decided to calculate iterations count on each 4310 // iteration (e.g., it is foldable into a constant). 4311 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 4312 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 4313 // Emit calculation of the iterations count. 4314 EmitIgnoredExpr(S.getCalcLastIteration()); 4315 } 4316 4317 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); 4318 4319 bool HasLastprivateClause = false; 4320 // Check pre-condition. 4321 { 4322 OMPLoopScope PreInitScope(*this, S); 4323 // Skip the entire loop if we don't meet the precondition. 4324 // If the condition constant folds and can be elided, avoid emitting the 4325 // whole loop. 4326 bool CondConstant; 4327 llvm::BasicBlock *ContBlock = nullptr; 4328 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 4329 if (!CondConstant) 4330 return; 4331 } else { 4332 llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then"); 4333 ContBlock = createBasicBlock("omp.precond.end"); 4334 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 4335 getProfileCount(&S)); 4336 EmitBlock(ThenBlock); 4337 incrementProfileCounter(&S); 4338 } 4339 4340 emitAlignedClause(*this, S); 4341 // Emit 'then' code. 4342 { 4343 // Emit helper vars inits. 4344 4345 LValue LB = EmitOMPHelperVar( 4346 *this, cast<DeclRefExpr>( 4347 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 4348 ? S.getCombinedLowerBoundVariable() 4349 : S.getLowerBoundVariable()))); 4350 LValue UB = EmitOMPHelperVar( 4351 *this, cast<DeclRefExpr>( 4352 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 4353 ? S.getCombinedUpperBoundVariable() 4354 : S.getUpperBoundVariable()))); 4355 LValue ST = 4356 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 4357 LValue IL = 4358 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 4359 4360 OMPPrivateScope LoopScope(*this); 4361 if (EmitOMPFirstprivateClause(S, LoopScope)) { 4362 // Emit implicit barrier to synchronize threads and avoid data races 4363 // on initialization of firstprivate variables and post-update of 4364 // lastprivate variables. 4365 CGM.getOpenMPRuntime().emitBarrierCall( 4366 *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, 4367 /*ForceSimpleCall=*/true); 4368 } 4369 EmitOMPPrivateClause(S, LoopScope); 4370 if (isOpenMPSimdDirective(S.getDirectiveKind()) && 4371 !isOpenMPParallelDirective(S.getDirectiveKind()) && 4372 !isOpenMPTeamsDirective(S.getDirectiveKind())) 4373 EmitOMPReductionClauseInit(S, LoopScope); 4374 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 4375 EmitOMPPrivateLoopCounters(S, LoopScope); 4376 (void)LoopScope.Privatize(); 4377 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 4378 CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S); 4379 4380 // Detect the distribute schedule kind and chunk. 4381 llvm::Value *Chunk = nullptr; 4382 OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown; 4383 if (const auto *C = S.getSingleClause<OMPDistScheduleClause>()) { 4384 ScheduleKind = C->getDistScheduleKind(); 4385 if (const Expr *Ch = C->getChunkSize()) { 4386 Chunk = EmitScalarExpr(Ch); 4387 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 4388 S.getIterationVariable()->getType(), 4389 S.getBeginLoc()); 4390 } 4391 } else { 4392 // Default behaviour for dist_schedule clause. 4393 CGM.getOpenMPRuntime().getDefaultDistScheduleAndChunk( 4394 *this, S, ScheduleKind, Chunk); 4395 } 4396 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 4397 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 4398 4399 // OpenMP [2.10.8, distribute Construct, Description] 4400 // If dist_schedule is specified, kind must be static. If specified, 4401 // iterations are divided into chunks of size chunk_size, chunks are 4402 // assigned to the teams of the league in a round-robin fashion in the 4403 // order of the team number. When no chunk_size is specified, the 4404 // iteration space is divided into chunks that are approximately equal 4405 // in size, and at most one chunk is distributed to each team of the 4406 // league. The size of the chunks is unspecified in this case. 4407 bool StaticChunked = RT.isStaticChunked( 4408 ScheduleKind, /* Chunked */ Chunk != nullptr) && 4409 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()); 4410 if (RT.isStaticNonchunked(ScheduleKind, 4411 /* Chunked */ Chunk != nullptr) || 4412 StaticChunked) { 4413 CGOpenMPRuntime::StaticRTInput StaticInit( 4414 IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(*this), 4415 LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this), 4416 StaticChunked ? Chunk : nullptr); 4417 RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, 4418 StaticInit); 4419 JumpDest LoopExit = 4420 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 4421 // UB = min(UB, GlobalUB); 4422 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 4423 ? S.getCombinedEnsureUpperBound() 4424 : S.getEnsureUpperBound()); 4425 // IV = LB; 4426 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 4427 ? S.getCombinedInit() 4428 : S.getInit()); 4429 4430 const Expr *Cond = 4431 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 4432 ? S.getCombinedCond() 4433 : S.getCond(); 4434 4435 if (StaticChunked) 4436 Cond = S.getCombinedDistCond(); 4437 4438 // For static unchunked schedules generate: 4439 // 4440 // 1. For distribute alone, codegen 4441 // while (idx <= UB) { 4442 // BODY; 4443 // ++idx; 4444 // } 4445 // 4446 // 2. When combined with 'for' (e.g. as in 'distribute parallel for') 4447 // while (idx <= UB) { 4448 // <CodeGen rest of pragma>(LB, UB); 4449 // idx += ST; 4450 // } 4451 // 4452 // For static chunk one schedule generate: 4453 // 4454 // while (IV <= GlobalUB) { 4455 // <CodeGen rest of pragma>(LB, UB); 4456 // LB += ST; 4457 // UB += ST; 4458 // UB = min(UB, GlobalUB); 4459 // IV = LB; 4460 // } 4461 // 4462 emitCommonSimdLoop( 4463 *this, S, 4464 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4465 if (isOpenMPSimdDirective(S.getDirectiveKind())) 4466 CGF.EmitOMPSimdInit(S, /*IsMonotonic=*/true); 4467 }, 4468 [&S, &LoopScope, Cond, IncExpr, LoopExit, &CodeGenLoop, 4469 StaticChunked](CodeGenFunction &CGF, PrePostActionTy &) { 4470 CGF.EmitOMPInnerLoop( 4471 S, LoopScope.requiresCleanups(), Cond, IncExpr, 4472 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { 4473 CodeGenLoop(CGF, S, LoopExit); 4474 }, 4475 [&S, StaticChunked](CodeGenFunction &CGF) { 4476 if (StaticChunked) { 4477 CGF.EmitIgnoredExpr(S.getCombinedNextLowerBound()); 4478 CGF.EmitIgnoredExpr(S.getCombinedNextUpperBound()); 4479 CGF.EmitIgnoredExpr(S.getCombinedEnsureUpperBound()); 4480 CGF.EmitIgnoredExpr(S.getCombinedInit()); 4481 } 4482 }); 4483 }); 4484 EmitBlock(LoopExit.getBlock()); 4485 // Tell the runtime we are done. 4486 RT.emitForStaticFinish(*this, S.getEndLoc(), S.getDirectiveKind()); 4487 } else { 4488 // Emit the outer loop, which requests its work chunk [LB..UB] from 4489 // runtime and runs the inner loop to process it. 4490 const OMPLoopArguments LoopArguments = { 4491 LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this), 4492 IL.getAddress(*this), Chunk}; 4493 EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments, 4494 CodeGenLoop); 4495 } 4496 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 4497 EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) { 4498 return CGF.Builder.CreateIsNotNull( 4499 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 4500 }); 4501 } 4502 if (isOpenMPSimdDirective(S.getDirectiveKind()) && 4503 !isOpenMPParallelDirective(S.getDirectiveKind()) && 4504 !isOpenMPTeamsDirective(S.getDirectiveKind())) { 4505 EmitOMPReductionClauseFinal(S, OMPD_simd); 4506 // Emit post-update of the reduction variables if IsLastIter != 0. 4507 emitPostUpdateForReductionClause( 4508 *this, S, [IL, &S](CodeGenFunction &CGF) { 4509 return CGF.Builder.CreateIsNotNull( 4510 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 4511 }); 4512 } 4513 // Emit final copy of the lastprivate variables if IsLastIter != 0. 4514 if (HasLastprivateClause) { 4515 EmitOMPLastprivateClauseFinal( 4516 S, /*NoFinals=*/false, 4517 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc()))); 4518 } 4519 } 4520 4521 // We're now done with the loop, so jump to the continuation block. 4522 if (ContBlock) { 4523 EmitBranch(ContBlock); 4524 EmitBlock(ContBlock, true); 4525 } 4526 } 4527 } 4528 4529 void CodeGenFunction::EmitOMPDistributeDirective( 4530 const OMPDistributeDirective &S) { 4531 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4532 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 4533 }; 4534 OMPLexicalScope Scope(*this, S, OMPD_unknown); 4535 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); 4536 } 4537 4538 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, 4539 const CapturedStmt *S, 4540 SourceLocation Loc) { 4541 CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); 4542 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; 4543 CGF.CapturedStmtInfo = &CapStmtInfo; 4544 llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S, Loc); 4545 Fn->setDoesNotRecurse(); 4546 return Fn; 4547 } 4548 4549 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { 4550 if (S.hasClausesOfKind<OMPDependClause>()) { 4551 assert(!S.getAssociatedStmt() && 4552 "No associated statement must be in ordered depend construct."); 4553 for (const auto *DC : S.getClausesOfKind<OMPDependClause>()) 4554 CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC); 4555 return; 4556 } 4557 const auto *C = S.getSingleClause<OMPSIMDClause>(); 4558 auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF, 4559 PrePostActionTy &Action) { 4560 const CapturedStmt *CS = S.getInnermostCapturedStmt(); 4561 if (C) { 4562 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 4563 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 4564 llvm::Function *OutlinedFn = 4565 emitOutlinedOrderedFunction(CGM, CS, S.getBeginLoc()); 4566 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getBeginLoc(), 4567 OutlinedFn, CapturedVars); 4568 } else { 4569 Action.Enter(CGF); 4570 CGF.EmitStmt(CS->getCapturedStmt()); 4571 } 4572 }; 4573 OMPLexicalScope Scope(*this, S, OMPD_unknown); 4574 CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getBeginLoc(), !C); 4575 } 4576 4577 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, 4578 QualType SrcType, QualType DestType, 4579 SourceLocation Loc) { 4580 assert(CGF.hasScalarEvaluationKind(DestType) && 4581 "DestType must have scalar evaluation kind."); 4582 assert(!Val.isAggregate() && "Must be a scalar or complex."); 4583 return Val.isScalar() ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, 4584 DestType, Loc) 4585 : CGF.EmitComplexToScalarConversion( 4586 Val.getComplexVal(), SrcType, DestType, Loc); 4587 } 4588 4589 static CodeGenFunction::ComplexPairTy 4590 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, 4591 QualType DestType, SourceLocation Loc) { 4592 assert(CGF.getEvaluationKind(DestType) == TEK_Complex && 4593 "DestType must have complex evaluation kind."); 4594 CodeGenFunction::ComplexPairTy ComplexVal; 4595 if (Val.isScalar()) { 4596 // Convert the input element to the element type of the complex. 4597 QualType DestElementType = 4598 DestType->castAs<ComplexType>()->getElementType(); 4599 llvm::Value *ScalarVal = CGF.EmitScalarConversion( 4600 Val.getScalarVal(), SrcType, DestElementType, Loc); 4601 ComplexVal = CodeGenFunction::ComplexPairTy( 4602 ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType())); 4603 } else { 4604 assert(Val.isComplex() && "Must be a scalar or complex."); 4605 QualType SrcElementType = SrcType->castAs<ComplexType>()->getElementType(); 4606 QualType DestElementType = 4607 DestType->castAs<ComplexType>()->getElementType(); 4608 ComplexVal.first = CGF.EmitScalarConversion( 4609 Val.getComplexVal().first, SrcElementType, DestElementType, Loc); 4610 ComplexVal.second = CGF.EmitScalarConversion( 4611 Val.getComplexVal().second, SrcElementType, DestElementType, Loc); 4612 } 4613 return ComplexVal; 4614 } 4615 4616 static void emitSimpleAtomicStore(CodeGenFunction &CGF, llvm::AtomicOrdering AO, 4617 LValue LVal, RValue RVal) { 4618 if (LVal.isGlobalReg()) 4619 CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal); 4620 else 4621 CGF.EmitAtomicStore(RVal, LVal, AO, LVal.isVolatile(), /*isInit=*/false); 4622 } 4623 4624 static RValue emitSimpleAtomicLoad(CodeGenFunction &CGF, 4625 llvm::AtomicOrdering AO, LValue LVal, 4626 SourceLocation Loc) { 4627 if (LVal.isGlobalReg()) 4628 return CGF.EmitLoadOfLValue(LVal, Loc); 4629 return CGF.EmitAtomicLoad( 4630 LVal, Loc, llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO), 4631 LVal.isVolatile()); 4632 } 4633 4634 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal, 4635 QualType RValTy, SourceLocation Loc) { 4636 switch (getEvaluationKind(LVal.getType())) { 4637 case TEK_Scalar: 4638 EmitStoreThroughLValue(RValue::get(convertToScalarValue( 4639 *this, RVal, RValTy, LVal.getType(), Loc)), 4640 LVal); 4641 break; 4642 case TEK_Complex: 4643 EmitStoreOfComplex( 4644 convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal, 4645 /*isInit=*/false); 4646 break; 4647 case TEK_Aggregate: 4648 llvm_unreachable("Must be a scalar or complex."); 4649 } 4650 } 4651 4652 static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, llvm::AtomicOrdering AO, 4653 const Expr *X, const Expr *V, 4654 SourceLocation Loc) { 4655 // v = x; 4656 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue"); 4657 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue"); 4658 LValue XLValue = CGF.EmitLValue(X); 4659 LValue VLValue = CGF.EmitLValue(V); 4660 RValue Res = emitSimpleAtomicLoad(CGF, AO, XLValue, Loc); 4661 // OpenMP, 2.17.7, atomic Construct 4662 // If the read or capture clause is specified and the acquire, acq_rel, or 4663 // seq_cst clause is specified then the strong flush on exit from the atomic 4664 // operation is also an acquire flush. 4665 switch (AO) { 4666 case llvm::AtomicOrdering::Acquire: 4667 case llvm::AtomicOrdering::AcquireRelease: 4668 case llvm::AtomicOrdering::SequentiallyConsistent: 4669 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, 4670 llvm::AtomicOrdering::Acquire); 4671 break; 4672 case llvm::AtomicOrdering::Monotonic: 4673 case llvm::AtomicOrdering::Release: 4674 break; 4675 case llvm::AtomicOrdering::NotAtomic: 4676 case llvm::AtomicOrdering::Unordered: 4677 llvm_unreachable("Unexpected ordering."); 4678 } 4679 CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc); 4680 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V); 4681 } 4682 4683 static void emitOMPAtomicWriteExpr(CodeGenFunction &CGF, 4684 llvm::AtomicOrdering AO, const Expr *X, 4685 const Expr *E, SourceLocation Loc) { 4686 // x = expr; 4687 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue"); 4688 emitSimpleAtomicStore(CGF, AO, CGF.EmitLValue(X), CGF.EmitAnyExpr(E)); 4689 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X); 4690 // OpenMP, 2.17.7, atomic Construct 4691 // If the write, update, or capture clause is specified and the release, 4692 // acq_rel, or seq_cst clause is specified then the strong flush on entry to 4693 // the atomic operation is also a release flush. 4694 switch (AO) { 4695 case llvm::AtomicOrdering::Release: 4696 case llvm::AtomicOrdering::AcquireRelease: 4697 case llvm::AtomicOrdering::SequentiallyConsistent: 4698 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, 4699 llvm::AtomicOrdering::Release); 4700 break; 4701 case llvm::AtomicOrdering::Acquire: 4702 case llvm::AtomicOrdering::Monotonic: 4703 break; 4704 case llvm::AtomicOrdering::NotAtomic: 4705 case llvm::AtomicOrdering::Unordered: 4706 llvm_unreachable("Unexpected ordering."); 4707 } 4708 } 4709 4710 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, 4711 RValue Update, 4712 BinaryOperatorKind BO, 4713 llvm::AtomicOrdering AO, 4714 bool IsXLHSInRHSPart) { 4715 ASTContext &Context = CGF.getContext(); 4716 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x' 4717 // expression is simple and atomic is allowed for the given type for the 4718 // target platform. 4719 if (BO == BO_Comma || !Update.isScalar() || 4720 !Update.getScalarVal()->getType()->isIntegerTy() || !X.isSimple() || 4721 (!isa<llvm::ConstantInt>(Update.getScalarVal()) && 4722 (Update.getScalarVal()->getType() != 4723 X.getAddress(CGF).getElementType())) || 4724 !X.getAddress(CGF).getElementType()->isIntegerTy() || 4725 !Context.getTargetInfo().hasBuiltinAtomic( 4726 Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment()))) 4727 return std::make_pair(false, RValue::get(nullptr)); 4728 4729 llvm::AtomicRMWInst::BinOp RMWOp; 4730 switch (BO) { 4731 case BO_Add: 4732 RMWOp = llvm::AtomicRMWInst::Add; 4733 break; 4734 case BO_Sub: 4735 if (!IsXLHSInRHSPart) 4736 return std::make_pair(false, RValue::get(nullptr)); 4737 RMWOp = llvm::AtomicRMWInst::Sub; 4738 break; 4739 case BO_And: 4740 RMWOp = llvm::AtomicRMWInst::And; 4741 break; 4742 case BO_Or: 4743 RMWOp = llvm::AtomicRMWInst::Or; 4744 break; 4745 case BO_Xor: 4746 RMWOp = llvm::AtomicRMWInst::Xor; 4747 break; 4748 case BO_LT: 4749 RMWOp = X.getType()->hasSignedIntegerRepresentation() 4750 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min 4751 : llvm::AtomicRMWInst::Max) 4752 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin 4753 : llvm::AtomicRMWInst::UMax); 4754 break; 4755 case BO_GT: 4756 RMWOp = X.getType()->hasSignedIntegerRepresentation() 4757 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max 4758 : llvm::AtomicRMWInst::Min) 4759 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax 4760 : llvm::AtomicRMWInst::UMin); 4761 break; 4762 case BO_Assign: 4763 RMWOp = llvm::AtomicRMWInst::Xchg; 4764 break; 4765 case BO_Mul: 4766 case BO_Div: 4767 case BO_Rem: 4768 case BO_Shl: 4769 case BO_Shr: 4770 case BO_LAnd: 4771 case BO_LOr: 4772 return std::make_pair(false, RValue::get(nullptr)); 4773 case BO_PtrMemD: 4774 case BO_PtrMemI: 4775 case BO_LE: 4776 case BO_GE: 4777 case BO_EQ: 4778 case BO_NE: 4779 case BO_Cmp: 4780 case BO_AddAssign: 4781 case BO_SubAssign: 4782 case BO_AndAssign: 4783 case BO_OrAssign: 4784 case BO_XorAssign: 4785 case BO_MulAssign: 4786 case BO_DivAssign: 4787 case BO_RemAssign: 4788 case BO_ShlAssign: 4789 case BO_ShrAssign: 4790 case BO_Comma: 4791 llvm_unreachable("Unsupported atomic update operation"); 4792 } 4793 llvm::Value *UpdateVal = Update.getScalarVal(); 4794 if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) { 4795 UpdateVal = CGF.Builder.CreateIntCast( 4796 IC, X.getAddress(CGF).getElementType(), 4797 X.getType()->hasSignedIntegerRepresentation()); 4798 } 4799 llvm::Value *Res = 4800 CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(CGF), UpdateVal, AO); 4801 return std::make_pair(true, RValue::get(Res)); 4802 } 4803 4804 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr( 4805 LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart, 4806 llvm::AtomicOrdering AO, SourceLocation Loc, 4807 const llvm::function_ref<RValue(RValue)> CommonGen) { 4808 // Update expressions are allowed to have the following forms: 4809 // x binop= expr; -> xrval + expr; 4810 // x++, ++x -> xrval + 1; 4811 // x--, --x -> xrval - 1; 4812 // x = x binop expr; -> xrval binop expr 4813 // x = expr Op x; - > expr binop xrval; 4814 auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart); 4815 if (!Res.first) { 4816 if (X.isGlobalReg()) { 4817 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop 4818 // 'xrval'. 4819 EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X); 4820 } else { 4821 // Perform compare-and-swap procedure. 4822 EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified()); 4823 } 4824 } 4825 return Res; 4826 } 4827 4828 static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF, 4829 llvm::AtomicOrdering AO, const Expr *X, 4830 const Expr *E, const Expr *UE, 4831 bool IsXLHSInRHSPart, SourceLocation Loc) { 4832 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 4833 "Update expr in 'atomic update' must be a binary operator."); 4834 const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 4835 // Update expressions are allowed to have the following forms: 4836 // x binop= expr; -> xrval + expr; 4837 // x++, ++x -> xrval + 1; 4838 // x--, --x -> xrval - 1; 4839 // x = x binop expr; -> xrval binop expr 4840 // x = expr Op x; - > expr binop xrval; 4841 assert(X->isLValue() && "X of 'omp atomic update' is not lvalue"); 4842 LValue XLValue = CGF.EmitLValue(X); 4843 RValue ExprRValue = CGF.EmitAnyExpr(E); 4844 const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 4845 const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 4846 const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 4847 const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 4848 auto &&Gen = [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) { 4849 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 4850 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 4851 return CGF.EmitAnyExpr(UE); 4852 }; 4853 (void)CGF.EmitOMPAtomicSimpleUpdateExpr( 4854 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 4855 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X); 4856 // OpenMP, 2.17.7, atomic Construct 4857 // If the write, update, or capture clause is specified and the release, 4858 // acq_rel, or seq_cst clause is specified then the strong flush on entry to 4859 // the atomic operation is also a release flush. 4860 switch (AO) { 4861 case llvm::AtomicOrdering::Release: 4862 case llvm::AtomicOrdering::AcquireRelease: 4863 case llvm::AtomicOrdering::SequentiallyConsistent: 4864 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, 4865 llvm::AtomicOrdering::Release); 4866 break; 4867 case llvm::AtomicOrdering::Acquire: 4868 case llvm::AtomicOrdering::Monotonic: 4869 break; 4870 case llvm::AtomicOrdering::NotAtomic: 4871 case llvm::AtomicOrdering::Unordered: 4872 llvm_unreachable("Unexpected ordering."); 4873 } 4874 } 4875 4876 static RValue convertToType(CodeGenFunction &CGF, RValue Value, 4877 QualType SourceType, QualType ResType, 4878 SourceLocation Loc) { 4879 switch (CGF.getEvaluationKind(ResType)) { 4880 case TEK_Scalar: 4881 return RValue::get( 4882 convertToScalarValue(CGF, Value, SourceType, ResType, Loc)); 4883 case TEK_Complex: { 4884 auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc); 4885 return RValue::getComplex(Res.first, Res.second); 4886 } 4887 case TEK_Aggregate: 4888 break; 4889 } 4890 llvm_unreachable("Must be a scalar or complex."); 4891 } 4892 4893 static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF, 4894 llvm::AtomicOrdering AO, 4895 bool IsPostfixUpdate, const Expr *V, 4896 const Expr *X, const Expr *E, 4897 const Expr *UE, bool IsXLHSInRHSPart, 4898 SourceLocation Loc) { 4899 assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue"); 4900 assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue"); 4901 RValue NewVVal; 4902 LValue VLValue = CGF.EmitLValue(V); 4903 LValue XLValue = CGF.EmitLValue(X); 4904 RValue ExprRValue = CGF.EmitAnyExpr(E); 4905 QualType NewVValType; 4906 if (UE) { 4907 // 'x' is updated with some additional value. 4908 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 4909 "Update expr in 'atomic capture' must be a binary operator."); 4910 const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 4911 // Update expressions are allowed to have the following forms: 4912 // x binop= expr; -> xrval + expr; 4913 // x++, ++x -> xrval + 1; 4914 // x--, --x -> xrval - 1; 4915 // x = x binop expr; -> xrval binop expr 4916 // x = expr Op x; - > expr binop xrval; 4917 const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 4918 const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 4919 const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 4920 NewVValType = XRValExpr->getType(); 4921 const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 4922 auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr, 4923 IsPostfixUpdate](RValue XRValue) { 4924 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 4925 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 4926 RValue Res = CGF.EmitAnyExpr(UE); 4927 NewVVal = IsPostfixUpdate ? XRValue : Res; 4928 return Res; 4929 }; 4930 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 4931 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 4932 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X); 4933 if (Res.first) { 4934 // 'atomicrmw' instruction was generated. 4935 if (IsPostfixUpdate) { 4936 // Use old value from 'atomicrmw'. 4937 NewVVal = Res.second; 4938 } else { 4939 // 'atomicrmw' does not provide new value, so evaluate it using old 4940 // value of 'x'. 4941 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 4942 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second); 4943 NewVVal = CGF.EmitAnyExpr(UE); 4944 } 4945 } 4946 } else { 4947 // 'x' is simply rewritten with some 'expr'. 4948 NewVValType = X->getType().getNonReferenceType(); 4949 ExprRValue = convertToType(CGF, ExprRValue, E->getType(), 4950 X->getType().getNonReferenceType(), Loc); 4951 auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) { 4952 NewVVal = XRValue; 4953 return ExprRValue; 4954 }; 4955 // Try to perform atomicrmw xchg, otherwise simple exchange. 4956 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 4957 XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO, 4958 Loc, Gen); 4959 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X); 4960 if (Res.first) { 4961 // 'atomicrmw' instruction was generated. 4962 NewVVal = IsPostfixUpdate ? Res.second : ExprRValue; 4963 } 4964 } 4965 // Emit post-update store to 'v' of old/new 'x' value. 4966 CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc); 4967 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V); 4968 // OpenMP, 2.17.7, atomic Construct 4969 // If the write, update, or capture clause is specified and the release, 4970 // acq_rel, or seq_cst clause is specified then the strong flush on entry to 4971 // the atomic operation is also a release flush. 4972 // If the read or capture clause is specified and the acquire, acq_rel, or 4973 // seq_cst clause is specified then the strong flush on exit from the atomic 4974 // operation is also an acquire flush. 4975 switch (AO) { 4976 case llvm::AtomicOrdering::Release: 4977 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, 4978 llvm::AtomicOrdering::Release); 4979 break; 4980 case llvm::AtomicOrdering::Acquire: 4981 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, 4982 llvm::AtomicOrdering::Acquire); 4983 break; 4984 case llvm::AtomicOrdering::AcquireRelease: 4985 case llvm::AtomicOrdering::SequentiallyConsistent: 4986 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, 4987 llvm::AtomicOrdering::AcquireRelease); 4988 break; 4989 case llvm::AtomicOrdering::Monotonic: 4990 break; 4991 case llvm::AtomicOrdering::NotAtomic: 4992 case llvm::AtomicOrdering::Unordered: 4993 llvm_unreachable("Unexpected ordering."); 4994 } 4995 } 4996 4997 static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, 4998 llvm::AtomicOrdering AO, bool IsPostfixUpdate, 4999 const Expr *X, const Expr *V, const Expr *E, 5000 const Expr *UE, bool IsXLHSInRHSPart, 5001 SourceLocation Loc) { 5002 switch (Kind) { 5003 case OMPC_read: 5004 emitOMPAtomicReadExpr(CGF, AO, X, V, Loc); 5005 break; 5006 case OMPC_write: 5007 emitOMPAtomicWriteExpr(CGF, AO, X, E, Loc); 5008 break; 5009 case OMPC_unknown: 5010 case OMPC_update: 5011 emitOMPAtomicUpdateExpr(CGF, AO, X, E, UE, IsXLHSInRHSPart, Loc); 5012 break; 5013 case OMPC_capture: 5014 emitOMPAtomicCaptureExpr(CGF, AO, IsPostfixUpdate, V, X, E, UE, 5015 IsXLHSInRHSPart, Loc); 5016 break; 5017 case OMPC_if: 5018 case OMPC_final: 5019 case OMPC_num_threads: 5020 case OMPC_private: 5021 case OMPC_firstprivate: 5022 case OMPC_lastprivate: 5023 case OMPC_reduction: 5024 case OMPC_task_reduction: 5025 case OMPC_in_reduction: 5026 case OMPC_safelen: 5027 case OMPC_simdlen: 5028 case OMPC_allocator: 5029 case OMPC_allocate: 5030 case OMPC_collapse: 5031 case OMPC_default: 5032 case OMPC_seq_cst: 5033 case OMPC_acq_rel: 5034 case OMPC_acquire: 5035 case OMPC_release: 5036 case OMPC_relaxed: 5037 case OMPC_shared: 5038 case OMPC_linear: 5039 case OMPC_aligned: 5040 case OMPC_copyin: 5041 case OMPC_copyprivate: 5042 case OMPC_flush: 5043 case OMPC_depobj: 5044 case OMPC_proc_bind: 5045 case OMPC_schedule: 5046 case OMPC_ordered: 5047 case OMPC_nowait: 5048 case OMPC_untied: 5049 case OMPC_threadprivate: 5050 case OMPC_depend: 5051 case OMPC_mergeable: 5052 case OMPC_device: 5053 case OMPC_threads: 5054 case OMPC_simd: 5055 case OMPC_map: 5056 case OMPC_num_teams: 5057 case OMPC_thread_limit: 5058 case OMPC_priority: 5059 case OMPC_grainsize: 5060 case OMPC_nogroup: 5061 case OMPC_num_tasks: 5062 case OMPC_hint: 5063 case OMPC_dist_schedule: 5064 case OMPC_defaultmap: 5065 case OMPC_uniform: 5066 case OMPC_to: 5067 case OMPC_from: 5068 case OMPC_use_device_ptr: 5069 case OMPC_use_device_addr: 5070 case OMPC_is_device_ptr: 5071 case OMPC_unified_address: 5072 case OMPC_unified_shared_memory: 5073 case OMPC_reverse_offload: 5074 case OMPC_dynamic_allocators: 5075 case OMPC_atomic_default_mem_order: 5076 case OMPC_device_type: 5077 case OMPC_match: 5078 case OMPC_nontemporal: 5079 case OMPC_order: 5080 case OMPC_destroy: 5081 case OMPC_detach: 5082 case OMPC_inclusive: 5083 case OMPC_exclusive: 5084 case OMPC_uses_allocators: 5085 case OMPC_affinity: 5086 llvm_unreachable("Clause is not allowed in 'omp atomic'."); 5087 } 5088 } 5089 5090 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { 5091 llvm::AtomicOrdering AO = llvm::AtomicOrdering::Monotonic; 5092 bool MemOrderingSpecified = false; 5093 if (S.getSingleClause<OMPSeqCstClause>()) { 5094 AO = llvm::AtomicOrdering::SequentiallyConsistent; 5095 MemOrderingSpecified = true; 5096 } else if (S.getSingleClause<OMPAcqRelClause>()) { 5097 AO = llvm::AtomicOrdering::AcquireRelease; 5098 MemOrderingSpecified = true; 5099 } else if (S.getSingleClause<OMPAcquireClause>()) { 5100 AO = llvm::AtomicOrdering::Acquire; 5101 MemOrderingSpecified = true; 5102 } else if (S.getSingleClause<OMPReleaseClause>()) { 5103 AO = llvm::AtomicOrdering::Release; 5104 MemOrderingSpecified = true; 5105 } else if (S.getSingleClause<OMPRelaxedClause>()) { 5106 AO = llvm::AtomicOrdering::Monotonic; 5107 MemOrderingSpecified = true; 5108 } 5109 OpenMPClauseKind Kind = OMPC_unknown; 5110 for (const OMPClause *C : S.clauses()) { 5111 // Find first clause (skip seq_cst|acq_rel|aqcuire|release|relaxed clause, 5112 // if it is first). 5113 if (C->getClauseKind() != OMPC_seq_cst && 5114 C->getClauseKind() != OMPC_acq_rel && 5115 C->getClauseKind() != OMPC_acquire && 5116 C->getClauseKind() != OMPC_release && 5117 C->getClauseKind() != OMPC_relaxed) { 5118 Kind = C->getClauseKind(); 5119 break; 5120 } 5121 } 5122 if (!MemOrderingSpecified) { 5123 llvm::AtomicOrdering DefaultOrder = 5124 CGM.getOpenMPRuntime().getDefaultMemoryOrdering(); 5125 if (DefaultOrder == llvm::AtomicOrdering::Monotonic || 5126 DefaultOrder == llvm::AtomicOrdering::SequentiallyConsistent || 5127 (DefaultOrder == llvm::AtomicOrdering::AcquireRelease && 5128 Kind == OMPC_capture)) { 5129 AO = DefaultOrder; 5130 } else if (DefaultOrder == llvm::AtomicOrdering::AcquireRelease) { 5131 if (Kind == OMPC_unknown || Kind == OMPC_update || Kind == OMPC_write) { 5132 AO = llvm::AtomicOrdering::Release; 5133 } else if (Kind == OMPC_read) { 5134 assert(Kind == OMPC_read && "Unexpected atomic kind."); 5135 AO = llvm::AtomicOrdering::Acquire; 5136 } 5137 } 5138 } 5139 5140 const Stmt *CS = S.getInnermostCapturedStmt()->IgnoreContainers(); 5141 if (const auto *FE = dyn_cast<FullExpr>(CS)) 5142 enterFullExpression(FE); 5143 // Processing for statements under 'atomic capture'. 5144 if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) { 5145 for (const Stmt *C : Compound->body()) { 5146 if (const auto *FE = dyn_cast<FullExpr>(C)) 5147 enterFullExpression(FE); 5148 } 5149 } 5150 5151 auto &&CodeGen = [&S, Kind, AO, CS](CodeGenFunction &CGF, 5152 PrePostActionTy &) { 5153 CGF.EmitStopPoint(CS); 5154 emitOMPAtomicExpr(CGF, Kind, AO, S.isPostfixUpdate(), S.getX(), S.getV(), 5155 S.getExpr(), S.getUpdateExpr(), S.isXLHSInRHSPart(), 5156 S.getBeginLoc()); 5157 }; 5158 OMPLexicalScope Scope(*this, S, OMPD_unknown); 5159 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen); 5160 } 5161 5162 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, 5163 const OMPExecutableDirective &S, 5164 const RegionCodeGenTy &CodeGen) { 5165 assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind())); 5166 CodeGenModule &CGM = CGF.CGM; 5167 5168 // On device emit this construct as inlined code. 5169 if (CGM.getLangOpts().OpenMPIsDevice) { 5170 OMPLexicalScope Scope(CGF, S, OMPD_target); 5171 CGM.getOpenMPRuntime().emitInlinedDirective( 5172 CGF, OMPD_target, [&S](CodeGenFunction &CGF, PrePostActionTy &) { 5173 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 5174 }); 5175 return; 5176 } 5177 5178 auto LPCRegion = 5179 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S); 5180 llvm::Function *Fn = nullptr; 5181 llvm::Constant *FnID = nullptr; 5182 5183 const Expr *IfCond = nullptr; 5184 // Check for the at most one if clause associated with the target region. 5185 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 5186 if (C->getNameModifier() == OMPD_unknown || 5187 C->getNameModifier() == OMPD_target) { 5188 IfCond = C->getCondition(); 5189 break; 5190 } 5191 } 5192 5193 // Check if we have any device clause associated with the directive. 5194 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device( 5195 nullptr, OMPC_DEVICE_unknown); 5196 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 5197 Device.setPointerAndInt(C->getDevice(), C->getModifier()); 5198 5199 // Check if we have an if clause whose conditional always evaluates to false 5200 // or if we do not have any targets specified. If so the target region is not 5201 // an offload entry point. 5202 bool IsOffloadEntry = true; 5203 if (IfCond) { 5204 bool Val; 5205 if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val) 5206 IsOffloadEntry = false; 5207 } 5208 if (CGM.getLangOpts().OMPTargetTriples.empty()) 5209 IsOffloadEntry = false; 5210 5211 assert(CGF.CurFuncDecl && "No parent declaration for target region!"); 5212 StringRef ParentName; 5213 // In case we have Ctors/Dtors we use the complete type variant to produce 5214 // the mangling of the device outlined kernel. 5215 if (const auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl)) 5216 ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete)); 5217 else if (const auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl)) 5218 ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete)); 5219 else 5220 ParentName = 5221 CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl))); 5222 5223 // Emit target region as a standalone region. 5224 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID, 5225 IsOffloadEntry, CodeGen); 5226 OMPLexicalScope Scope(CGF, S, OMPD_task); 5227 auto &&SizeEmitter = 5228 [IsOffloadEntry](CodeGenFunction &CGF, 5229 const OMPLoopDirective &D) -> llvm::Value * { 5230 if (IsOffloadEntry) { 5231 OMPLoopScope(CGF, D); 5232 // Emit calculation of the iterations count. 5233 llvm::Value *NumIterations = CGF.EmitScalarExpr(D.getNumIterations()); 5234 NumIterations = CGF.Builder.CreateIntCast(NumIterations, CGF.Int64Ty, 5235 /*isSigned=*/false); 5236 return NumIterations; 5237 } 5238 return nullptr; 5239 }; 5240 CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device, 5241 SizeEmitter); 5242 } 5243 5244 static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S, 5245 PrePostActionTy &Action) { 5246 Action.Enter(CGF); 5247 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5248 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 5249 CGF.EmitOMPPrivateClause(S, PrivateScope); 5250 (void)PrivateScope.Privatize(); 5251 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 5252 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); 5253 5254 CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt()); 5255 } 5256 5257 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM, 5258 StringRef ParentName, 5259 const OMPTargetDirective &S) { 5260 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5261 emitTargetRegion(CGF, S, Action); 5262 }; 5263 llvm::Function *Fn; 5264 llvm::Constant *Addr; 5265 // Emit target region as a standalone region. 5266 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 5267 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 5268 assert(Fn && Addr && "Target device function emission failed."); 5269 } 5270 5271 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { 5272 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5273 emitTargetRegion(CGF, S, Action); 5274 }; 5275 emitCommonOMPTargetDirective(*this, S, CodeGen); 5276 } 5277 5278 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF, 5279 const OMPExecutableDirective &S, 5280 OpenMPDirectiveKind InnermostKind, 5281 const RegionCodeGenTy &CodeGen) { 5282 const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams); 5283 llvm::Function *OutlinedFn = 5284 CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction( 5285 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 5286 5287 const auto *NT = S.getSingleClause<OMPNumTeamsClause>(); 5288 const auto *TL = S.getSingleClause<OMPThreadLimitClause>(); 5289 if (NT || TL) { 5290 const Expr *NumTeams = NT ? NT->getNumTeams() : nullptr; 5291 const Expr *ThreadLimit = TL ? TL->getThreadLimit() : nullptr; 5292 5293 CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit, 5294 S.getBeginLoc()); 5295 } 5296 5297 OMPTeamsScope Scope(CGF, S); 5298 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 5299 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 5300 CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getBeginLoc(), OutlinedFn, 5301 CapturedVars); 5302 } 5303 5304 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) { 5305 // Emit teams region as a standalone region. 5306 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5307 Action.Enter(CGF); 5308 OMPPrivateScope PrivateScope(CGF); 5309 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 5310 CGF.EmitOMPPrivateClause(S, PrivateScope); 5311 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 5312 (void)PrivateScope.Privatize(); 5313 CGF.EmitStmt(S.getCapturedStmt(OMPD_teams)->getCapturedStmt()); 5314 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 5315 }; 5316 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen); 5317 emitPostUpdateForReductionClause(*this, S, 5318 [](CodeGenFunction &) { return nullptr; }); 5319 } 5320 5321 static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action, 5322 const OMPTargetTeamsDirective &S) { 5323 auto *CS = S.getCapturedStmt(OMPD_teams); 5324 Action.Enter(CGF); 5325 // Emit teams region as a standalone region. 5326 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) { 5327 Action.Enter(CGF); 5328 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5329 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 5330 CGF.EmitOMPPrivateClause(S, PrivateScope); 5331 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 5332 (void)PrivateScope.Privatize(); 5333 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 5334 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); 5335 CGF.EmitStmt(CS->getCapturedStmt()); 5336 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 5337 }; 5338 emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen); 5339 emitPostUpdateForReductionClause(CGF, S, 5340 [](CodeGenFunction &) { return nullptr; }); 5341 } 5342 5343 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 5344 CodeGenModule &CGM, StringRef ParentName, 5345 const OMPTargetTeamsDirective &S) { 5346 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5347 emitTargetTeamsRegion(CGF, Action, S); 5348 }; 5349 llvm::Function *Fn; 5350 llvm::Constant *Addr; 5351 // Emit target region as a standalone region. 5352 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 5353 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 5354 assert(Fn && Addr && "Target device function emission failed."); 5355 } 5356 5357 void CodeGenFunction::EmitOMPTargetTeamsDirective( 5358 const OMPTargetTeamsDirective &S) { 5359 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5360 emitTargetTeamsRegion(CGF, Action, S); 5361 }; 5362 emitCommonOMPTargetDirective(*this, S, CodeGen); 5363 } 5364 5365 static void 5366 emitTargetTeamsDistributeRegion(CodeGenFunction &CGF, PrePostActionTy &Action, 5367 const OMPTargetTeamsDistributeDirective &S) { 5368 Action.Enter(CGF); 5369 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 5370 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 5371 }; 5372 5373 // Emit teams region as a standalone region. 5374 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 5375 PrePostActionTy &Action) { 5376 Action.Enter(CGF); 5377 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5378 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 5379 (void)PrivateScope.Privatize(); 5380 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 5381 CodeGenDistribute); 5382 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 5383 }; 5384 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute, CodeGen); 5385 emitPostUpdateForReductionClause(CGF, S, 5386 [](CodeGenFunction &) { return nullptr; }); 5387 } 5388 5389 void CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 5390 CodeGenModule &CGM, StringRef ParentName, 5391 const OMPTargetTeamsDistributeDirective &S) { 5392 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5393 emitTargetTeamsDistributeRegion(CGF, Action, S); 5394 }; 5395 llvm::Function *Fn; 5396 llvm::Constant *Addr; 5397 // Emit target region as a standalone region. 5398 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 5399 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 5400 assert(Fn && Addr && "Target device function emission failed."); 5401 } 5402 5403 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective( 5404 const OMPTargetTeamsDistributeDirective &S) { 5405 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5406 emitTargetTeamsDistributeRegion(CGF, Action, S); 5407 }; 5408 emitCommonOMPTargetDirective(*this, S, CodeGen); 5409 } 5410 5411 static void emitTargetTeamsDistributeSimdRegion( 5412 CodeGenFunction &CGF, PrePostActionTy &Action, 5413 const OMPTargetTeamsDistributeSimdDirective &S) { 5414 Action.Enter(CGF); 5415 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 5416 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 5417 }; 5418 5419 // Emit teams region as a standalone region. 5420 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 5421 PrePostActionTy &Action) { 5422 Action.Enter(CGF); 5423 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5424 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 5425 (void)PrivateScope.Privatize(); 5426 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 5427 CodeGenDistribute); 5428 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 5429 }; 5430 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_simd, CodeGen); 5431 emitPostUpdateForReductionClause(CGF, S, 5432 [](CodeGenFunction &) { return nullptr; }); 5433 } 5434 5435 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 5436 CodeGenModule &CGM, StringRef ParentName, 5437 const OMPTargetTeamsDistributeSimdDirective &S) { 5438 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5439 emitTargetTeamsDistributeSimdRegion(CGF, Action, S); 5440 }; 5441 llvm::Function *Fn; 5442 llvm::Constant *Addr; 5443 // Emit target region as a standalone region. 5444 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 5445 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 5446 assert(Fn && Addr && "Target device function emission failed."); 5447 } 5448 5449 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective( 5450 const OMPTargetTeamsDistributeSimdDirective &S) { 5451 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5452 emitTargetTeamsDistributeSimdRegion(CGF, Action, S); 5453 }; 5454 emitCommonOMPTargetDirective(*this, S, CodeGen); 5455 } 5456 5457 void CodeGenFunction::EmitOMPTeamsDistributeDirective( 5458 const OMPTeamsDistributeDirective &S) { 5459 5460 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 5461 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 5462 }; 5463 5464 // Emit teams region as a standalone region. 5465 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 5466 PrePostActionTy &Action) { 5467 Action.Enter(CGF); 5468 OMPPrivateScope PrivateScope(CGF); 5469 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 5470 (void)PrivateScope.Privatize(); 5471 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 5472 CodeGenDistribute); 5473 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 5474 }; 5475 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen); 5476 emitPostUpdateForReductionClause(*this, S, 5477 [](CodeGenFunction &) { return nullptr; }); 5478 } 5479 5480 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective( 5481 const OMPTeamsDistributeSimdDirective &S) { 5482 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 5483 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 5484 }; 5485 5486 // Emit teams region as a standalone region. 5487 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 5488 PrePostActionTy &Action) { 5489 Action.Enter(CGF); 5490 OMPPrivateScope PrivateScope(CGF); 5491 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 5492 (void)PrivateScope.Privatize(); 5493 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_simd, 5494 CodeGenDistribute); 5495 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 5496 }; 5497 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_simd, CodeGen); 5498 emitPostUpdateForReductionClause(*this, S, 5499 [](CodeGenFunction &) { return nullptr; }); 5500 } 5501 5502 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective( 5503 const OMPTeamsDistributeParallelForDirective &S) { 5504 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 5505 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 5506 S.getDistInc()); 5507 }; 5508 5509 // Emit teams region as a standalone region. 5510 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 5511 PrePostActionTy &Action) { 5512 Action.Enter(CGF); 5513 OMPPrivateScope PrivateScope(CGF); 5514 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 5515 (void)PrivateScope.Privatize(); 5516 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 5517 CodeGenDistribute); 5518 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 5519 }; 5520 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen); 5521 emitPostUpdateForReductionClause(*this, S, 5522 [](CodeGenFunction &) { return nullptr; }); 5523 } 5524 5525 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective( 5526 const OMPTeamsDistributeParallelForSimdDirective &S) { 5527 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 5528 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 5529 S.getDistInc()); 5530 }; 5531 5532 // Emit teams region as a standalone region. 5533 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 5534 PrePostActionTy &Action) { 5535 Action.Enter(CGF); 5536 OMPPrivateScope PrivateScope(CGF); 5537 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 5538 (void)PrivateScope.Privatize(); 5539 CGF.CGM.getOpenMPRuntime().emitInlinedDirective( 5540 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); 5541 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 5542 }; 5543 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for_simd, 5544 CodeGen); 5545 emitPostUpdateForReductionClause(*this, S, 5546 [](CodeGenFunction &) { return nullptr; }); 5547 } 5548 5549 static void emitTargetTeamsDistributeParallelForRegion( 5550 CodeGenFunction &CGF, const OMPTargetTeamsDistributeParallelForDirective &S, 5551 PrePostActionTy &Action) { 5552 Action.Enter(CGF); 5553 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 5554 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 5555 S.getDistInc()); 5556 }; 5557 5558 // Emit teams region as a standalone region. 5559 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 5560 PrePostActionTy &Action) { 5561 Action.Enter(CGF); 5562 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5563 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 5564 (void)PrivateScope.Privatize(); 5565 CGF.CGM.getOpenMPRuntime().emitInlinedDirective( 5566 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); 5567 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 5568 }; 5569 5570 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for, 5571 CodeGenTeams); 5572 emitPostUpdateForReductionClause(CGF, S, 5573 [](CodeGenFunction &) { return nullptr; }); 5574 } 5575 5576 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 5577 CodeGenModule &CGM, StringRef ParentName, 5578 const OMPTargetTeamsDistributeParallelForDirective &S) { 5579 // Emit SPMD target teams distribute parallel for region as a standalone 5580 // region. 5581 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5582 emitTargetTeamsDistributeParallelForRegion(CGF, S, Action); 5583 }; 5584 llvm::Function *Fn; 5585 llvm::Constant *Addr; 5586 // Emit target region as a standalone region. 5587 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 5588 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 5589 assert(Fn && Addr && "Target device function emission failed."); 5590 } 5591 5592 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective( 5593 const OMPTargetTeamsDistributeParallelForDirective &S) { 5594 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5595 emitTargetTeamsDistributeParallelForRegion(CGF, S, Action); 5596 }; 5597 emitCommonOMPTargetDirective(*this, S, CodeGen); 5598 } 5599 5600 static void emitTargetTeamsDistributeParallelForSimdRegion( 5601 CodeGenFunction &CGF, 5602 const OMPTargetTeamsDistributeParallelForSimdDirective &S, 5603 PrePostActionTy &Action) { 5604 Action.Enter(CGF); 5605 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 5606 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 5607 S.getDistInc()); 5608 }; 5609 5610 // Emit teams region as a standalone region. 5611 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 5612 PrePostActionTy &Action) { 5613 Action.Enter(CGF); 5614 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5615 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 5616 (void)PrivateScope.Privatize(); 5617 CGF.CGM.getOpenMPRuntime().emitInlinedDirective( 5618 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); 5619 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 5620 }; 5621 5622 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for_simd, 5623 CodeGenTeams); 5624 emitPostUpdateForReductionClause(CGF, S, 5625 [](CodeGenFunction &) { return nullptr; }); 5626 } 5627 5628 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 5629 CodeGenModule &CGM, StringRef ParentName, 5630 const OMPTargetTeamsDistributeParallelForSimdDirective &S) { 5631 // Emit SPMD target teams distribute parallel for simd region as a standalone 5632 // region. 5633 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5634 emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action); 5635 }; 5636 llvm::Function *Fn; 5637 llvm::Constant *Addr; 5638 // Emit target region as a standalone region. 5639 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 5640 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 5641 assert(Fn && Addr && "Target device function emission failed."); 5642 } 5643 5644 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective( 5645 const OMPTargetTeamsDistributeParallelForSimdDirective &S) { 5646 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5647 emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action); 5648 }; 5649 emitCommonOMPTargetDirective(*this, S, CodeGen); 5650 } 5651 5652 void CodeGenFunction::EmitOMPCancellationPointDirective( 5653 const OMPCancellationPointDirective &S) { 5654 CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getBeginLoc(), 5655 S.getCancelRegion()); 5656 } 5657 5658 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { 5659 const Expr *IfCond = nullptr; 5660 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 5661 if (C->getNameModifier() == OMPD_unknown || 5662 C->getNameModifier() == OMPD_cancel) { 5663 IfCond = C->getCondition(); 5664 break; 5665 } 5666 } 5667 if (llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder()) { 5668 // TODO: This check is necessary as we only generate `omp parallel` through 5669 // the OpenMPIRBuilder for now. 5670 if (S.getCancelRegion() == OMPD_parallel) { 5671 llvm::Value *IfCondition = nullptr; 5672 if (IfCond) 5673 IfCondition = EmitScalarExpr(IfCond, 5674 /*IgnoreResultAssign=*/true); 5675 return Builder.restoreIP( 5676 OMPBuilder->CreateCancel(Builder, IfCondition, S.getCancelRegion())); 5677 } 5678 } 5679 5680 CGM.getOpenMPRuntime().emitCancelCall(*this, S.getBeginLoc(), IfCond, 5681 S.getCancelRegion()); 5682 } 5683 5684 CodeGenFunction::JumpDest 5685 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) { 5686 if (Kind == OMPD_parallel || Kind == OMPD_task || 5687 Kind == OMPD_target_parallel || Kind == OMPD_taskloop || 5688 Kind == OMPD_master_taskloop || Kind == OMPD_parallel_master_taskloop) 5689 return ReturnBlock; 5690 assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections || 5691 Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for || 5692 Kind == OMPD_distribute_parallel_for || 5693 Kind == OMPD_target_parallel_for || 5694 Kind == OMPD_teams_distribute_parallel_for || 5695 Kind == OMPD_target_teams_distribute_parallel_for); 5696 return OMPCancelStack.getExitBlock(); 5697 } 5698 5699 void CodeGenFunction::EmitOMPUseDevicePtrClause( 5700 const OMPClause &NC, OMPPrivateScope &PrivateScope, 5701 const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) { 5702 const auto &C = cast<OMPUseDevicePtrClause>(NC); 5703 auto OrigVarIt = C.varlist_begin(); 5704 auto InitIt = C.inits().begin(); 5705 for (const Expr *PvtVarIt : C.private_copies()) { 5706 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl()); 5707 const auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl()); 5708 const auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl()); 5709 5710 // In order to identify the right initializer we need to match the 5711 // declaration used by the mapping logic. In some cases we may get 5712 // OMPCapturedExprDecl that refers to the original declaration. 5713 const ValueDecl *MatchingVD = OrigVD; 5714 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) { 5715 // OMPCapturedExprDecl are used to privative fields of the current 5716 // structure. 5717 const auto *ME = cast<MemberExpr>(OED->getInit()); 5718 assert(isa<CXXThisExpr>(ME->getBase()) && 5719 "Base should be the current struct!"); 5720 MatchingVD = ME->getMemberDecl(); 5721 } 5722 5723 // If we don't have information about the current list item, move on to 5724 // the next one. 5725 auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD); 5726 if (InitAddrIt == CaptureDeviceAddrMap.end()) 5727 continue; 5728 5729 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, OrigVD, 5730 InitAddrIt, InitVD, 5731 PvtVD]() { 5732 // Initialize the temporary initialization variable with the address we 5733 // get from the runtime library. We have to cast the source address 5734 // because it is always a void *. References are materialized in the 5735 // privatization scope, so the initialization here disregards the fact 5736 // the original variable is a reference. 5737 QualType AddrQTy = 5738 getContext().getPointerType(OrigVD->getType().getNonReferenceType()); 5739 llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy); 5740 Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy); 5741 setAddrOfLocalVar(InitVD, InitAddr); 5742 5743 // Emit private declaration, it will be initialized by the value we 5744 // declaration we just added to the local declarations map. 5745 EmitDecl(*PvtVD); 5746 5747 // The initialization variables reached its purpose in the emission 5748 // of the previous declaration, so we don't need it anymore. 5749 LocalDeclMap.erase(InitVD); 5750 5751 // Return the address of the private variable. 5752 return GetAddrOfLocalVar(PvtVD); 5753 }); 5754 assert(IsRegistered && "firstprivate var already registered as private"); 5755 // Silence the warning about unused variable. 5756 (void)IsRegistered; 5757 5758 ++OrigVarIt; 5759 ++InitIt; 5760 } 5761 } 5762 5763 // Generate the instructions for '#pragma omp target data' directive. 5764 void CodeGenFunction::EmitOMPTargetDataDirective( 5765 const OMPTargetDataDirective &S) { 5766 CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true); 5767 5768 // Create a pre/post action to signal the privatization of the device pointer. 5769 // This action can be replaced by the OpenMP runtime code generation to 5770 // deactivate privatization. 5771 bool PrivatizeDevicePointers = false; 5772 class DevicePointerPrivActionTy : public PrePostActionTy { 5773 bool &PrivatizeDevicePointers; 5774 5775 public: 5776 explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers) 5777 : PrePostActionTy(), PrivatizeDevicePointers(PrivatizeDevicePointers) {} 5778 void Enter(CodeGenFunction &CGF) override { 5779 PrivatizeDevicePointers = true; 5780 } 5781 }; 5782 DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers); 5783 5784 auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers]( 5785 CodeGenFunction &CGF, PrePostActionTy &Action) { 5786 auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 5787 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 5788 }; 5789 5790 // Codegen that selects whether to generate the privatization code or not. 5791 auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers, 5792 &InnermostCodeGen](CodeGenFunction &CGF, 5793 PrePostActionTy &Action) { 5794 RegionCodeGenTy RCG(InnermostCodeGen); 5795 PrivatizeDevicePointers = false; 5796 5797 // Call the pre-action to change the status of PrivatizeDevicePointers if 5798 // needed. 5799 Action.Enter(CGF); 5800 5801 if (PrivatizeDevicePointers) { 5802 OMPPrivateScope PrivateScope(CGF); 5803 // Emit all instances of the use_device_ptr clause. 5804 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>()) 5805 CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope, 5806 Info.CaptureDeviceAddrMap); 5807 (void)PrivateScope.Privatize(); 5808 RCG(CGF); 5809 } else { 5810 RCG(CGF); 5811 } 5812 }; 5813 5814 // Forward the provided action to the privatization codegen. 5815 RegionCodeGenTy PrivRCG(PrivCodeGen); 5816 PrivRCG.setAction(Action); 5817 5818 // Notwithstanding the body of the region is emitted as inlined directive, 5819 // we don't use an inline scope as changes in the references inside the 5820 // region are expected to be visible outside, so we do not privative them. 5821 OMPLexicalScope Scope(CGF, S); 5822 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data, 5823 PrivRCG); 5824 }; 5825 5826 RegionCodeGenTy RCG(CodeGen); 5827 5828 // If we don't have target devices, don't bother emitting the data mapping 5829 // code. 5830 if (CGM.getLangOpts().OMPTargetTriples.empty()) { 5831 RCG(*this); 5832 return; 5833 } 5834 5835 // Check if we have any if clause associated with the directive. 5836 const Expr *IfCond = nullptr; 5837 if (const auto *C = S.getSingleClause<OMPIfClause>()) 5838 IfCond = C->getCondition(); 5839 5840 // Check if we have any device clause associated with the directive. 5841 const Expr *Device = nullptr; 5842 if (const auto *C = S.getSingleClause<OMPDeviceClause>()) 5843 Device = C->getDevice(); 5844 5845 // Set the action to signal privatization of device pointers. 5846 RCG.setAction(PrivAction); 5847 5848 // Emit region code. 5849 CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG, 5850 Info); 5851 } 5852 5853 void CodeGenFunction::EmitOMPTargetEnterDataDirective( 5854 const OMPTargetEnterDataDirective &S) { 5855 // If we don't have target devices, don't bother emitting the data mapping 5856 // code. 5857 if (CGM.getLangOpts().OMPTargetTriples.empty()) 5858 return; 5859 5860 // Check if we have any if clause associated with the directive. 5861 const Expr *IfCond = nullptr; 5862 if (const auto *C = S.getSingleClause<OMPIfClause>()) 5863 IfCond = C->getCondition(); 5864 5865 // Check if we have any device clause associated with the directive. 5866 const Expr *Device = nullptr; 5867 if (const auto *C = S.getSingleClause<OMPDeviceClause>()) 5868 Device = C->getDevice(); 5869 5870 OMPLexicalScope Scope(*this, S, OMPD_task); 5871 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 5872 } 5873 5874 void CodeGenFunction::EmitOMPTargetExitDataDirective( 5875 const OMPTargetExitDataDirective &S) { 5876 // If we don't have target devices, don't bother emitting the data mapping 5877 // code. 5878 if (CGM.getLangOpts().OMPTargetTriples.empty()) 5879 return; 5880 5881 // Check if we have any if clause associated with the directive. 5882 const Expr *IfCond = nullptr; 5883 if (const auto *C = S.getSingleClause<OMPIfClause>()) 5884 IfCond = C->getCondition(); 5885 5886 // Check if we have any device clause associated with the directive. 5887 const Expr *Device = nullptr; 5888 if (const auto *C = S.getSingleClause<OMPDeviceClause>()) 5889 Device = C->getDevice(); 5890 5891 OMPLexicalScope Scope(*this, S, OMPD_task); 5892 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 5893 } 5894 5895 static void emitTargetParallelRegion(CodeGenFunction &CGF, 5896 const OMPTargetParallelDirective &S, 5897 PrePostActionTy &Action) { 5898 // Get the captured statement associated with the 'parallel' region. 5899 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); 5900 Action.Enter(CGF); 5901 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) { 5902 Action.Enter(CGF); 5903 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5904 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 5905 CGF.EmitOMPPrivateClause(S, PrivateScope); 5906 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 5907 (void)PrivateScope.Privatize(); 5908 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 5909 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); 5910 // TODO: Add support for clauses. 5911 CGF.EmitStmt(CS->getCapturedStmt()); 5912 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 5913 }; 5914 emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen, 5915 emitEmptyBoundParameters); 5916 emitPostUpdateForReductionClause(CGF, S, 5917 [](CodeGenFunction &) { return nullptr; }); 5918 } 5919 5920 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 5921 CodeGenModule &CGM, StringRef ParentName, 5922 const OMPTargetParallelDirective &S) { 5923 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5924 emitTargetParallelRegion(CGF, S, Action); 5925 }; 5926 llvm::Function *Fn; 5927 llvm::Constant *Addr; 5928 // Emit target region as a standalone region. 5929 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 5930 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 5931 assert(Fn && Addr && "Target device function emission failed."); 5932 } 5933 5934 void CodeGenFunction::EmitOMPTargetParallelDirective( 5935 const OMPTargetParallelDirective &S) { 5936 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5937 emitTargetParallelRegion(CGF, S, Action); 5938 }; 5939 emitCommonOMPTargetDirective(*this, S, CodeGen); 5940 } 5941 5942 static void emitTargetParallelForRegion(CodeGenFunction &CGF, 5943 const OMPTargetParallelForDirective &S, 5944 PrePostActionTy &Action) { 5945 Action.Enter(CGF); 5946 // Emit directive as a combined directive that consists of two implicit 5947 // directives: 'parallel' with 'for' directive. 5948 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5949 Action.Enter(CGF); 5950 CodeGenFunction::OMPCancelStackRAII CancelRegion( 5951 CGF, OMPD_target_parallel_for, S.hasCancel()); 5952 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 5953 emitDispatchForLoopBounds); 5954 }; 5955 emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen, 5956 emitEmptyBoundParameters); 5957 } 5958 5959 void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 5960 CodeGenModule &CGM, StringRef ParentName, 5961 const OMPTargetParallelForDirective &S) { 5962 // Emit SPMD target parallel for region as a standalone region. 5963 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5964 emitTargetParallelForRegion(CGF, S, Action); 5965 }; 5966 llvm::Function *Fn; 5967 llvm::Constant *Addr; 5968 // Emit target region as a standalone region. 5969 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 5970 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 5971 assert(Fn && Addr && "Target device function emission failed."); 5972 } 5973 5974 void CodeGenFunction::EmitOMPTargetParallelForDirective( 5975 const OMPTargetParallelForDirective &S) { 5976 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5977 emitTargetParallelForRegion(CGF, S, Action); 5978 }; 5979 emitCommonOMPTargetDirective(*this, S, CodeGen); 5980 } 5981 5982 static void 5983 emitTargetParallelForSimdRegion(CodeGenFunction &CGF, 5984 const OMPTargetParallelForSimdDirective &S, 5985 PrePostActionTy &Action) { 5986 Action.Enter(CGF); 5987 // Emit directive as a combined directive that consists of two implicit 5988 // directives: 'parallel' with 'for' directive. 5989 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5990 Action.Enter(CGF); 5991 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 5992 emitDispatchForLoopBounds); 5993 }; 5994 emitCommonOMPParallelDirective(CGF, S, OMPD_simd, CodeGen, 5995 emitEmptyBoundParameters); 5996 } 5997 5998 void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 5999 CodeGenModule &CGM, StringRef ParentName, 6000 const OMPTargetParallelForSimdDirective &S) { 6001 // Emit SPMD target parallel for region as a standalone region. 6002 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6003 emitTargetParallelForSimdRegion(CGF, S, Action); 6004 }; 6005 llvm::Function *Fn; 6006 llvm::Constant *Addr; 6007 // Emit target region as a standalone region. 6008 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 6009 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 6010 assert(Fn && Addr && "Target device function emission failed."); 6011 } 6012 6013 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective( 6014 const OMPTargetParallelForSimdDirective &S) { 6015 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6016 emitTargetParallelForSimdRegion(CGF, S, Action); 6017 }; 6018 emitCommonOMPTargetDirective(*this, S, CodeGen); 6019 } 6020 6021 /// Emit a helper variable and return corresponding lvalue. 6022 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper, 6023 const ImplicitParamDecl *PVD, 6024 CodeGenFunction::OMPPrivateScope &Privates) { 6025 const auto *VDecl = cast<VarDecl>(Helper->getDecl()); 6026 Privates.addPrivate(VDecl, 6027 [&CGF, PVD]() { return CGF.GetAddrOfLocalVar(PVD); }); 6028 } 6029 6030 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { 6031 assert(isOpenMPTaskLoopDirective(S.getDirectiveKind())); 6032 // Emit outlined function for task construct. 6033 const CapturedStmt *CS = S.getCapturedStmt(OMPD_taskloop); 6034 Address CapturedStruct = Address::invalid(); 6035 { 6036 OMPLexicalScope Scope(*this, S, OMPD_taskloop, /*EmitPreInitStmt=*/false); 6037 CapturedStruct = GenerateCapturedStmtArgument(*CS); 6038 } 6039 QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 6040 const Expr *IfCond = nullptr; 6041 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 6042 if (C->getNameModifier() == OMPD_unknown || 6043 C->getNameModifier() == OMPD_taskloop) { 6044 IfCond = C->getCondition(); 6045 break; 6046 } 6047 } 6048 6049 OMPTaskDataTy Data; 6050 // Check if taskloop must be emitted without taskgroup. 6051 Data.Nogroup = S.getSingleClause<OMPNogroupClause>(); 6052 // TODO: Check if we should emit tied or untied task. 6053 Data.Tied = true; 6054 // Set scheduling for taskloop 6055 if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) { 6056 // grainsize clause 6057 Data.Schedule.setInt(/*IntVal=*/false); 6058 Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize())); 6059 } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) { 6060 // num_tasks clause 6061 Data.Schedule.setInt(/*IntVal=*/true); 6062 Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks())); 6063 } 6064 6065 auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) { 6066 // if (PreCond) { 6067 // for (IV in 0..LastIteration) BODY; 6068 // <Final counter/linear vars updates>; 6069 // } 6070 // 6071 6072 // Emit: if (PreCond) - begin. 6073 // If the condition constant folds and can be elided, avoid emitting the 6074 // whole loop. 6075 bool CondConstant; 6076 llvm::BasicBlock *ContBlock = nullptr; 6077 OMPLoopScope PreInitScope(CGF, S); 6078 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 6079 if (!CondConstant) 6080 return; 6081 } else { 6082 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("taskloop.if.then"); 6083 ContBlock = CGF.createBasicBlock("taskloop.if.end"); 6084 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 6085 CGF.getProfileCount(&S)); 6086 CGF.EmitBlock(ThenBlock); 6087 CGF.incrementProfileCounter(&S); 6088 } 6089 6090 (void)CGF.EmitOMPLinearClauseInit(S); 6091 6092 OMPPrivateScope LoopScope(CGF); 6093 // Emit helper vars inits. 6094 enum { LowerBound = 5, UpperBound, Stride, LastIter }; 6095 auto *I = CS->getCapturedDecl()->param_begin(); 6096 auto *LBP = std::next(I, LowerBound); 6097 auto *UBP = std::next(I, UpperBound); 6098 auto *STP = std::next(I, Stride); 6099 auto *LIP = std::next(I, LastIter); 6100 mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP, 6101 LoopScope); 6102 mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP, 6103 LoopScope); 6104 mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope); 6105 mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP, 6106 LoopScope); 6107 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 6108 CGF.EmitOMPLinearClause(S, LoopScope); 6109 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 6110 (void)LoopScope.Privatize(); 6111 // Emit the loop iteration variable. 6112 const Expr *IVExpr = S.getIterationVariable(); 6113 const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 6114 CGF.EmitVarDecl(*IVDecl); 6115 CGF.EmitIgnoredExpr(S.getInit()); 6116 6117 // Emit the iterations count variable. 6118 // If it is not a variable, Sema decided to calculate iterations count on 6119 // each iteration (e.g., it is foldable into a constant). 6120 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 6121 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 6122 // Emit calculation of the iterations count. 6123 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 6124 } 6125 6126 { 6127 OMPLexicalScope Scope(CGF, S, OMPD_taskloop, /*EmitPreInitStmt=*/false); 6128 emitCommonSimdLoop( 6129 CGF, S, 6130 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 6131 if (isOpenMPSimdDirective(S.getDirectiveKind())) 6132 CGF.EmitOMPSimdInit(S); 6133 }, 6134 [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { 6135 CGF.EmitOMPInnerLoop( 6136 S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), 6137 [&S](CodeGenFunction &CGF) { 6138 CGF.EmitOMPLoopBody(S, CodeGenFunction::JumpDest()); 6139 CGF.EmitStopPoint(&S); 6140 }, 6141 [](CodeGenFunction &) {}); 6142 }); 6143 } 6144 // Emit: if (PreCond) - end. 6145 if (ContBlock) { 6146 CGF.EmitBranch(ContBlock); 6147 CGF.EmitBlock(ContBlock, true); 6148 } 6149 // Emit final copy of the lastprivate variables if IsLastIter != 0. 6150 if (HasLastprivateClause) { 6151 CGF.EmitOMPLastprivateClauseFinal( 6152 S, isOpenMPSimdDirective(S.getDirectiveKind()), 6153 CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar( 6154 CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false, 6155 (*LIP)->getType(), S.getBeginLoc()))); 6156 } 6157 CGF.EmitOMPLinearClauseFinal(S, [LIP, &S](CodeGenFunction &CGF) { 6158 return CGF.Builder.CreateIsNotNull( 6159 CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false, 6160 (*LIP)->getType(), S.getBeginLoc())); 6161 }); 6162 }; 6163 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 6164 IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn, 6165 const OMPTaskDataTy &Data) { 6166 auto &&CodeGen = [&S, OutlinedFn, SharedsTy, CapturedStruct, IfCond, 6167 &Data](CodeGenFunction &CGF, PrePostActionTy &) { 6168 OMPLoopScope PreInitScope(CGF, S); 6169 CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getBeginLoc(), S, 6170 OutlinedFn, SharedsTy, 6171 CapturedStruct, IfCond, Data); 6172 }; 6173 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop, 6174 CodeGen); 6175 }; 6176 if (Data.Nogroup) { 6177 EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, Data); 6178 } else { 6179 CGM.getOpenMPRuntime().emitTaskgroupRegion( 6180 *this, 6181 [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF, 6182 PrePostActionTy &Action) { 6183 Action.Enter(CGF); 6184 CGF.EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, 6185 Data); 6186 }, 6187 S.getBeginLoc()); 6188 } 6189 } 6190 6191 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) { 6192 auto LPCRegion = 6193 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 6194 EmitOMPTaskLoopBasedDirective(S); 6195 } 6196 6197 void CodeGenFunction::EmitOMPTaskLoopSimdDirective( 6198 const OMPTaskLoopSimdDirective &S) { 6199 auto LPCRegion = 6200 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 6201 OMPLexicalScope Scope(*this, S); 6202 EmitOMPTaskLoopBasedDirective(S); 6203 } 6204 6205 void CodeGenFunction::EmitOMPMasterTaskLoopDirective( 6206 const OMPMasterTaskLoopDirective &S) { 6207 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6208 Action.Enter(CGF); 6209 EmitOMPTaskLoopBasedDirective(S); 6210 }; 6211 auto LPCRegion = 6212 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 6213 OMPLexicalScope Scope(*this, S, llvm::None, /*EmitPreInitStmt=*/false); 6214 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc()); 6215 } 6216 6217 void CodeGenFunction::EmitOMPMasterTaskLoopSimdDirective( 6218 const OMPMasterTaskLoopSimdDirective &S) { 6219 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6220 Action.Enter(CGF); 6221 EmitOMPTaskLoopBasedDirective(S); 6222 }; 6223 auto LPCRegion = 6224 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 6225 OMPLexicalScope Scope(*this, S); 6226 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc()); 6227 } 6228 6229 void CodeGenFunction::EmitOMPParallelMasterTaskLoopDirective( 6230 const OMPParallelMasterTaskLoopDirective &S) { 6231 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6232 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF, 6233 PrePostActionTy &Action) { 6234 Action.Enter(CGF); 6235 CGF.EmitOMPTaskLoopBasedDirective(S); 6236 }; 6237 OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false); 6238 CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen, 6239 S.getBeginLoc()); 6240 }; 6241 auto LPCRegion = 6242 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 6243 emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop, CodeGen, 6244 emitEmptyBoundParameters); 6245 } 6246 6247 void CodeGenFunction::EmitOMPParallelMasterTaskLoopSimdDirective( 6248 const OMPParallelMasterTaskLoopSimdDirective &S) { 6249 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6250 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF, 6251 PrePostActionTy &Action) { 6252 Action.Enter(CGF); 6253 CGF.EmitOMPTaskLoopBasedDirective(S); 6254 }; 6255 OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false); 6256 CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen, 6257 S.getBeginLoc()); 6258 }; 6259 auto LPCRegion = 6260 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 6261 emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop_simd, CodeGen, 6262 emitEmptyBoundParameters); 6263 } 6264 6265 // Generate the instructions for '#pragma omp target update' directive. 6266 void CodeGenFunction::EmitOMPTargetUpdateDirective( 6267 const OMPTargetUpdateDirective &S) { 6268 // If we don't have target devices, don't bother emitting the data mapping 6269 // code. 6270 if (CGM.getLangOpts().OMPTargetTriples.empty()) 6271 return; 6272 6273 // Check if we have any if clause associated with the directive. 6274 const Expr *IfCond = nullptr; 6275 if (const auto *C = S.getSingleClause<OMPIfClause>()) 6276 IfCond = C->getCondition(); 6277 6278 // Check if we have any device clause associated with the directive. 6279 const Expr *Device = nullptr; 6280 if (const auto *C = S.getSingleClause<OMPDeviceClause>()) 6281 Device = C->getDevice(); 6282 6283 OMPLexicalScope Scope(*this, S, OMPD_task); 6284 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 6285 } 6286 6287 void CodeGenFunction::EmitSimpleOMPExecutableDirective( 6288 const OMPExecutableDirective &D) { 6289 if (const auto *SD = dyn_cast<OMPScanDirective>(&D)) { 6290 EmitOMPScanDirective(*SD); 6291 return; 6292 } 6293 if (!D.hasAssociatedStmt() || !D.getAssociatedStmt()) 6294 return; 6295 auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) { 6296 OMPPrivateScope GlobalsScope(CGF); 6297 if (isOpenMPTaskingDirective(D.getDirectiveKind())) { 6298 // Capture global firstprivates to avoid crash. 6299 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { 6300 for (const Expr *Ref : C->varlists()) { 6301 const auto *DRE = cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 6302 if (!DRE) 6303 continue; 6304 const auto *VD = dyn_cast<VarDecl>(DRE->getDecl()); 6305 if (!VD || VD->hasLocalStorage()) 6306 continue; 6307 if (!CGF.LocalDeclMap.count(VD)) { 6308 LValue GlobLVal = CGF.EmitLValue(Ref); 6309 GlobalsScope.addPrivate( 6310 VD, [&GlobLVal, &CGF]() { return GlobLVal.getAddress(CGF); }); 6311 } 6312 } 6313 } 6314 } 6315 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 6316 (void)GlobalsScope.Privatize(); 6317 emitOMPSimdRegion(CGF, cast<OMPLoopDirective>(D), Action); 6318 } else { 6319 if (const auto *LD = dyn_cast<OMPLoopDirective>(&D)) { 6320 for (const Expr *E : LD->counters()) { 6321 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 6322 if (!VD->hasLocalStorage() && !CGF.LocalDeclMap.count(VD)) { 6323 LValue GlobLVal = CGF.EmitLValue(E); 6324 GlobalsScope.addPrivate( 6325 VD, [&GlobLVal, &CGF]() { return GlobLVal.getAddress(CGF); }); 6326 } 6327 if (isa<OMPCapturedExprDecl>(VD)) { 6328 // Emit only those that were not explicitly referenced in clauses. 6329 if (!CGF.LocalDeclMap.count(VD)) 6330 CGF.EmitVarDecl(*VD); 6331 } 6332 } 6333 for (const auto *C : D.getClausesOfKind<OMPOrderedClause>()) { 6334 if (!C->getNumForLoops()) 6335 continue; 6336 for (unsigned I = LD->getCollapsedNumber(), 6337 E = C->getLoopNumIterations().size(); 6338 I < E; ++I) { 6339 if (const auto *VD = dyn_cast<OMPCapturedExprDecl>( 6340 cast<DeclRefExpr>(C->getLoopCounter(I))->getDecl())) { 6341 // Emit only those that were not explicitly referenced in clauses. 6342 if (!CGF.LocalDeclMap.count(VD)) 6343 CGF.EmitVarDecl(*VD); 6344 } 6345 } 6346 } 6347 } 6348 (void)GlobalsScope.Privatize(); 6349 CGF.EmitStmt(D.getInnermostCapturedStmt()->getCapturedStmt()); 6350 } 6351 }; 6352 { 6353 auto LPCRegion = 6354 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, D); 6355 OMPSimdLexicalScope Scope(*this, D); 6356 CGM.getOpenMPRuntime().emitInlinedDirective( 6357 *this, 6358 isOpenMPSimdDirective(D.getDirectiveKind()) ? OMPD_simd 6359 : D.getDirectiveKind(), 6360 CodeGen); 6361 } 6362 // Check for outer lastprivate conditional update. 6363 checkForLastprivateConditionalUpdate(*this, D); 6364 } 6365