1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This contains code to emit OpenMP nodes as LLVM code. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGCleanup.h" 14 #include "CGOpenMPRuntime.h" 15 #include "CodeGenFunction.h" 16 #include "CodeGenModule.h" 17 #include "TargetInfo.h" 18 #include "clang/AST/ASTContext.h" 19 #include "clang/AST/Attr.h" 20 #include "clang/AST/DeclOpenMP.h" 21 #include "clang/AST/OpenMPClause.h" 22 #include "clang/AST/Stmt.h" 23 #include "clang/AST/StmtOpenMP.h" 24 #include "clang/AST/StmtVisitor.h" 25 #include "clang/Basic/OpenMPKinds.h" 26 #include "clang/Basic/PrettyStackTrace.h" 27 #include "llvm/Frontend/OpenMP/OMPConstants.h" 28 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" 29 #include "llvm/IR/Constants.h" 30 #include "llvm/IR/Instructions.h" 31 #include "llvm/Support/AtomicOrdering.h" 32 using namespace clang; 33 using namespace CodeGen; 34 using namespace llvm::omp; 35 36 static const VarDecl *getBaseDecl(const Expr *Ref); 37 38 namespace { 39 /// Lexical scope for OpenMP executable constructs, that handles correct codegen 40 /// for captured expressions. 41 class OMPLexicalScope : public CodeGenFunction::LexicalScope { 42 void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 43 for (const auto *C : S.clauses()) { 44 if (const auto *CPI = OMPClauseWithPreInit::get(C)) { 45 if (const auto *PreInit = 46 cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { 47 for (const auto *I : PreInit->decls()) { 48 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 49 CGF.EmitVarDecl(cast<VarDecl>(*I)); 50 } else { 51 CodeGenFunction::AutoVarEmission Emission = 52 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 53 CGF.EmitAutoVarCleanups(Emission); 54 } 55 } 56 } 57 } 58 } 59 } 60 CodeGenFunction::OMPPrivateScope InlinedShareds; 61 62 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { 63 return CGF.LambdaCaptureFields.lookup(VD) || 64 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || 65 (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) && 66 cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD)); 67 } 68 69 public: 70 OMPLexicalScope( 71 CodeGenFunction &CGF, const OMPExecutableDirective &S, 72 const llvm::Optional<OpenMPDirectiveKind> CapturedRegion = llvm::None, 73 const bool EmitPreInitStmt = true) 74 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), 75 InlinedShareds(CGF) { 76 if (EmitPreInitStmt) 77 emitPreInitStmt(CGF, S); 78 if (!CapturedRegion.hasValue()) 79 return; 80 assert(S.hasAssociatedStmt() && 81 "Expected associated statement for inlined directive."); 82 const CapturedStmt *CS = S.getCapturedStmt(*CapturedRegion); 83 for (const auto &C : CS->captures()) { 84 if (C.capturesVariable() || C.capturesVariableByCopy()) { 85 auto *VD = C.getCapturedVar(); 86 assert(VD == VD->getCanonicalDecl() && 87 "Canonical decl must be captured."); 88 DeclRefExpr DRE( 89 CGF.getContext(), const_cast<VarDecl *>(VD), 90 isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo && 91 InlinedShareds.isGlobalVarCaptured(VD)), 92 VD->getType().getNonReferenceType(), VK_LValue, C.getLocation()); 93 InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address { 94 return CGF.EmitLValue(&DRE).getAddress(CGF); 95 }); 96 } 97 } 98 (void)InlinedShareds.Privatize(); 99 } 100 }; 101 102 /// Lexical scope for OpenMP parallel construct, that handles correct codegen 103 /// for captured expressions. 104 class OMPParallelScope final : public OMPLexicalScope { 105 bool EmitPreInitStmt(const OMPExecutableDirective &S) { 106 OpenMPDirectiveKind Kind = S.getDirectiveKind(); 107 return !(isOpenMPTargetExecutionDirective(Kind) || 108 isOpenMPLoopBoundSharingDirective(Kind)) && 109 isOpenMPParallelDirective(Kind); 110 } 111 112 public: 113 OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 114 : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None, 115 EmitPreInitStmt(S)) {} 116 }; 117 118 /// Lexical scope for OpenMP teams construct, that handles correct codegen 119 /// for captured expressions. 120 class OMPTeamsScope final : public OMPLexicalScope { 121 bool EmitPreInitStmt(const OMPExecutableDirective &S) { 122 OpenMPDirectiveKind Kind = S.getDirectiveKind(); 123 return !isOpenMPTargetExecutionDirective(Kind) && 124 isOpenMPTeamsDirective(Kind); 125 } 126 127 public: 128 OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 129 : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None, 130 EmitPreInitStmt(S)) {} 131 }; 132 133 /// Private scope for OpenMP loop-based directives, that supports capturing 134 /// of used expression from loop statement. 135 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { 136 void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopBasedDirective &S) { 137 const DeclStmt *PreInits; 138 CodeGenFunction::OMPMapVars PreCondVars; 139 if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) { 140 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 141 for (const auto *E : LD->counters()) { 142 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 143 EmittedAsPrivate.insert(VD->getCanonicalDecl()); 144 (void)PreCondVars.setVarAddr( 145 CGF, VD, CGF.CreateMemTemp(VD->getType().getNonReferenceType())); 146 } 147 // Mark private vars as undefs. 148 for (const auto *C : LD->getClausesOfKind<OMPPrivateClause>()) { 149 for (const Expr *IRef : C->varlists()) { 150 const auto *OrigVD = 151 cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); 152 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 153 (void)PreCondVars.setVarAddr( 154 CGF, OrigVD, 155 Address(llvm::UndefValue::get(CGF.ConvertTypeForMem( 156 CGF.getContext().getPointerType( 157 OrigVD->getType().getNonReferenceType()))), 158 CGF.getContext().getDeclAlign(OrigVD))); 159 } 160 } 161 } 162 (void)PreCondVars.apply(CGF); 163 // Emit init, __range and __end variables for C++ range loops. 164 (void)OMPLoopBasedDirective::doForAllLoops( 165 LD->getInnermostCapturedStmt()->getCapturedStmt(), 166 /*TryImperfectlyNestedLoops=*/true, LD->getLoopsNumber(), 167 [&CGF](unsigned Cnt, const Stmt *CurStmt) { 168 if (const auto *CXXFor = dyn_cast<CXXForRangeStmt>(CurStmt)) { 169 if (const Stmt *Init = CXXFor->getInit()) 170 CGF.EmitStmt(Init); 171 CGF.EmitStmt(CXXFor->getRangeStmt()); 172 CGF.EmitStmt(CXXFor->getEndStmt()); 173 } 174 return false; 175 }); 176 PreInits = cast_or_null<DeclStmt>(LD->getPreInits()); 177 } else if (const auto *Tile = dyn_cast<OMPTileDirective>(&S)) { 178 PreInits = cast_or_null<DeclStmt>(Tile->getPreInits()); 179 } else if (const auto *Unroll = dyn_cast<OMPUnrollDirective>(&S)) { 180 PreInits = cast_or_null<DeclStmt>(Unroll->getPreInits()); 181 } else { 182 llvm_unreachable("Unknown loop-based directive kind."); 183 } 184 if (PreInits) { 185 for (const auto *I : PreInits->decls()) 186 CGF.EmitVarDecl(cast<VarDecl>(*I)); 187 } 188 PreCondVars.restore(CGF); 189 } 190 191 public: 192 OMPLoopScope(CodeGenFunction &CGF, const OMPLoopBasedDirective &S) 193 : CodeGenFunction::RunCleanupsScope(CGF) { 194 emitPreInitStmt(CGF, S); 195 } 196 }; 197 198 class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope { 199 CodeGenFunction::OMPPrivateScope InlinedShareds; 200 201 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { 202 return CGF.LambdaCaptureFields.lookup(VD) || 203 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || 204 (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) && 205 cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD)); 206 } 207 208 public: 209 OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 210 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), 211 InlinedShareds(CGF) { 212 for (const auto *C : S.clauses()) { 213 if (const auto *CPI = OMPClauseWithPreInit::get(C)) { 214 if (const auto *PreInit = 215 cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { 216 for (const auto *I : PreInit->decls()) { 217 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 218 CGF.EmitVarDecl(cast<VarDecl>(*I)); 219 } else { 220 CodeGenFunction::AutoVarEmission Emission = 221 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 222 CGF.EmitAutoVarCleanups(Emission); 223 } 224 } 225 } 226 } else if (const auto *UDP = dyn_cast<OMPUseDevicePtrClause>(C)) { 227 for (const Expr *E : UDP->varlists()) { 228 const Decl *D = cast<DeclRefExpr>(E)->getDecl(); 229 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D)) 230 CGF.EmitVarDecl(*OED); 231 } 232 } else if (const auto *UDP = dyn_cast<OMPUseDeviceAddrClause>(C)) { 233 for (const Expr *E : UDP->varlists()) { 234 const Decl *D = getBaseDecl(E); 235 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D)) 236 CGF.EmitVarDecl(*OED); 237 } 238 } 239 } 240 if (!isOpenMPSimdDirective(S.getDirectiveKind())) 241 CGF.EmitOMPPrivateClause(S, InlinedShareds); 242 if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(&S)) { 243 if (const Expr *E = TG->getReductionRef()) 244 CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl())); 245 } 246 // Temp copy arrays for inscan reductions should not be emitted as they are 247 // not used in simd only mode. 248 llvm::DenseSet<CanonicalDeclPtr<const Decl>> CopyArrayTemps; 249 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 250 if (C->getModifier() != OMPC_REDUCTION_inscan) 251 continue; 252 for (const Expr *E : C->copy_array_temps()) 253 CopyArrayTemps.insert(cast<DeclRefExpr>(E)->getDecl()); 254 } 255 const auto *CS = cast_or_null<CapturedStmt>(S.getAssociatedStmt()); 256 while (CS) { 257 for (auto &C : CS->captures()) { 258 if (C.capturesVariable() || C.capturesVariableByCopy()) { 259 auto *VD = C.getCapturedVar(); 260 if (CopyArrayTemps.contains(VD)) 261 continue; 262 assert(VD == VD->getCanonicalDecl() && 263 "Canonical decl must be captured."); 264 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 265 isCapturedVar(CGF, VD) || 266 (CGF.CapturedStmtInfo && 267 InlinedShareds.isGlobalVarCaptured(VD)), 268 VD->getType().getNonReferenceType(), VK_LValue, 269 C.getLocation()); 270 InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address { 271 return CGF.EmitLValue(&DRE).getAddress(CGF); 272 }); 273 } 274 } 275 CS = dyn_cast<CapturedStmt>(CS->getCapturedStmt()); 276 } 277 (void)InlinedShareds.Privatize(); 278 } 279 }; 280 281 } // namespace 282 283 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, 284 const OMPExecutableDirective &S, 285 const RegionCodeGenTy &CodeGen); 286 287 LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) { 288 if (const auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) { 289 if (const auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) { 290 OrigVD = OrigVD->getCanonicalDecl(); 291 bool IsCaptured = 292 LambdaCaptureFields.lookup(OrigVD) || 293 (CapturedStmtInfo && CapturedStmtInfo->lookup(OrigVD)) || 294 (CurCodeDecl && isa<BlockDecl>(CurCodeDecl)); 295 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), IsCaptured, 296 OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc()); 297 return EmitLValue(&DRE); 298 } 299 } 300 return EmitLValue(E); 301 } 302 303 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) { 304 ASTContext &C = getContext(); 305 llvm::Value *Size = nullptr; 306 auto SizeInChars = C.getTypeSizeInChars(Ty); 307 if (SizeInChars.isZero()) { 308 // getTypeSizeInChars() returns 0 for a VLA. 309 while (const VariableArrayType *VAT = C.getAsVariableArrayType(Ty)) { 310 VlaSizePair VlaSize = getVLASize(VAT); 311 Ty = VlaSize.Type; 312 Size = Size ? Builder.CreateNUWMul(Size, VlaSize.NumElts) 313 : VlaSize.NumElts; 314 } 315 SizeInChars = C.getTypeSizeInChars(Ty); 316 if (SizeInChars.isZero()) 317 return llvm::ConstantInt::get(SizeTy, /*V=*/0); 318 return Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars)); 319 } 320 return CGM.getSize(SizeInChars); 321 } 322 323 void CodeGenFunction::GenerateOpenMPCapturedVars( 324 const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) { 325 const RecordDecl *RD = S.getCapturedRecordDecl(); 326 auto CurField = RD->field_begin(); 327 auto CurCap = S.captures().begin(); 328 for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(), 329 E = S.capture_init_end(); 330 I != E; ++I, ++CurField, ++CurCap) { 331 if (CurField->hasCapturedVLAType()) { 332 const VariableArrayType *VAT = CurField->getCapturedVLAType(); 333 llvm::Value *Val = VLASizeMap[VAT->getSizeExpr()]; 334 CapturedVars.push_back(Val); 335 } else if (CurCap->capturesThis()) { 336 CapturedVars.push_back(CXXThisValue); 337 } else if (CurCap->capturesVariableByCopy()) { 338 llvm::Value *CV = EmitLoadOfScalar(EmitLValue(*I), CurCap->getLocation()); 339 340 // If the field is not a pointer, we need to save the actual value 341 // and load it as a void pointer. 342 if (!CurField->getType()->isAnyPointerType()) { 343 ASTContext &Ctx = getContext(); 344 Address DstAddr = CreateMemTemp( 345 Ctx.getUIntPtrType(), 346 Twine(CurCap->getCapturedVar()->getName(), ".casted")); 347 LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType()); 348 349 llvm::Value *SrcAddrVal = EmitScalarConversion( 350 DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()), 351 Ctx.getPointerType(CurField->getType()), CurCap->getLocation()); 352 LValue SrcLV = 353 MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType()); 354 355 // Store the value using the source type pointer. 356 EmitStoreThroughLValue(RValue::get(CV), SrcLV); 357 358 // Load the value using the destination type pointer. 359 CV = EmitLoadOfScalar(DstLV, CurCap->getLocation()); 360 } 361 CapturedVars.push_back(CV); 362 } else { 363 assert(CurCap->capturesVariable() && "Expected capture by reference."); 364 CapturedVars.push_back(EmitLValue(*I).getAddress(*this).getPointer()); 365 } 366 } 367 } 368 369 static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc, 370 QualType DstType, StringRef Name, 371 LValue AddrLV) { 372 ASTContext &Ctx = CGF.getContext(); 373 374 llvm::Value *CastedPtr = CGF.EmitScalarConversion( 375 AddrLV.getAddress(CGF).getPointer(), Ctx.getUIntPtrType(), 376 Ctx.getPointerType(DstType), Loc); 377 Address TmpAddr = 378 CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType)) 379 .getAddress(CGF); 380 return TmpAddr; 381 } 382 383 static QualType getCanonicalParamType(ASTContext &C, QualType T) { 384 if (T->isLValueReferenceType()) 385 return C.getLValueReferenceType( 386 getCanonicalParamType(C, T.getNonReferenceType()), 387 /*SpelledAsLValue=*/false); 388 if (T->isPointerType()) 389 return C.getPointerType(getCanonicalParamType(C, T->getPointeeType())); 390 if (const ArrayType *A = T->getAsArrayTypeUnsafe()) { 391 if (const auto *VLA = dyn_cast<VariableArrayType>(A)) 392 return getCanonicalParamType(C, VLA->getElementType()); 393 if (!A->isVariablyModifiedType()) 394 return C.getCanonicalType(T); 395 } 396 return C.getCanonicalParamType(T); 397 } 398 399 namespace { 400 /// Contains required data for proper outlined function codegen. 401 struct FunctionOptions { 402 /// Captured statement for which the function is generated. 403 const CapturedStmt *S = nullptr; 404 /// true if cast to/from UIntPtr is required for variables captured by 405 /// value. 406 const bool UIntPtrCastRequired = true; 407 /// true if only casted arguments must be registered as local args or VLA 408 /// sizes. 409 const bool RegisterCastedArgsOnly = false; 410 /// Name of the generated function. 411 const StringRef FunctionName; 412 /// Location of the non-debug version of the outlined function. 413 SourceLocation Loc; 414 explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired, 415 bool RegisterCastedArgsOnly, StringRef FunctionName, 416 SourceLocation Loc) 417 : S(S), UIntPtrCastRequired(UIntPtrCastRequired), 418 RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly), 419 FunctionName(FunctionName), Loc(Loc) {} 420 }; 421 } // namespace 422 423 static llvm::Function *emitOutlinedFunctionPrologue( 424 CodeGenFunction &CGF, FunctionArgList &Args, 425 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> 426 &LocalAddrs, 427 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> 428 &VLASizes, 429 llvm::Value *&CXXThisValue, const FunctionOptions &FO) { 430 const CapturedDecl *CD = FO.S->getCapturedDecl(); 431 const RecordDecl *RD = FO.S->getCapturedRecordDecl(); 432 assert(CD->hasBody() && "missing CapturedDecl body"); 433 434 CXXThisValue = nullptr; 435 // Build the argument list. 436 CodeGenModule &CGM = CGF.CGM; 437 ASTContext &Ctx = CGM.getContext(); 438 FunctionArgList TargetArgs; 439 Args.append(CD->param_begin(), 440 std::next(CD->param_begin(), CD->getContextParamPosition())); 441 TargetArgs.append( 442 CD->param_begin(), 443 std::next(CD->param_begin(), CD->getContextParamPosition())); 444 auto I = FO.S->captures().begin(); 445 FunctionDecl *DebugFunctionDecl = nullptr; 446 if (!FO.UIntPtrCastRequired) { 447 FunctionProtoType::ExtProtoInfo EPI; 448 QualType FunctionTy = Ctx.getFunctionType(Ctx.VoidTy, llvm::None, EPI); 449 DebugFunctionDecl = FunctionDecl::Create( 450 Ctx, Ctx.getTranslationUnitDecl(), FO.S->getBeginLoc(), 451 SourceLocation(), DeclarationName(), FunctionTy, 452 Ctx.getTrivialTypeSourceInfo(FunctionTy), SC_Static, 453 /*isInlineSpecified=*/false, /*hasWrittenPrototype=*/false); 454 } 455 for (const FieldDecl *FD : RD->fields()) { 456 QualType ArgType = FD->getType(); 457 IdentifierInfo *II = nullptr; 458 VarDecl *CapVar = nullptr; 459 460 // If this is a capture by copy and the type is not a pointer, the outlined 461 // function argument type should be uintptr and the value properly casted to 462 // uintptr. This is necessary given that the runtime library is only able to 463 // deal with pointers. We can pass in the same way the VLA type sizes to the 464 // outlined function. 465 if (FO.UIntPtrCastRequired && 466 ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) || 467 I->capturesVariableArrayType())) 468 ArgType = Ctx.getUIntPtrType(); 469 470 if (I->capturesVariable() || I->capturesVariableByCopy()) { 471 CapVar = I->getCapturedVar(); 472 II = CapVar->getIdentifier(); 473 } else if (I->capturesThis()) { 474 II = &Ctx.Idents.get("this"); 475 } else { 476 assert(I->capturesVariableArrayType()); 477 II = &Ctx.Idents.get("vla"); 478 } 479 if (ArgType->isVariablyModifiedType()) 480 ArgType = getCanonicalParamType(Ctx, ArgType); 481 VarDecl *Arg; 482 if (DebugFunctionDecl && (CapVar || I->capturesThis())) { 483 Arg = ParmVarDecl::Create( 484 Ctx, DebugFunctionDecl, 485 CapVar ? CapVar->getBeginLoc() : FD->getBeginLoc(), 486 CapVar ? CapVar->getLocation() : FD->getLocation(), II, ArgType, 487 /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr); 488 } else { 489 Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(), 490 II, ArgType, ImplicitParamDecl::Other); 491 } 492 Args.emplace_back(Arg); 493 // Do not cast arguments if we emit function with non-original types. 494 TargetArgs.emplace_back( 495 FO.UIntPtrCastRequired 496 ? Arg 497 : CGM.getOpenMPRuntime().translateParameter(FD, Arg)); 498 ++I; 499 } 500 Args.append( 501 std::next(CD->param_begin(), CD->getContextParamPosition() + 1), 502 CD->param_end()); 503 TargetArgs.append( 504 std::next(CD->param_begin(), CD->getContextParamPosition() + 1), 505 CD->param_end()); 506 507 // Create the function declaration. 508 const CGFunctionInfo &FuncInfo = 509 CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs); 510 llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); 511 512 auto *F = 513 llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage, 514 FO.FunctionName, &CGM.getModule()); 515 CGM.SetInternalFunctionAttributes(CD, F, FuncInfo); 516 if (CD->isNothrow()) 517 F->setDoesNotThrow(); 518 F->setDoesNotRecurse(); 519 520 // Generate the function. 521 CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs, 522 FO.UIntPtrCastRequired ? FO.Loc : FO.S->getBeginLoc(), 523 FO.UIntPtrCastRequired ? FO.Loc 524 : CD->getBody()->getBeginLoc()); 525 unsigned Cnt = CD->getContextParamPosition(); 526 I = FO.S->captures().begin(); 527 for (const FieldDecl *FD : RD->fields()) { 528 // Do not map arguments if we emit function with non-original types. 529 Address LocalAddr(Address::invalid()); 530 if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) { 531 LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt], 532 TargetArgs[Cnt]); 533 } else { 534 LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]); 535 } 536 // If we are capturing a pointer by copy we don't need to do anything, just 537 // use the value that we get from the arguments. 538 if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) { 539 const VarDecl *CurVD = I->getCapturedVar(); 540 if (!FO.RegisterCastedArgsOnly) 541 LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}}); 542 ++Cnt; 543 ++I; 544 continue; 545 } 546 547 LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(), 548 AlignmentSource::Decl); 549 if (FD->hasCapturedVLAType()) { 550 if (FO.UIntPtrCastRequired) { 551 ArgLVal = CGF.MakeAddrLValue( 552 castValueFromUintptr(CGF, I->getLocation(), FD->getType(), 553 Args[Cnt]->getName(), ArgLVal), 554 FD->getType(), AlignmentSource::Decl); 555 } 556 llvm::Value *ExprArg = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation()); 557 const VariableArrayType *VAT = FD->getCapturedVLAType(); 558 VLASizes.try_emplace(Args[Cnt], VAT->getSizeExpr(), ExprArg); 559 } else if (I->capturesVariable()) { 560 const VarDecl *Var = I->getCapturedVar(); 561 QualType VarTy = Var->getType(); 562 Address ArgAddr = ArgLVal.getAddress(CGF); 563 if (ArgLVal.getType()->isLValueReferenceType()) { 564 ArgAddr = CGF.EmitLoadOfReference(ArgLVal); 565 } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) { 566 assert(ArgLVal.getType()->isPointerType()); 567 ArgAddr = CGF.EmitLoadOfPointer( 568 ArgAddr, ArgLVal.getType()->castAs<PointerType>()); 569 } 570 if (!FO.RegisterCastedArgsOnly) { 571 LocalAddrs.insert( 572 {Args[Cnt], 573 {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}}); 574 } 575 } else if (I->capturesVariableByCopy()) { 576 assert(!FD->getType()->isAnyPointerType() && 577 "Not expecting a captured pointer."); 578 const VarDecl *Var = I->getCapturedVar(); 579 LocalAddrs.insert({Args[Cnt], 580 {Var, FO.UIntPtrCastRequired 581 ? castValueFromUintptr( 582 CGF, I->getLocation(), FD->getType(), 583 Args[Cnt]->getName(), ArgLVal) 584 : ArgLVal.getAddress(CGF)}}); 585 } else { 586 // If 'this' is captured, load it into CXXThisValue. 587 assert(I->capturesThis()); 588 CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation()); 589 LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress(CGF)}}); 590 } 591 ++Cnt; 592 ++I; 593 } 594 595 return F; 596 } 597 598 llvm::Function * 599 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, 600 SourceLocation Loc) { 601 assert( 602 CapturedStmtInfo && 603 "CapturedStmtInfo should be set when generating the captured function"); 604 const CapturedDecl *CD = S.getCapturedDecl(); 605 // Build the argument list. 606 bool NeedWrapperFunction = 607 getDebugInfo() && CGM.getCodeGenOpts().hasReducedDebugInfo(); 608 FunctionArgList Args; 609 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs; 610 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes; 611 SmallString<256> Buffer; 612 llvm::raw_svector_ostream Out(Buffer); 613 Out << CapturedStmtInfo->getHelperName(); 614 if (NeedWrapperFunction) 615 Out << "_debug__"; 616 FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false, 617 Out.str(), Loc); 618 llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs, 619 VLASizes, CXXThisValue, FO); 620 CodeGenFunction::OMPPrivateScope LocalScope(*this); 621 for (const auto &LocalAddrPair : LocalAddrs) { 622 if (LocalAddrPair.second.first) { 623 LocalScope.addPrivate(LocalAddrPair.second.first, [&LocalAddrPair]() { 624 return LocalAddrPair.second.second; 625 }); 626 } 627 } 628 (void)LocalScope.Privatize(); 629 for (const auto &VLASizePair : VLASizes) 630 VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second; 631 PGO.assignRegionCounters(GlobalDecl(CD), F); 632 CapturedStmtInfo->EmitBody(*this, CD->getBody()); 633 (void)LocalScope.ForceCleanup(); 634 FinishFunction(CD->getBodyRBrace()); 635 if (!NeedWrapperFunction) 636 return F; 637 638 FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true, 639 /*RegisterCastedArgsOnly=*/true, 640 CapturedStmtInfo->getHelperName(), Loc); 641 CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true); 642 WrapperCGF.CapturedStmtInfo = CapturedStmtInfo; 643 Args.clear(); 644 LocalAddrs.clear(); 645 VLASizes.clear(); 646 llvm::Function *WrapperF = 647 emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes, 648 WrapperCGF.CXXThisValue, WrapperFO); 649 llvm::SmallVector<llvm::Value *, 4> CallArgs; 650 auto *PI = F->arg_begin(); 651 for (const auto *Arg : Args) { 652 llvm::Value *CallArg; 653 auto I = LocalAddrs.find(Arg); 654 if (I != LocalAddrs.end()) { 655 LValue LV = WrapperCGF.MakeAddrLValue( 656 I->second.second, 657 I->second.first ? I->second.first->getType() : Arg->getType(), 658 AlignmentSource::Decl); 659 if (LV.getType()->isAnyComplexType()) 660 LV.setAddress(WrapperCGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 661 LV.getAddress(WrapperCGF), 662 PI->getType()->getPointerTo( 663 LV.getAddress(WrapperCGF).getAddressSpace()))); 664 CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc()); 665 } else { 666 auto EI = VLASizes.find(Arg); 667 if (EI != VLASizes.end()) { 668 CallArg = EI->second.second; 669 } else { 670 LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg), 671 Arg->getType(), 672 AlignmentSource::Decl); 673 CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc()); 674 } 675 } 676 CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType())); 677 ++PI; 678 } 679 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, Loc, F, CallArgs); 680 WrapperCGF.FinishFunction(); 681 return WrapperF; 682 } 683 684 //===----------------------------------------------------------------------===// 685 // OpenMP Directive Emission 686 //===----------------------------------------------------------------------===// 687 void CodeGenFunction::EmitOMPAggregateAssign( 688 Address DestAddr, Address SrcAddr, QualType OriginalType, 689 const llvm::function_ref<void(Address, Address)> CopyGen) { 690 // Perform element-by-element initialization. 691 QualType ElementTy; 692 693 // Drill down to the base element type on both arrays. 694 const ArrayType *ArrayTy = OriginalType->getAsArrayTypeUnsafe(); 695 llvm::Value *NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr); 696 SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 697 698 llvm::Value *SrcBegin = SrcAddr.getPointer(); 699 llvm::Value *DestBegin = DestAddr.getPointer(); 700 // Cast from pointer to array type to pointer to single element. 701 llvm::Value *DestEnd = 702 Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements); 703 // The basic structure here is a while-do loop. 704 llvm::BasicBlock *BodyBB = createBasicBlock("omp.arraycpy.body"); 705 llvm::BasicBlock *DoneBB = createBasicBlock("omp.arraycpy.done"); 706 llvm::Value *IsEmpty = 707 Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty"); 708 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 709 710 // Enter the loop body, making that address the current address. 711 llvm::BasicBlock *EntryBB = Builder.GetInsertBlock(); 712 EmitBlock(BodyBB); 713 714 CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy); 715 716 llvm::PHINode *SrcElementPHI = 717 Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 718 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 719 Address SrcElementCurrent = 720 Address(SrcElementPHI, 721 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 722 723 llvm::PHINode *DestElementPHI = 724 Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 725 DestElementPHI->addIncoming(DestBegin, EntryBB); 726 Address DestElementCurrent = 727 Address(DestElementPHI, 728 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 729 730 // Emit copy. 731 CopyGen(DestElementCurrent, SrcElementCurrent); 732 733 // Shift the address forward by one element. 734 llvm::Value *DestElementNext = Builder.CreateConstGEP1_32( 735 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1, 736 "omp.arraycpy.dest.element"); 737 llvm::Value *SrcElementNext = Builder.CreateConstGEP1_32( 738 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1, 739 "omp.arraycpy.src.element"); 740 // Check whether we've reached the end. 741 llvm::Value *Done = 742 Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 743 Builder.CreateCondBr(Done, DoneBB, BodyBB); 744 DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock()); 745 SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock()); 746 747 // Done. 748 EmitBlock(DoneBB, /*IsFinished=*/true); 749 } 750 751 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr, 752 Address SrcAddr, const VarDecl *DestVD, 753 const VarDecl *SrcVD, const Expr *Copy) { 754 if (OriginalType->isArrayType()) { 755 const auto *BO = dyn_cast<BinaryOperator>(Copy); 756 if (BO && BO->getOpcode() == BO_Assign) { 757 // Perform simple memcpy for simple copying. 758 LValue Dest = MakeAddrLValue(DestAddr, OriginalType); 759 LValue Src = MakeAddrLValue(SrcAddr, OriginalType); 760 EmitAggregateAssign(Dest, Src, OriginalType); 761 } else { 762 // For arrays with complex element types perform element by element 763 // copying. 764 EmitOMPAggregateAssign( 765 DestAddr, SrcAddr, OriginalType, 766 [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) { 767 // Working with the single array element, so have to remap 768 // destination and source variables to corresponding array 769 // elements. 770 CodeGenFunction::OMPPrivateScope Remap(*this); 771 Remap.addPrivate(DestVD, [DestElement]() { return DestElement; }); 772 Remap.addPrivate(SrcVD, [SrcElement]() { return SrcElement; }); 773 (void)Remap.Privatize(); 774 EmitIgnoredExpr(Copy); 775 }); 776 } 777 } else { 778 // Remap pseudo source variable to private copy. 779 CodeGenFunction::OMPPrivateScope Remap(*this); 780 Remap.addPrivate(SrcVD, [SrcAddr]() { return SrcAddr; }); 781 Remap.addPrivate(DestVD, [DestAddr]() { return DestAddr; }); 782 (void)Remap.Privatize(); 783 // Emit copying of the whole variable. 784 EmitIgnoredExpr(Copy); 785 } 786 } 787 788 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, 789 OMPPrivateScope &PrivateScope) { 790 if (!HaveInsertPoint()) 791 return false; 792 bool DeviceConstTarget = 793 getLangOpts().OpenMPIsDevice && 794 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 795 bool FirstprivateIsLastprivate = false; 796 llvm::DenseMap<const VarDecl *, OpenMPLastprivateModifier> Lastprivates; 797 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 798 for (const auto *D : C->varlists()) 799 Lastprivates.try_emplace( 800 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl(), 801 C->getKind()); 802 } 803 llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate; 804 llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 805 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 806 // Force emission of the firstprivate copy if the directive does not emit 807 // outlined function, like omp for, omp simd, omp distribute etc. 808 bool MustEmitFirstprivateCopy = 809 CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown; 810 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { 811 const auto *IRef = C->varlist_begin(); 812 const auto *InitsRef = C->inits().begin(); 813 for (const Expr *IInit : C->private_copies()) { 814 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 815 bool ThisFirstprivateIsLastprivate = 816 Lastprivates.count(OrigVD->getCanonicalDecl()) > 0; 817 const FieldDecl *FD = CapturedStmtInfo->lookup(OrigVD); 818 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 819 if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD && 820 !FD->getType()->isReferenceType() && 821 (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) { 822 EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()); 823 ++IRef; 824 ++InitsRef; 825 continue; 826 } 827 // Do not emit copy for firstprivate constant variables in target regions, 828 // captured by reference. 829 if (DeviceConstTarget && OrigVD->getType().isConstant(getContext()) && 830 FD && FD->getType()->isReferenceType() && 831 (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) { 832 EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()); 833 ++IRef; 834 ++InitsRef; 835 continue; 836 } 837 FirstprivateIsLastprivate = 838 FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate; 839 if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) { 840 const auto *VDInit = 841 cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl()); 842 bool IsRegistered; 843 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), 844 /*RefersToEnclosingVariableOrCapture=*/FD != nullptr, 845 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 846 LValue OriginalLVal; 847 if (!FD) { 848 // Check if the firstprivate variable is just a constant value. 849 ConstantEmission CE = tryEmitAsConstant(&DRE); 850 if (CE && !CE.isReference()) { 851 // Constant value, no need to create a copy. 852 ++IRef; 853 ++InitsRef; 854 continue; 855 } 856 if (CE && CE.isReference()) { 857 OriginalLVal = CE.getReferenceLValue(*this, &DRE); 858 } else { 859 assert(!CE && "Expected non-constant firstprivate."); 860 OriginalLVal = EmitLValue(&DRE); 861 } 862 } else { 863 OriginalLVal = EmitLValue(&DRE); 864 } 865 QualType Type = VD->getType(); 866 if (Type->isArrayType()) { 867 // Emit VarDecl with copy init for arrays. 868 // Get the address of the original variable captured in current 869 // captured region. 870 IsRegistered = PrivateScope.addPrivate( 871 OrigVD, [this, VD, Type, OriginalLVal, VDInit]() { 872 AutoVarEmission Emission = EmitAutoVarAlloca(*VD); 873 const Expr *Init = VD->getInit(); 874 if (!isa<CXXConstructExpr>(Init) || 875 isTrivialInitializer(Init)) { 876 // Perform simple memcpy. 877 LValue Dest = 878 MakeAddrLValue(Emission.getAllocatedAddress(), Type); 879 EmitAggregateAssign(Dest, OriginalLVal, Type); 880 } else { 881 EmitOMPAggregateAssign( 882 Emission.getAllocatedAddress(), 883 OriginalLVal.getAddress(*this), Type, 884 [this, VDInit, Init](Address DestElement, 885 Address SrcElement) { 886 // Clean up any temporaries needed by the 887 // initialization. 888 RunCleanupsScope InitScope(*this); 889 // Emit initialization for single element. 890 setAddrOfLocalVar(VDInit, SrcElement); 891 EmitAnyExprToMem(Init, DestElement, 892 Init->getType().getQualifiers(), 893 /*IsInitializer*/ false); 894 LocalDeclMap.erase(VDInit); 895 }); 896 } 897 EmitAutoVarCleanups(Emission); 898 return Emission.getAllocatedAddress(); 899 }); 900 } else { 901 Address OriginalAddr = OriginalLVal.getAddress(*this); 902 IsRegistered = 903 PrivateScope.addPrivate(OrigVD, [this, VDInit, OriginalAddr, VD, 904 ThisFirstprivateIsLastprivate, 905 OrigVD, &Lastprivates, IRef]() { 906 // Emit private VarDecl with copy init. 907 // Remap temp VDInit variable to the address of the original 908 // variable (for proper handling of captured global variables). 909 setAddrOfLocalVar(VDInit, OriginalAddr); 910 EmitDecl(*VD); 911 LocalDeclMap.erase(VDInit); 912 if (ThisFirstprivateIsLastprivate && 913 Lastprivates[OrigVD->getCanonicalDecl()] == 914 OMPC_LASTPRIVATE_conditional) { 915 // Create/init special variable for lastprivate conditionals. 916 Address VDAddr = 917 CGM.getOpenMPRuntime().emitLastprivateConditionalInit( 918 *this, OrigVD); 919 llvm::Value *V = EmitLoadOfScalar( 920 MakeAddrLValue(GetAddrOfLocalVar(VD), (*IRef)->getType(), 921 AlignmentSource::Decl), 922 (*IRef)->getExprLoc()); 923 EmitStoreOfScalar(V, 924 MakeAddrLValue(VDAddr, (*IRef)->getType(), 925 AlignmentSource::Decl)); 926 LocalDeclMap.erase(VD); 927 setAddrOfLocalVar(VD, VDAddr); 928 return VDAddr; 929 } 930 return GetAddrOfLocalVar(VD); 931 }); 932 } 933 assert(IsRegistered && 934 "firstprivate var already registered as private"); 935 // Silence the warning about unused variable. 936 (void)IsRegistered; 937 } 938 ++IRef; 939 ++InitsRef; 940 } 941 } 942 return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty(); 943 } 944 945 void CodeGenFunction::EmitOMPPrivateClause( 946 const OMPExecutableDirective &D, 947 CodeGenFunction::OMPPrivateScope &PrivateScope) { 948 if (!HaveInsertPoint()) 949 return; 950 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 951 for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) { 952 auto IRef = C->varlist_begin(); 953 for (const Expr *IInit : C->private_copies()) { 954 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 955 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 956 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 957 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD]() { 958 // Emit private VarDecl with copy init. 959 EmitDecl(*VD); 960 return GetAddrOfLocalVar(VD); 961 }); 962 assert(IsRegistered && "private var already registered as private"); 963 // Silence the warning about unused variable. 964 (void)IsRegistered; 965 } 966 ++IRef; 967 } 968 } 969 } 970 971 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { 972 if (!HaveInsertPoint()) 973 return false; 974 // threadprivate_var1 = master_threadprivate_var1; 975 // operator=(threadprivate_var2, master_threadprivate_var2); 976 // ... 977 // __kmpc_barrier(&loc, global_tid); 978 llvm::DenseSet<const VarDecl *> CopiedVars; 979 llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr; 980 for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) { 981 auto IRef = C->varlist_begin(); 982 auto ISrcRef = C->source_exprs().begin(); 983 auto IDestRef = C->destination_exprs().begin(); 984 for (const Expr *AssignOp : C->assignment_ops()) { 985 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 986 QualType Type = VD->getType(); 987 if (CopiedVars.insert(VD->getCanonicalDecl()).second) { 988 // Get the address of the master variable. If we are emitting code with 989 // TLS support, the address is passed from the master as field in the 990 // captured declaration. 991 Address MasterAddr = Address::invalid(); 992 if (getLangOpts().OpenMPUseTLS && 993 getContext().getTargetInfo().isTLSSupported()) { 994 assert(CapturedStmtInfo->lookup(VD) && 995 "Copyin threadprivates should have been captured!"); 996 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), true, 997 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 998 MasterAddr = EmitLValue(&DRE).getAddress(*this); 999 LocalDeclMap.erase(VD); 1000 } else { 1001 MasterAddr = 1002 Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD) 1003 : CGM.GetAddrOfGlobal(VD), 1004 getContext().getDeclAlign(VD)); 1005 } 1006 // Get the address of the threadprivate variable. 1007 Address PrivateAddr = EmitLValue(*IRef).getAddress(*this); 1008 if (CopiedVars.size() == 1) { 1009 // At first check if current thread is a master thread. If it is, no 1010 // need to copy data. 1011 CopyBegin = createBasicBlock("copyin.not.master"); 1012 CopyEnd = createBasicBlock("copyin.not.master.end"); 1013 // TODO: Avoid ptrtoint conversion. 1014 auto *MasterAddrInt = 1015 Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy); 1016 auto *PrivateAddrInt = 1017 Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy); 1018 Builder.CreateCondBr( 1019 Builder.CreateICmpNE(MasterAddrInt, PrivateAddrInt), CopyBegin, 1020 CopyEnd); 1021 EmitBlock(CopyBegin); 1022 } 1023 const auto *SrcVD = 1024 cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 1025 const auto *DestVD = 1026 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 1027 EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp); 1028 } 1029 ++IRef; 1030 ++ISrcRef; 1031 ++IDestRef; 1032 } 1033 } 1034 if (CopyEnd) { 1035 // Exit out of copying procedure for non-master thread. 1036 EmitBlock(CopyEnd, /*IsFinished=*/true); 1037 return true; 1038 } 1039 return false; 1040 } 1041 1042 bool CodeGenFunction::EmitOMPLastprivateClauseInit( 1043 const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) { 1044 if (!HaveInsertPoint()) 1045 return false; 1046 bool HasAtLeastOneLastprivate = false; 1047 llvm::DenseSet<const VarDecl *> SIMDLCVs; 1048 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 1049 const auto *LoopDirective = cast<OMPLoopDirective>(&D); 1050 for (const Expr *C : LoopDirective->counters()) { 1051 SIMDLCVs.insert( 1052 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 1053 } 1054 } 1055 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 1056 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 1057 HasAtLeastOneLastprivate = true; 1058 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 1059 !getLangOpts().OpenMPSimd) 1060 break; 1061 const auto *IRef = C->varlist_begin(); 1062 const auto *IDestRef = C->destination_exprs().begin(); 1063 for (const Expr *IInit : C->private_copies()) { 1064 // Keep the address of the original variable for future update at the end 1065 // of the loop. 1066 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 1067 // Taskloops do not require additional initialization, it is done in 1068 // runtime support library. 1069 if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) { 1070 const auto *DestVD = 1071 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 1072 PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() { 1073 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), 1074 /*RefersToEnclosingVariableOrCapture=*/ 1075 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1076 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 1077 return EmitLValue(&DRE).getAddress(*this); 1078 }); 1079 // Check if the variable is also a firstprivate: in this case IInit is 1080 // not generated. Initialization of this variable will happen in codegen 1081 // for 'firstprivate' clause. 1082 if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) { 1083 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 1084 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD, C, 1085 OrigVD]() { 1086 if (C->getKind() == OMPC_LASTPRIVATE_conditional) { 1087 Address VDAddr = 1088 CGM.getOpenMPRuntime().emitLastprivateConditionalInit(*this, 1089 OrigVD); 1090 setAddrOfLocalVar(VD, VDAddr); 1091 return VDAddr; 1092 } 1093 // Emit private VarDecl with copy init. 1094 EmitDecl(*VD); 1095 return GetAddrOfLocalVar(VD); 1096 }); 1097 assert(IsRegistered && 1098 "lastprivate var already registered as private"); 1099 (void)IsRegistered; 1100 } 1101 } 1102 ++IRef; 1103 ++IDestRef; 1104 } 1105 } 1106 return HasAtLeastOneLastprivate; 1107 } 1108 1109 void CodeGenFunction::EmitOMPLastprivateClauseFinal( 1110 const OMPExecutableDirective &D, bool NoFinals, 1111 llvm::Value *IsLastIterCond) { 1112 if (!HaveInsertPoint()) 1113 return; 1114 // Emit following code: 1115 // if (<IsLastIterCond>) { 1116 // orig_var1 = private_orig_var1; 1117 // ... 1118 // orig_varn = private_orig_varn; 1119 // } 1120 llvm::BasicBlock *ThenBB = nullptr; 1121 llvm::BasicBlock *DoneBB = nullptr; 1122 if (IsLastIterCond) { 1123 // Emit implicit barrier if at least one lastprivate conditional is found 1124 // and this is not a simd mode. 1125 if (!getLangOpts().OpenMPSimd && 1126 llvm::any_of(D.getClausesOfKind<OMPLastprivateClause>(), 1127 [](const OMPLastprivateClause *C) { 1128 return C->getKind() == OMPC_LASTPRIVATE_conditional; 1129 })) { 1130 CGM.getOpenMPRuntime().emitBarrierCall(*this, D.getBeginLoc(), 1131 OMPD_unknown, 1132 /*EmitChecks=*/false, 1133 /*ForceSimpleCall=*/true); 1134 } 1135 ThenBB = createBasicBlock(".omp.lastprivate.then"); 1136 DoneBB = createBasicBlock(".omp.lastprivate.done"); 1137 Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB); 1138 EmitBlock(ThenBB); 1139 } 1140 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 1141 llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates; 1142 if (const auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) { 1143 auto IC = LoopDirective->counters().begin(); 1144 for (const Expr *F : LoopDirective->finals()) { 1145 const auto *D = 1146 cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl(); 1147 if (NoFinals) 1148 AlreadyEmittedVars.insert(D); 1149 else 1150 LoopCountersAndUpdates[D] = F; 1151 ++IC; 1152 } 1153 } 1154 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 1155 auto IRef = C->varlist_begin(); 1156 auto ISrcRef = C->source_exprs().begin(); 1157 auto IDestRef = C->destination_exprs().begin(); 1158 for (const Expr *AssignOp : C->assignment_ops()) { 1159 const auto *PrivateVD = 1160 cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 1161 QualType Type = PrivateVD->getType(); 1162 const auto *CanonicalVD = PrivateVD->getCanonicalDecl(); 1163 if (AlreadyEmittedVars.insert(CanonicalVD).second) { 1164 // If lastprivate variable is a loop control variable for loop-based 1165 // directive, update its value before copyin back to original 1166 // variable. 1167 if (const Expr *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD)) 1168 EmitIgnoredExpr(FinalExpr); 1169 const auto *SrcVD = 1170 cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 1171 const auto *DestVD = 1172 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 1173 // Get the address of the private variable. 1174 Address PrivateAddr = GetAddrOfLocalVar(PrivateVD); 1175 if (const auto *RefTy = PrivateVD->getType()->getAs<ReferenceType>()) 1176 PrivateAddr = 1177 Address(Builder.CreateLoad(PrivateAddr), 1178 CGM.getNaturalTypeAlignment(RefTy->getPointeeType())); 1179 // Store the last value to the private copy in the last iteration. 1180 if (C->getKind() == OMPC_LASTPRIVATE_conditional) 1181 CGM.getOpenMPRuntime().emitLastprivateConditionalFinalUpdate( 1182 *this, MakeAddrLValue(PrivateAddr, (*IRef)->getType()), PrivateVD, 1183 (*IRef)->getExprLoc()); 1184 // Get the address of the original variable. 1185 Address OriginalAddr = GetAddrOfLocalVar(DestVD); 1186 EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp); 1187 } 1188 ++IRef; 1189 ++ISrcRef; 1190 ++IDestRef; 1191 } 1192 if (const Expr *PostUpdate = C->getPostUpdateExpr()) 1193 EmitIgnoredExpr(PostUpdate); 1194 } 1195 if (IsLastIterCond) 1196 EmitBlock(DoneBB, /*IsFinished=*/true); 1197 } 1198 1199 void CodeGenFunction::EmitOMPReductionClauseInit( 1200 const OMPExecutableDirective &D, 1201 CodeGenFunction::OMPPrivateScope &PrivateScope, bool ForInscan) { 1202 if (!HaveInsertPoint()) 1203 return; 1204 SmallVector<const Expr *, 4> Shareds; 1205 SmallVector<const Expr *, 4> Privates; 1206 SmallVector<const Expr *, 4> ReductionOps; 1207 SmallVector<const Expr *, 4> LHSs; 1208 SmallVector<const Expr *, 4> RHSs; 1209 OMPTaskDataTy Data; 1210 SmallVector<const Expr *, 4> TaskLHSs; 1211 SmallVector<const Expr *, 4> TaskRHSs; 1212 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1213 if (ForInscan != (C->getModifier() == OMPC_REDUCTION_inscan)) 1214 continue; 1215 Shareds.append(C->varlist_begin(), C->varlist_end()); 1216 Privates.append(C->privates().begin(), C->privates().end()); 1217 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 1218 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 1219 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 1220 if (C->getModifier() == OMPC_REDUCTION_task) { 1221 Data.ReductionVars.append(C->privates().begin(), C->privates().end()); 1222 Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end()); 1223 Data.ReductionCopies.append(C->privates().begin(), C->privates().end()); 1224 Data.ReductionOps.append(C->reduction_ops().begin(), 1225 C->reduction_ops().end()); 1226 TaskLHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 1227 TaskRHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 1228 } 1229 } 1230 ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps); 1231 unsigned Count = 0; 1232 auto *ILHS = LHSs.begin(); 1233 auto *IRHS = RHSs.begin(); 1234 auto *IPriv = Privates.begin(); 1235 for (const Expr *IRef : Shareds) { 1236 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl()); 1237 // Emit private VarDecl with reduction init. 1238 RedCG.emitSharedOrigLValue(*this, Count); 1239 RedCG.emitAggregateType(*this, Count); 1240 AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD); 1241 RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(), 1242 RedCG.getSharedLValue(Count), 1243 [&Emission](CodeGenFunction &CGF) { 1244 CGF.EmitAutoVarInit(Emission); 1245 return true; 1246 }); 1247 EmitAutoVarCleanups(Emission); 1248 Address BaseAddr = RedCG.adjustPrivateAddress( 1249 *this, Count, Emission.getAllocatedAddress()); 1250 bool IsRegistered = PrivateScope.addPrivate( 1251 RedCG.getBaseDecl(Count), [BaseAddr]() { return BaseAddr; }); 1252 assert(IsRegistered && "private var already registered as private"); 1253 // Silence the warning about unused variable. 1254 (void)IsRegistered; 1255 1256 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 1257 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 1258 QualType Type = PrivateVD->getType(); 1259 bool isaOMPArraySectionExpr = isa<OMPArraySectionExpr>(IRef); 1260 if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) { 1261 // Store the address of the original variable associated with the LHS 1262 // implicit variable. 1263 PrivateScope.addPrivate(LHSVD, [&RedCG, Count, this]() { 1264 return RedCG.getSharedLValue(Count).getAddress(*this); 1265 }); 1266 PrivateScope.addPrivate( 1267 RHSVD, [this, PrivateVD]() { return GetAddrOfLocalVar(PrivateVD); }); 1268 } else if ((isaOMPArraySectionExpr && Type->isScalarType()) || 1269 isa<ArraySubscriptExpr>(IRef)) { 1270 // Store the address of the original variable associated with the LHS 1271 // implicit variable. 1272 PrivateScope.addPrivate(LHSVD, [&RedCG, Count, this]() { 1273 return RedCG.getSharedLValue(Count).getAddress(*this); 1274 }); 1275 PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() { 1276 return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD), 1277 ConvertTypeForMem(RHSVD->getType()), 1278 "rhs.begin"); 1279 }); 1280 } else { 1281 QualType Type = PrivateVD->getType(); 1282 bool IsArray = getContext().getAsArrayType(Type) != nullptr; 1283 Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress(*this); 1284 // Store the address of the original variable associated with the LHS 1285 // implicit variable. 1286 if (IsArray) { 1287 OriginalAddr = Builder.CreateElementBitCast( 1288 OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin"); 1289 } 1290 PrivateScope.addPrivate(LHSVD, [OriginalAddr]() { return OriginalAddr; }); 1291 PrivateScope.addPrivate( 1292 RHSVD, [this, PrivateVD, RHSVD, IsArray]() { 1293 return IsArray 1294 ? Builder.CreateElementBitCast( 1295 GetAddrOfLocalVar(PrivateVD), 1296 ConvertTypeForMem(RHSVD->getType()), "rhs.begin") 1297 : GetAddrOfLocalVar(PrivateVD); 1298 }); 1299 } 1300 ++ILHS; 1301 ++IRHS; 1302 ++IPriv; 1303 ++Count; 1304 } 1305 if (!Data.ReductionVars.empty()) { 1306 Data.IsReductionWithTaskMod = true; 1307 Data.IsWorksharingReduction = 1308 isOpenMPWorksharingDirective(D.getDirectiveKind()); 1309 llvm::Value *ReductionDesc = CGM.getOpenMPRuntime().emitTaskReductionInit( 1310 *this, D.getBeginLoc(), TaskLHSs, TaskRHSs, Data); 1311 const Expr *TaskRedRef = nullptr; 1312 switch (D.getDirectiveKind()) { 1313 case OMPD_parallel: 1314 TaskRedRef = cast<OMPParallelDirective>(D).getTaskReductionRefExpr(); 1315 break; 1316 case OMPD_for: 1317 TaskRedRef = cast<OMPForDirective>(D).getTaskReductionRefExpr(); 1318 break; 1319 case OMPD_sections: 1320 TaskRedRef = cast<OMPSectionsDirective>(D).getTaskReductionRefExpr(); 1321 break; 1322 case OMPD_parallel_for: 1323 TaskRedRef = cast<OMPParallelForDirective>(D).getTaskReductionRefExpr(); 1324 break; 1325 case OMPD_parallel_master: 1326 TaskRedRef = 1327 cast<OMPParallelMasterDirective>(D).getTaskReductionRefExpr(); 1328 break; 1329 case OMPD_parallel_sections: 1330 TaskRedRef = 1331 cast<OMPParallelSectionsDirective>(D).getTaskReductionRefExpr(); 1332 break; 1333 case OMPD_target_parallel: 1334 TaskRedRef = 1335 cast<OMPTargetParallelDirective>(D).getTaskReductionRefExpr(); 1336 break; 1337 case OMPD_target_parallel_for: 1338 TaskRedRef = 1339 cast<OMPTargetParallelForDirective>(D).getTaskReductionRefExpr(); 1340 break; 1341 case OMPD_distribute_parallel_for: 1342 TaskRedRef = 1343 cast<OMPDistributeParallelForDirective>(D).getTaskReductionRefExpr(); 1344 break; 1345 case OMPD_teams_distribute_parallel_for: 1346 TaskRedRef = cast<OMPTeamsDistributeParallelForDirective>(D) 1347 .getTaskReductionRefExpr(); 1348 break; 1349 case OMPD_target_teams_distribute_parallel_for: 1350 TaskRedRef = cast<OMPTargetTeamsDistributeParallelForDirective>(D) 1351 .getTaskReductionRefExpr(); 1352 break; 1353 case OMPD_simd: 1354 case OMPD_for_simd: 1355 case OMPD_section: 1356 case OMPD_single: 1357 case OMPD_master: 1358 case OMPD_critical: 1359 case OMPD_parallel_for_simd: 1360 case OMPD_task: 1361 case OMPD_taskyield: 1362 case OMPD_barrier: 1363 case OMPD_taskwait: 1364 case OMPD_taskgroup: 1365 case OMPD_flush: 1366 case OMPD_depobj: 1367 case OMPD_scan: 1368 case OMPD_ordered: 1369 case OMPD_atomic: 1370 case OMPD_teams: 1371 case OMPD_target: 1372 case OMPD_cancellation_point: 1373 case OMPD_cancel: 1374 case OMPD_target_data: 1375 case OMPD_target_enter_data: 1376 case OMPD_target_exit_data: 1377 case OMPD_taskloop: 1378 case OMPD_taskloop_simd: 1379 case OMPD_master_taskloop: 1380 case OMPD_master_taskloop_simd: 1381 case OMPD_parallel_master_taskloop: 1382 case OMPD_parallel_master_taskloop_simd: 1383 case OMPD_distribute: 1384 case OMPD_target_update: 1385 case OMPD_distribute_parallel_for_simd: 1386 case OMPD_distribute_simd: 1387 case OMPD_target_parallel_for_simd: 1388 case OMPD_target_simd: 1389 case OMPD_teams_distribute: 1390 case OMPD_teams_distribute_simd: 1391 case OMPD_teams_distribute_parallel_for_simd: 1392 case OMPD_target_teams: 1393 case OMPD_target_teams_distribute: 1394 case OMPD_target_teams_distribute_parallel_for_simd: 1395 case OMPD_target_teams_distribute_simd: 1396 case OMPD_declare_target: 1397 case OMPD_end_declare_target: 1398 case OMPD_threadprivate: 1399 case OMPD_allocate: 1400 case OMPD_declare_reduction: 1401 case OMPD_declare_mapper: 1402 case OMPD_declare_simd: 1403 case OMPD_requires: 1404 case OMPD_declare_variant: 1405 case OMPD_begin_declare_variant: 1406 case OMPD_end_declare_variant: 1407 case OMPD_unknown: 1408 default: 1409 llvm_unreachable("Enexpected directive with task reductions."); 1410 } 1411 1412 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(TaskRedRef)->getDecl()); 1413 EmitVarDecl(*VD); 1414 EmitStoreOfScalar(ReductionDesc, GetAddrOfLocalVar(VD), 1415 /*Volatile=*/false, TaskRedRef->getType()); 1416 } 1417 } 1418 1419 void CodeGenFunction::EmitOMPReductionClauseFinal( 1420 const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) { 1421 if (!HaveInsertPoint()) 1422 return; 1423 llvm::SmallVector<const Expr *, 8> Privates; 1424 llvm::SmallVector<const Expr *, 8> LHSExprs; 1425 llvm::SmallVector<const Expr *, 8> RHSExprs; 1426 llvm::SmallVector<const Expr *, 8> ReductionOps; 1427 bool HasAtLeastOneReduction = false; 1428 bool IsReductionWithTaskMod = false; 1429 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1430 // Do not emit for inscan reductions. 1431 if (C->getModifier() == OMPC_REDUCTION_inscan) 1432 continue; 1433 HasAtLeastOneReduction = true; 1434 Privates.append(C->privates().begin(), C->privates().end()); 1435 LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 1436 RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 1437 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 1438 IsReductionWithTaskMod = 1439 IsReductionWithTaskMod || C->getModifier() == OMPC_REDUCTION_task; 1440 } 1441 if (HasAtLeastOneReduction) { 1442 if (IsReductionWithTaskMod) { 1443 CGM.getOpenMPRuntime().emitTaskReductionFini( 1444 *this, D.getBeginLoc(), 1445 isOpenMPWorksharingDirective(D.getDirectiveKind())); 1446 } 1447 bool WithNowait = D.getSingleClause<OMPNowaitClause>() || 1448 isOpenMPParallelDirective(D.getDirectiveKind()) || 1449 ReductionKind == OMPD_simd; 1450 bool SimpleReduction = ReductionKind == OMPD_simd; 1451 // Emit nowait reduction if nowait clause is present or directive is a 1452 // parallel directive (it always has implicit barrier). 1453 CGM.getOpenMPRuntime().emitReduction( 1454 *this, D.getEndLoc(), Privates, LHSExprs, RHSExprs, ReductionOps, 1455 {WithNowait, SimpleReduction, ReductionKind}); 1456 } 1457 } 1458 1459 static void emitPostUpdateForReductionClause( 1460 CodeGenFunction &CGF, const OMPExecutableDirective &D, 1461 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { 1462 if (!CGF.HaveInsertPoint()) 1463 return; 1464 llvm::BasicBlock *DoneBB = nullptr; 1465 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1466 if (const Expr *PostUpdate = C->getPostUpdateExpr()) { 1467 if (!DoneBB) { 1468 if (llvm::Value *Cond = CondGen(CGF)) { 1469 // If the first post-update expression is found, emit conditional 1470 // block if it was requested. 1471 llvm::BasicBlock *ThenBB = CGF.createBasicBlock(".omp.reduction.pu"); 1472 DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done"); 1473 CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1474 CGF.EmitBlock(ThenBB); 1475 } 1476 } 1477 CGF.EmitIgnoredExpr(PostUpdate); 1478 } 1479 } 1480 if (DoneBB) 1481 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 1482 } 1483 1484 namespace { 1485 /// Codegen lambda for appending distribute lower and upper bounds to outlined 1486 /// parallel function. This is necessary for combined constructs such as 1487 /// 'distribute parallel for' 1488 typedef llvm::function_ref<void(CodeGenFunction &, 1489 const OMPExecutableDirective &, 1490 llvm::SmallVectorImpl<llvm::Value *> &)> 1491 CodeGenBoundParametersTy; 1492 } // anonymous namespace 1493 1494 static void 1495 checkForLastprivateConditionalUpdate(CodeGenFunction &CGF, 1496 const OMPExecutableDirective &S) { 1497 if (CGF.getLangOpts().OpenMP < 50) 1498 return; 1499 llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> PrivateDecls; 1500 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 1501 for (const Expr *Ref : C->varlists()) { 1502 if (!Ref->getType()->isScalarType()) 1503 continue; 1504 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 1505 if (!DRE) 1506 continue; 1507 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); 1508 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref); 1509 } 1510 } 1511 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 1512 for (const Expr *Ref : C->varlists()) { 1513 if (!Ref->getType()->isScalarType()) 1514 continue; 1515 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 1516 if (!DRE) 1517 continue; 1518 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); 1519 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref); 1520 } 1521 } 1522 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 1523 for (const Expr *Ref : C->varlists()) { 1524 if (!Ref->getType()->isScalarType()) 1525 continue; 1526 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 1527 if (!DRE) 1528 continue; 1529 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); 1530 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref); 1531 } 1532 } 1533 // Privates should ne analyzed since they are not captured at all. 1534 // Task reductions may be skipped - tasks are ignored. 1535 // Firstprivates do not return value but may be passed by reference - no need 1536 // to check for updated lastprivate conditional. 1537 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 1538 for (const Expr *Ref : C->varlists()) { 1539 if (!Ref->getType()->isScalarType()) 1540 continue; 1541 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 1542 if (!DRE) 1543 continue; 1544 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); 1545 } 1546 } 1547 CGF.CGM.getOpenMPRuntime().checkAndEmitSharedLastprivateConditional( 1548 CGF, S, PrivateDecls); 1549 } 1550 1551 static void emitCommonOMPParallelDirective( 1552 CodeGenFunction &CGF, const OMPExecutableDirective &S, 1553 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1554 const CodeGenBoundParametersTy &CodeGenBoundParameters) { 1555 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); 1556 llvm::Function *OutlinedFn = 1557 CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( 1558 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 1559 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) { 1560 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 1561 llvm::Value *NumThreads = 1562 CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), 1563 /*IgnoreResultAssign=*/true); 1564 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause( 1565 CGF, NumThreads, NumThreadsClause->getBeginLoc()); 1566 } 1567 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) { 1568 CodeGenFunction::RunCleanupsScope ProcBindScope(CGF); 1569 CGF.CGM.getOpenMPRuntime().emitProcBindClause( 1570 CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getBeginLoc()); 1571 } 1572 const Expr *IfCond = nullptr; 1573 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 1574 if (C->getNameModifier() == OMPD_unknown || 1575 C->getNameModifier() == OMPD_parallel) { 1576 IfCond = C->getCondition(); 1577 break; 1578 } 1579 } 1580 1581 OMPParallelScope Scope(CGF, S); 1582 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 1583 // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk 1584 // lower and upper bounds with the pragma 'for' chunking mechanism. 1585 // The following lambda takes care of appending the lower and upper bound 1586 // parameters when necessary 1587 CodeGenBoundParameters(CGF, S, CapturedVars); 1588 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 1589 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getBeginLoc(), OutlinedFn, 1590 CapturedVars, IfCond); 1591 } 1592 1593 static bool isAllocatableDecl(const VarDecl *VD) { 1594 const VarDecl *CVD = VD->getCanonicalDecl(); 1595 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 1596 return false; 1597 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 1598 // Use the default allocation. 1599 return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || 1600 AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && 1601 !AA->getAllocator()); 1602 } 1603 1604 static void emitEmptyBoundParameters(CodeGenFunction &, 1605 const OMPExecutableDirective &, 1606 llvm::SmallVectorImpl<llvm::Value *> &) {} 1607 1608 Address CodeGenFunction::OMPBuilderCBHelpers::getAddressOfLocalVariable( 1609 CodeGenFunction &CGF, const VarDecl *VD) { 1610 CodeGenModule &CGM = CGF.CGM; 1611 auto &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1612 1613 if (!VD) 1614 return Address::invalid(); 1615 const VarDecl *CVD = VD->getCanonicalDecl(); 1616 if (!isAllocatableDecl(CVD)) 1617 return Address::invalid(); 1618 llvm::Value *Size; 1619 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 1620 if (CVD->getType()->isVariablyModifiedType()) { 1621 Size = CGF.getTypeSize(CVD->getType()); 1622 // Align the size: ((size + align - 1) / align) * align 1623 Size = CGF.Builder.CreateNUWAdd( 1624 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 1625 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 1626 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 1627 } else { 1628 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 1629 Size = CGM.getSize(Sz.alignTo(Align)); 1630 } 1631 1632 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 1633 assert(AA->getAllocator() && 1634 "Expected allocator expression for non-default allocator."); 1635 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); 1636 // According to the standard, the original allocator type is a enum (integer). 1637 // Convert to pointer type, if required. 1638 if (Allocator->getType()->isIntegerTy()) 1639 Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy); 1640 else if (Allocator->getType()->isPointerTy()) 1641 Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator, 1642 CGM.VoidPtrTy); 1643 1644 llvm::Value *Addr = OMPBuilder.createOMPAlloc( 1645 CGF.Builder, Size, Allocator, 1646 getNameWithSeparators({CVD->getName(), ".void.addr"}, ".", ".")); 1647 llvm::CallInst *FreeCI = 1648 OMPBuilder.createOMPFree(CGF.Builder, Addr, Allocator); 1649 1650 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FreeCI); 1651 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1652 Addr, 1653 CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())), 1654 getNameWithSeparators({CVD->getName(), ".addr"}, ".", ".")); 1655 return Address(Addr, Align); 1656 } 1657 1658 Address CodeGenFunction::OMPBuilderCBHelpers::getAddrOfThreadPrivate( 1659 CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, 1660 SourceLocation Loc) { 1661 CodeGenModule &CGM = CGF.CGM; 1662 if (CGM.getLangOpts().OpenMPUseTLS && 1663 CGM.getContext().getTargetInfo().isTLSSupported()) 1664 return VDAddr; 1665 1666 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1667 1668 llvm::Type *VarTy = VDAddr.getElementType(); 1669 llvm::Value *Data = 1670 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy); 1671 llvm::ConstantInt *Size = CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)); 1672 std::string Suffix = getNameWithSeparators({"cache", ""}); 1673 llvm::Twine CacheName = Twine(CGM.getMangledName(VD)).concat(Suffix); 1674 1675 llvm::CallInst *ThreadPrivateCacheCall = 1676 OMPBuilder.createCachedThreadPrivate(CGF.Builder, Data, Size, CacheName); 1677 1678 return Address(ThreadPrivateCacheCall, VDAddr.getAlignment()); 1679 } 1680 1681 std::string CodeGenFunction::OMPBuilderCBHelpers::getNameWithSeparators( 1682 ArrayRef<StringRef> Parts, StringRef FirstSeparator, StringRef Separator) { 1683 SmallString<128> Buffer; 1684 llvm::raw_svector_ostream OS(Buffer); 1685 StringRef Sep = FirstSeparator; 1686 for (StringRef Part : Parts) { 1687 OS << Sep << Part; 1688 Sep = Separator; 1689 } 1690 return OS.str().str(); 1691 } 1692 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { 1693 if (CGM.getLangOpts().OpenMPIRBuilder) { 1694 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1695 // Check if we have any if clause associated with the directive. 1696 llvm::Value *IfCond = nullptr; 1697 if (const auto *C = S.getSingleClause<OMPIfClause>()) 1698 IfCond = EmitScalarExpr(C->getCondition(), 1699 /*IgnoreResultAssign=*/true); 1700 1701 llvm::Value *NumThreads = nullptr; 1702 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) 1703 NumThreads = EmitScalarExpr(NumThreadsClause->getNumThreads(), 1704 /*IgnoreResultAssign=*/true); 1705 1706 ProcBindKind ProcBind = OMP_PROC_BIND_default; 1707 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) 1708 ProcBind = ProcBindClause->getProcBindKind(); 1709 1710 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 1711 1712 // The cleanup callback that finalizes all variabels at the given location, 1713 // thus calls destructors etc. 1714 auto FiniCB = [this](InsertPointTy IP) { 1715 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); 1716 }; 1717 1718 // Privatization callback that performs appropriate action for 1719 // shared/private/firstprivate/lastprivate/copyin/... variables. 1720 // 1721 // TODO: This defaults to shared right now. 1722 auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, 1723 llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) { 1724 // The next line is appropriate only for variables (Val) with the 1725 // data-sharing attribute "shared". 1726 ReplVal = &Val; 1727 1728 return CodeGenIP; 1729 }; 1730 1731 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); 1732 const Stmt *ParallelRegionBodyStmt = CS->getCapturedStmt(); 1733 1734 auto BodyGenCB = [ParallelRegionBodyStmt, 1735 this](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, 1736 llvm::BasicBlock &ContinuationBB) { 1737 OMPBuilderCBHelpers::OutlinedRegionBodyRAII ORB(*this, AllocaIP, 1738 ContinuationBB); 1739 OMPBuilderCBHelpers::EmitOMPRegionBody(*this, ParallelRegionBodyStmt, 1740 CodeGenIP, ContinuationBB); 1741 }; 1742 1743 CGCapturedStmtInfo CGSI(*CS, CR_OpenMP); 1744 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI); 1745 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( 1746 AllocaInsertPt->getParent(), AllocaInsertPt->getIterator()); 1747 Builder.restoreIP( 1748 OMPBuilder.createParallel(Builder, AllocaIP, BodyGenCB, PrivCB, FiniCB, 1749 IfCond, NumThreads, ProcBind, S.hasCancel())); 1750 return; 1751 } 1752 1753 // Emit parallel region as a standalone region. 1754 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 1755 Action.Enter(CGF); 1756 OMPPrivateScope PrivateScope(CGF); 1757 bool Copyins = CGF.EmitOMPCopyinClause(S); 1758 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 1759 if (Copyins) { 1760 // Emit implicit barrier to synchronize threads and avoid data races on 1761 // propagation master's thread values of threadprivate variables to local 1762 // instances of that variables of all other implicit threads. 1763 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 1764 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, 1765 /*ForceSimpleCall=*/true); 1766 } 1767 CGF.EmitOMPPrivateClause(S, PrivateScope); 1768 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 1769 (void)PrivateScope.Privatize(); 1770 CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt()); 1771 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 1772 }; 1773 { 1774 auto LPCRegion = 1775 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 1776 emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen, 1777 emitEmptyBoundParameters); 1778 emitPostUpdateForReductionClause(*this, S, 1779 [](CodeGenFunction &) { return nullptr; }); 1780 } 1781 // Check for outer lastprivate conditional update. 1782 checkForLastprivateConditionalUpdate(*this, S); 1783 } 1784 1785 namespace { 1786 /// RAII to handle scopes for loop transformation directives. 1787 class OMPTransformDirectiveScopeRAII { 1788 OMPLoopScope *Scope = nullptr; 1789 CodeGenFunction::CGCapturedStmtInfo *CGSI = nullptr; 1790 CodeGenFunction::CGCapturedStmtRAII *CapInfoRAII = nullptr; 1791 1792 public: 1793 OMPTransformDirectiveScopeRAII(CodeGenFunction &CGF, const Stmt *S) { 1794 if (const auto *Dir = dyn_cast<OMPLoopBasedDirective>(S)) { 1795 Scope = new OMPLoopScope(CGF, *Dir); 1796 CGSI = new CodeGenFunction::CGCapturedStmtInfo(CR_OpenMP); 1797 CapInfoRAII = new CodeGenFunction::CGCapturedStmtRAII(CGF, CGSI); 1798 } 1799 } 1800 ~OMPTransformDirectiveScopeRAII() { 1801 if (!Scope) 1802 return; 1803 delete CapInfoRAII; 1804 delete CGSI; 1805 delete Scope; 1806 } 1807 }; 1808 } // namespace 1809 1810 static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop, 1811 int MaxLevel, int Level = 0) { 1812 assert(Level < MaxLevel && "Too deep lookup during loop body codegen."); 1813 const Stmt *SimplifiedS = S->IgnoreContainers(); 1814 if (const auto *CS = dyn_cast<CompoundStmt>(SimplifiedS)) { 1815 PrettyStackTraceLoc CrashInfo( 1816 CGF.getContext().getSourceManager(), CS->getLBracLoc(), 1817 "LLVM IR generation of compound statement ('{}')"); 1818 1819 // Keep track of the current cleanup stack depth, including debug scopes. 1820 CodeGenFunction::LexicalScope Scope(CGF, S->getSourceRange()); 1821 for (const Stmt *CurStmt : CS->body()) 1822 emitBody(CGF, CurStmt, NextLoop, MaxLevel, Level); 1823 return; 1824 } 1825 if (SimplifiedS == NextLoop) { 1826 if (auto *Dir = dyn_cast<OMPTileDirective>(SimplifiedS)) 1827 SimplifiedS = Dir->getTransformedStmt(); 1828 if (auto *Dir = dyn_cast<OMPUnrollDirective>(SimplifiedS)) 1829 SimplifiedS = Dir->getTransformedStmt(); 1830 if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(SimplifiedS)) 1831 SimplifiedS = CanonLoop->getLoopStmt(); 1832 if (const auto *For = dyn_cast<ForStmt>(SimplifiedS)) { 1833 S = For->getBody(); 1834 } else { 1835 assert(isa<CXXForRangeStmt>(SimplifiedS) && 1836 "Expected canonical for loop or range-based for loop."); 1837 const auto *CXXFor = cast<CXXForRangeStmt>(SimplifiedS); 1838 CGF.EmitStmt(CXXFor->getLoopVarStmt()); 1839 S = CXXFor->getBody(); 1840 } 1841 if (Level + 1 < MaxLevel) { 1842 NextLoop = OMPLoopDirective::tryToFindNextInnerLoop( 1843 S, /*TryImperfectlyNestedLoops=*/true); 1844 emitBody(CGF, S, NextLoop, MaxLevel, Level + 1); 1845 return; 1846 } 1847 } 1848 CGF.EmitStmt(S); 1849 } 1850 1851 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, 1852 JumpDest LoopExit) { 1853 RunCleanupsScope BodyScope(*this); 1854 // Update counters values on current iteration. 1855 for (const Expr *UE : D.updates()) 1856 EmitIgnoredExpr(UE); 1857 // Update the linear variables. 1858 // In distribute directives only loop counters may be marked as linear, no 1859 // need to generate the code for them. 1860 if (!isOpenMPDistributeDirective(D.getDirectiveKind())) { 1861 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1862 for (const Expr *UE : C->updates()) 1863 EmitIgnoredExpr(UE); 1864 } 1865 } 1866 1867 // On a continue in the body, jump to the end. 1868 JumpDest Continue = getJumpDestInCurrentScope("omp.body.continue"); 1869 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1870 for (const Expr *E : D.finals_conditions()) { 1871 if (!E) 1872 continue; 1873 // Check that loop counter in non-rectangular nest fits into the iteration 1874 // space. 1875 llvm::BasicBlock *NextBB = createBasicBlock("omp.body.next"); 1876 EmitBranchOnBoolExpr(E, NextBB, Continue.getBlock(), 1877 getProfileCount(D.getBody())); 1878 EmitBlock(NextBB); 1879 } 1880 1881 OMPPrivateScope InscanScope(*this); 1882 EmitOMPReductionClauseInit(D, InscanScope, /*ForInscan=*/true); 1883 bool IsInscanRegion = InscanScope.Privatize(); 1884 if (IsInscanRegion) { 1885 // Need to remember the block before and after scan directive 1886 // to dispatch them correctly depending on the clause used in 1887 // this directive, inclusive or exclusive. For inclusive scan the natural 1888 // order of the blocks is used, for exclusive clause the blocks must be 1889 // executed in reverse order. 1890 OMPBeforeScanBlock = createBasicBlock("omp.before.scan.bb"); 1891 OMPAfterScanBlock = createBasicBlock("omp.after.scan.bb"); 1892 // No need to allocate inscan exit block, in simd mode it is selected in the 1893 // codegen for the scan directive. 1894 if (D.getDirectiveKind() != OMPD_simd && !getLangOpts().OpenMPSimd) 1895 OMPScanExitBlock = createBasicBlock("omp.exit.inscan.bb"); 1896 OMPScanDispatch = createBasicBlock("omp.inscan.dispatch"); 1897 EmitBranch(OMPScanDispatch); 1898 EmitBlock(OMPBeforeScanBlock); 1899 } 1900 1901 // Emit loop variables for C++ range loops. 1902 const Stmt *Body = 1903 D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(); 1904 // Emit loop body. 1905 emitBody(*this, Body, 1906 OMPLoopBasedDirective::tryToFindNextInnerLoop( 1907 Body, /*TryImperfectlyNestedLoops=*/true), 1908 D.getLoopsNumber()); 1909 1910 // Jump to the dispatcher at the end of the loop body. 1911 if (IsInscanRegion) 1912 EmitBranch(OMPScanExitBlock); 1913 1914 // The end (updates/cleanups). 1915 EmitBlock(Continue.getBlock()); 1916 BreakContinueStack.pop_back(); 1917 } 1918 1919 using EmittedClosureTy = std::pair<llvm::Function *, llvm::Value *>; 1920 1921 /// Emit a captured statement and return the function as well as its captured 1922 /// closure context. 1923 static EmittedClosureTy emitCapturedStmtFunc(CodeGenFunction &ParentCGF, 1924 const CapturedStmt *S) { 1925 LValue CapStruct = ParentCGF.InitCapturedStruct(*S); 1926 CodeGenFunction CGF(ParentCGF.CGM, /*suppressNewContext=*/true); 1927 std::unique_ptr<CodeGenFunction::CGCapturedStmtInfo> CSI = 1928 std::make_unique<CodeGenFunction::CGCapturedStmtInfo>(*S); 1929 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, CSI.get()); 1930 llvm::Function *F = CGF.GenerateCapturedStmtFunction(*S); 1931 1932 return {F, CapStruct.getPointer(ParentCGF)}; 1933 } 1934 1935 /// Emit a call to a previously captured closure. 1936 static llvm::CallInst * 1937 emitCapturedStmtCall(CodeGenFunction &ParentCGF, EmittedClosureTy Cap, 1938 llvm::ArrayRef<llvm::Value *> Args) { 1939 // Append the closure context to the argument. 1940 SmallVector<llvm::Value *> EffectiveArgs; 1941 EffectiveArgs.reserve(Args.size() + 1); 1942 llvm::append_range(EffectiveArgs, Args); 1943 EffectiveArgs.push_back(Cap.second); 1944 1945 return ParentCGF.Builder.CreateCall(Cap.first, EffectiveArgs); 1946 } 1947 1948 llvm::CanonicalLoopInfo * 1949 CodeGenFunction::EmitOMPCollapsedCanonicalLoopNest(const Stmt *S, int Depth) { 1950 assert(Depth == 1 && "Nested loops with OpenMPIRBuilder not yet implemented"); 1951 1952 EmitStmt(S); 1953 assert(OMPLoopNestStack.size() >= (size_t)Depth && "Found too few loops"); 1954 1955 // The last added loop is the outermost one. 1956 return OMPLoopNestStack.back(); 1957 } 1958 1959 void CodeGenFunction::EmitOMPCanonicalLoop(const OMPCanonicalLoop *S) { 1960 const Stmt *SyntacticalLoop = S->getLoopStmt(); 1961 if (!getLangOpts().OpenMPIRBuilder) { 1962 // Ignore if OpenMPIRBuilder is not enabled. 1963 EmitStmt(SyntacticalLoop); 1964 return; 1965 } 1966 1967 LexicalScope ForScope(*this, S->getSourceRange()); 1968 1969 // Emit init statements. The Distance/LoopVar funcs may reference variable 1970 // declarations they contain. 1971 const Stmt *BodyStmt; 1972 if (const auto *For = dyn_cast<ForStmt>(SyntacticalLoop)) { 1973 if (const Stmt *InitStmt = For->getInit()) 1974 EmitStmt(InitStmt); 1975 BodyStmt = For->getBody(); 1976 } else if (const auto *RangeFor = 1977 dyn_cast<CXXForRangeStmt>(SyntacticalLoop)) { 1978 if (const DeclStmt *RangeStmt = RangeFor->getRangeStmt()) 1979 EmitStmt(RangeStmt); 1980 if (const DeclStmt *BeginStmt = RangeFor->getBeginStmt()) 1981 EmitStmt(BeginStmt); 1982 if (const DeclStmt *EndStmt = RangeFor->getEndStmt()) 1983 EmitStmt(EndStmt); 1984 if (const DeclStmt *LoopVarStmt = RangeFor->getLoopVarStmt()) 1985 EmitStmt(LoopVarStmt); 1986 BodyStmt = RangeFor->getBody(); 1987 } else 1988 llvm_unreachable("Expected for-stmt or range-based for-stmt"); 1989 1990 // Emit closure for later use. By-value captures will be captured here. 1991 const CapturedStmt *DistanceFunc = S->getDistanceFunc(); 1992 EmittedClosureTy DistanceClosure = emitCapturedStmtFunc(*this, DistanceFunc); 1993 const CapturedStmt *LoopVarFunc = S->getLoopVarFunc(); 1994 EmittedClosureTy LoopVarClosure = emitCapturedStmtFunc(*this, LoopVarFunc); 1995 1996 // Call the distance function to get the number of iterations of the loop to 1997 // come. 1998 QualType LogicalTy = DistanceFunc->getCapturedDecl() 1999 ->getParam(0) 2000 ->getType() 2001 .getNonReferenceType(); 2002 Address CountAddr = CreateMemTemp(LogicalTy, ".count.addr"); 2003 emitCapturedStmtCall(*this, DistanceClosure, {CountAddr.getPointer()}); 2004 llvm::Value *DistVal = Builder.CreateLoad(CountAddr, ".count"); 2005 2006 // Emit the loop structure. 2007 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 2008 auto BodyGen = [&, this](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP, 2009 llvm::Value *IndVar) { 2010 Builder.restoreIP(CodeGenIP); 2011 2012 // Emit the loop body: Convert the logical iteration number to the loop 2013 // variable and emit the body. 2014 const DeclRefExpr *LoopVarRef = S->getLoopVarRef(); 2015 LValue LCVal = EmitLValue(LoopVarRef); 2016 Address LoopVarAddress = LCVal.getAddress(*this); 2017 emitCapturedStmtCall(*this, LoopVarClosure, 2018 {LoopVarAddress.getPointer(), IndVar}); 2019 2020 RunCleanupsScope BodyScope(*this); 2021 EmitStmt(BodyStmt); 2022 }; 2023 llvm::CanonicalLoopInfo *CL = 2024 OMPBuilder.createCanonicalLoop(Builder, BodyGen, DistVal); 2025 2026 // Finish up the loop. 2027 Builder.restoreIP(CL->getAfterIP()); 2028 ForScope.ForceCleanup(); 2029 2030 // Remember the CanonicalLoopInfo for parent AST nodes consuming it. 2031 OMPLoopNestStack.push_back(CL); 2032 } 2033 2034 void CodeGenFunction::EmitOMPInnerLoop( 2035 const OMPExecutableDirective &S, bool RequiresCleanup, const Expr *LoopCond, 2036 const Expr *IncExpr, 2037 const llvm::function_ref<void(CodeGenFunction &)> BodyGen, 2038 const llvm::function_ref<void(CodeGenFunction &)> PostIncGen) { 2039 auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end"); 2040 2041 // Start the loop with a block that tests the condition. 2042 auto CondBlock = createBasicBlock("omp.inner.for.cond"); 2043 EmitBlock(CondBlock); 2044 const SourceRange R = S.getSourceRange(); 2045 2046 // If attributes are attached, push to the basic block with them. 2047 const auto &OMPED = cast<OMPExecutableDirective>(S); 2048 const CapturedStmt *ICS = OMPED.getInnermostCapturedStmt(); 2049 const Stmt *SS = ICS->getCapturedStmt(); 2050 const AttributedStmt *AS = dyn_cast_or_null<AttributedStmt>(SS); 2051 OMPLoopNestStack.clear(); 2052 if (AS) 2053 LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(), 2054 AS->getAttrs(), SourceLocToDebugLoc(R.getBegin()), 2055 SourceLocToDebugLoc(R.getEnd())); 2056 else 2057 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), 2058 SourceLocToDebugLoc(R.getEnd())); 2059 2060 // If there are any cleanups between here and the loop-exit scope, 2061 // create a block to stage a loop exit along. 2062 llvm::BasicBlock *ExitBlock = LoopExit.getBlock(); 2063 if (RequiresCleanup) 2064 ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup"); 2065 2066 llvm::BasicBlock *LoopBody = createBasicBlock("omp.inner.for.body"); 2067 2068 // Emit condition. 2069 EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S)); 2070 if (ExitBlock != LoopExit.getBlock()) { 2071 EmitBlock(ExitBlock); 2072 EmitBranchThroughCleanup(LoopExit); 2073 } 2074 2075 EmitBlock(LoopBody); 2076 incrementProfileCounter(&S); 2077 2078 // Create a block for the increment. 2079 JumpDest Continue = getJumpDestInCurrentScope("omp.inner.for.inc"); 2080 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 2081 2082 BodyGen(*this); 2083 2084 // Emit "IV = IV + 1" and a back-edge to the condition block. 2085 EmitBlock(Continue.getBlock()); 2086 EmitIgnoredExpr(IncExpr); 2087 PostIncGen(*this); 2088 BreakContinueStack.pop_back(); 2089 EmitBranch(CondBlock); 2090 LoopStack.pop(); 2091 // Emit the fall-through block. 2092 EmitBlock(LoopExit.getBlock()); 2093 } 2094 2095 bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { 2096 if (!HaveInsertPoint()) 2097 return false; 2098 // Emit inits for the linear variables. 2099 bool HasLinears = false; 2100 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 2101 for (const Expr *Init : C->inits()) { 2102 HasLinears = true; 2103 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl()); 2104 if (const auto *Ref = 2105 dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) { 2106 AutoVarEmission Emission = EmitAutoVarAlloca(*VD); 2107 const auto *OrigVD = cast<VarDecl>(Ref->getDecl()); 2108 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), 2109 CapturedStmtInfo->lookup(OrigVD) != nullptr, 2110 VD->getInit()->getType(), VK_LValue, 2111 VD->getInit()->getExprLoc()); 2112 EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(), 2113 VD->getType()), 2114 /*capturedByInit=*/false); 2115 EmitAutoVarCleanups(Emission); 2116 } else { 2117 EmitVarDecl(*VD); 2118 } 2119 } 2120 // Emit the linear steps for the linear clauses. 2121 // If a step is not constant, it is pre-calculated before the loop. 2122 if (const auto *CS = cast_or_null<BinaryOperator>(C->getCalcStep())) 2123 if (const auto *SaveRef = cast<DeclRefExpr>(CS->getLHS())) { 2124 EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl())); 2125 // Emit calculation of the linear step. 2126 EmitIgnoredExpr(CS); 2127 } 2128 } 2129 return HasLinears; 2130 } 2131 2132 void CodeGenFunction::EmitOMPLinearClauseFinal( 2133 const OMPLoopDirective &D, 2134 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { 2135 if (!HaveInsertPoint()) 2136 return; 2137 llvm::BasicBlock *DoneBB = nullptr; 2138 // Emit the final values of the linear variables. 2139 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 2140 auto IC = C->varlist_begin(); 2141 for (const Expr *F : C->finals()) { 2142 if (!DoneBB) { 2143 if (llvm::Value *Cond = CondGen(*this)) { 2144 // If the first post-update expression is found, emit conditional 2145 // block if it was requested. 2146 llvm::BasicBlock *ThenBB = createBasicBlock(".omp.linear.pu"); 2147 DoneBB = createBasicBlock(".omp.linear.pu.done"); 2148 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 2149 EmitBlock(ThenBB); 2150 } 2151 } 2152 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl()); 2153 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), 2154 CapturedStmtInfo->lookup(OrigVD) != nullptr, 2155 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); 2156 Address OrigAddr = EmitLValue(&DRE).getAddress(*this); 2157 CodeGenFunction::OMPPrivateScope VarScope(*this); 2158 VarScope.addPrivate(OrigVD, [OrigAddr]() { return OrigAddr; }); 2159 (void)VarScope.Privatize(); 2160 EmitIgnoredExpr(F); 2161 ++IC; 2162 } 2163 if (const Expr *PostUpdate = C->getPostUpdateExpr()) 2164 EmitIgnoredExpr(PostUpdate); 2165 } 2166 if (DoneBB) 2167 EmitBlock(DoneBB, /*IsFinished=*/true); 2168 } 2169 2170 static void emitAlignedClause(CodeGenFunction &CGF, 2171 const OMPExecutableDirective &D) { 2172 if (!CGF.HaveInsertPoint()) 2173 return; 2174 for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) { 2175 llvm::APInt ClauseAlignment(64, 0); 2176 if (const Expr *AlignmentExpr = Clause->getAlignment()) { 2177 auto *AlignmentCI = 2178 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); 2179 ClauseAlignment = AlignmentCI->getValue(); 2180 } 2181 for (const Expr *E : Clause->varlists()) { 2182 llvm::APInt Alignment(ClauseAlignment); 2183 if (Alignment == 0) { 2184 // OpenMP [2.8.1, Description] 2185 // If no optional parameter is specified, implementation-defined default 2186 // alignments for SIMD instructions on the target platforms are assumed. 2187 Alignment = 2188 CGF.getContext() 2189 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign( 2190 E->getType()->getPointeeType())) 2191 .getQuantity(); 2192 } 2193 assert((Alignment == 0 || Alignment.isPowerOf2()) && 2194 "alignment is not power of 2"); 2195 if (Alignment != 0) { 2196 llvm::Value *PtrValue = CGF.EmitScalarExpr(E); 2197 CGF.emitAlignmentAssumption( 2198 PtrValue, E, /*No second loc needed*/ SourceLocation(), 2199 llvm::ConstantInt::get(CGF.getLLVMContext(), Alignment)); 2200 } 2201 } 2202 } 2203 } 2204 2205 void CodeGenFunction::EmitOMPPrivateLoopCounters( 2206 const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) { 2207 if (!HaveInsertPoint()) 2208 return; 2209 auto I = S.private_counters().begin(); 2210 for (const Expr *E : S.counters()) { 2211 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2212 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()); 2213 // Emit var without initialization. 2214 AutoVarEmission VarEmission = EmitAutoVarAlloca(*PrivateVD); 2215 EmitAutoVarCleanups(VarEmission); 2216 LocalDeclMap.erase(PrivateVD); 2217 (void)LoopScope.addPrivate(VD, [&VarEmission]() { 2218 return VarEmission.getAllocatedAddress(); 2219 }); 2220 if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) || 2221 VD->hasGlobalStorage()) { 2222 (void)LoopScope.addPrivate(PrivateVD, [this, VD, E]() { 2223 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), 2224 LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD), 2225 E->getType(), VK_LValue, E->getExprLoc()); 2226 return EmitLValue(&DRE).getAddress(*this); 2227 }); 2228 } else { 2229 (void)LoopScope.addPrivate(PrivateVD, [&VarEmission]() { 2230 return VarEmission.getAllocatedAddress(); 2231 }); 2232 } 2233 ++I; 2234 } 2235 // Privatize extra loop counters used in loops for ordered(n) clauses. 2236 for (const auto *C : S.getClausesOfKind<OMPOrderedClause>()) { 2237 if (!C->getNumForLoops()) 2238 continue; 2239 for (unsigned I = S.getLoopsNumber(), E = C->getLoopNumIterations().size(); 2240 I < E; ++I) { 2241 const auto *DRE = cast<DeclRefExpr>(C->getLoopCounter(I)); 2242 const auto *VD = cast<VarDecl>(DRE->getDecl()); 2243 // Override only those variables that can be captured to avoid re-emission 2244 // of the variables declared within the loops. 2245 if (DRE->refersToEnclosingVariableOrCapture()) { 2246 (void)LoopScope.addPrivate(VD, [this, DRE, VD]() { 2247 return CreateMemTemp(DRE->getType(), VD->getName()); 2248 }); 2249 } 2250 } 2251 } 2252 } 2253 2254 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S, 2255 const Expr *Cond, llvm::BasicBlock *TrueBlock, 2256 llvm::BasicBlock *FalseBlock, uint64_t TrueCount) { 2257 if (!CGF.HaveInsertPoint()) 2258 return; 2259 { 2260 CodeGenFunction::OMPPrivateScope PreCondScope(CGF); 2261 CGF.EmitOMPPrivateLoopCounters(S, PreCondScope); 2262 (void)PreCondScope.Privatize(); 2263 // Get initial values of real counters. 2264 for (const Expr *I : S.inits()) { 2265 CGF.EmitIgnoredExpr(I); 2266 } 2267 } 2268 // Create temp loop control variables with their init values to support 2269 // non-rectangular loops. 2270 CodeGenFunction::OMPMapVars PreCondVars; 2271 for (const Expr * E: S.dependent_counters()) { 2272 if (!E) 2273 continue; 2274 assert(!E->getType().getNonReferenceType()->isRecordType() && 2275 "dependent counter must not be an iterator."); 2276 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2277 Address CounterAddr = 2278 CGF.CreateMemTemp(VD->getType().getNonReferenceType()); 2279 (void)PreCondVars.setVarAddr(CGF, VD, CounterAddr); 2280 } 2281 (void)PreCondVars.apply(CGF); 2282 for (const Expr *E : S.dependent_inits()) { 2283 if (!E) 2284 continue; 2285 CGF.EmitIgnoredExpr(E); 2286 } 2287 // Check that loop is executed at least one time. 2288 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount); 2289 PreCondVars.restore(CGF); 2290 } 2291 2292 void CodeGenFunction::EmitOMPLinearClause( 2293 const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) { 2294 if (!HaveInsertPoint()) 2295 return; 2296 llvm::DenseSet<const VarDecl *> SIMDLCVs; 2297 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 2298 const auto *LoopDirective = cast<OMPLoopDirective>(&D); 2299 for (const Expr *C : LoopDirective->counters()) { 2300 SIMDLCVs.insert( 2301 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 2302 } 2303 } 2304 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 2305 auto CurPrivate = C->privates().begin(); 2306 for (const Expr *E : C->varlists()) { 2307 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2308 const auto *PrivateVD = 2309 cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl()); 2310 if (!SIMDLCVs.count(VD->getCanonicalDecl())) { 2311 bool IsRegistered = PrivateScope.addPrivate(VD, [this, PrivateVD]() { 2312 // Emit private VarDecl with copy init. 2313 EmitVarDecl(*PrivateVD); 2314 return GetAddrOfLocalVar(PrivateVD); 2315 }); 2316 assert(IsRegistered && "linear var already registered as private"); 2317 // Silence the warning about unused variable. 2318 (void)IsRegistered; 2319 } else { 2320 EmitVarDecl(*PrivateVD); 2321 } 2322 ++CurPrivate; 2323 } 2324 } 2325 } 2326 2327 static void emitSimdlenSafelenClause(CodeGenFunction &CGF, 2328 const OMPExecutableDirective &D) { 2329 if (!CGF.HaveInsertPoint()) 2330 return; 2331 if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) { 2332 RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(), 2333 /*ignoreResult=*/true); 2334 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 2335 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 2336 // In presence of finite 'safelen', it may be unsafe to mark all 2337 // the memory instructions parallel, because loop-carried 2338 // dependences of 'safelen' iterations are possible. 2339 CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>()); 2340 } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) { 2341 RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(), 2342 /*ignoreResult=*/true); 2343 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 2344 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 2345 // In presence of finite 'safelen', it may be unsafe to mark all 2346 // the memory instructions parallel, because loop-carried 2347 // dependences of 'safelen' iterations are possible. 2348 CGF.LoopStack.setParallel(/*Enable=*/false); 2349 } 2350 } 2351 2352 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D) { 2353 // Walk clauses and process safelen/lastprivate. 2354 LoopStack.setParallel(/*Enable=*/true); 2355 LoopStack.setVectorizeEnable(); 2356 emitSimdlenSafelenClause(*this, D); 2357 if (const auto *C = D.getSingleClause<OMPOrderClause>()) 2358 if (C->getKind() == OMPC_ORDER_concurrent) 2359 LoopStack.setParallel(/*Enable=*/true); 2360 if ((D.getDirectiveKind() == OMPD_simd || 2361 (getLangOpts().OpenMPSimd && 2362 isOpenMPSimdDirective(D.getDirectiveKind()))) && 2363 llvm::any_of(D.getClausesOfKind<OMPReductionClause>(), 2364 [](const OMPReductionClause *C) { 2365 return C->getModifier() == OMPC_REDUCTION_inscan; 2366 })) 2367 // Disable parallel access in case of prefix sum. 2368 LoopStack.setParallel(/*Enable=*/false); 2369 } 2370 2371 void CodeGenFunction::EmitOMPSimdFinal( 2372 const OMPLoopDirective &D, 2373 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { 2374 if (!HaveInsertPoint()) 2375 return; 2376 llvm::BasicBlock *DoneBB = nullptr; 2377 auto IC = D.counters().begin(); 2378 auto IPC = D.private_counters().begin(); 2379 for (const Expr *F : D.finals()) { 2380 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl()); 2381 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl()); 2382 const auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD); 2383 if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) || 2384 OrigVD->hasGlobalStorage() || CED) { 2385 if (!DoneBB) { 2386 if (llvm::Value *Cond = CondGen(*this)) { 2387 // If the first post-update expression is found, emit conditional 2388 // block if it was requested. 2389 llvm::BasicBlock *ThenBB = createBasicBlock(".omp.final.then"); 2390 DoneBB = createBasicBlock(".omp.final.done"); 2391 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 2392 EmitBlock(ThenBB); 2393 } 2394 } 2395 Address OrigAddr = Address::invalid(); 2396 if (CED) { 2397 OrigAddr = 2398 EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress(*this); 2399 } else { 2400 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(PrivateVD), 2401 /*RefersToEnclosingVariableOrCapture=*/false, 2402 (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc()); 2403 OrigAddr = EmitLValue(&DRE).getAddress(*this); 2404 } 2405 OMPPrivateScope VarScope(*this); 2406 VarScope.addPrivate(OrigVD, [OrigAddr]() { return OrigAddr; }); 2407 (void)VarScope.Privatize(); 2408 EmitIgnoredExpr(F); 2409 } 2410 ++IC; 2411 ++IPC; 2412 } 2413 if (DoneBB) 2414 EmitBlock(DoneBB, /*IsFinished=*/true); 2415 } 2416 2417 static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF, 2418 const OMPLoopDirective &S, 2419 CodeGenFunction::JumpDest LoopExit) { 2420 CGF.EmitOMPLoopBody(S, LoopExit); 2421 CGF.EmitStopPoint(&S); 2422 } 2423 2424 /// Emit a helper variable and return corresponding lvalue. 2425 static LValue EmitOMPHelperVar(CodeGenFunction &CGF, 2426 const DeclRefExpr *Helper) { 2427 auto VDecl = cast<VarDecl>(Helper->getDecl()); 2428 CGF.EmitVarDecl(*VDecl); 2429 return CGF.EmitLValue(Helper); 2430 } 2431 2432 static void emitCommonSimdLoop(CodeGenFunction &CGF, const OMPLoopDirective &S, 2433 const RegionCodeGenTy &SimdInitGen, 2434 const RegionCodeGenTy &BodyCodeGen) { 2435 auto &&ThenGen = [&S, &SimdInitGen, &BodyCodeGen](CodeGenFunction &CGF, 2436 PrePostActionTy &) { 2437 CGOpenMPRuntime::NontemporalDeclsRAII NontemporalsRegion(CGF.CGM, S); 2438 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); 2439 SimdInitGen(CGF); 2440 2441 BodyCodeGen(CGF); 2442 }; 2443 auto &&ElseGen = [&BodyCodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 2444 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); 2445 CGF.LoopStack.setVectorizeEnable(/*Enable=*/false); 2446 2447 BodyCodeGen(CGF); 2448 }; 2449 const Expr *IfCond = nullptr; 2450 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 2451 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 2452 if (CGF.getLangOpts().OpenMP >= 50 && 2453 (C->getNameModifier() == OMPD_unknown || 2454 C->getNameModifier() == OMPD_simd)) { 2455 IfCond = C->getCondition(); 2456 break; 2457 } 2458 } 2459 } 2460 if (IfCond) { 2461 CGF.CGM.getOpenMPRuntime().emitIfClause(CGF, IfCond, ThenGen, ElseGen); 2462 } else { 2463 RegionCodeGenTy ThenRCG(ThenGen); 2464 ThenRCG(CGF); 2465 } 2466 } 2467 2468 static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S, 2469 PrePostActionTy &Action) { 2470 Action.Enter(CGF); 2471 assert(isOpenMPSimdDirective(S.getDirectiveKind()) && 2472 "Expected simd directive"); 2473 OMPLoopScope PreInitScope(CGF, S); 2474 // if (PreCond) { 2475 // for (IV in 0..LastIteration) BODY; 2476 // <Final counter/linear vars updates>; 2477 // } 2478 // 2479 if (isOpenMPDistributeDirective(S.getDirectiveKind()) || 2480 isOpenMPWorksharingDirective(S.getDirectiveKind()) || 2481 isOpenMPTaskLoopDirective(S.getDirectiveKind())) { 2482 (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable())); 2483 (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable())); 2484 } 2485 2486 // Emit: if (PreCond) - begin. 2487 // If the condition constant folds and can be elided, avoid emitting the 2488 // whole loop. 2489 bool CondConstant; 2490 llvm::BasicBlock *ContBlock = nullptr; 2491 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 2492 if (!CondConstant) 2493 return; 2494 } else { 2495 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("simd.if.then"); 2496 ContBlock = CGF.createBasicBlock("simd.if.end"); 2497 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 2498 CGF.getProfileCount(&S)); 2499 CGF.EmitBlock(ThenBlock); 2500 CGF.incrementProfileCounter(&S); 2501 } 2502 2503 // Emit the loop iteration variable. 2504 const Expr *IVExpr = S.getIterationVariable(); 2505 const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 2506 CGF.EmitVarDecl(*IVDecl); 2507 CGF.EmitIgnoredExpr(S.getInit()); 2508 2509 // Emit the iterations count variable. 2510 // If it is not a variable, Sema decided to calculate iterations count on 2511 // each iteration (e.g., it is foldable into a constant). 2512 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 2513 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 2514 // Emit calculation of the iterations count. 2515 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 2516 } 2517 2518 emitAlignedClause(CGF, S); 2519 (void)CGF.EmitOMPLinearClauseInit(S); 2520 { 2521 CodeGenFunction::OMPPrivateScope LoopScope(CGF); 2522 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 2523 CGF.EmitOMPLinearClause(S, LoopScope); 2524 CGF.EmitOMPPrivateClause(S, LoopScope); 2525 CGF.EmitOMPReductionClauseInit(S, LoopScope); 2526 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion( 2527 CGF, S, CGF.EmitLValue(S.getIterationVariable())); 2528 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 2529 (void)LoopScope.Privatize(); 2530 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 2531 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); 2532 2533 emitCommonSimdLoop( 2534 CGF, S, 2535 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2536 CGF.EmitOMPSimdInit(S); 2537 }, 2538 [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { 2539 CGF.EmitOMPInnerLoop( 2540 S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), 2541 [&S](CodeGenFunction &CGF) { 2542 emitOMPLoopBodyWithStopPoint(CGF, S, 2543 CodeGenFunction::JumpDest()); 2544 }, 2545 [](CodeGenFunction &) {}); 2546 }); 2547 CGF.EmitOMPSimdFinal(S, [](CodeGenFunction &) { return nullptr; }); 2548 // Emit final copy of the lastprivate variables at the end of loops. 2549 if (HasLastprivateClause) 2550 CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true); 2551 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd); 2552 emitPostUpdateForReductionClause(CGF, S, 2553 [](CodeGenFunction &) { return nullptr; }); 2554 } 2555 CGF.EmitOMPLinearClauseFinal(S, [](CodeGenFunction &) { return nullptr; }); 2556 // Emit: if (PreCond) - end. 2557 if (ContBlock) { 2558 CGF.EmitBranch(ContBlock); 2559 CGF.EmitBlock(ContBlock, true); 2560 } 2561 } 2562 2563 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { 2564 ParentLoopDirectiveForScanRegion ScanRegion(*this, S); 2565 OMPFirstScanLoop = true; 2566 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2567 emitOMPSimdRegion(CGF, S, Action); 2568 }; 2569 { 2570 auto LPCRegion = 2571 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 2572 OMPLexicalScope Scope(*this, S, OMPD_unknown); 2573 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 2574 } 2575 // Check for outer lastprivate conditional update. 2576 checkForLastprivateConditionalUpdate(*this, S); 2577 } 2578 2579 void CodeGenFunction::EmitOMPTileDirective(const OMPTileDirective &S) { 2580 // Emit the de-sugared statement. 2581 OMPTransformDirectiveScopeRAII TileScope(*this, &S); 2582 EmitStmt(S.getTransformedStmt()); 2583 } 2584 2585 void CodeGenFunction::EmitOMPUnrollDirective(const OMPUnrollDirective &S) { 2586 // This function is only called if the unrolled loop is not consumed by any 2587 // other loop-associated construct. Such a loop-associated construct will have 2588 // used the transformed AST. 2589 2590 // Set the unroll metadata for the next emitted loop. 2591 LoopStack.setUnrollState(LoopAttributes::Enable); 2592 2593 if (S.hasClausesOfKind<OMPFullClause>()) { 2594 LoopStack.setUnrollState(LoopAttributes::Full); 2595 } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) { 2596 if (Expr *FactorExpr = PartialClause->getFactor()) { 2597 uint64_t Factor = 2598 FactorExpr->EvaluateKnownConstInt(getContext()).getZExtValue(); 2599 assert(Factor >= 1 && "Only positive factors are valid"); 2600 LoopStack.setUnrollCount(Factor); 2601 } 2602 } 2603 2604 EmitStmt(S.getAssociatedStmt()); 2605 } 2606 2607 void CodeGenFunction::EmitOMPOuterLoop( 2608 bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S, 2609 CodeGenFunction::OMPPrivateScope &LoopScope, 2610 const CodeGenFunction::OMPLoopArguments &LoopArgs, 2611 const CodeGenFunction::CodeGenLoopTy &CodeGenLoop, 2612 const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) { 2613 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); 2614 2615 const Expr *IVExpr = S.getIterationVariable(); 2616 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2617 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2618 2619 JumpDest LoopExit = getJumpDestInCurrentScope("omp.dispatch.end"); 2620 2621 // Start the loop with a block that tests the condition. 2622 llvm::BasicBlock *CondBlock = createBasicBlock("omp.dispatch.cond"); 2623 EmitBlock(CondBlock); 2624 const SourceRange R = S.getSourceRange(); 2625 OMPLoopNestStack.clear(); 2626 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), 2627 SourceLocToDebugLoc(R.getEnd())); 2628 2629 llvm::Value *BoolCondVal = nullptr; 2630 if (!DynamicOrOrdered) { 2631 // UB = min(UB, GlobalUB) or 2632 // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g. 2633 // 'distribute parallel for') 2634 EmitIgnoredExpr(LoopArgs.EUB); 2635 // IV = LB 2636 EmitIgnoredExpr(LoopArgs.Init); 2637 // IV < UB 2638 BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond); 2639 } else { 2640 BoolCondVal = 2641 RT.emitForNext(*this, S.getBeginLoc(), IVSize, IVSigned, LoopArgs.IL, 2642 LoopArgs.LB, LoopArgs.UB, LoopArgs.ST); 2643 } 2644 2645 // If there are any cleanups between here and the loop-exit scope, 2646 // create a block to stage a loop exit along. 2647 llvm::BasicBlock *ExitBlock = LoopExit.getBlock(); 2648 if (LoopScope.requiresCleanups()) 2649 ExitBlock = createBasicBlock("omp.dispatch.cleanup"); 2650 2651 llvm::BasicBlock *LoopBody = createBasicBlock("omp.dispatch.body"); 2652 Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock); 2653 if (ExitBlock != LoopExit.getBlock()) { 2654 EmitBlock(ExitBlock); 2655 EmitBranchThroughCleanup(LoopExit); 2656 } 2657 EmitBlock(LoopBody); 2658 2659 // Emit "IV = LB" (in case of static schedule, we have already calculated new 2660 // LB for loop condition and emitted it above). 2661 if (DynamicOrOrdered) 2662 EmitIgnoredExpr(LoopArgs.Init); 2663 2664 // Create a block for the increment. 2665 JumpDest Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); 2666 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 2667 2668 emitCommonSimdLoop( 2669 *this, S, 2670 [&S, IsMonotonic](CodeGenFunction &CGF, PrePostActionTy &) { 2671 // Generate !llvm.loop.parallel metadata for loads and stores for loops 2672 // with dynamic/guided scheduling and without ordered clause. 2673 if (!isOpenMPSimdDirective(S.getDirectiveKind())) { 2674 CGF.LoopStack.setParallel(!IsMonotonic); 2675 if (const auto *C = S.getSingleClause<OMPOrderClause>()) 2676 if (C->getKind() == OMPC_ORDER_concurrent) 2677 CGF.LoopStack.setParallel(/*Enable=*/true); 2678 } else { 2679 CGF.EmitOMPSimdInit(S); 2680 } 2681 }, 2682 [&S, &LoopArgs, LoopExit, &CodeGenLoop, IVSize, IVSigned, &CodeGenOrdered, 2683 &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { 2684 SourceLocation Loc = S.getBeginLoc(); 2685 // when 'distribute' is not combined with a 'for': 2686 // while (idx <= UB) { BODY; ++idx; } 2687 // when 'distribute' is combined with a 'for' 2688 // (e.g. 'distribute parallel for') 2689 // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; } 2690 CGF.EmitOMPInnerLoop( 2691 S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr, 2692 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { 2693 CodeGenLoop(CGF, S, LoopExit); 2694 }, 2695 [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) { 2696 CodeGenOrdered(CGF, Loc, IVSize, IVSigned); 2697 }); 2698 }); 2699 2700 EmitBlock(Continue.getBlock()); 2701 BreakContinueStack.pop_back(); 2702 if (!DynamicOrOrdered) { 2703 // Emit "LB = LB + Stride", "UB = UB + Stride". 2704 EmitIgnoredExpr(LoopArgs.NextLB); 2705 EmitIgnoredExpr(LoopArgs.NextUB); 2706 } 2707 2708 EmitBranch(CondBlock); 2709 OMPLoopNestStack.clear(); 2710 LoopStack.pop(); 2711 // Emit the fall-through block. 2712 EmitBlock(LoopExit.getBlock()); 2713 2714 // Tell the runtime we are done. 2715 auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) { 2716 if (!DynamicOrOrdered) 2717 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(), 2718 S.getDirectiveKind()); 2719 }; 2720 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); 2721 } 2722 2723 void CodeGenFunction::EmitOMPForOuterLoop( 2724 const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic, 2725 const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, 2726 const OMPLoopArguments &LoopArgs, 2727 const CodeGenDispatchBoundsTy &CGDispatchBounds) { 2728 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); 2729 2730 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). 2731 const bool DynamicOrOrdered = 2732 Ordered || RT.isDynamic(ScheduleKind.Schedule); 2733 2734 assert((Ordered || 2735 !RT.isStaticNonchunked(ScheduleKind.Schedule, 2736 LoopArgs.Chunk != nullptr)) && 2737 "static non-chunked schedule does not need outer loop"); 2738 2739 // Emit outer loop. 2740 // 2741 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 2742 // When schedule(dynamic,chunk_size) is specified, the iterations are 2743 // distributed to threads in the team in chunks as the threads request them. 2744 // Each thread executes a chunk of iterations, then requests another chunk, 2745 // until no chunks remain to be distributed. Each chunk contains chunk_size 2746 // iterations, except for the last chunk to be distributed, which may have 2747 // fewer iterations. When no chunk_size is specified, it defaults to 1. 2748 // 2749 // When schedule(guided,chunk_size) is specified, the iterations are assigned 2750 // to threads in the team in chunks as the executing threads request them. 2751 // Each thread executes a chunk of iterations, then requests another chunk, 2752 // until no chunks remain to be assigned. For a chunk_size of 1, the size of 2753 // each chunk is proportional to the number of unassigned iterations divided 2754 // by the number of threads in the team, decreasing to 1. For a chunk_size 2755 // with value k (greater than 1), the size of each chunk is determined in the 2756 // same way, with the restriction that the chunks do not contain fewer than k 2757 // iterations (except for the last chunk to be assigned, which may have fewer 2758 // than k iterations). 2759 // 2760 // When schedule(auto) is specified, the decision regarding scheduling is 2761 // delegated to the compiler and/or runtime system. The programmer gives the 2762 // implementation the freedom to choose any possible mapping of iterations to 2763 // threads in the team. 2764 // 2765 // When schedule(runtime) is specified, the decision regarding scheduling is 2766 // deferred until run time, and the schedule and chunk size are taken from the 2767 // run-sched-var ICV. If the ICV is set to auto, the schedule is 2768 // implementation defined 2769 // 2770 // while(__kmpc_dispatch_next(&LB, &UB)) { 2771 // idx = LB; 2772 // while (idx <= UB) { BODY; ++idx; 2773 // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only. 2774 // } // inner loop 2775 // } 2776 // 2777 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 2778 // When schedule(static, chunk_size) is specified, iterations are divided into 2779 // chunks of size chunk_size, and the chunks are assigned to the threads in 2780 // the team in a round-robin fashion in the order of the thread number. 2781 // 2782 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { 2783 // while (idx <= UB) { BODY; ++idx; } // inner loop 2784 // LB = LB + ST; 2785 // UB = UB + ST; 2786 // } 2787 // 2788 2789 const Expr *IVExpr = S.getIterationVariable(); 2790 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2791 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2792 2793 if (DynamicOrOrdered) { 2794 const std::pair<llvm::Value *, llvm::Value *> DispatchBounds = 2795 CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB); 2796 llvm::Value *LBVal = DispatchBounds.first; 2797 llvm::Value *UBVal = DispatchBounds.second; 2798 CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal, 2799 LoopArgs.Chunk}; 2800 RT.emitForDispatchInit(*this, S.getBeginLoc(), ScheduleKind, IVSize, 2801 IVSigned, Ordered, DipatchRTInputValues); 2802 } else { 2803 CGOpenMPRuntime::StaticRTInput StaticInit( 2804 IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB, 2805 LoopArgs.ST, LoopArgs.Chunk); 2806 RT.emitForStaticInit(*this, S.getBeginLoc(), S.getDirectiveKind(), 2807 ScheduleKind, StaticInit); 2808 } 2809 2810 auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc, 2811 const unsigned IVSize, 2812 const bool IVSigned) { 2813 if (Ordered) { 2814 CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize, 2815 IVSigned); 2816 } 2817 }; 2818 2819 OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST, 2820 LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB); 2821 OuterLoopArgs.IncExpr = S.getInc(); 2822 OuterLoopArgs.Init = S.getInit(); 2823 OuterLoopArgs.Cond = S.getCond(); 2824 OuterLoopArgs.NextLB = S.getNextLowerBound(); 2825 OuterLoopArgs.NextUB = S.getNextUpperBound(); 2826 EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs, 2827 emitOMPLoopBodyWithStopPoint, CodeGenOrdered); 2828 } 2829 2830 static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc, 2831 const unsigned IVSize, const bool IVSigned) {} 2832 2833 void CodeGenFunction::EmitOMPDistributeOuterLoop( 2834 OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S, 2835 OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs, 2836 const CodeGenLoopTy &CodeGenLoopContent) { 2837 2838 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); 2839 2840 // Emit outer loop. 2841 // Same behavior as a OMPForOuterLoop, except that schedule cannot be 2842 // dynamic 2843 // 2844 2845 const Expr *IVExpr = S.getIterationVariable(); 2846 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2847 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2848 2849 CGOpenMPRuntime::StaticRTInput StaticInit( 2850 IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB, 2851 LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk); 2852 RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, StaticInit); 2853 2854 // for combined 'distribute' and 'for' the increment expression of distribute 2855 // is stored in DistInc. For 'distribute' alone, it is in Inc. 2856 Expr *IncExpr; 2857 if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())) 2858 IncExpr = S.getDistInc(); 2859 else 2860 IncExpr = S.getInc(); 2861 2862 // this routine is shared by 'omp distribute parallel for' and 2863 // 'omp distribute': select the right EUB expression depending on the 2864 // directive 2865 OMPLoopArguments OuterLoopArgs; 2866 OuterLoopArgs.LB = LoopArgs.LB; 2867 OuterLoopArgs.UB = LoopArgs.UB; 2868 OuterLoopArgs.ST = LoopArgs.ST; 2869 OuterLoopArgs.IL = LoopArgs.IL; 2870 OuterLoopArgs.Chunk = LoopArgs.Chunk; 2871 OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2872 ? S.getCombinedEnsureUpperBound() 2873 : S.getEnsureUpperBound(); 2874 OuterLoopArgs.IncExpr = IncExpr; 2875 OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2876 ? S.getCombinedInit() 2877 : S.getInit(); 2878 OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2879 ? S.getCombinedCond() 2880 : S.getCond(); 2881 OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2882 ? S.getCombinedNextLowerBound() 2883 : S.getNextLowerBound(); 2884 OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2885 ? S.getCombinedNextUpperBound() 2886 : S.getNextUpperBound(); 2887 2888 EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S, 2889 LoopScope, OuterLoopArgs, CodeGenLoopContent, 2890 emitEmptyOrdered); 2891 } 2892 2893 static std::pair<LValue, LValue> 2894 emitDistributeParallelForInnerBounds(CodeGenFunction &CGF, 2895 const OMPExecutableDirective &S) { 2896 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 2897 LValue LB = 2898 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); 2899 LValue UB = 2900 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); 2901 2902 // When composing 'distribute' with 'for' (e.g. as in 'distribute 2903 // parallel for') we need to use the 'distribute' 2904 // chunk lower and upper bounds rather than the whole loop iteration 2905 // space. These are parameters to the outlined function for 'parallel' 2906 // and we copy the bounds of the previous schedule into the 2907 // the current ones. 2908 LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable()); 2909 LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable()); 2910 llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar( 2911 PrevLB, LS.getPrevLowerBoundVariable()->getExprLoc()); 2912 PrevLBVal = CGF.EmitScalarConversion( 2913 PrevLBVal, LS.getPrevLowerBoundVariable()->getType(), 2914 LS.getIterationVariable()->getType(), 2915 LS.getPrevLowerBoundVariable()->getExprLoc()); 2916 llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar( 2917 PrevUB, LS.getPrevUpperBoundVariable()->getExprLoc()); 2918 PrevUBVal = CGF.EmitScalarConversion( 2919 PrevUBVal, LS.getPrevUpperBoundVariable()->getType(), 2920 LS.getIterationVariable()->getType(), 2921 LS.getPrevUpperBoundVariable()->getExprLoc()); 2922 2923 CGF.EmitStoreOfScalar(PrevLBVal, LB); 2924 CGF.EmitStoreOfScalar(PrevUBVal, UB); 2925 2926 return {LB, UB}; 2927 } 2928 2929 /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then 2930 /// we need to use the LB and UB expressions generated by the worksharing 2931 /// code generation support, whereas in non combined situations we would 2932 /// just emit 0 and the LastIteration expression 2933 /// This function is necessary due to the difference of the LB and UB 2934 /// types for the RT emission routines for 'for_static_init' and 2935 /// 'for_dispatch_init' 2936 static std::pair<llvm::Value *, llvm::Value *> 2937 emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF, 2938 const OMPExecutableDirective &S, 2939 Address LB, Address UB) { 2940 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 2941 const Expr *IVExpr = LS.getIterationVariable(); 2942 // when implementing a dynamic schedule for a 'for' combined with a 2943 // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop 2944 // is not normalized as each team only executes its own assigned 2945 // distribute chunk 2946 QualType IteratorTy = IVExpr->getType(); 2947 llvm::Value *LBVal = 2948 CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, S.getBeginLoc()); 2949 llvm::Value *UBVal = 2950 CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, S.getBeginLoc()); 2951 return {LBVal, UBVal}; 2952 } 2953 2954 static void emitDistributeParallelForDistributeInnerBoundParams( 2955 CodeGenFunction &CGF, const OMPExecutableDirective &S, 2956 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) { 2957 const auto &Dir = cast<OMPLoopDirective>(S); 2958 LValue LB = 2959 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable())); 2960 llvm::Value *LBCast = 2961 CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(LB.getAddress(CGF)), 2962 CGF.SizeTy, /*isSigned=*/false); 2963 CapturedVars.push_back(LBCast); 2964 LValue UB = 2965 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable())); 2966 2967 llvm::Value *UBCast = 2968 CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(UB.getAddress(CGF)), 2969 CGF.SizeTy, /*isSigned=*/false); 2970 CapturedVars.push_back(UBCast); 2971 } 2972 2973 static void 2974 emitInnerParallelForWhenCombined(CodeGenFunction &CGF, 2975 const OMPLoopDirective &S, 2976 CodeGenFunction::JumpDest LoopExit) { 2977 auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF, 2978 PrePostActionTy &Action) { 2979 Action.Enter(CGF); 2980 bool HasCancel = false; 2981 if (!isOpenMPSimdDirective(S.getDirectiveKind())) { 2982 if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(&S)) 2983 HasCancel = D->hasCancel(); 2984 else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(&S)) 2985 HasCancel = D->hasCancel(); 2986 else if (const auto *D = 2987 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&S)) 2988 HasCancel = D->hasCancel(); 2989 } 2990 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(), 2991 HasCancel); 2992 CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(), 2993 emitDistributeParallelForInnerBounds, 2994 emitDistributeParallelForDispatchBounds); 2995 }; 2996 2997 emitCommonOMPParallelDirective( 2998 CGF, S, 2999 isOpenMPSimdDirective(S.getDirectiveKind()) ? OMPD_for_simd : OMPD_for, 3000 CGInlinedWorksharingLoop, 3001 emitDistributeParallelForDistributeInnerBoundParams); 3002 } 3003 3004 void CodeGenFunction::EmitOMPDistributeParallelForDirective( 3005 const OMPDistributeParallelForDirective &S) { 3006 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3007 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 3008 S.getDistInc()); 3009 }; 3010 OMPLexicalScope Scope(*this, S, OMPD_parallel); 3011 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); 3012 } 3013 3014 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective( 3015 const OMPDistributeParallelForSimdDirective &S) { 3016 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3017 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 3018 S.getDistInc()); 3019 }; 3020 OMPLexicalScope Scope(*this, S, OMPD_parallel); 3021 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); 3022 } 3023 3024 void CodeGenFunction::EmitOMPDistributeSimdDirective( 3025 const OMPDistributeSimdDirective &S) { 3026 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3027 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 3028 }; 3029 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3030 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 3031 } 3032 3033 void CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 3034 CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) { 3035 // Emit SPMD target parallel for region as a standalone region. 3036 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3037 emitOMPSimdRegion(CGF, S, Action); 3038 }; 3039 llvm::Function *Fn; 3040 llvm::Constant *Addr; 3041 // Emit target region as a standalone region. 3042 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 3043 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 3044 assert(Fn && Addr && "Target device function emission failed."); 3045 } 3046 3047 void CodeGenFunction::EmitOMPTargetSimdDirective( 3048 const OMPTargetSimdDirective &S) { 3049 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3050 emitOMPSimdRegion(CGF, S, Action); 3051 }; 3052 emitCommonOMPTargetDirective(*this, S, CodeGen); 3053 } 3054 3055 namespace { 3056 struct ScheduleKindModifiersTy { 3057 OpenMPScheduleClauseKind Kind; 3058 OpenMPScheduleClauseModifier M1; 3059 OpenMPScheduleClauseModifier M2; 3060 ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind, 3061 OpenMPScheduleClauseModifier M1, 3062 OpenMPScheduleClauseModifier M2) 3063 : Kind(Kind), M1(M1), M2(M2) {} 3064 }; 3065 } // namespace 3066 3067 bool CodeGenFunction::EmitOMPWorksharingLoop( 3068 const OMPLoopDirective &S, Expr *EUB, 3069 const CodeGenLoopBoundsTy &CodeGenLoopBounds, 3070 const CodeGenDispatchBoundsTy &CGDispatchBounds) { 3071 // Emit the loop iteration variable. 3072 const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 3073 const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl()); 3074 EmitVarDecl(*IVDecl); 3075 3076 // Emit the iterations count variable. 3077 // If it is not a variable, Sema decided to calculate iterations count on each 3078 // iteration (e.g., it is foldable into a constant). 3079 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 3080 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 3081 // Emit calculation of the iterations count. 3082 EmitIgnoredExpr(S.getCalcLastIteration()); 3083 } 3084 3085 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); 3086 3087 bool HasLastprivateClause; 3088 // Check pre-condition. 3089 { 3090 OMPLoopScope PreInitScope(*this, S); 3091 // Skip the entire loop if we don't meet the precondition. 3092 // If the condition constant folds and can be elided, avoid emitting the 3093 // whole loop. 3094 bool CondConstant; 3095 llvm::BasicBlock *ContBlock = nullptr; 3096 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 3097 if (!CondConstant) 3098 return false; 3099 } else { 3100 llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then"); 3101 ContBlock = createBasicBlock("omp.precond.end"); 3102 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 3103 getProfileCount(&S)); 3104 EmitBlock(ThenBlock); 3105 incrementProfileCounter(&S); 3106 } 3107 3108 RunCleanupsScope DoacrossCleanupScope(*this); 3109 bool Ordered = false; 3110 if (const auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) { 3111 if (OrderedClause->getNumForLoops()) 3112 RT.emitDoacrossInit(*this, S, OrderedClause->getLoopNumIterations()); 3113 else 3114 Ordered = true; 3115 } 3116 3117 llvm::DenseSet<const Expr *> EmittedFinals; 3118 emitAlignedClause(*this, S); 3119 bool HasLinears = EmitOMPLinearClauseInit(S); 3120 // Emit helper vars inits. 3121 3122 std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S); 3123 LValue LB = Bounds.first; 3124 LValue UB = Bounds.second; 3125 LValue ST = 3126 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 3127 LValue IL = 3128 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 3129 3130 // Emit 'then' code. 3131 { 3132 OMPPrivateScope LoopScope(*this); 3133 if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) { 3134 // Emit implicit barrier to synchronize threads and avoid data races on 3135 // initialization of firstprivate variables and post-update of 3136 // lastprivate variables. 3137 CGM.getOpenMPRuntime().emitBarrierCall( 3138 *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, 3139 /*ForceSimpleCall=*/true); 3140 } 3141 EmitOMPPrivateClause(S, LoopScope); 3142 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion( 3143 *this, S, EmitLValue(S.getIterationVariable())); 3144 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 3145 EmitOMPReductionClauseInit(S, LoopScope); 3146 EmitOMPPrivateLoopCounters(S, LoopScope); 3147 EmitOMPLinearClause(S, LoopScope); 3148 (void)LoopScope.Privatize(); 3149 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 3150 CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S); 3151 3152 // Detect the loop schedule kind and chunk. 3153 const Expr *ChunkExpr = nullptr; 3154 OpenMPScheduleTy ScheduleKind; 3155 if (const auto *C = S.getSingleClause<OMPScheduleClause>()) { 3156 ScheduleKind.Schedule = C->getScheduleKind(); 3157 ScheduleKind.M1 = C->getFirstScheduleModifier(); 3158 ScheduleKind.M2 = C->getSecondScheduleModifier(); 3159 ChunkExpr = C->getChunkSize(); 3160 } else { 3161 // Default behaviour for schedule clause. 3162 CGM.getOpenMPRuntime().getDefaultScheduleAndChunk( 3163 *this, S, ScheduleKind.Schedule, ChunkExpr); 3164 } 3165 bool HasChunkSizeOne = false; 3166 llvm::Value *Chunk = nullptr; 3167 if (ChunkExpr) { 3168 Chunk = EmitScalarExpr(ChunkExpr); 3169 Chunk = EmitScalarConversion(Chunk, ChunkExpr->getType(), 3170 S.getIterationVariable()->getType(), 3171 S.getBeginLoc()); 3172 Expr::EvalResult Result; 3173 if (ChunkExpr->EvaluateAsInt(Result, getContext())) { 3174 llvm::APSInt EvaluatedChunk = Result.Val.getInt(); 3175 HasChunkSizeOne = (EvaluatedChunk.getLimitedValue() == 1); 3176 } 3177 } 3178 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 3179 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 3180 // OpenMP 4.5, 2.7.1 Loop Construct, Description. 3181 // If the static schedule kind is specified or if the ordered clause is 3182 // specified, and if no monotonic modifier is specified, the effect will 3183 // be as if the monotonic modifier was specified. 3184 bool StaticChunkedOne = RT.isStaticChunked(ScheduleKind.Schedule, 3185 /* Chunked */ Chunk != nullptr) && HasChunkSizeOne && 3186 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()); 3187 bool IsMonotonic = 3188 Ordered || 3189 (ScheduleKind.Schedule == OMPC_SCHEDULE_static && 3190 !(ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic || 3191 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_nonmonotonic)) || 3192 ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic || 3193 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic; 3194 if ((RT.isStaticNonchunked(ScheduleKind.Schedule, 3195 /* Chunked */ Chunk != nullptr) || 3196 StaticChunkedOne) && 3197 !Ordered) { 3198 JumpDest LoopExit = 3199 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 3200 emitCommonSimdLoop( 3201 *this, S, 3202 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3203 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 3204 CGF.EmitOMPSimdInit(S); 3205 } else if (const auto *C = S.getSingleClause<OMPOrderClause>()) { 3206 if (C->getKind() == OMPC_ORDER_concurrent) 3207 CGF.LoopStack.setParallel(/*Enable=*/true); 3208 } 3209 }, 3210 [IVSize, IVSigned, Ordered, IL, LB, UB, ST, StaticChunkedOne, Chunk, 3211 &S, ScheduleKind, LoopExit, 3212 &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { 3213 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 3214 // When no chunk_size is specified, the iteration space is divided 3215 // into chunks that are approximately equal in size, and at most 3216 // one chunk is distributed to each thread. Note that the size of 3217 // the chunks is unspecified in this case. 3218 CGOpenMPRuntime::StaticRTInput StaticInit( 3219 IVSize, IVSigned, Ordered, IL.getAddress(CGF), 3220 LB.getAddress(CGF), UB.getAddress(CGF), ST.getAddress(CGF), 3221 StaticChunkedOne ? Chunk : nullptr); 3222 CGF.CGM.getOpenMPRuntime().emitForStaticInit( 3223 CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, 3224 StaticInit); 3225 // UB = min(UB, GlobalUB); 3226 if (!StaticChunkedOne) 3227 CGF.EmitIgnoredExpr(S.getEnsureUpperBound()); 3228 // IV = LB; 3229 CGF.EmitIgnoredExpr(S.getInit()); 3230 // For unchunked static schedule generate: 3231 // 3232 // while (idx <= UB) { 3233 // BODY; 3234 // ++idx; 3235 // } 3236 // 3237 // For static schedule with chunk one: 3238 // 3239 // while (IV <= PrevUB) { 3240 // BODY; 3241 // IV += ST; 3242 // } 3243 CGF.EmitOMPInnerLoop( 3244 S, LoopScope.requiresCleanups(), 3245 StaticChunkedOne ? S.getCombinedParForInDistCond() 3246 : S.getCond(), 3247 StaticChunkedOne ? S.getDistInc() : S.getInc(), 3248 [&S, LoopExit](CodeGenFunction &CGF) { 3249 emitOMPLoopBodyWithStopPoint(CGF, S, LoopExit); 3250 }, 3251 [](CodeGenFunction &) {}); 3252 }); 3253 EmitBlock(LoopExit.getBlock()); 3254 // Tell the runtime we are done. 3255 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 3256 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(), 3257 S.getDirectiveKind()); 3258 }; 3259 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); 3260 } else { 3261 // Emit the outer loop, which requests its work chunk [LB..UB] from 3262 // runtime and runs the inner loop to process it. 3263 const OMPLoopArguments LoopArguments( 3264 LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this), 3265 IL.getAddress(*this), Chunk, EUB); 3266 EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered, 3267 LoopArguments, CGDispatchBounds); 3268 } 3269 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 3270 EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) { 3271 return CGF.Builder.CreateIsNotNull( 3272 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 3273 }); 3274 } 3275 EmitOMPReductionClauseFinal( 3276 S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind()) 3277 ? /*Parallel and Simd*/ OMPD_parallel_for_simd 3278 : /*Parallel only*/ OMPD_parallel); 3279 // Emit post-update of the reduction variables if IsLastIter != 0. 3280 emitPostUpdateForReductionClause( 3281 *this, S, [IL, &S](CodeGenFunction &CGF) { 3282 return CGF.Builder.CreateIsNotNull( 3283 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 3284 }); 3285 // Emit final copy of the lastprivate variables if IsLastIter != 0. 3286 if (HasLastprivateClause) 3287 EmitOMPLastprivateClauseFinal( 3288 S, isOpenMPSimdDirective(S.getDirectiveKind()), 3289 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc()))); 3290 } 3291 EmitOMPLinearClauseFinal(S, [IL, &S](CodeGenFunction &CGF) { 3292 return CGF.Builder.CreateIsNotNull( 3293 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 3294 }); 3295 DoacrossCleanupScope.ForceCleanup(); 3296 // We're now done with the loop, so jump to the continuation block. 3297 if (ContBlock) { 3298 EmitBranch(ContBlock); 3299 EmitBlock(ContBlock, /*IsFinished=*/true); 3300 } 3301 } 3302 return HasLastprivateClause; 3303 } 3304 3305 /// The following two functions generate expressions for the loop lower 3306 /// and upper bounds in case of static and dynamic (dispatch) schedule 3307 /// of the associated 'for' or 'distribute' loop. 3308 static std::pair<LValue, LValue> 3309 emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 3310 const auto &LS = cast<OMPLoopDirective>(S); 3311 LValue LB = 3312 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); 3313 LValue UB = 3314 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); 3315 return {LB, UB}; 3316 } 3317 3318 /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not 3319 /// consider the lower and upper bound expressions generated by the 3320 /// worksharing loop support, but we use 0 and the iteration space size as 3321 /// constants 3322 static std::pair<llvm::Value *, llvm::Value *> 3323 emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S, 3324 Address LB, Address UB) { 3325 const auto &LS = cast<OMPLoopDirective>(S); 3326 const Expr *IVExpr = LS.getIterationVariable(); 3327 const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType()); 3328 llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0); 3329 llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration()); 3330 return {LBVal, UBVal}; 3331 } 3332 3333 /// Emits internal temp array declarations for the directive with inscan 3334 /// reductions. 3335 /// The code is the following: 3336 /// \code 3337 /// size num_iters = <num_iters>; 3338 /// <type> buffer[num_iters]; 3339 /// \endcode 3340 static void emitScanBasedDirectiveDecls( 3341 CodeGenFunction &CGF, const OMPLoopDirective &S, 3342 llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) { 3343 llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast( 3344 NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false); 3345 SmallVector<const Expr *, 4> Shareds; 3346 SmallVector<const Expr *, 4> Privates; 3347 SmallVector<const Expr *, 4> ReductionOps; 3348 SmallVector<const Expr *, 4> CopyArrayTemps; 3349 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 3350 assert(C->getModifier() == OMPC_REDUCTION_inscan && 3351 "Only inscan reductions are expected."); 3352 Shareds.append(C->varlist_begin(), C->varlist_end()); 3353 Privates.append(C->privates().begin(), C->privates().end()); 3354 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 3355 CopyArrayTemps.append(C->copy_array_temps().begin(), 3356 C->copy_array_temps().end()); 3357 } 3358 { 3359 // Emit buffers for each reduction variables. 3360 // ReductionCodeGen is required to emit correctly the code for array 3361 // reductions. 3362 ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps); 3363 unsigned Count = 0; 3364 auto *ITA = CopyArrayTemps.begin(); 3365 for (const Expr *IRef : Privates) { 3366 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); 3367 // Emit variably modified arrays, used for arrays/array sections 3368 // reductions. 3369 if (PrivateVD->getType()->isVariablyModifiedType()) { 3370 RedCG.emitSharedOrigLValue(CGF, Count); 3371 RedCG.emitAggregateType(CGF, Count); 3372 } 3373 CodeGenFunction::OpaqueValueMapping DimMapping( 3374 CGF, 3375 cast<OpaqueValueExpr>( 3376 cast<VariableArrayType>((*ITA)->getType()->getAsArrayTypeUnsafe()) 3377 ->getSizeExpr()), 3378 RValue::get(OMPScanNumIterations)); 3379 // Emit temp buffer. 3380 CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(*ITA)->getDecl())); 3381 ++ITA; 3382 ++Count; 3383 } 3384 } 3385 } 3386 3387 /// Emits the code for the directive with inscan reductions. 3388 /// The code is the following: 3389 /// \code 3390 /// #pragma omp ... 3391 /// for (i: 0..<num_iters>) { 3392 /// <input phase>; 3393 /// buffer[i] = red; 3394 /// } 3395 /// #pragma omp master // in parallel region 3396 /// for (int k = 0; k != ceil(log2(num_iters)); ++k) 3397 /// for (size cnt = last_iter; cnt >= pow(2, k); --k) 3398 /// buffer[i] op= buffer[i-pow(2,k)]; 3399 /// #pragma omp barrier // in parallel region 3400 /// #pragma omp ... 3401 /// for (0..<num_iters>) { 3402 /// red = InclusiveScan ? buffer[i] : buffer[i-1]; 3403 /// <scan phase>; 3404 /// } 3405 /// \endcode 3406 static void emitScanBasedDirective( 3407 CodeGenFunction &CGF, const OMPLoopDirective &S, 3408 llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen, 3409 llvm::function_ref<void(CodeGenFunction &)> FirstGen, 3410 llvm::function_ref<void(CodeGenFunction &)> SecondGen) { 3411 llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast( 3412 NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false); 3413 SmallVector<const Expr *, 4> Privates; 3414 SmallVector<const Expr *, 4> ReductionOps; 3415 SmallVector<const Expr *, 4> LHSs; 3416 SmallVector<const Expr *, 4> RHSs; 3417 SmallVector<const Expr *, 4> CopyArrayElems; 3418 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 3419 assert(C->getModifier() == OMPC_REDUCTION_inscan && 3420 "Only inscan reductions are expected."); 3421 Privates.append(C->privates().begin(), C->privates().end()); 3422 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 3423 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 3424 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 3425 CopyArrayElems.append(C->copy_array_elems().begin(), 3426 C->copy_array_elems().end()); 3427 } 3428 CodeGenFunction::ParentLoopDirectiveForScanRegion ScanRegion(CGF, S); 3429 { 3430 // Emit loop with input phase: 3431 // #pragma omp ... 3432 // for (i: 0..<num_iters>) { 3433 // <input phase>; 3434 // buffer[i] = red; 3435 // } 3436 CGF.OMPFirstScanLoop = true; 3437 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); 3438 FirstGen(CGF); 3439 } 3440 // #pragma omp barrier // in parallel region 3441 auto &&CodeGen = [&S, OMPScanNumIterations, &LHSs, &RHSs, &CopyArrayElems, 3442 &ReductionOps, 3443 &Privates](CodeGenFunction &CGF, PrePostActionTy &Action) { 3444 Action.Enter(CGF); 3445 // Emit prefix reduction: 3446 // #pragma omp master // in parallel region 3447 // for (int k = 0; k <= ceil(log2(n)); ++k) 3448 llvm::BasicBlock *InputBB = CGF.Builder.GetInsertBlock(); 3449 llvm::BasicBlock *LoopBB = CGF.createBasicBlock("omp.outer.log.scan.body"); 3450 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.outer.log.scan.exit"); 3451 llvm::Function *F = 3452 CGF.CGM.getIntrinsic(llvm::Intrinsic::log2, CGF.DoubleTy); 3453 llvm::Value *Arg = 3454 CGF.Builder.CreateUIToFP(OMPScanNumIterations, CGF.DoubleTy); 3455 llvm::Value *LogVal = CGF.EmitNounwindRuntimeCall(F, Arg); 3456 F = CGF.CGM.getIntrinsic(llvm::Intrinsic::ceil, CGF.DoubleTy); 3457 LogVal = CGF.EmitNounwindRuntimeCall(F, LogVal); 3458 LogVal = CGF.Builder.CreateFPToUI(LogVal, CGF.IntTy); 3459 llvm::Value *NMin1 = CGF.Builder.CreateNUWSub( 3460 OMPScanNumIterations, llvm::ConstantInt::get(CGF.SizeTy, 1)); 3461 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getBeginLoc()); 3462 CGF.EmitBlock(LoopBB); 3463 auto *Counter = CGF.Builder.CreatePHI(CGF.IntTy, 2); 3464 // size pow2k = 1; 3465 auto *Pow2K = CGF.Builder.CreatePHI(CGF.SizeTy, 2); 3466 Counter->addIncoming(llvm::ConstantInt::get(CGF.IntTy, 0), InputBB); 3467 Pow2K->addIncoming(llvm::ConstantInt::get(CGF.SizeTy, 1), InputBB); 3468 // for (size i = n - 1; i >= 2 ^ k; --i) 3469 // tmp[i] op= tmp[i-pow2k]; 3470 llvm::BasicBlock *InnerLoopBB = 3471 CGF.createBasicBlock("omp.inner.log.scan.body"); 3472 llvm::BasicBlock *InnerExitBB = 3473 CGF.createBasicBlock("omp.inner.log.scan.exit"); 3474 llvm::Value *CmpI = CGF.Builder.CreateICmpUGE(NMin1, Pow2K); 3475 CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB); 3476 CGF.EmitBlock(InnerLoopBB); 3477 auto *IVal = CGF.Builder.CreatePHI(CGF.SizeTy, 2); 3478 IVal->addIncoming(NMin1, LoopBB); 3479 { 3480 CodeGenFunction::OMPPrivateScope PrivScope(CGF); 3481 auto *ILHS = LHSs.begin(); 3482 auto *IRHS = RHSs.begin(); 3483 for (const Expr *CopyArrayElem : CopyArrayElems) { 3484 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 3485 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 3486 Address LHSAddr = Address::invalid(); 3487 { 3488 CodeGenFunction::OpaqueValueMapping IdxMapping( 3489 CGF, 3490 cast<OpaqueValueExpr>( 3491 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), 3492 RValue::get(IVal)); 3493 LHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF); 3494 } 3495 PrivScope.addPrivate(LHSVD, [LHSAddr]() { return LHSAddr; }); 3496 Address RHSAddr = Address::invalid(); 3497 { 3498 llvm::Value *OffsetIVal = CGF.Builder.CreateNUWSub(IVal, Pow2K); 3499 CodeGenFunction::OpaqueValueMapping IdxMapping( 3500 CGF, 3501 cast<OpaqueValueExpr>( 3502 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), 3503 RValue::get(OffsetIVal)); 3504 RHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF); 3505 } 3506 PrivScope.addPrivate(RHSVD, [RHSAddr]() { return RHSAddr; }); 3507 ++ILHS; 3508 ++IRHS; 3509 } 3510 PrivScope.Privatize(); 3511 CGF.CGM.getOpenMPRuntime().emitReduction( 3512 CGF, S.getEndLoc(), Privates, LHSs, RHSs, ReductionOps, 3513 {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_unknown}); 3514 } 3515 llvm::Value *NextIVal = 3516 CGF.Builder.CreateNUWSub(IVal, llvm::ConstantInt::get(CGF.SizeTy, 1)); 3517 IVal->addIncoming(NextIVal, CGF.Builder.GetInsertBlock()); 3518 CmpI = CGF.Builder.CreateICmpUGE(NextIVal, Pow2K); 3519 CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB); 3520 CGF.EmitBlock(InnerExitBB); 3521 llvm::Value *Next = 3522 CGF.Builder.CreateNUWAdd(Counter, llvm::ConstantInt::get(CGF.IntTy, 1)); 3523 Counter->addIncoming(Next, CGF.Builder.GetInsertBlock()); 3524 // pow2k <<= 1; 3525 llvm::Value *NextPow2K = 3526 CGF.Builder.CreateShl(Pow2K, 1, "", /*HasNUW=*/true); 3527 Pow2K->addIncoming(NextPow2K, CGF.Builder.GetInsertBlock()); 3528 llvm::Value *Cmp = CGF.Builder.CreateICmpNE(Next, LogVal); 3529 CGF.Builder.CreateCondBr(Cmp, LoopBB, ExitBB); 3530 auto DL1 = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getEndLoc()); 3531 CGF.EmitBlock(ExitBB); 3532 }; 3533 if (isOpenMPParallelDirective(S.getDirectiveKind())) { 3534 CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc()); 3535 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 3536 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, 3537 /*ForceSimpleCall=*/true); 3538 } else { 3539 RegionCodeGenTy RCG(CodeGen); 3540 RCG(CGF); 3541 } 3542 3543 CGF.OMPFirstScanLoop = false; 3544 SecondGen(CGF); 3545 } 3546 3547 static bool emitWorksharingDirective(CodeGenFunction &CGF, 3548 const OMPLoopDirective &S, 3549 bool HasCancel) { 3550 bool HasLastprivates; 3551 if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(), 3552 [](const OMPReductionClause *C) { 3553 return C->getModifier() == OMPC_REDUCTION_inscan; 3554 })) { 3555 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) { 3556 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); 3557 OMPLoopScope LoopScope(CGF, S); 3558 return CGF.EmitScalarExpr(S.getNumIterations()); 3559 }; 3560 const auto &&FirstGen = [&S, HasCancel](CodeGenFunction &CGF) { 3561 CodeGenFunction::OMPCancelStackRAII CancelRegion( 3562 CGF, S.getDirectiveKind(), HasCancel); 3563 (void)CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 3564 emitForLoopBounds, 3565 emitDispatchForLoopBounds); 3566 // Emit an implicit barrier at the end. 3567 CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getBeginLoc(), 3568 OMPD_for); 3569 }; 3570 const auto &&SecondGen = [&S, HasCancel, 3571 &HasLastprivates](CodeGenFunction &CGF) { 3572 CodeGenFunction::OMPCancelStackRAII CancelRegion( 3573 CGF, S.getDirectiveKind(), HasCancel); 3574 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 3575 emitForLoopBounds, 3576 emitDispatchForLoopBounds); 3577 }; 3578 if (!isOpenMPParallelDirective(S.getDirectiveKind())) 3579 emitScanBasedDirectiveDecls(CGF, S, NumIteratorsGen); 3580 emitScanBasedDirective(CGF, S, NumIteratorsGen, FirstGen, SecondGen); 3581 } else { 3582 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(), 3583 HasCancel); 3584 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 3585 emitForLoopBounds, 3586 emitDispatchForLoopBounds); 3587 } 3588 return HasLastprivates; 3589 } 3590 3591 static bool isSupportedByOpenMPIRBuilder(const OMPForDirective &S) { 3592 if (S.hasCancel()) 3593 return false; 3594 for (OMPClause *C : S.clauses()) 3595 if (!isa<OMPNowaitClause>(C)) 3596 return false; 3597 3598 return true; 3599 } 3600 3601 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { 3602 bool HasLastprivates = false; 3603 bool UseOMPIRBuilder = 3604 CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(S); 3605 auto &&CodeGen = [this, &S, &HasLastprivates, 3606 UseOMPIRBuilder](CodeGenFunction &CGF, PrePostActionTy &) { 3607 // Use the OpenMPIRBuilder if enabled. 3608 if (UseOMPIRBuilder) { 3609 // Emit the associated statement and get its loop representation. 3610 const Stmt *Inner = S.getRawStmt(); 3611 llvm::CanonicalLoopInfo *CLI = 3612 EmitOMPCollapsedCanonicalLoopNest(Inner, 1); 3613 3614 bool NeedsBarrier = !S.getSingleClause<OMPNowaitClause>(); 3615 llvm::OpenMPIRBuilder &OMPBuilder = 3616 CGM.getOpenMPRuntime().getOMPBuilder(); 3617 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( 3618 AllocaInsertPt->getParent(), AllocaInsertPt->getIterator()); 3619 OMPBuilder.createWorkshareLoop(Builder, CLI, AllocaIP, NeedsBarrier); 3620 return; 3621 } 3622 3623 HasLastprivates = emitWorksharingDirective(CGF, S, S.hasCancel()); 3624 }; 3625 { 3626 auto LPCRegion = 3627 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 3628 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3629 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen, 3630 S.hasCancel()); 3631 } 3632 3633 if (!UseOMPIRBuilder) { 3634 // Emit an implicit barrier at the end. 3635 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) 3636 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for); 3637 } 3638 // Check for outer lastprivate conditional update. 3639 checkForLastprivateConditionalUpdate(*this, S); 3640 } 3641 3642 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { 3643 bool HasLastprivates = false; 3644 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, 3645 PrePostActionTy &) { 3646 HasLastprivates = emitWorksharingDirective(CGF, S, /*HasCancel=*/false); 3647 }; 3648 { 3649 auto LPCRegion = 3650 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 3651 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3652 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 3653 } 3654 3655 // Emit an implicit barrier at the end. 3656 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) 3657 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for); 3658 // Check for outer lastprivate conditional update. 3659 checkForLastprivateConditionalUpdate(*this, S); 3660 } 3661 3662 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, 3663 const Twine &Name, 3664 llvm::Value *Init = nullptr) { 3665 LValue LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty); 3666 if (Init) 3667 CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true); 3668 return LVal; 3669 } 3670 3671 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { 3672 const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt(); 3673 const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt); 3674 bool HasLastprivates = false; 3675 auto &&CodeGen = [&S, CapturedStmt, CS, 3676 &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) { 3677 const ASTContext &C = CGF.getContext(); 3678 QualType KmpInt32Ty = 3679 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 3680 // Emit helper vars inits. 3681 LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.", 3682 CGF.Builder.getInt32(0)); 3683 llvm::ConstantInt *GlobalUBVal = CS != nullptr 3684 ? CGF.Builder.getInt32(CS->size() - 1) 3685 : CGF.Builder.getInt32(0); 3686 LValue UB = 3687 createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal); 3688 LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.", 3689 CGF.Builder.getInt32(1)); 3690 LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.", 3691 CGF.Builder.getInt32(0)); 3692 // Loop counter. 3693 LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv."); 3694 OpaqueValueExpr IVRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue); 3695 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV); 3696 OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue); 3697 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); 3698 // Generate condition for loop. 3699 BinaryOperator *Cond = BinaryOperator::Create( 3700 C, &IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_PRValue, OK_Ordinary, 3701 S.getBeginLoc(), FPOptionsOverride()); 3702 // Increment for loop counter. 3703 UnaryOperator *Inc = UnaryOperator::Create( 3704 C, &IVRefExpr, UO_PreInc, KmpInt32Ty, VK_PRValue, OK_Ordinary, 3705 S.getBeginLoc(), true, FPOptionsOverride()); 3706 auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) { 3707 // Iterate through all sections and emit a switch construct: 3708 // switch (IV) { 3709 // case 0: 3710 // <SectionStmt[0]>; 3711 // break; 3712 // ... 3713 // case <NumSection> - 1: 3714 // <SectionStmt[<NumSection> - 1]>; 3715 // break; 3716 // } 3717 // .omp.sections.exit: 3718 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".omp.sections.exit"); 3719 llvm::SwitchInst *SwitchStmt = 3720 CGF.Builder.CreateSwitch(CGF.EmitLoadOfScalar(IV, S.getBeginLoc()), 3721 ExitBB, CS == nullptr ? 1 : CS->size()); 3722 if (CS) { 3723 unsigned CaseNumber = 0; 3724 for (const Stmt *SubStmt : CS->children()) { 3725 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 3726 CGF.EmitBlock(CaseBB); 3727 SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB); 3728 CGF.EmitStmt(SubStmt); 3729 CGF.EmitBranch(ExitBB); 3730 ++CaseNumber; 3731 } 3732 } else { 3733 llvm::BasicBlock *CaseBB = CGF.createBasicBlock(".omp.sections.case"); 3734 CGF.EmitBlock(CaseBB); 3735 SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB); 3736 CGF.EmitStmt(CapturedStmt); 3737 CGF.EmitBranch(ExitBB); 3738 } 3739 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 3740 }; 3741 3742 CodeGenFunction::OMPPrivateScope LoopScope(CGF); 3743 if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) { 3744 // Emit implicit barrier to synchronize threads and avoid data races on 3745 // initialization of firstprivate variables and post-update of lastprivate 3746 // variables. 3747 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 3748 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, 3749 /*ForceSimpleCall=*/true); 3750 } 3751 CGF.EmitOMPPrivateClause(S, LoopScope); 3752 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(CGF, S, IV); 3753 HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 3754 CGF.EmitOMPReductionClauseInit(S, LoopScope); 3755 (void)LoopScope.Privatize(); 3756 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 3757 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); 3758 3759 // Emit static non-chunked loop. 3760 OpenMPScheduleTy ScheduleKind; 3761 ScheduleKind.Schedule = OMPC_SCHEDULE_static; 3762 CGOpenMPRuntime::StaticRTInput StaticInit( 3763 /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(CGF), 3764 LB.getAddress(CGF), UB.getAddress(CGF), ST.getAddress(CGF)); 3765 CGF.CGM.getOpenMPRuntime().emitForStaticInit( 3766 CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, StaticInit); 3767 // UB = min(UB, GlobalUB); 3768 llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, S.getBeginLoc()); 3769 llvm::Value *MinUBGlobalUB = CGF.Builder.CreateSelect( 3770 CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal); 3771 CGF.EmitStoreOfScalar(MinUBGlobalUB, UB); 3772 // IV = LB; 3773 CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getBeginLoc()), IV); 3774 // while (idx <= UB) { BODY; ++idx; } 3775 CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, Cond, Inc, BodyGen, 3776 [](CodeGenFunction &) {}); 3777 // Tell the runtime we are done. 3778 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 3779 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(), 3780 S.getDirectiveKind()); 3781 }; 3782 CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen); 3783 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 3784 // Emit post-update of the reduction variables if IsLastIter != 0. 3785 emitPostUpdateForReductionClause(CGF, S, [IL, &S](CodeGenFunction &CGF) { 3786 return CGF.Builder.CreateIsNotNull( 3787 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 3788 }); 3789 3790 // Emit final copy of the lastprivate variables if IsLastIter != 0. 3791 if (HasLastprivates) 3792 CGF.EmitOMPLastprivateClauseFinal( 3793 S, /*NoFinals=*/false, 3794 CGF.Builder.CreateIsNotNull( 3795 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()))); 3796 }; 3797 3798 bool HasCancel = false; 3799 if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S)) 3800 HasCancel = OSD->hasCancel(); 3801 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S)) 3802 HasCancel = OPSD->hasCancel(); 3803 OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel); 3804 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen, 3805 HasCancel); 3806 // Emit barrier for lastprivates only if 'sections' directive has 'nowait' 3807 // clause. Otherwise the barrier will be generated by the codegen for the 3808 // directive. 3809 if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) { 3810 // Emit implicit barrier to synchronize threads and avoid data races on 3811 // initialization of firstprivate variables. 3812 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), 3813 OMPD_unknown); 3814 } 3815 } 3816 3817 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { 3818 if (CGM.getLangOpts().OpenMPIRBuilder) { 3819 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 3820 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 3821 using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy; 3822 3823 auto FiniCB = [this](InsertPointTy IP) { 3824 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); 3825 }; 3826 3827 const CapturedStmt *ICS = S.getInnermostCapturedStmt(); 3828 const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt(); 3829 const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt); 3830 llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector; 3831 if (CS) { 3832 for (const Stmt *SubStmt : CS->children()) { 3833 auto SectionCB = [this, SubStmt](InsertPointTy AllocaIP, 3834 InsertPointTy CodeGenIP, 3835 llvm::BasicBlock &FiniBB) { 3836 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, 3837 FiniBB); 3838 OMPBuilderCBHelpers::EmitOMPRegionBody(*this, SubStmt, CodeGenIP, 3839 FiniBB); 3840 }; 3841 SectionCBVector.push_back(SectionCB); 3842 } 3843 } else { 3844 auto SectionCB = [this, CapturedStmt](InsertPointTy AllocaIP, 3845 InsertPointTy CodeGenIP, 3846 llvm::BasicBlock &FiniBB) { 3847 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB); 3848 OMPBuilderCBHelpers::EmitOMPRegionBody(*this, CapturedStmt, CodeGenIP, 3849 FiniBB); 3850 }; 3851 SectionCBVector.push_back(SectionCB); 3852 } 3853 3854 // Privatization callback that performs appropriate action for 3855 // shared/private/firstprivate/lastprivate/copyin/... variables. 3856 // 3857 // TODO: This defaults to shared right now. 3858 auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, 3859 llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) { 3860 // The next line is appropriate only for variables (Val) with the 3861 // data-sharing attribute "shared". 3862 ReplVal = &Val; 3863 3864 return CodeGenIP; 3865 }; 3866 3867 CGCapturedStmtInfo CGSI(*ICS, CR_OpenMP); 3868 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI); 3869 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( 3870 AllocaInsertPt->getParent(), AllocaInsertPt->getIterator()); 3871 Builder.restoreIP(OMPBuilder.createSections( 3872 Builder, AllocaIP, SectionCBVector, PrivCB, FiniCB, S.hasCancel(), 3873 S.getSingleClause<OMPNowaitClause>())); 3874 return; 3875 } 3876 { 3877 auto LPCRegion = 3878 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 3879 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3880 EmitSections(S); 3881 } 3882 // Emit an implicit barrier at the end. 3883 if (!S.getSingleClause<OMPNowaitClause>()) { 3884 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), 3885 OMPD_sections); 3886 } 3887 // Check for outer lastprivate conditional update. 3888 checkForLastprivateConditionalUpdate(*this, S); 3889 } 3890 3891 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { 3892 if (CGM.getLangOpts().OpenMPIRBuilder) { 3893 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 3894 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 3895 3896 const Stmt *SectionRegionBodyStmt = S.getAssociatedStmt(); 3897 auto FiniCB = [this](InsertPointTy IP) { 3898 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); 3899 }; 3900 3901 auto BodyGenCB = [SectionRegionBodyStmt, this](InsertPointTy AllocaIP, 3902 InsertPointTy CodeGenIP, 3903 llvm::BasicBlock &FiniBB) { 3904 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB); 3905 OMPBuilderCBHelpers::EmitOMPRegionBody(*this, SectionRegionBodyStmt, 3906 CodeGenIP, FiniBB); 3907 }; 3908 3909 LexicalScope Scope(*this, S.getSourceRange()); 3910 EmitStopPoint(&S); 3911 Builder.restoreIP(OMPBuilder.createSection(Builder, BodyGenCB, FiniCB)); 3912 3913 return; 3914 } 3915 LexicalScope Scope(*this, S.getSourceRange()); 3916 EmitStopPoint(&S); 3917 EmitStmt(S.getAssociatedStmt()); 3918 } 3919 3920 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { 3921 llvm::SmallVector<const Expr *, 8> CopyprivateVars; 3922 llvm::SmallVector<const Expr *, 8> DestExprs; 3923 llvm::SmallVector<const Expr *, 8> SrcExprs; 3924 llvm::SmallVector<const Expr *, 8> AssignmentOps; 3925 // Check if there are any 'copyprivate' clauses associated with this 3926 // 'single' construct. 3927 // Build a list of copyprivate variables along with helper expressions 3928 // (<source>, <destination>, <destination>=<source> expressions) 3929 for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) { 3930 CopyprivateVars.append(C->varlists().begin(), C->varlists().end()); 3931 DestExprs.append(C->destination_exprs().begin(), 3932 C->destination_exprs().end()); 3933 SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end()); 3934 AssignmentOps.append(C->assignment_ops().begin(), 3935 C->assignment_ops().end()); 3936 } 3937 // Emit code for 'single' region along with 'copyprivate' clauses 3938 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3939 Action.Enter(CGF); 3940 OMPPrivateScope SingleScope(CGF); 3941 (void)CGF.EmitOMPFirstprivateClause(S, SingleScope); 3942 CGF.EmitOMPPrivateClause(S, SingleScope); 3943 (void)SingleScope.Privatize(); 3944 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 3945 }; 3946 { 3947 auto LPCRegion = 3948 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 3949 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3950 CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getBeginLoc(), 3951 CopyprivateVars, DestExprs, 3952 SrcExprs, AssignmentOps); 3953 } 3954 // Emit an implicit barrier at the end (to avoid data race on firstprivate 3955 // init or if no 'nowait' clause was specified and no 'copyprivate' clause). 3956 if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) { 3957 CGM.getOpenMPRuntime().emitBarrierCall( 3958 *this, S.getBeginLoc(), 3959 S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single); 3960 } 3961 // Check for outer lastprivate conditional update. 3962 checkForLastprivateConditionalUpdate(*this, S); 3963 } 3964 3965 static void emitMaster(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 3966 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3967 Action.Enter(CGF); 3968 CGF.EmitStmt(S.getRawStmt()); 3969 }; 3970 CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc()); 3971 } 3972 3973 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { 3974 if (CGM.getLangOpts().OpenMPIRBuilder) { 3975 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 3976 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 3977 3978 const Stmt *MasterRegionBodyStmt = S.getAssociatedStmt(); 3979 3980 auto FiniCB = [this](InsertPointTy IP) { 3981 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); 3982 }; 3983 3984 auto BodyGenCB = [MasterRegionBodyStmt, this](InsertPointTy AllocaIP, 3985 InsertPointTy CodeGenIP, 3986 llvm::BasicBlock &FiniBB) { 3987 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB); 3988 OMPBuilderCBHelpers::EmitOMPRegionBody(*this, MasterRegionBodyStmt, 3989 CodeGenIP, FiniBB); 3990 }; 3991 3992 LexicalScope Scope(*this, S.getSourceRange()); 3993 EmitStopPoint(&S); 3994 Builder.restoreIP(OMPBuilder.createMaster(Builder, BodyGenCB, FiniCB)); 3995 3996 return; 3997 } 3998 LexicalScope Scope(*this, S.getSourceRange()); 3999 EmitStopPoint(&S); 4000 emitMaster(*this, S); 4001 } 4002 4003 static void emitMasked(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 4004 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4005 Action.Enter(CGF); 4006 CGF.EmitStmt(S.getRawStmt()); 4007 }; 4008 Expr *Filter = nullptr; 4009 if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>()) 4010 Filter = FilterClause->getThreadID(); 4011 CGF.CGM.getOpenMPRuntime().emitMaskedRegion(CGF, CodeGen, S.getBeginLoc(), 4012 Filter); 4013 } 4014 4015 void CodeGenFunction::EmitOMPMaskedDirective(const OMPMaskedDirective &S) { 4016 if (CGM.getLangOpts().OpenMPIRBuilder) { 4017 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 4018 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 4019 4020 const Stmt *MaskedRegionBodyStmt = S.getAssociatedStmt(); 4021 const Expr *Filter = nullptr; 4022 if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>()) 4023 Filter = FilterClause->getThreadID(); 4024 llvm::Value *FilterVal = Filter 4025 ? EmitScalarExpr(Filter, CGM.Int32Ty) 4026 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0); 4027 4028 auto FiniCB = [this](InsertPointTy IP) { 4029 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); 4030 }; 4031 4032 auto BodyGenCB = [MaskedRegionBodyStmt, this](InsertPointTy AllocaIP, 4033 InsertPointTy CodeGenIP, 4034 llvm::BasicBlock &FiniBB) { 4035 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB); 4036 OMPBuilderCBHelpers::EmitOMPRegionBody(*this, MaskedRegionBodyStmt, 4037 CodeGenIP, FiniBB); 4038 }; 4039 4040 LexicalScope Scope(*this, S.getSourceRange()); 4041 EmitStopPoint(&S); 4042 Builder.restoreIP( 4043 OMPBuilder.createMasked(Builder, BodyGenCB, FiniCB, FilterVal)); 4044 4045 return; 4046 } 4047 LexicalScope Scope(*this, S.getSourceRange()); 4048 EmitStopPoint(&S); 4049 emitMasked(*this, S); 4050 } 4051 4052 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { 4053 if (CGM.getLangOpts().OpenMPIRBuilder) { 4054 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 4055 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 4056 4057 const Stmt *CriticalRegionBodyStmt = S.getAssociatedStmt(); 4058 const Expr *Hint = nullptr; 4059 if (const auto *HintClause = S.getSingleClause<OMPHintClause>()) 4060 Hint = HintClause->getHint(); 4061 4062 // TODO: This is slightly different from what's currently being done in 4063 // clang. Fix the Int32Ty to IntPtrTy (pointer width size) when everything 4064 // about typing is final. 4065 llvm::Value *HintInst = nullptr; 4066 if (Hint) 4067 HintInst = 4068 Builder.CreateIntCast(EmitScalarExpr(Hint), CGM.Int32Ty, false); 4069 4070 auto FiniCB = [this](InsertPointTy IP) { 4071 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); 4072 }; 4073 4074 auto BodyGenCB = [CriticalRegionBodyStmt, this](InsertPointTy AllocaIP, 4075 InsertPointTy CodeGenIP, 4076 llvm::BasicBlock &FiniBB) { 4077 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB); 4078 OMPBuilderCBHelpers::EmitOMPRegionBody(*this, CriticalRegionBodyStmt, 4079 CodeGenIP, FiniBB); 4080 }; 4081 4082 LexicalScope Scope(*this, S.getSourceRange()); 4083 EmitStopPoint(&S); 4084 Builder.restoreIP(OMPBuilder.createCritical( 4085 Builder, BodyGenCB, FiniCB, S.getDirectiveName().getAsString(), 4086 HintInst)); 4087 4088 return; 4089 } 4090 4091 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4092 Action.Enter(CGF); 4093 CGF.EmitStmt(S.getAssociatedStmt()); 4094 }; 4095 const Expr *Hint = nullptr; 4096 if (const auto *HintClause = S.getSingleClause<OMPHintClause>()) 4097 Hint = HintClause->getHint(); 4098 LexicalScope Scope(*this, S.getSourceRange()); 4099 EmitStopPoint(&S); 4100 CGM.getOpenMPRuntime().emitCriticalRegion(*this, 4101 S.getDirectiveName().getAsString(), 4102 CodeGen, S.getBeginLoc(), Hint); 4103 } 4104 4105 void CodeGenFunction::EmitOMPParallelForDirective( 4106 const OMPParallelForDirective &S) { 4107 // Emit directive as a combined directive that consists of two implicit 4108 // directives: 'parallel' with 'for' directive. 4109 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4110 Action.Enter(CGF); 4111 (void)emitWorksharingDirective(CGF, S, S.hasCancel()); 4112 }; 4113 { 4114 if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(), 4115 [](const OMPReductionClause *C) { 4116 return C->getModifier() == OMPC_REDUCTION_inscan; 4117 })) { 4118 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) { 4119 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); 4120 CGCapturedStmtInfo CGSI(CR_OpenMP); 4121 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI); 4122 OMPLoopScope LoopScope(CGF, S); 4123 return CGF.EmitScalarExpr(S.getNumIterations()); 4124 }; 4125 emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen); 4126 } 4127 auto LPCRegion = 4128 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 4129 emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen, 4130 emitEmptyBoundParameters); 4131 } 4132 // Check for outer lastprivate conditional update. 4133 checkForLastprivateConditionalUpdate(*this, S); 4134 } 4135 4136 void CodeGenFunction::EmitOMPParallelForSimdDirective( 4137 const OMPParallelForSimdDirective &S) { 4138 // Emit directive as a combined directive that consists of two implicit 4139 // directives: 'parallel' with 'for' directive. 4140 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4141 Action.Enter(CGF); 4142 (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false); 4143 }; 4144 { 4145 if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(), 4146 [](const OMPReductionClause *C) { 4147 return C->getModifier() == OMPC_REDUCTION_inscan; 4148 })) { 4149 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) { 4150 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); 4151 CGCapturedStmtInfo CGSI(CR_OpenMP); 4152 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI); 4153 OMPLoopScope LoopScope(CGF, S); 4154 return CGF.EmitScalarExpr(S.getNumIterations()); 4155 }; 4156 emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen); 4157 } 4158 auto LPCRegion = 4159 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 4160 emitCommonOMPParallelDirective(*this, S, OMPD_for_simd, CodeGen, 4161 emitEmptyBoundParameters); 4162 } 4163 // Check for outer lastprivate conditional update. 4164 checkForLastprivateConditionalUpdate(*this, S); 4165 } 4166 4167 void CodeGenFunction::EmitOMPParallelMasterDirective( 4168 const OMPParallelMasterDirective &S) { 4169 // Emit directive as a combined directive that consists of two implicit 4170 // directives: 'parallel' with 'master' directive. 4171 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4172 Action.Enter(CGF); 4173 OMPPrivateScope PrivateScope(CGF); 4174 bool Copyins = CGF.EmitOMPCopyinClause(S); 4175 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 4176 if (Copyins) { 4177 // Emit implicit barrier to synchronize threads and avoid data races on 4178 // propagation master's thread values of threadprivate variables to local 4179 // instances of that variables of all other implicit threads. 4180 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 4181 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, 4182 /*ForceSimpleCall=*/true); 4183 } 4184 CGF.EmitOMPPrivateClause(S, PrivateScope); 4185 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4186 (void)PrivateScope.Privatize(); 4187 emitMaster(CGF, S); 4188 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 4189 }; 4190 { 4191 auto LPCRegion = 4192 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 4193 emitCommonOMPParallelDirective(*this, S, OMPD_master, CodeGen, 4194 emitEmptyBoundParameters); 4195 emitPostUpdateForReductionClause(*this, S, 4196 [](CodeGenFunction &) { return nullptr; }); 4197 } 4198 // Check for outer lastprivate conditional update. 4199 checkForLastprivateConditionalUpdate(*this, S); 4200 } 4201 4202 void CodeGenFunction::EmitOMPParallelSectionsDirective( 4203 const OMPParallelSectionsDirective &S) { 4204 // Emit directive as a combined directive that consists of two implicit 4205 // directives: 'parallel' with 'sections' directive. 4206 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4207 Action.Enter(CGF); 4208 CGF.EmitSections(S); 4209 }; 4210 { 4211 auto LPCRegion = 4212 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 4213 emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen, 4214 emitEmptyBoundParameters); 4215 } 4216 // Check for outer lastprivate conditional update. 4217 checkForLastprivateConditionalUpdate(*this, S); 4218 } 4219 4220 namespace { 4221 /// Get the list of variables declared in the context of the untied tasks. 4222 class CheckVarsEscapingUntiedTaskDeclContext final 4223 : public ConstStmtVisitor<CheckVarsEscapingUntiedTaskDeclContext> { 4224 llvm::SmallVector<const VarDecl *, 4> PrivateDecls; 4225 4226 public: 4227 explicit CheckVarsEscapingUntiedTaskDeclContext() = default; 4228 virtual ~CheckVarsEscapingUntiedTaskDeclContext() = default; 4229 void VisitDeclStmt(const DeclStmt *S) { 4230 if (!S) 4231 return; 4232 // Need to privatize only local vars, static locals can be processed as is. 4233 for (const Decl *D : S->decls()) { 4234 if (const auto *VD = dyn_cast_or_null<VarDecl>(D)) 4235 if (VD->hasLocalStorage()) 4236 PrivateDecls.push_back(VD); 4237 } 4238 } 4239 void VisitOMPExecutableDirective(const OMPExecutableDirective *) { return; } 4240 void VisitCapturedStmt(const CapturedStmt *) { return; } 4241 void VisitLambdaExpr(const LambdaExpr *) { return; } 4242 void VisitBlockExpr(const BlockExpr *) { return; } 4243 void VisitStmt(const Stmt *S) { 4244 if (!S) 4245 return; 4246 for (const Stmt *Child : S->children()) 4247 if (Child) 4248 Visit(Child); 4249 } 4250 4251 /// Swaps list of vars with the provided one. 4252 ArrayRef<const VarDecl *> getPrivateDecls() const { return PrivateDecls; } 4253 }; 4254 } // anonymous namespace 4255 4256 void CodeGenFunction::EmitOMPTaskBasedDirective( 4257 const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion, 4258 const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen, 4259 OMPTaskDataTy &Data) { 4260 // Emit outlined function for task construct. 4261 const CapturedStmt *CS = S.getCapturedStmt(CapturedRegion); 4262 auto I = CS->getCapturedDecl()->param_begin(); 4263 auto PartId = std::next(I); 4264 auto TaskT = std::next(I, 4); 4265 // Check if the task is final 4266 if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) { 4267 // If the condition constant folds and can be elided, try to avoid emitting 4268 // the condition and the dead arm of the if/else. 4269 const Expr *Cond = Clause->getCondition(); 4270 bool CondConstant; 4271 if (ConstantFoldsToSimpleInteger(Cond, CondConstant)) 4272 Data.Final.setInt(CondConstant); 4273 else 4274 Data.Final.setPointer(EvaluateExprAsBool(Cond)); 4275 } else { 4276 // By default the task is not final. 4277 Data.Final.setInt(/*IntVal=*/false); 4278 } 4279 // Check if the task has 'priority' clause. 4280 if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) { 4281 const Expr *Prio = Clause->getPriority(); 4282 Data.Priority.setInt(/*IntVal=*/true); 4283 Data.Priority.setPointer(EmitScalarConversion( 4284 EmitScalarExpr(Prio), Prio->getType(), 4285 getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1), 4286 Prio->getExprLoc())); 4287 } 4288 // The first function argument for tasks is a thread id, the second one is a 4289 // part id (0 for tied tasks, >=0 for untied task). 4290 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 4291 // Get list of private variables. 4292 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 4293 auto IRef = C->varlist_begin(); 4294 for (const Expr *IInit : C->private_copies()) { 4295 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 4296 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 4297 Data.PrivateVars.push_back(*IRef); 4298 Data.PrivateCopies.push_back(IInit); 4299 } 4300 ++IRef; 4301 } 4302 } 4303 EmittedAsPrivate.clear(); 4304 // Get list of firstprivate variables. 4305 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 4306 auto IRef = C->varlist_begin(); 4307 auto IElemInitRef = C->inits().begin(); 4308 for (const Expr *IInit : C->private_copies()) { 4309 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 4310 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 4311 Data.FirstprivateVars.push_back(*IRef); 4312 Data.FirstprivateCopies.push_back(IInit); 4313 Data.FirstprivateInits.push_back(*IElemInitRef); 4314 } 4315 ++IRef; 4316 ++IElemInitRef; 4317 } 4318 } 4319 // Get list of lastprivate variables (for taskloops). 4320 llvm::MapVector<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs; 4321 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 4322 auto IRef = C->varlist_begin(); 4323 auto ID = C->destination_exprs().begin(); 4324 for (const Expr *IInit : C->private_copies()) { 4325 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 4326 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 4327 Data.LastprivateVars.push_back(*IRef); 4328 Data.LastprivateCopies.push_back(IInit); 4329 } 4330 LastprivateDstsOrigs.insert( 4331 std::make_pair(cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()), 4332 cast<DeclRefExpr>(*IRef))); 4333 ++IRef; 4334 ++ID; 4335 } 4336 } 4337 SmallVector<const Expr *, 4> LHSs; 4338 SmallVector<const Expr *, 4> RHSs; 4339 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 4340 Data.ReductionVars.append(C->varlist_begin(), C->varlist_end()); 4341 Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end()); 4342 Data.ReductionCopies.append(C->privates().begin(), C->privates().end()); 4343 Data.ReductionOps.append(C->reduction_ops().begin(), 4344 C->reduction_ops().end()); 4345 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 4346 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 4347 } 4348 Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit( 4349 *this, S.getBeginLoc(), LHSs, RHSs, Data); 4350 // Build list of dependences. 4351 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) { 4352 OMPTaskDataTy::DependData &DD = 4353 Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier()); 4354 DD.DepExprs.append(C->varlist_begin(), C->varlist_end()); 4355 } 4356 // Get list of local vars for untied tasks. 4357 if (!Data.Tied) { 4358 CheckVarsEscapingUntiedTaskDeclContext Checker; 4359 Checker.Visit(S.getInnermostCapturedStmt()->getCapturedStmt()); 4360 Data.PrivateLocals.append(Checker.getPrivateDecls().begin(), 4361 Checker.getPrivateDecls().end()); 4362 } 4363 auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs, 4364 CapturedRegion](CodeGenFunction &CGF, 4365 PrePostActionTy &Action) { 4366 llvm::MapVector<CanonicalDeclPtr<const VarDecl>, 4367 std::pair<Address, Address>> 4368 UntiedLocalVars; 4369 // Set proper addresses for generated private copies. 4370 OMPPrivateScope Scope(CGF); 4371 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> FirstprivatePtrs; 4372 if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() || 4373 !Data.LastprivateVars.empty() || !Data.PrivateLocals.empty()) { 4374 enum { PrivatesParam = 2, CopyFnParam = 3 }; 4375 llvm::Value *CopyFn = CGF.Builder.CreateLoad( 4376 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam))); 4377 llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar( 4378 CS->getCapturedDecl()->getParam(PrivatesParam))); 4379 // Map privates. 4380 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; 4381 llvm::SmallVector<llvm::Value *, 16> CallArgs; 4382 llvm::SmallVector<llvm::Type *, 4> ParamTypes; 4383 CallArgs.push_back(PrivatesPtr); 4384 ParamTypes.push_back(PrivatesPtr->getType()); 4385 for (const Expr *E : Data.PrivateVars) { 4386 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4387 Address PrivatePtr = CGF.CreateMemTemp( 4388 CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr"); 4389 PrivatePtrs.emplace_back(VD, PrivatePtr); 4390 CallArgs.push_back(PrivatePtr.getPointer()); 4391 ParamTypes.push_back(PrivatePtr.getType()); 4392 } 4393 for (const Expr *E : Data.FirstprivateVars) { 4394 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4395 Address PrivatePtr = 4396 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 4397 ".firstpriv.ptr.addr"); 4398 PrivatePtrs.emplace_back(VD, PrivatePtr); 4399 FirstprivatePtrs.emplace_back(VD, PrivatePtr); 4400 CallArgs.push_back(PrivatePtr.getPointer()); 4401 ParamTypes.push_back(PrivatePtr.getType()); 4402 } 4403 for (const Expr *E : Data.LastprivateVars) { 4404 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4405 Address PrivatePtr = 4406 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 4407 ".lastpriv.ptr.addr"); 4408 PrivatePtrs.emplace_back(VD, PrivatePtr); 4409 CallArgs.push_back(PrivatePtr.getPointer()); 4410 ParamTypes.push_back(PrivatePtr.getType()); 4411 } 4412 for (const VarDecl *VD : Data.PrivateLocals) { 4413 QualType Ty = VD->getType().getNonReferenceType(); 4414 if (VD->getType()->isLValueReferenceType()) 4415 Ty = CGF.getContext().getPointerType(Ty); 4416 if (isAllocatableDecl(VD)) 4417 Ty = CGF.getContext().getPointerType(Ty); 4418 Address PrivatePtr = CGF.CreateMemTemp( 4419 CGF.getContext().getPointerType(Ty), ".local.ptr.addr"); 4420 auto Result = UntiedLocalVars.insert( 4421 std::make_pair(VD, std::make_pair(PrivatePtr, Address::invalid()))); 4422 // If key exists update in place. 4423 if (Result.second == false) 4424 *Result.first = std::make_pair( 4425 VD, std::make_pair(PrivatePtr, Address::invalid())); 4426 CallArgs.push_back(PrivatePtr.getPointer()); 4427 ParamTypes.push_back(PrivatePtr.getType()); 4428 } 4429 auto *CopyFnTy = llvm::FunctionType::get(CGF.Builder.getVoidTy(), 4430 ParamTypes, /*isVarArg=*/false); 4431 CopyFn = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4432 CopyFn, CopyFnTy->getPointerTo()); 4433 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall( 4434 CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs); 4435 for (const auto &Pair : LastprivateDstsOrigs) { 4436 const auto *OrigVD = cast<VarDecl>(Pair.second->getDecl()); 4437 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(OrigVD), 4438 /*RefersToEnclosingVariableOrCapture=*/ 4439 CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr, 4440 Pair.second->getType(), VK_LValue, 4441 Pair.second->getExprLoc()); 4442 Scope.addPrivate(Pair.first, [&CGF, &DRE]() { 4443 return CGF.EmitLValue(&DRE).getAddress(CGF); 4444 }); 4445 } 4446 for (const auto &Pair : PrivatePtrs) { 4447 Address Replacement(CGF.Builder.CreateLoad(Pair.second), 4448 CGF.getContext().getDeclAlign(Pair.first)); 4449 Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); 4450 } 4451 // Adjust mapping for internal locals by mapping actual memory instead of 4452 // a pointer to this memory. 4453 for (auto &Pair : UntiedLocalVars) { 4454 if (isAllocatableDecl(Pair.first)) { 4455 llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first); 4456 Address Replacement(Ptr, CGF.getPointerAlign()); 4457 Pair.second.first = Replacement; 4458 Ptr = CGF.Builder.CreateLoad(Replacement); 4459 Replacement = Address(Ptr, CGF.getContext().getDeclAlign(Pair.first)); 4460 Pair.second.second = Replacement; 4461 } else { 4462 llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first); 4463 Address Replacement(Ptr, CGF.getContext().getDeclAlign(Pair.first)); 4464 Pair.second.first = Replacement; 4465 } 4466 } 4467 } 4468 if (Data.Reductions) { 4469 OMPPrivateScope FirstprivateScope(CGF); 4470 for (const auto &Pair : FirstprivatePtrs) { 4471 Address Replacement(CGF.Builder.CreateLoad(Pair.second), 4472 CGF.getContext().getDeclAlign(Pair.first)); 4473 FirstprivateScope.addPrivate(Pair.first, 4474 [Replacement]() { return Replacement; }); 4475 } 4476 (void)FirstprivateScope.Privatize(); 4477 OMPLexicalScope LexScope(CGF, S, CapturedRegion); 4478 ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars, 4479 Data.ReductionCopies, Data.ReductionOps); 4480 llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad( 4481 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9))); 4482 for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) { 4483 RedCG.emitSharedOrigLValue(CGF, Cnt); 4484 RedCG.emitAggregateType(CGF, Cnt); 4485 // FIXME: This must removed once the runtime library is fixed. 4486 // Emit required threadprivate variables for 4487 // initializer/combiner/finalizer. 4488 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(), 4489 RedCG, Cnt); 4490 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( 4491 CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); 4492 Replacement = 4493 Address(CGF.EmitScalarConversion( 4494 Replacement.getPointer(), CGF.getContext().VoidPtrTy, 4495 CGF.getContext().getPointerType( 4496 Data.ReductionCopies[Cnt]->getType()), 4497 Data.ReductionCopies[Cnt]->getExprLoc()), 4498 Replacement.getAlignment()); 4499 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); 4500 Scope.addPrivate(RedCG.getBaseDecl(Cnt), 4501 [Replacement]() { return Replacement; }); 4502 } 4503 } 4504 // Privatize all private variables except for in_reduction items. 4505 (void)Scope.Privatize(); 4506 SmallVector<const Expr *, 4> InRedVars; 4507 SmallVector<const Expr *, 4> InRedPrivs; 4508 SmallVector<const Expr *, 4> InRedOps; 4509 SmallVector<const Expr *, 4> TaskgroupDescriptors; 4510 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) { 4511 auto IPriv = C->privates().begin(); 4512 auto IRed = C->reduction_ops().begin(); 4513 auto ITD = C->taskgroup_descriptors().begin(); 4514 for (const Expr *Ref : C->varlists()) { 4515 InRedVars.emplace_back(Ref); 4516 InRedPrivs.emplace_back(*IPriv); 4517 InRedOps.emplace_back(*IRed); 4518 TaskgroupDescriptors.emplace_back(*ITD); 4519 std::advance(IPriv, 1); 4520 std::advance(IRed, 1); 4521 std::advance(ITD, 1); 4522 } 4523 } 4524 // Privatize in_reduction items here, because taskgroup descriptors must be 4525 // privatized earlier. 4526 OMPPrivateScope InRedScope(CGF); 4527 if (!InRedVars.empty()) { 4528 ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps); 4529 for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) { 4530 RedCG.emitSharedOrigLValue(CGF, Cnt); 4531 RedCG.emitAggregateType(CGF, Cnt); 4532 // The taskgroup descriptor variable is always implicit firstprivate and 4533 // privatized already during processing of the firstprivates. 4534 // FIXME: This must removed once the runtime library is fixed. 4535 // Emit required threadprivate variables for 4536 // initializer/combiner/finalizer. 4537 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(), 4538 RedCG, Cnt); 4539 llvm::Value *ReductionsPtr; 4540 if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) { 4541 ReductionsPtr = CGF.EmitLoadOfScalar(CGF.EmitLValue(TRExpr), 4542 TRExpr->getExprLoc()); 4543 } else { 4544 ReductionsPtr = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4545 } 4546 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( 4547 CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); 4548 Replacement = Address( 4549 CGF.EmitScalarConversion( 4550 Replacement.getPointer(), CGF.getContext().VoidPtrTy, 4551 CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()), 4552 InRedPrivs[Cnt]->getExprLoc()), 4553 Replacement.getAlignment()); 4554 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); 4555 InRedScope.addPrivate(RedCG.getBaseDecl(Cnt), 4556 [Replacement]() { return Replacement; }); 4557 } 4558 } 4559 (void)InRedScope.Privatize(); 4560 4561 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII LocalVarsScope(CGF, 4562 UntiedLocalVars); 4563 Action.Enter(CGF); 4564 BodyGen(CGF); 4565 }; 4566 llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( 4567 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied, 4568 Data.NumberOfParts); 4569 OMPLexicalScope Scope(*this, S, llvm::None, 4570 !isOpenMPParallelDirective(S.getDirectiveKind()) && 4571 !isOpenMPSimdDirective(S.getDirectiveKind())); 4572 TaskGen(*this, OutlinedFn, Data); 4573 } 4574 4575 static ImplicitParamDecl * 4576 createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data, 4577 QualType Ty, CapturedDecl *CD, 4578 SourceLocation Loc) { 4579 auto *OrigVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty, 4580 ImplicitParamDecl::Other); 4581 auto *OrigRef = DeclRefExpr::Create( 4582 C, NestedNameSpecifierLoc(), SourceLocation(), OrigVD, 4583 /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue); 4584 auto *PrivateVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty, 4585 ImplicitParamDecl::Other); 4586 auto *PrivateRef = DeclRefExpr::Create( 4587 C, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD, 4588 /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue); 4589 QualType ElemType = C.getBaseElementType(Ty); 4590 auto *InitVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, ElemType, 4591 ImplicitParamDecl::Other); 4592 auto *InitRef = DeclRefExpr::Create( 4593 C, NestedNameSpecifierLoc(), SourceLocation(), InitVD, 4594 /*RefersToEnclosingVariableOrCapture=*/false, Loc, ElemType, VK_LValue); 4595 PrivateVD->setInitStyle(VarDecl::CInit); 4596 PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue, 4597 InitRef, /*BasePath=*/nullptr, 4598 VK_PRValue, FPOptionsOverride())); 4599 Data.FirstprivateVars.emplace_back(OrigRef); 4600 Data.FirstprivateCopies.emplace_back(PrivateRef); 4601 Data.FirstprivateInits.emplace_back(InitRef); 4602 return OrigVD; 4603 } 4604 4605 void CodeGenFunction::EmitOMPTargetTaskBasedDirective( 4606 const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen, 4607 OMPTargetDataInfo &InputInfo) { 4608 // Emit outlined function for task construct. 4609 const CapturedStmt *CS = S.getCapturedStmt(OMPD_task); 4610 Address CapturedStruct = GenerateCapturedStmtArgument(*CS); 4611 QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 4612 auto I = CS->getCapturedDecl()->param_begin(); 4613 auto PartId = std::next(I); 4614 auto TaskT = std::next(I, 4); 4615 OMPTaskDataTy Data; 4616 // The task is not final. 4617 Data.Final.setInt(/*IntVal=*/false); 4618 // Get list of firstprivate variables. 4619 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 4620 auto IRef = C->varlist_begin(); 4621 auto IElemInitRef = C->inits().begin(); 4622 for (auto *IInit : C->private_copies()) { 4623 Data.FirstprivateVars.push_back(*IRef); 4624 Data.FirstprivateCopies.push_back(IInit); 4625 Data.FirstprivateInits.push_back(*IElemInitRef); 4626 ++IRef; 4627 ++IElemInitRef; 4628 } 4629 } 4630 OMPPrivateScope TargetScope(*this); 4631 VarDecl *BPVD = nullptr; 4632 VarDecl *PVD = nullptr; 4633 VarDecl *SVD = nullptr; 4634 VarDecl *MVD = nullptr; 4635 if (InputInfo.NumberOfTargetItems > 0) { 4636 auto *CD = CapturedDecl::Create( 4637 getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0); 4638 llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems); 4639 QualType BaseAndPointerAndMapperType = getContext().getConstantArrayType( 4640 getContext().VoidPtrTy, ArrSize, nullptr, ArrayType::Normal, 4641 /*IndexTypeQuals=*/0); 4642 BPVD = createImplicitFirstprivateForType( 4643 getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc()); 4644 PVD = createImplicitFirstprivateForType( 4645 getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc()); 4646 QualType SizesType = getContext().getConstantArrayType( 4647 getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1), 4648 ArrSize, nullptr, ArrayType::Normal, 4649 /*IndexTypeQuals=*/0); 4650 SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD, 4651 S.getBeginLoc()); 4652 TargetScope.addPrivate( 4653 BPVD, [&InputInfo]() { return InputInfo.BasePointersArray; }); 4654 TargetScope.addPrivate(PVD, 4655 [&InputInfo]() { return InputInfo.PointersArray; }); 4656 TargetScope.addPrivate(SVD, 4657 [&InputInfo]() { return InputInfo.SizesArray; }); 4658 // If there is no user-defined mapper, the mapper array will be nullptr. In 4659 // this case, we don't need to privatize it. 4660 if (!dyn_cast_or_null<llvm::ConstantPointerNull>( 4661 InputInfo.MappersArray.getPointer())) { 4662 MVD = createImplicitFirstprivateForType( 4663 getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc()); 4664 TargetScope.addPrivate(MVD, 4665 [&InputInfo]() { return InputInfo.MappersArray; }); 4666 } 4667 } 4668 (void)TargetScope.Privatize(); 4669 // Build list of dependences. 4670 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) { 4671 OMPTaskDataTy::DependData &DD = 4672 Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier()); 4673 DD.DepExprs.append(C->varlist_begin(), C->varlist_end()); 4674 } 4675 auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD, MVD, 4676 &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) { 4677 // Set proper addresses for generated private copies. 4678 OMPPrivateScope Scope(CGF); 4679 if (!Data.FirstprivateVars.empty()) { 4680 enum { PrivatesParam = 2, CopyFnParam = 3 }; 4681 llvm::Value *CopyFn = CGF.Builder.CreateLoad( 4682 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam))); 4683 llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar( 4684 CS->getCapturedDecl()->getParam(PrivatesParam))); 4685 // Map privates. 4686 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; 4687 llvm::SmallVector<llvm::Value *, 16> CallArgs; 4688 llvm::SmallVector<llvm::Type *, 4> ParamTypes; 4689 CallArgs.push_back(PrivatesPtr); 4690 ParamTypes.push_back(PrivatesPtr->getType()); 4691 for (const Expr *E : Data.FirstprivateVars) { 4692 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4693 Address PrivatePtr = 4694 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 4695 ".firstpriv.ptr.addr"); 4696 PrivatePtrs.emplace_back(VD, PrivatePtr); 4697 CallArgs.push_back(PrivatePtr.getPointer()); 4698 ParamTypes.push_back(PrivatePtr.getType()); 4699 } 4700 auto *CopyFnTy = llvm::FunctionType::get(CGF.Builder.getVoidTy(), 4701 ParamTypes, /*isVarArg=*/false); 4702 CopyFn = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4703 CopyFn, CopyFnTy->getPointerTo()); 4704 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall( 4705 CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs); 4706 for (const auto &Pair : PrivatePtrs) { 4707 Address Replacement(CGF.Builder.CreateLoad(Pair.second), 4708 CGF.getContext().getDeclAlign(Pair.first)); 4709 Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); 4710 } 4711 } 4712 // Privatize all private variables except for in_reduction items. 4713 (void)Scope.Privatize(); 4714 if (InputInfo.NumberOfTargetItems > 0) { 4715 InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP( 4716 CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0); 4717 InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP( 4718 CGF.GetAddrOfLocalVar(PVD), /*Index=*/0); 4719 InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP( 4720 CGF.GetAddrOfLocalVar(SVD), /*Index=*/0); 4721 // If MVD is nullptr, the mapper array is not privatized 4722 if (MVD) 4723 InputInfo.MappersArray = CGF.Builder.CreateConstArrayGEP( 4724 CGF.GetAddrOfLocalVar(MVD), /*Index=*/0); 4725 } 4726 4727 Action.Enter(CGF); 4728 OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false); 4729 BodyGen(CGF); 4730 }; 4731 llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( 4732 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true, 4733 Data.NumberOfParts); 4734 llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0); 4735 IntegerLiteral IfCond(getContext(), TrueOrFalse, 4736 getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 4737 SourceLocation()); 4738 4739 CGM.getOpenMPRuntime().emitTaskCall(*this, S.getBeginLoc(), S, OutlinedFn, 4740 SharedsTy, CapturedStruct, &IfCond, Data); 4741 } 4742 4743 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { 4744 // Emit outlined function for task construct. 4745 const CapturedStmt *CS = S.getCapturedStmt(OMPD_task); 4746 Address CapturedStruct = GenerateCapturedStmtArgument(*CS); 4747 QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 4748 const Expr *IfCond = nullptr; 4749 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 4750 if (C->getNameModifier() == OMPD_unknown || 4751 C->getNameModifier() == OMPD_task) { 4752 IfCond = C->getCondition(); 4753 break; 4754 } 4755 } 4756 4757 OMPTaskDataTy Data; 4758 // Check if we should emit tied or untied task. 4759 Data.Tied = !S.getSingleClause<OMPUntiedClause>(); 4760 auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { 4761 CGF.EmitStmt(CS->getCapturedStmt()); 4762 }; 4763 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 4764 IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn, 4765 const OMPTaskDataTy &Data) { 4766 CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getBeginLoc(), S, OutlinedFn, 4767 SharedsTy, CapturedStruct, IfCond, 4768 Data); 4769 }; 4770 auto LPCRegion = 4771 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 4772 EmitOMPTaskBasedDirective(S, OMPD_task, BodyGen, TaskGen, Data); 4773 } 4774 4775 void CodeGenFunction::EmitOMPTaskyieldDirective( 4776 const OMPTaskyieldDirective &S) { 4777 CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getBeginLoc()); 4778 } 4779 4780 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { 4781 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_barrier); 4782 } 4783 4784 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) { 4785 CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getBeginLoc()); 4786 } 4787 4788 void CodeGenFunction::EmitOMPTaskgroupDirective( 4789 const OMPTaskgroupDirective &S) { 4790 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4791 Action.Enter(CGF); 4792 if (const Expr *E = S.getReductionRef()) { 4793 SmallVector<const Expr *, 4> LHSs; 4794 SmallVector<const Expr *, 4> RHSs; 4795 OMPTaskDataTy Data; 4796 for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) { 4797 Data.ReductionVars.append(C->varlist_begin(), C->varlist_end()); 4798 Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end()); 4799 Data.ReductionCopies.append(C->privates().begin(), C->privates().end()); 4800 Data.ReductionOps.append(C->reduction_ops().begin(), 4801 C->reduction_ops().end()); 4802 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 4803 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 4804 } 4805 llvm::Value *ReductionDesc = 4806 CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getBeginLoc(), 4807 LHSs, RHSs, Data); 4808 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4809 CGF.EmitVarDecl(*VD); 4810 CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD), 4811 /*Volatile=*/false, E->getType()); 4812 } 4813 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 4814 }; 4815 OMPLexicalScope Scope(*this, S, OMPD_unknown); 4816 CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getBeginLoc()); 4817 } 4818 4819 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { 4820 llvm::AtomicOrdering AO = S.getSingleClause<OMPFlushClause>() 4821 ? llvm::AtomicOrdering::NotAtomic 4822 : llvm::AtomicOrdering::AcquireRelease; 4823 CGM.getOpenMPRuntime().emitFlush( 4824 *this, 4825 [&S]() -> ArrayRef<const Expr *> { 4826 if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) 4827 return llvm::makeArrayRef(FlushClause->varlist_begin(), 4828 FlushClause->varlist_end()); 4829 return llvm::None; 4830 }(), 4831 S.getBeginLoc(), AO); 4832 } 4833 4834 void CodeGenFunction::EmitOMPDepobjDirective(const OMPDepobjDirective &S) { 4835 const auto *DO = S.getSingleClause<OMPDepobjClause>(); 4836 LValue DOLVal = EmitLValue(DO->getDepobj()); 4837 if (const auto *DC = S.getSingleClause<OMPDependClause>()) { 4838 OMPTaskDataTy::DependData Dependencies(DC->getDependencyKind(), 4839 DC->getModifier()); 4840 Dependencies.DepExprs.append(DC->varlist_begin(), DC->varlist_end()); 4841 Address DepAddr = CGM.getOpenMPRuntime().emitDepobjDependClause( 4842 *this, Dependencies, DC->getBeginLoc()); 4843 EmitStoreOfScalar(DepAddr.getPointer(), DOLVal); 4844 return; 4845 } 4846 if (const auto *DC = S.getSingleClause<OMPDestroyClause>()) { 4847 CGM.getOpenMPRuntime().emitDestroyClause(*this, DOLVal, DC->getBeginLoc()); 4848 return; 4849 } 4850 if (const auto *UC = S.getSingleClause<OMPUpdateClause>()) { 4851 CGM.getOpenMPRuntime().emitUpdateClause( 4852 *this, DOLVal, UC->getDependencyKind(), UC->getBeginLoc()); 4853 return; 4854 } 4855 } 4856 4857 void CodeGenFunction::EmitOMPScanDirective(const OMPScanDirective &S) { 4858 if (!OMPParentLoopDirectiveForScan) 4859 return; 4860 const OMPExecutableDirective &ParentDir = *OMPParentLoopDirectiveForScan; 4861 bool IsInclusive = S.hasClausesOfKind<OMPInclusiveClause>(); 4862 SmallVector<const Expr *, 4> Shareds; 4863 SmallVector<const Expr *, 4> Privates; 4864 SmallVector<const Expr *, 4> LHSs; 4865 SmallVector<const Expr *, 4> RHSs; 4866 SmallVector<const Expr *, 4> ReductionOps; 4867 SmallVector<const Expr *, 4> CopyOps; 4868 SmallVector<const Expr *, 4> CopyArrayTemps; 4869 SmallVector<const Expr *, 4> CopyArrayElems; 4870 for (const auto *C : ParentDir.getClausesOfKind<OMPReductionClause>()) { 4871 if (C->getModifier() != OMPC_REDUCTION_inscan) 4872 continue; 4873 Shareds.append(C->varlist_begin(), C->varlist_end()); 4874 Privates.append(C->privates().begin(), C->privates().end()); 4875 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 4876 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 4877 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 4878 CopyOps.append(C->copy_ops().begin(), C->copy_ops().end()); 4879 CopyArrayTemps.append(C->copy_array_temps().begin(), 4880 C->copy_array_temps().end()); 4881 CopyArrayElems.append(C->copy_array_elems().begin(), 4882 C->copy_array_elems().end()); 4883 } 4884 if (ParentDir.getDirectiveKind() == OMPD_simd || 4885 (getLangOpts().OpenMPSimd && 4886 isOpenMPSimdDirective(ParentDir.getDirectiveKind()))) { 4887 // For simd directive and simd-based directives in simd only mode, use the 4888 // following codegen: 4889 // int x = 0; 4890 // #pragma omp simd reduction(inscan, +: x) 4891 // for (..) { 4892 // <first part> 4893 // #pragma omp scan inclusive(x) 4894 // <second part> 4895 // } 4896 // is transformed to: 4897 // int x = 0; 4898 // for (..) { 4899 // int x_priv = 0; 4900 // <first part> 4901 // x = x_priv + x; 4902 // x_priv = x; 4903 // <second part> 4904 // } 4905 // and 4906 // int x = 0; 4907 // #pragma omp simd reduction(inscan, +: x) 4908 // for (..) { 4909 // <first part> 4910 // #pragma omp scan exclusive(x) 4911 // <second part> 4912 // } 4913 // to 4914 // int x = 0; 4915 // for (..) { 4916 // int x_priv = 0; 4917 // <second part> 4918 // int temp = x; 4919 // x = x_priv + x; 4920 // x_priv = temp; 4921 // <first part> 4922 // } 4923 llvm::BasicBlock *OMPScanReduce = createBasicBlock("omp.inscan.reduce"); 4924 EmitBranch(IsInclusive 4925 ? OMPScanReduce 4926 : BreakContinueStack.back().ContinueBlock.getBlock()); 4927 EmitBlock(OMPScanDispatch); 4928 { 4929 // New scope for correct construction/destruction of temp variables for 4930 // exclusive scan. 4931 LexicalScope Scope(*this, S.getSourceRange()); 4932 EmitBranch(IsInclusive ? OMPBeforeScanBlock : OMPAfterScanBlock); 4933 EmitBlock(OMPScanReduce); 4934 if (!IsInclusive) { 4935 // Create temp var and copy LHS value to this temp value. 4936 // TMP = LHS; 4937 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { 4938 const Expr *PrivateExpr = Privates[I]; 4939 const Expr *TempExpr = CopyArrayTemps[I]; 4940 EmitAutoVarDecl( 4941 *cast<VarDecl>(cast<DeclRefExpr>(TempExpr)->getDecl())); 4942 LValue DestLVal = EmitLValue(TempExpr); 4943 LValue SrcLVal = EmitLValue(LHSs[I]); 4944 EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this), 4945 SrcLVal.getAddress(*this), 4946 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), 4947 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), 4948 CopyOps[I]); 4949 } 4950 } 4951 CGM.getOpenMPRuntime().emitReduction( 4952 *this, ParentDir.getEndLoc(), Privates, LHSs, RHSs, ReductionOps, 4953 {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_simd}); 4954 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { 4955 const Expr *PrivateExpr = Privates[I]; 4956 LValue DestLVal; 4957 LValue SrcLVal; 4958 if (IsInclusive) { 4959 DestLVal = EmitLValue(RHSs[I]); 4960 SrcLVal = EmitLValue(LHSs[I]); 4961 } else { 4962 const Expr *TempExpr = CopyArrayTemps[I]; 4963 DestLVal = EmitLValue(RHSs[I]); 4964 SrcLVal = EmitLValue(TempExpr); 4965 } 4966 EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this), 4967 SrcLVal.getAddress(*this), 4968 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), 4969 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), 4970 CopyOps[I]); 4971 } 4972 } 4973 EmitBranch(IsInclusive ? OMPAfterScanBlock : OMPBeforeScanBlock); 4974 OMPScanExitBlock = IsInclusive 4975 ? BreakContinueStack.back().ContinueBlock.getBlock() 4976 : OMPScanReduce; 4977 EmitBlock(OMPAfterScanBlock); 4978 return; 4979 } 4980 if (!IsInclusive) { 4981 EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock()); 4982 EmitBlock(OMPScanExitBlock); 4983 } 4984 if (OMPFirstScanLoop) { 4985 // Emit buffer[i] = red; at the end of the input phase. 4986 const auto *IVExpr = cast<OMPLoopDirective>(ParentDir) 4987 .getIterationVariable() 4988 ->IgnoreParenImpCasts(); 4989 LValue IdxLVal = EmitLValue(IVExpr); 4990 llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc()); 4991 IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false); 4992 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { 4993 const Expr *PrivateExpr = Privates[I]; 4994 const Expr *OrigExpr = Shareds[I]; 4995 const Expr *CopyArrayElem = CopyArrayElems[I]; 4996 OpaqueValueMapping IdxMapping( 4997 *this, 4998 cast<OpaqueValueExpr>( 4999 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), 5000 RValue::get(IdxVal)); 5001 LValue DestLVal = EmitLValue(CopyArrayElem); 5002 LValue SrcLVal = EmitLValue(OrigExpr); 5003 EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this), 5004 SrcLVal.getAddress(*this), 5005 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), 5006 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), 5007 CopyOps[I]); 5008 } 5009 } 5010 EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock()); 5011 if (IsInclusive) { 5012 EmitBlock(OMPScanExitBlock); 5013 EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock()); 5014 } 5015 EmitBlock(OMPScanDispatch); 5016 if (!OMPFirstScanLoop) { 5017 // Emit red = buffer[i]; at the entrance to the scan phase. 5018 const auto *IVExpr = cast<OMPLoopDirective>(ParentDir) 5019 .getIterationVariable() 5020 ->IgnoreParenImpCasts(); 5021 LValue IdxLVal = EmitLValue(IVExpr); 5022 llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc()); 5023 IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false); 5024 llvm::BasicBlock *ExclusiveExitBB = nullptr; 5025 if (!IsInclusive) { 5026 llvm::BasicBlock *ContBB = createBasicBlock("omp.exclusive.dec"); 5027 ExclusiveExitBB = createBasicBlock("omp.exclusive.copy.exit"); 5028 llvm::Value *Cmp = Builder.CreateIsNull(IdxVal); 5029 Builder.CreateCondBr(Cmp, ExclusiveExitBB, ContBB); 5030 EmitBlock(ContBB); 5031 // Use idx - 1 iteration for exclusive scan. 5032 IdxVal = Builder.CreateNUWSub(IdxVal, llvm::ConstantInt::get(SizeTy, 1)); 5033 } 5034 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { 5035 const Expr *PrivateExpr = Privates[I]; 5036 const Expr *OrigExpr = Shareds[I]; 5037 const Expr *CopyArrayElem = CopyArrayElems[I]; 5038 OpaqueValueMapping IdxMapping( 5039 *this, 5040 cast<OpaqueValueExpr>( 5041 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), 5042 RValue::get(IdxVal)); 5043 LValue SrcLVal = EmitLValue(CopyArrayElem); 5044 LValue DestLVal = EmitLValue(OrigExpr); 5045 EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this), 5046 SrcLVal.getAddress(*this), 5047 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), 5048 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), 5049 CopyOps[I]); 5050 } 5051 if (!IsInclusive) { 5052 EmitBlock(ExclusiveExitBB); 5053 } 5054 } 5055 EmitBranch((OMPFirstScanLoop == IsInclusive) ? OMPBeforeScanBlock 5056 : OMPAfterScanBlock); 5057 EmitBlock(OMPAfterScanBlock); 5058 } 5059 5060 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, 5061 const CodeGenLoopTy &CodeGenLoop, 5062 Expr *IncExpr) { 5063 // Emit the loop iteration variable. 5064 const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 5065 const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl()); 5066 EmitVarDecl(*IVDecl); 5067 5068 // Emit the iterations count variable. 5069 // If it is not a variable, Sema decided to calculate iterations count on each 5070 // iteration (e.g., it is foldable into a constant). 5071 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 5072 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 5073 // Emit calculation of the iterations count. 5074 EmitIgnoredExpr(S.getCalcLastIteration()); 5075 } 5076 5077 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); 5078 5079 bool HasLastprivateClause = false; 5080 // Check pre-condition. 5081 { 5082 OMPLoopScope PreInitScope(*this, S); 5083 // Skip the entire loop if we don't meet the precondition. 5084 // If the condition constant folds and can be elided, avoid emitting the 5085 // whole loop. 5086 bool CondConstant; 5087 llvm::BasicBlock *ContBlock = nullptr; 5088 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 5089 if (!CondConstant) 5090 return; 5091 } else { 5092 llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then"); 5093 ContBlock = createBasicBlock("omp.precond.end"); 5094 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 5095 getProfileCount(&S)); 5096 EmitBlock(ThenBlock); 5097 incrementProfileCounter(&S); 5098 } 5099 5100 emitAlignedClause(*this, S); 5101 // Emit 'then' code. 5102 { 5103 // Emit helper vars inits. 5104 5105 LValue LB = EmitOMPHelperVar( 5106 *this, cast<DeclRefExpr>( 5107 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 5108 ? S.getCombinedLowerBoundVariable() 5109 : S.getLowerBoundVariable()))); 5110 LValue UB = EmitOMPHelperVar( 5111 *this, cast<DeclRefExpr>( 5112 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 5113 ? S.getCombinedUpperBoundVariable() 5114 : S.getUpperBoundVariable()))); 5115 LValue ST = 5116 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 5117 LValue IL = 5118 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 5119 5120 OMPPrivateScope LoopScope(*this); 5121 if (EmitOMPFirstprivateClause(S, LoopScope)) { 5122 // Emit implicit barrier to synchronize threads and avoid data races 5123 // on initialization of firstprivate variables and post-update of 5124 // lastprivate variables. 5125 CGM.getOpenMPRuntime().emitBarrierCall( 5126 *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, 5127 /*ForceSimpleCall=*/true); 5128 } 5129 EmitOMPPrivateClause(S, LoopScope); 5130 if (isOpenMPSimdDirective(S.getDirectiveKind()) && 5131 !isOpenMPParallelDirective(S.getDirectiveKind()) && 5132 !isOpenMPTeamsDirective(S.getDirectiveKind())) 5133 EmitOMPReductionClauseInit(S, LoopScope); 5134 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 5135 EmitOMPPrivateLoopCounters(S, LoopScope); 5136 (void)LoopScope.Privatize(); 5137 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 5138 CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S); 5139 5140 // Detect the distribute schedule kind and chunk. 5141 llvm::Value *Chunk = nullptr; 5142 OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown; 5143 if (const auto *C = S.getSingleClause<OMPDistScheduleClause>()) { 5144 ScheduleKind = C->getDistScheduleKind(); 5145 if (const Expr *Ch = C->getChunkSize()) { 5146 Chunk = EmitScalarExpr(Ch); 5147 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 5148 S.getIterationVariable()->getType(), 5149 S.getBeginLoc()); 5150 } 5151 } else { 5152 // Default behaviour for dist_schedule clause. 5153 CGM.getOpenMPRuntime().getDefaultDistScheduleAndChunk( 5154 *this, S, ScheduleKind, Chunk); 5155 } 5156 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 5157 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 5158 5159 // OpenMP [2.10.8, distribute Construct, Description] 5160 // If dist_schedule is specified, kind must be static. If specified, 5161 // iterations are divided into chunks of size chunk_size, chunks are 5162 // assigned to the teams of the league in a round-robin fashion in the 5163 // order of the team number. When no chunk_size is specified, the 5164 // iteration space is divided into chunks that are approximately equal 5165 // in size, and at most one chunk is distributed to each team of the 5166 // league. The size of the chunks is unspecified in this case. 5167 bool StaticChunked = RT.isStaticChunked( 5168 ScheduleKind, /* Chunked */ Chunk != nullptr) && 5169 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()); 5170 if (RT.isStaticNonchunked(ScheduleKind, 5171 /* Chunked */ Chunk != nullptr) || 5172 StaticChunked) { 5173 CGOpenMPRuntime::StaticRTInput StaticInit( 5174 IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(*this), 5175 LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this), 5176 StaticChunked ? Chunk : nullptr); 5177 RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, 5178 StaticInit); 5179 JumpDest LoopExit = 5180 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 5181 // UB = min(UB, GlobalUB); 5182 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 5183 ? S.getCombinedEnsureUpperBound() 5184 : S.getEnsureUpperBound()); 5185 // IV = LB; 5186 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 5187 ? S.getCombinedInit() 5188 : S.getInit()); 5189 5190 const Expr *Cond = 5191 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 5192 ? S.getCombinedCond() 5193 : S.getCond(); 5194 5195 if (StaticChunked) 5196 Cond = S.getCombinedDistCond(); 5197 5198 // For static unchunked schedules generate: 5199 // 5200 // 1. For distribute alone, codegen 5201 // while (idx <= UB) { 5202 // BODY; 5203 // ++idx; 5204 // } 5205 // 5206 // 2. When combined with 'for' (e.g. as in 'distribute parallel for') 5207 // while (idx <= UB) { 5208 // <CodeGen rest of pragma>(LB, UB); 5209 // idx += ST; 5210 // } 5211 // 5212 // For static chunk one schedule generate: 5213 // 5214 // while (IV <= GlobalUB) { 5215 // <CodeGen rest of pragma>(LB, UB); 5216 // LB += ST; 5217 // UB += ST; 5218 // UB = min(UB, GlobalUB); 5219 // IV = LB; 5220 // } 5221 // 5222 emitCommonSimdLoop( 5223 *this, S, 5224 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 5225 if (isOpenMPSimdDirective(S.getDirectiveKind())) 5226 CGF.EmitOMPSimdInit(S); 5227 }, 5228 [&S, &LoopScope, Cond, IncExpr, LoopExit, &CodeGenLoop, 5229 StaticChunked](CodeGenFunction &CGF, PrePostActionTy &) { 5230 CGF.EmitOMPInnerLoop( 5231 S, LoopScope.requiresCleanups(), Cond, IncExpr, 5232 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { 5233 CodeGenLoop(CGF, S, LoopExit); 5234 }, 5235 [&S, StaticChunked](CodeGenFunction &CGF) { 5236 if (StaticChunked) { 5237 CGF.EmitIgnoredExpr(S.getCombinedNextLowerBound()); 5238 CGF.EmitIgnoredExpr(S.getCombinedNextUpperBound()); 5239 CGF.EmitIgnoredExpr(S.getCombinedEnsureUpperBound()); 5240 CGF.EmitIgnoredExpr(S.getCombinedInit()); 5241 } 5242 }); 5243 }); 5244 EmitBlock(LoopExit.getBlock()); 5245 // Tell the runtime we are done. 5246 RT.emitForStaticFinish(*this, S.getEndLoc(), S.getDirectiveKind()); 5247 } else { 5248 // Emit the outer loop, which requests its work chunk [LB..UB] from 5249 // runtime and runs the inner loop to process it. 5250 const OMPLoopArguments LoopArguments = { 5251 LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this), 5252 IL.getAddress(*this), Chunk}; 5253 EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments, 5254 CodeGenLoop); 5255 } 5256 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 5257 EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) { 5258 return CGF.Builder.CreateIsNotNull( 5259 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 5260 }); 5261 } 5262 if (isOpenMPSimdDirective(S.getDirectiveKind()) && 5263 !isOpenMPParallelDirective(S.getDirectiveKind()) && 5264 !isOpenMPTeamsDirective(S.getDirectiveKind())) { 5265 EmitOMPReductionClauseFinal(S, OMPD_simd); 5266 // Emit post-update of the reduction variables if IsLastIter != 0. 5267 emitPostUpdateForReductionClause( 5268 *this, S, [IL, &S](CodeGenFunction &CGF) { 5269 return CGF.Builder.CreateIsNotNull( 5270 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 5271 }); 5272 } 5273 // Emit final copy of the lastprivate variables if IsLastIter != 0. 5274 if (HasLastprivateClause) { 5275 EmitOMPLastprivateClauseFinal( 5276 S, /*NoFinals=*/false, 5277 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc()))); 5278 } 5279 } 5280 5281 // We're now done with the loop, so jump to the continuation block. 5282 if (ContBlock) { 5283 EmitBranch(ContBlock); 5284 EmitBlock(ContBlock, true); 5285 } 5286 } 5287 } 5288 5289 void CodeGenFunction::EmitOMPDistributeDirective( 5290 const OMPDistributeDirective &S) { 5291 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 5292 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 5293 }; 5294 OMPLexicalScope Scope(*this, S, OMPD_unknown); 5295 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); 5296 } 5297 5298 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, 5299 const CapturedStmt *S, 5300 SourceLocation Loc) { 5301 CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); 5302 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; 5303 CGF.CapturedStmtInfo = &CapStmtInfo; 5304 llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S, Loc); 5305 Fn->setDoesNotRecurse(); 5306 return Fn; 5307 } 5308 5309 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { 5310 if (S.hasClausesOfKind<OMPDependClause>()) { 5311 assert(!S.hasAssociatedStmt() && 5312 "No associated statement must be in ordered depend construct."); 5313 for (const auto *DC : S.getClausesOfKind<OMPDependClause>()) 5314 CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC); 5315 return; 5316 } 5317 const auto *C = S.getSingleClause<OMPSIMDClause>(); 5318 auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF, 5319 PrePostActionTy &Action) { 5320 const CapturedStmt *CS = S.getInnermostCapturedStmt(); 5321 if (C) { 5322 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 5323 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 5324 llvm::Function *OutlinedFn = 5325 emitOutlinedOrderedFunction(CGM, CS, S.getBeginLoc()); 5326 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getBeginLoc(), 5327 OutlinedFn, CapturedVars); 5328 } else { 5329 Action.Enter(CGF); 5330 CGF.EmitStmt(CS->getCapturedStmt()); 5331 } 5332 }; 5333 OMPLexicalScope Scope(*this, S, OMPD_unknown); 5334 CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getBeginLoc(), !C); 5335 } 5336 5337 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, 5338 QualType SrcType, QualType DestType, 5339 SourceLocation Loc) { 5340 assert(CGF.hasScalarEvaluationKind(DestType) && 5341 "DestType must have scalar evaluation kind."); 5342 assert(!Val.isAggregate() && "Must be a scalar or complex."); 5343 return Val.isScalar() ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, 5344 DestType, Loc) 5345 : CGF.EmitComplexToScalarConversion( 5346 Val.getComplexVal(), SrcType, DestType, Loc); 5347 } 5348 5349 static CodeGenFunction::ComplexPairTy 5350 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, 5351 QualType DestType, SourceLocation Loc) { 5352 assert(CGF.getEvaluationKind(DestType) == TEK_Complex && 5353 "DestType must have complex evaluation kind."); 5354 CodeGenFunction::ComplexPairTy ComplexVal; 5355 if (Val.isScalar()) { 5356 // Convert the input element to the element type of the complex. 5357 QualType DestElementType = 5358 DestType->castAs<ComplexType>()->getElementType(); 5359 llvm::Value *ScalarVal = CGF.EmitScalarConversion( 5360 Val.getScalarVal(), SrcType, DestElementType, Loc); 5361 ComplexVal = CodeGenFunction::ComplexPairTy( 5362 ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType())); 5363 } else { 5364 assert(Val.isComplex() && "Must be a scalar or complex."); 5365 QualType SrcElementType = SrcType->castAs<ComplexType>()->getElementType(); 5366 QualType DestElementType = 5367 DestType->castAs<ComplexType>()->getElementType(); 5368 ComplexVal.first = CGF.EmitScalarConversion( 5369 Val.getComplexVal().first, SrcElementType, DestElementType, Loc); 5370 ComplexVal.second = CGF.EmitScalarConversion( 5371 Val.getComplexVal().second, SrcElementType, DestElementType, Loc); 5372 } 5373 return ComplexVal; 5374 } 5375 5376 static void emitSimpleAtomicStore(CodeGenFunction &CGF, llvm::AtomicOrdering AO, 5377 LValue LVal, RValue RVal) { 5378 if (LVal.isGlobalReg()) 5379 CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal); 5380 else 5381 CGF.EmitAtomicStore(RVal, LVal, AO, LVal.isVolatile(), /*isInit=*/false); 5382 } 5383 5384 static RValue emitSimpleAtomicLoad(CodeGenFunction &CGF, 5385 llvm::AtomicOrdering AO, LValue LVal, 5386 SourceLocation Loc) { 5387 if (LVal.isGlobalReg()) 5388 return CGF.EmitLoadOfLValue(LVal, Loc); 5389 return CGF.EmitAtomicLoad( 5390 LVal, Loc, llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO), 5391 LVal.isVolatile()); 5392 } 5393 5394 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal, 5395 QualType RValTy, SourceLocation Loc) { 5396 switch (getEvaluationKind(LVal.getType())) { 5397 case TEK_Scalar: 5398 EmitStoreThroughLValue(RValue::get(convertToScalarValue( 5399 *this, RVal, RValTy, LVal.getType(), Loc)), 5400 LVal); 5401 break; 5402 case TEK_Complex: 5403 EmitStoreOfComplex( 5404 convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal, 5405 /*isInit=*/false); 5406 break; 5407 case TEK_Aggregate: 5408 llvm_unreachable("Must be a scalar or complex."); 5409 } 5410 } 5411 5412 static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, llvm::AtomicOrdering AO, 5413 const Expr *X, const Expr *V, 5414 SourceLocation Loc) { 5415 // v = x; 5416 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue"); 5417 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue"); 5418 LValue XLValue = CGF.EmitLValue(X); 5419 LValue VLValue = CGF.EmitLValue(V); 5420 RValue Res = emitSimpleAtomicLoad(CGF, AO, XLValue, Loc); 5421 // OpenMP, 2.17.7, atomic Construct 5422 // If the read or capture clause is specified and the acquire, acq_rel, or 5423 // seq_cst clause is specified then the strong flush on exit from the atomic 5424 // operation is also an acquire flush. 5425 switch (AO) { 5426 case llvm::AtomicOrdering::Acquire: 5427 case llvm::AtomicOrdering::AcquireRelease: 5428 case llvm::AtomicOrdering::SequentiallyConsistent: 5429 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, 5430 llvm::AtomicOrdering::Acquire); 5431 break; 5432 case llvm::AtomicOrdering::Monotonic: 5433 case llvm::AtomicOrdering::Release: 5434 break; 5435 case llvm::AtomicOrdering::NotAtomic: 5436 case llvm::AtomicOrdering::Unordered: 5437 llvm_unreachable("Unexpected ordering."); 5438 } 5439 CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc); 5440 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V); 5441 } 5442 5443 static void emitOMPAtomicWriteExpr(CodeGenFunction &CGF, 5444 llvm::AtomicOrdering AO, const Expr *X, 5445 const Expr *E, SourceLocation Loc) { 5446 // x = expr; 5447 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue"); 5448 emitSimpleAtomicStore(CGF, AO, CGF.EmitLValue(X), CGF.EmitAnyExpr(E)); 5449 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X); 5450 // OpenMP, 2.17.7, atomic Construct 5451 // If the write, update, or capture clause is specified and the release, 5452 // acq_rel, or seq_cst clause is specified then the strong flush on entry to 5453 // the atomic operation is also a release flush. 5454 switch (AO) { 5455 case llvm::AtomicOrdering::Release: 5456 case llvm::AtomicOrdering::AcquireRelease: 5457 case llvm::AtomicOrdering::SequentiallyConsistent: 5458 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, 5459 llvm::AtomicOrdering::Release); 5460 break; 5461 case llvm::AtomicOrdering::Acquire: 5462 case llvm::AtomicOrdering::Monotonic: 5463 break; 5464 case llvm::AtomicOrdering::NotAtomic: 5465 case llvm::AtomicOrdering::Unordered: 5466 llvm_unreachable("Unexpected ordering."); 5467 } 5468 } 5469 5470 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, 5471 RValue Update, 5472 BinaryOperatorKind BO, 5473 llvm::AtomicOrdering AO, 5474 bool IsXLHSInRHSPart) { 5475 ASTContext &Context = CGF.getContext(); 5476 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x' 5477 // expression is simple and atomic is allowed for the given type for the 5478 // target platform. 5479 if (BO == BO_Comma || !Update.isScalar() || 5480 !Update.getScalarVal()->getType()->isIntegerTy() || !X.isSimple() || 5481 (!isa<llvm::ConstantInt>(Update.getScalarVal()) && 5482 (Update.getScalarVal()->getType() != 5483 X.getAddress(CGF).getElementType())) || 5484 !X.getAddress(CGF).getElementType()->isIntegerTy() || 5485 !Context.getTargetInfo().hasBuiltinAtomic( 5486 Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment()))) 5487 return std::make_pair(false, RValue::get(nullptr)); 5488 5489 llvm::AtomicRMWInst::BinOp RMWOp; 5490 switch (BO) { 5491 case BO_Add: 5492 RMWOp = llvm::AtomicRMWInst::Add; 5493 break; 5494 case BO_Sub: 5495 if (!IsXLHSInRHSPart) 5496 return std::make_pair(false, RValue::get(nullptr)); 5497 RMWOp = llvm::AtomicRMWInst::Sub; 5498 break; 5499 case BO_And: 5500 RMWOp = llvm::AtomicRMWInst::And; 5501 break; 5502 case BO_Or: 5503 RMWOp = llvm::AtomicRMWInst::Or; 5504 break; 5505 case BO_Xor: 5506 RMWOp = llvm::AtomicRMWInst::Xor; 5507 break; 5508 case BO_LT: 5509 RMWOp = X.getType()->hasSignedIntegerRepresentation() 5510 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min 5511 : llvm::AtomicRMWInst::Max) 5512 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin 5513 : llvm::AtomicRMWInst::UMax); 5514 break; 5515 case BO_GT: 5516 RMWOp = X.getType()->hasSignedIntegerRepresentation() 5517 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max 5518 : llvm::AtomicRMWInst::Min) 5519 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax 5520 : llvm::AtomicRMWInst::UMin); 5521 break; 5522 case BO_Assign: 5523 RMWOp = llvm::AtomicRMWInst::Xchg; 5524 break; 5525 case BO_Mul: 5526 case BO_Div: 5527 case BO_Rem: 5528 case BO_Shl: 5529 case BO_Shr: 5530 case BO_LAnd: 5531 case BO_LOr: 5532 return std::make_pair(false, RValue::get(nullptr)); 5533 case BO_PtrMemD: 5534 case BO_PtrMemI: 5535 case BO_LE: 5536 case BO_GE: 5537 case BO_EQ: 5538 case BO_NE: 5539 case BO_Cmp: 5540 case BO_AddAssign: 5541 case BO_SubAssign: 5542 case BO_AndAssign: 5543 case BO_OrAssign: 5544 case BO_XorAssign: 5545 case BO_MulAssign: 5546 case BO_DivAssign: 5547 case BO_RemAssign: 5548 case BO_ShlAssign: 5549 case BO_ShrAssign: 5550 case BO_Comma: 5551 llvm_unreachable("Unsupported atomic update operation"); 5552 } 5553 llvm::Value *UpdateVal = Update.getScalarVal(); 5554 if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) { 5555 UpdateVal = CGF.Builder.CreateIntCast( 5556 IC, X.getAddress(CGF).getElementType(), 5557 X.getType()->hasSignedIntegerRepresentation()); 5558 } 5559 llvm::Value *Res = 5560 CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(CGF), UpdateVal, AO); 5561 return std::make_pair(true, RValue::get(Res)); 5562 } 5563 5564 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr( 5565 LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart, 5566 llvm::AtomicOrdering AO, SourceLocation Loc, 5567 const llvm::function_ref<RValue(RValue)> CommonGen) { 5568 // Update expressions are allowed to have the following forms: 5569 // x binop= expr; -> xrval + expr; 5570 // x++, ++x -> xrval + 1; 5571 // x--, --x -> xrval - 1; 5572 // x = x binop expr; -> xrval binop expr 5573 // x = expr Op x; - > expr binop xrval; 5574 auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart); 5575 if (!Res.first) { 5576 if (X.isGlobalReg()) { 5577 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop 5578 // 'xrval'. 5579 EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X); 5580 } else { 5581 // Perform compare-and-swap procedure. 5582 EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified()); 5583 } 5584 } 5585 return Res; 5586 } 5587 5588 static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF, 5589 llvm::AtomicOrdering AO, const Expr *X, 5590 const Expr *E, const Expr *UE, 5591 bool IsXLHSInRHSPart, SourceLocation Loc) { 5592 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 5593 "Update expr in 'atomic update' must be a binary operator."); 5594 const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 5595 // Update expressions are allowed to have the following forms: 5596 // x binop= expr; -> xrval + expr; 5597 // x++, ++x -> xrval + 1; 5598 // x--, --x -> xrval - 1; 5599 // x = x binop expr; -> xrval binop expr 5600 // x = expr Op x; - > expr binop xrval; 5601 assert(X->isLValue() && "X of 'omp atomic update' is not lvalue"); 5602 LValue XLValue = CGF.EmitLValue(X); 5603 RValue ExprRValue = CGF.EmitAnyExpr(E); 5604 const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 5605 const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 5606 const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 5607 const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 5608 auto &&Gen = [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) { 5609 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 5610 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 5611 return CGF.EmitAnyExpr(UE); 5612 }; 5613 (void)CGF.EmitOMPAtomicSimpleUpdateExpr( 5614 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 5615 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X); 5616 // OpenMP, 2.17.7, atomic Construct 5617 // If the write, update, or capture clause is specified and the release, 5618 // acq_rel, or seq_cst clause is specified then the strong flush on entry to 5619 // the atomic operation is also a release flush. 5620 switch (AO) { 5621 case llvm::AtomicOrdering::Release: 5622 case llvm::AtomicOrdering::AcquireRelease: 5623 case llvm::AtomicOrdering::SequentiallyConsistent: 5624 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, 5625 llvm::AtomicOrdering::Release); 5626 break; 5627 case llvm::AtomicOrdering::Acquire: 5628 case llvm::AtomicOrdering::Monotonic: 5629 break; 5630 case llvm::AtomicOrdering::NotAtomic: 5631 case llvm::AtomicOrdering::Unordered: 5632 llvm_unreachable("Unexpected ordering."); 5633 } 5634 } 5635 5636 static RValue convertToType(CodeGenFunction &CGF, RValue Value, 5637 QualType SourceType, QualType ResType, 5638 SourceLocation Loc) { 5639 switch (CGF.getEvaluationKind(ResType)) { 5640 case TEK_Scalar: 5641 return RValue::get( 5642 convertToScalarValue(CGF, Value, SourceType, ResType, Loc)); 5643 case TEK_Complex: { 5644 auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc); 5645 return RValue::getComplex(Res.first, Res.second); 5646 } 5647 case TEK_Aggregate: 5648 break; 5649 } 5650 llvm_unreachable("Must be a scalar or complex."); 5651 } 5652 5653 static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF, 5654 llvm::AtomicOrdering AO, 5655 bool IsPostfixUpdate, const Expr *V, 5656 const Expr *X, const Expr *E, 5657 const Expr *UE, bool IsXLHSInRHSPart, 5658 SourceLocation Loc) { 5659 assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue"); 5660 assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue"); 5661 RValue NewVVal; 5662 LValue VLValue = CGF.EmitLValue(V); 5663 LValue XLValue = CGF.EmitLValue(X); 5664 RValue ExprRValue = CGF.EmitAnyExpr(E); 5665 QualType NewVValType; 5666 if (UE) { 5667 // 'x' is updated with some additional value. 5668 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 5669 "Update expr in 'atomic capture' must be a binary operator."); 5670 const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 5671 // Update expressions are allowed to have the following forms: 5672 // x binop= expr; -> xrval + expr; 5673 // x++, ++x -> xrval + 1; 5674 // x--, --x -> xrval - 1; 5675 // x = x binop expr; -> xrval binop expr 5676 // x = expr Op x; - > expr binop xrval; 5677 const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 5678 const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 5679 const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 5680 NewVValType = XRValExpr->getType(); 5681 const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 5682 auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr, 5683 IsPostfixUpdate](RValue XRValue) { 5684 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 5685 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 5686 RValue Res = CGF.EmitAnyExpr(UE); 5687 NewVVal = IsPostfixUpdate ? XRValue : Res; 5688 return Res; 5689 }; 5690 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 5691 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 5692 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X); 5693 if (Res.first) { 5694 // 'atomicrmw' instruction was generated. 5695 if (IsPostfixUpdate) { 5696 // Use old value from 'atomicrmw'. 5697 NewVVal = Res.second; 5698 } else { 5699 // 'atomicrmw' does not provide new value, so evaluate it using old 5700 // value of 'x'. 5701 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 5702 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second); 5703 NewVVal = CGF.EmitAnyExpr(UE); 5704 } 5705 } 5706 } else { 5707 // 'x' is simply rewritten with some 'expr'. 5708 NewVValType = X->getType().getNonReferenceType(); 5709 ExprRValue = convertToType(CGF, ExprRValue, E->getType(), 5710 X->getType().getNonReferenceType(), Loc); 5711 auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) { 5712 NewVVal = XRValue; 5713 return ExprRValue; 5714 }; 5715 // Try to perform atomicrmw xchg, otherwise simple exchange. 5716 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 5717 XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO, 5718 Loc, Gen); 5719 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X); 5720 if (Res.first) { 5721 // 'atomicrmw' instruction was generated. 5722 NewVVal = IsPostfixUpdate ? Res.second : ExprRValue; 5723 } 5724 } 5725 // Emit post-update store to 'v' of old/new 'x' value. 5726 CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc); 5727 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V); 5728 // OpenMP, 2.17.7, atomic Construct 5729 // If the write, update, or capture clause is specified and the release, 5730 // acq_rel, or seq_cst clause is specified then the strong flush on entry to 5731 // the atomic operation is also a release flush. 5732 // If the read or capture clause is specified and the acquire, acq_rel, or 5733 // seq_cst clause is specified then the strong flush on exit from the atomic 5734 // operation is also an acquire flush. 5735 switch (AO) { 5736 case llvm::AtomicOrdering::Release: 5737 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, 5738 llvm::AtomicOrdering::Release); 5739 break; 5740 case llvm::AtomicOrdering::Acquire: 5741 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, 5742 llvm::AtomicOrdering::Acquire); 5743 break; 5744 case llvm::AtomicOrdering::AcquireRelease: 5745 case llvm::AtomicOrdering::SequentiallyConsistent: 5746 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, 5747 llvm::AtomicOrdering::AcquireRelease); 5748 break; 5749 case llvm::AtomicOrdering::Monotonic: 5750 break; 5751 case llvm::AtomicOrdering::NotAtomic: 5752 case llvm::AtomicOrdering::Unordered: 5753 llvm_unreachable("Unexpected ordering."); 5754 } 5755 } 5756 5757 static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, 5758 llvm::AtomicOrdering AO, bool IsPostfixUpdate, 5759 const Expr *X, const Expr *V, const Expr *E, 5760 const Expr *UE, bool IsXLHSInRHSPart, 5761 SourceLocation Loc) { 5762 switch (Kind) { 5763 case OMPC_read: 5764 emitOMPAtomicReadExpr(CGF, AO, X, V, Loc); 5765 break; 5766 case OMPC_write: 5767 emitOMPAtomicWriteExpr(CGF, AO, X, E, Loc); 5768 break; 5769 case OMPC_unknown: 5770 case OMPC_update: 5771 emitOMPAtomicUpdateExpr(CGF, AO, X, E, UE, IsXLHSInRHSPart, Loc); 5772 break; 5773 case OMPC_capture: 5774 emitOMPAtomicCaptureExpr(CGF, AO, IsPostfixUpdate, V, X, E, UE, 5775 IsXLHSInRHSPart, Loc); 5776 break; 5777 case OMPC_if: 5778 case OMPC_final: 5779 case OMPC_num_threads: 5780 case OMPC_private: 5781 case OMPC_firstprivate: 5782 case OMPC_lastprivate: 5783 case OMPC_reduction: 5784 case OMPC_task_reduction: 5785 case OMPC_in_reduction: 5786 case OMPC_safelen: 5787 case OMPC_simdlen: 5788 case OMPC_sizes: 5789 case OMPC_full: 5790 case OMPC_partial: 5791 case OMPC_allocator: 5792 case OMPC_allocate: 5793 case OMPC_collapse: 5794 case OMPC_default: 5795 case OMPC_seq_cst: 5796 case OMPC_acq_rel: 5797 case OMPC_acquire: 5798 case OMPC_release: 5799 case OMPC_relaxed: 5800 case OMPC_shared: 5801 case OMPC_linear: 5802 case OMPC_aligned: 5803 case OMPC_copyin: 5804 case OMPC_copyprivate: 5805 case OMPC_flush: 5806 case OMPC_depobj: 5807 case OMPC_proc_bind: 5808 case OMPC_schedule: 5809 case OMPC_ordered: 5810 case OMPC_nowait: 5811 case OMPC_untied: 5812 case OMPC_threadprivate: 5813 case OMPC_depend: 5814 case OMPC_mergeable: 5815 case OMPC_device: 5816 case OMPC_threads: 5817 case OMPC_simd: 5818 case OMPC_map: 5819 case OMPC_num_teams: 5820 case OMPC_thread_limit: 5821 case OMPC_priority: 5822 case OMPC_grainsize: 5823 case OMPC_nogroup: 5824 case OMPC_num_tasks: 5825 case OMPC_hint: 5826 case OMPC_dist_schedule: 5827 case OMPC_defaultmap: 5828 case OMPC_uniform: 5829 case OMPC_to: 5830 case OMPC_from: 5831 case OMPC_use_device_ptr: 5832 case OMPC_use_device_addr: 5833 case OMPC_is_device_ptr: 5834 case OMPC_unified_address: 5835 case OMPC_unified_shared_memory: 5836 case OMPC_reverse_offload: 5837 case OMPC_dynamic_allocators: 5838 case OMPC_atomic_default_mem_order: 5839 case OMPC_device_type: 5840 case OMPC_match: 5841 case OMPC_nontemporal: 5842 case OMPC_order: 5843 case OMPC_destroy: 5844 case OMPC_detach: 5845 case OMPC_inclusive: 5846 case OMPC_exclusive: 5847 case OMPC_uses_allocators: 5848 case OMPC_affinity: 5849 case OMPC_init: 5850 case OMPC_inbranch: 5851 case OMPC_notinbranch: 5852 case OMPC_link: 5853 case OMPC_use: 5854 case OMPC_novariants: 5855 case OMPC_nocontext: 5856 case OMPC_filter: 5857 llvm_unreachable("Clause is not allowed in 'omp atomic'."); 5858 } 5859 } 5860 5861 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { 5862 llvm::AtomicOrdering AO = llvm::AtomicOrdering::Monotonic; 5863 bool MemOrderingSpecified = false; 5864 if (S.getSingleClause<OMPSeqCstClause>()) { 5865 AO = llvm::AtomicOrdering::SequentiallyConsistent; 5866 MemOrderingSpecified = true; 5867 } else if (S.getSingleClause<OMPAcqRelClause>()) { 5868 AO = llvm::AtomicOrdering::AcquireRelease; 5869 MemOrderingSpecified = true; 5870 } else if (S.getSingleClause<OMPAcquireClause>()) { 5871 AO = llvm::AtomicOrdering::Acquire; 5872 MemOrderingSpecified = true; 5873 } else if (S.getSingleClause<OMPReleaseClause>()) { 5874 AO = llvm::AtomicOrdering::Release; 5875 MemOrderingSpecified = true; 5876 } else if (S.getSingleClause<OMPRelaxedClause>()) { 5877 AO = llvm::AtomicOrdering::Monotonic; 5878 MemOrderingSpecified = true; 5879 } 5880 OpenMPClauseKind Kind = OMPC_unknown; 5881 for (const OMPClause *C : S.clauses()) { 5882 // Find first clause (skip seq_cst|acq_rel|aqcuire|release|relaxed clause, 5883 // if it is first). 5884 if (C->getClauseKind() != OMPC_seq_cst && 5885 C->getClauseKind() != OMPC_acq_rel && 5886 C->getClauseKind() != OMPC_acquire && 5887 C->getClauseKind() != OMPC_release && 5888 C->getClauseKind() != OMPC_relaxed && C->getClauseKind() != OMPC_hint) { 5889 Kind = C->getClauseKind(); 5890 break; 5891 } 5892 } 5893 if (!MemOrderingSpecified) { 5894 llvm::AtomicOrdering DefaultOrder = 5895 CGM.getOpenMPRuntime().getDefaultMemoryOrdering(); 5896 if (DefaultOrder == llvm::AtomicOrdering::Monotonic || 5897 DefaultOrder == llvm::AtomicOrdering::SequentiallyConsistent || 5898 (DefaultOrder == llvm::AtomicOrdering::AcquireRelease && 5899 Kind == OMPC_capture)) { 5900 AO = DefaultOrder; 5901 } else if (DefaultOrder == llvm::AtomicOrdering::AcquireRelease) { 5902 if (Kind == OMPC_unknown || Kind == OMPC_update || Kind == OMPC_write) { 5903 AO = llvm::AtomicOrdering::Release; 5904 } else if (Kind == OMPC_read) { 5905 assert(Kind == OMPC_read && "Unexpected atomic kind."); 5906 AO = llvm::AtomicOrdering::Acquire; 5907 } 5908 } 5909 } 5910 5911 LexicalScope Scope(*this, S.getSourceRange()); 5912 EmitStopPoint(S.getAssociatedStmt()); 5913 emitOMPAtomicExpr(*this, Kind, AO, S.isPostfixUpdate(), S.getX(), S.getV(), 5914 S.getExpr(), S.getUpdateExpr(), S.isXLHSInRHSPart(), 5915 S.getBeginLoc()); 5916 } 5917 5918 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, 5919 const OMPExecutableDirective &S, 5920 const RegionCodeGenTy &CodeGen) { 5921 assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind())); 5922 CodeGenModule &CGM = CGF.CGM; 5923 5924 // On device emit this construct as inlined code. 5925 if (CGM.getLangOpts().OpenMPIsDevice) { 5926 OMPLexicalScope Scope(CGF, S, OMPD_target); 5927 CGM.getOpenMPRuntime().emitInlinedDirective( 5928 CGF, OMPD_target, [&S](CodeGenFunction &CGF, PrePostActionTy &) { 5929 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 5930 }); 5931 return; 5932 } 5933 5934 auto LPCRegion = 5935 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S); 5936 llvm::Function *Fn = nullptr; 5937 llvm::Constant *FnID = nullptr; 5938 5939 const Expr *IfCond = nullptr; 5940 // Check for the at most one if clause associated with the target region. 5941 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 5942 if (C->getNameModifier() == OMPD_unknown || 5943 C->getNameModifier() == OMPD_target) { 5944 IfCond = C->getCondition(); 5945 break; 5946 } 5947 } 5948 5949 // Check if we have any device clause associated with the directive. 5950 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device( 5951 nullptr, OMPC_DEVICE_unknown); 5952 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 5953 Device.setPointerAndInt(C->getDevice(), C->getModifier()); 5954 5955 // Check if we have an if clause whose conditional always evaluates to false 5956 // or if we do not have any targets specified. If so the target region is not 5957 // an offload entry point. 5958 bool IsOffloadEntry = true; 5959 if (IfCond) { 5960 bool Val; 5961 if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val) 5962 IsOffloadEntry = false; 5963 } 5964 if (CGM.getLangOpts().OMPTargetTriples.empty()) 5965 IsOffloadEntry = false; 5966 5967 assert(CGF.CurFuncDecl && "No parent declaration for target region!"); 5968 StringRef ParentName; 5969 // In case we have Ctors/Dtors we use the complete type variant to produce 5970 // the mangling of the device outlined kernel. 5971 if (const auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl)) 5972 ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete)); 5973 else if (const auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl)) 5974 ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete)); 5975 else 5976 ParentName = 5977 CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl))); 5978 5979 // Emit target region as a standalone region. 5980 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID, 5981 IsOffloadEntry, CodeGen); 5982 OMPLexicalScope Scope(CGF, S, OMPD_task); 5983 auto &&SizeEmitter = 5984 [IsOffloadEntry](CodeGenFunction &CGF, 5985 const OMPLoopDirective &D) -> llvm::Value * { 5986 if (IsOffloadEntry) { 5987 OMPLoopScope(CGF, D); 5988 // Emit calculation of the iterations count. 5989 llvm::Value *NumIterations = CGF.EmitScalarExpr(D.getNumIterations()); 5990 NumIterations = CGF.Builder.CreateIntCast(NumIterations, CGF.Int64Ty, 5991 /*isSigned=*/false); 5992 return NumIterations; 5993 } 5994 return nullptr; 5995 }; 5996 CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device, 5997 SizeEmitter); 5998 } 5999 6000 static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S, 6001 PrePostActionTy &Action) { 6002 Action.Enter(CGF); 6003 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6004 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 6005 CGF.EmitOMPPrivateClause(S, PrivateScope); 6006 (void)PrivateScope.Privatize(); 6007 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 6008 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); 6009 6010 CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt()); 6011 CGF.EnsureInsertPoint(); 6012 } 6013 6014 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM, 6015 StringRef ParentName, 6016 const OMPTargetDirective &S) { 6017 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6018 emitTargetRegion(CGF, S, Action); 6019 }; 6020 llvm::Function *Fn; 6021 llvm::Constant *Addr; 6022 // Emit target region as a standalone region. 6023 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 6024 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 6025 assert(Fn && Addr && "Target device function emission failed."); 6026 } 6027 6028 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { 6029 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6030 emitTargetRegion(CGF, S, Action); 6031 }; 6032 emitCommonOMPTargetDirective(*this, S, CodeGen); 6033 } 6034 6035 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF, 6036 const OMPExecutableDirective &S, 6037 OpenMPDirectiveKind InnermostKind, 6038 const RegionCodeGenTy &CodeGen) { 6039 const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams); 6040 llvm::Function *OutlinedFn = 6041 CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction( 6042 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 6043 6044 const auto *NT = S.getSingleClause<OMPNumTeamsClause>(); 6045 const auto *TL = S.getSingleClause<OMPThreadLimitClause>(); 6046 if (NT || TL) { 6047 const Expr *NumTeams = NT ? NT->getNumTeams() : nullptr; 6048 const Expr *ThreadLimit = TL ? TL->getThreadLimit() : nullptr; 6049 6050 CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit, 6051 S.getBeginLoc()); 6052 } 6053 6054 OMPTeamsScope Scope(CGF, S); 6055 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 6056 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 6057 CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getBeginLoc(), OutlinedFn, 6058 CapturedVars); 6059 } 6060 6061 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) { 6062 // Emit teams region as a standalone region. 6063 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6064 Action.Enter(CGF); 6065 OMPPrivateScope PrivateScope(CGF); 6066 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 6067 CGF.EmitOMPPrivateClause(S, PrivateScope); 6068 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 6069 (void)PrivateScope.Privatize(); 6070 CGF.EmitStmt(S.getCapturedStmt(OMPD_teams)->getCapturedStmt()); 6071 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 6072 }; 6073 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen); 6074 emitPostUpdateForReductionClause(*this, S, 6075 [](CodeGenFunction &) { return nullptr; }); 6076 } 6077 6078 static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action, 6079 const OMPTargetTeamsDirective &S) { 6080 auto *CS = S.getCapturedStmt(OMPD_teams); 6081 Action.Enter(CGF); 6082 // Emit teams region as a standalone region. 6083 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) { 6084 Action.Enter(CGF); 6085 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6086 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 6087 CGF.EmitOMPPrivateClause(S, PrivateScope); 6088 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 6089 (void)PrivateScope.Privatize(); 6090 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 6091 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); 6092 CGF.EmitStmt(CS->getCapturedStmt()); 6093 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 6094 }; 6095 emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen); 6096 emitPostUpdateForReductionClause(CGF, S, 6097 [](CodeGenFunction &) { return nullptr; }); 6098 } 6099 6100 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 6101 CodeGenModule &CGM, StringRef ParentName, 6102 const OMPTargetTeamsDirective &S) { 6103 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6104 emitTargetTeamsRegion(CGF, Action, S); 6105 }; 6106 llvm::Function *Fn; 6107 llvm::Constant *Addr; 6108 // Emit target region as a standalone region. 6109 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 6110 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 6111 assert(Fn && Addr && "Target device function emission failed."); 6112 } 6113 6114 void CodeGenFunction::EmitOMPTargetTeamsDirective( 6115 const OMPTargetTeamsDirective &S) { 6116 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6117 emitTargetTeamsRegion(CGF, Action, S); 6118 }; 6119 emitCommonOMPTargetDirective(*this, S, CodeGen); 6120 } 6121 6122 static void 6123 emitTargetTeamsDistributeRegion(CodeGenFunction &CGF, PrePostActionTy &Action, 6124 const OMPTargetTeamsDistributeDirective &S) { 6125 Action.Enter(CGF); 6126 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 6127 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 6128 }; 6129 6130 // Emit teams region as a standalone region. 6131 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 6132 PrePostActionTy &Action) { 6133 Action.Enter(CGF); 6134 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6135 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 6136 (void)PrivateScope.Privatize(); 6137 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 6138 CodeGenDistribute); 6139 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 6140 }; 6141 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute, CodeGen); 6142 emitPostUpdateForReductionClause(CGF, S, 6143 [](CodeGenFunction &) { return nullptr; }); 6144 } 6145 6146 void CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 6147 CodeGenModule &CGM, StringRef ParentName, 6148 const OMPTargetTeamsDistributeDirective &S) { 6149 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6150 emitTargetTeamsDistributeRegion(CGF, Action, S); 6151 }; 6152 llvm::Function *Fn; 6153 llvm::Constant *Addr; 6154 // Emit target region as a standalone region. 6155 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 6156 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 6157 assert(Fn && Addr && "Target device function emission failed."); 6158 } 6159 6160 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective( 6161 const OMPTargetTeamsDistributeDirective &S) { 6162 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6163 emitTargetTeamsDistributeRegion(CGF, Action, S); 6164 }; 6165 emitCommonOMPTargetDirective(*this, S, CodeGen); 6166 } 6167 6168 static void emitTargetTeamsDistributeSimdRegion( 6169 CodeGenFunction &CGF, PrePostActionTy &Action, 6170 const OMPTargetTeamsDistributeSimdDirective &S) { 6171 Action.Enter(CGF); 6172 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 6173 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 6174 }; 6175 6176 // Emit teams region as a standalone region. 6177 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 6178 PrePostActionTy &Action) { 6179 Action.Enter(CGF); 6180 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6181 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 6182 (void)PrivateScope.Privatize(); 6183 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 6184 CodeGenDistribute); 6185 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 6186 }; 6187 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_simd, CodeGen); 6188 emitPostUpdateForReductionClause(CGF, S, 6189 [](CodeGenFunction &) { return nullptr; }); 6190 } 6191 6192 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 6193 CodeGenModule &CGM, StringRef ParentName, 6194 const OMPTargetTeamsDistributeSimdDirective &S) { 6195 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6196 emitTargetTeamsDistributeSimdRegion(CGF, Action, S); 6197 }; 6198 llvm::Function *Fn; 6199 llvm::Constant *Addr; 6200 // Emit target region as a standalone region. 6201 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 6202 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 6203 assert(Fn && Addr && "Target device function emission failed."); 6204 } 6205 6206 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective( 6207 const OMPTargetTeamsDistributeSimdDirective &S) { 6208 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6209 emitTargetTeamsDistributeSimdRegion(CGF, Action, S); 6210 }; 6211 emitCommonOMPTargetDirective(*this, S, CodeGen); 6212 } 6213 6214 void CodeGenFunction::EmitOMPTeamsDistributeDirective( 6215 const OMPTeamsDistributeDirective &S) { 6216 6217 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 6218 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 6219 }; 6220 6221 // Emit teams region as a standalone region. 6222 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 6223 PrePostActionTy &Action) { 6224 Action.Enter(CGF); 6225 OMPPrivateScope PrivateScope(CGF); 6226 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 6227 (void)PrivateScope.Privatize(); 6228 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 6229 CodeGenDistribute); 6230 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 6231 }; 6232 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen); 6233 emitPostUpdateForReductionClause(*this, S, 6234 [](CodeGenFunction &) { return nullptr; }); 6235 } 6236 6237 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective( 6238 const OMPTeamsDistributeSimdDirective &S) { 6239 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 6240 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 6241 }; 6242 6243 // Emit teams region as a standalone region. 6244 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 6245 PrePostActionTy &Action) { 6246 Action.Enter(CGF); 6247 OMPPrivateScope PrivateScope(CGF); 6248 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 6249 (void)PrivateScope.Privatize(); 6250 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_simd, 6251 CodeGenDistribute); 6252 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 6253 }; 6254 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_simd, CodeGen); 6255 emitPostUpdateForReductionClause(*this, S, 6256 [](CodeGenFunction &) { return nullptr; }); 6257 } 6258 6259 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective( 6260 const OMPTeamsDistributeParallelForDirective &S) { 6261 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 6262 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 6263 S.getDistInc()); 6264 }; 6265 6266 // Emit teams region as a standalone region. 6267 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 6268 PrePostActionTy &Action) { 6269 Action.Enter(CGF); 6270 OMPPrivateScope PrivateScope(CGF); 6271 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 6272 (void)PrivateScope.Privatize(); 6273 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 6274 CodeGenDistribute); 6275 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 6276 }; 6277 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen); 6278 emitPostUpdateForReductionClause(*this, S, 6279 [](CodeGenFunction &) { return nullptr; }); 6280 } 6281 6282 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective( 6283 const OMPTeamsDistributeParallelForSimdDirective &S) { 6284 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 6285 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 6286 S.getDistInc()); 6287 }; 6288 6289 // Emit teams region as a standalone region. 6290 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 6291 PrePostActionTy &Action) { 6292 Action.Enter(CGF); 6293 OMPPrivateScope PrivateScope(CGF); 6294 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 6295 (void)PrivateScope.Privatize(); 6296 CGF.CGM.getOpenMPRuntime().emitInlinedDirective( 6297 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); 6298 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 6299 }; 6300 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for_simd, 6301 CodeGen); 6302 emitPostUpdateForReductionClause(*this, S, 6303 [](CodeGenFunction &) { return nullptr; }); 6304 } 6305 6306 static void emitTargetTeamsDistributeParallelForRegion( 6307 CodeGenFunction &CGF, const OMPTargetTeamsDistributeParallelForDirective &S, 6308 PrePostActionTy &Action) { 6309 Action.Enter(CGF); 6310 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 6311 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 6312 S.getDistInc()); 6313 }; 6314 6315 // Emit teams region as a standalone region. 6316 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 6317 PrePostActionTy &Action) { 6318 Action.Enter(CGF); 6319 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6320 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 6321 (void)PrivateScope.Privatize(); 6322 CGF.CGM.getOpenMPRuntime().emitInlinedDirective( 6323 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); 6324 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 6325 }; 6326 6327 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for, 6328 CodeGenTeams); 6329 emitPostUpdateForReductionClause(CGF, S, 6330 [](CodeGenFunction &) { return nullptr; }); 6331 } 6332 6333 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 6334 CodeGenModule &CGM, StringRef ParentName, 6335 const OMPTargetTeamsDistributeParallelForDirective &S) { 6336 // Emit SPMD target teams distribute parallel for region as a standalone 6337 // region. 6338 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6339 emitTargetTeamsDistributeParallelForRegion(CGF, S, Action); 6340 }; 6341 llvm::Function *Fn; 6342 llvm::Constant *Addr; 6343 // Emit target region as a standalone region. 6344 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 6345 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 6346 assert(Fn && Addr && "Target device function emission failed."); 6347 } 6348 6349 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective( 6350 const OMPTargetTeamsDistributeParallelForDirective &S) { 6351 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6352 emitTargetTeamsDistributeParallelForRegion(CGF, S, Action); 6353 }; 6354 emitCommonOMPTargetDirective(*this, S, CodeGen); 6355 } 6356 6357 static void emitTargetTeamsDistributeParallelForSimdRegion( 6358 CodeGenFunction &CGF, 6359 const OMPTargetTeamsDistributeParallelForSimdDirective &S, 6360 PrePostActionTy &Action) { 6361 Action.Enter(CGF); 6362 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 6363 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 6364 S.getDistInc()); 6365 }; 6366 6367 // Emit teams region as a standalone region. 6368 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 6369 PrePostActionTy &Action) { 6370 Action.Enter(CGF); 6371 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6372 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 6373 (void)PrivateScope.Privatize(); 6374 CGF.CGM.getOpenMPRuntime().emitInlinedDirective( 6375 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); 6376 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 6377 }; 6378 6379 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for_simd, 6380 CodeGenTeams); 6381 emitPostUpdateForReductionClause(CGF, S, 6382 [](CodeGenFunction &) { return nullptr; }); 6383 } 6384 6385 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 6386 CodeGenModule &CGM, StringRef ParentName, 6387 const OMPTargetTeamsDistributeParallelForSimdDirective &S) { 6388 // Emit SPMD target teams distribute parallel for simd region as a standalone 6389 // region. 6390 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6391 emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action); 6392 }; 6393 llvm::Function *Fn; 6394 llvm::Constant *Addr; 6395 // Emit target region as a standalone region. 6396 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 6397 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 6398 assert(Fn && Addr && "Target device function emission failed."); 6399 } 6400 6401 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective( 6402 const OMPTargetTeamsDistributeParallelForSimdDirective &S) { 6403 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6404 emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action); 6405 }; 6406 emitCommonOMPTargetDirective(*this, S, CodeGen); 6407 } 6408 6409 void CodeGenFunction::EmitOMPCancellationPointDirective( 6410 const OMPCancellationPointDirective &S) { 6411 CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getBeginLoc(), 6412 S.getCancelRegion()); 6413 } 6414 6415 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { 6416 const Expr *IfCond = nullptr; 6417 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 6418 if (C->getNameModifier() == OMPD_unknown || 6419 C->getNameModifier() == OMPD_cancel) { 6420 IfCond = C->getCondition(); 6421 break; 6422 } 6423 } 6424 if (CGM.getLangOpts().OpenMPIRBuilder) { 6425 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 6426 // TODO: This check is necessary as we only generate `omp parallel` through 6427 // the OpenMPIRBuilder for now. 6428 if (S.getCancelRegion() == OMPD_parallel || 6429 S.getCancelRegion() == OMPD_sections || 6430 S.getCancelRegion() == OMPD_section) { 6431 llvm::Value *IfCondition = nullptr; 6432 if (IfCond) 6433 IfCondition = EmitScalarExpr(IfCond, 6434 /*IgnoreResultAssign=*/true); 6435 return Builder.restoreIP( 6436 OMPBuilder.createCancel(Builder, IfCondition, S.getCancelRegion())); 6437 } 6438 } 6439 6440 CGM.getOpenMPRuntime().emitCancelCall(*this, S.getBeginLoc(), IfCond, 6441 S.getCancelRegion()); 6442 } 6443 6444 CodeGenFunction::JumpDest 6445 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) { 6446 if (Kind == OMPD_parallel || Kind == OMPD_task || 6447 Kind == OMPD_target_parallel || Kind == OMPD_taskloop || 6448 Kind == OMPD_master_taskloop || Kind == OMPD_parallel_master_taskloop) 6449 return ReturnBlock; 6450 assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections || 6451 Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for || 6452 Kind == OMPD_distribute_parallel_for || 6453 Kind == OMPD_target_parallel_for || 6454 Kind == OMPD_teams_distribute_parallel_for || 6455 Kind == OMPD_target_teams_distribute_parallel_for); 6456 return OMPCancelStack.getExitBlock(); 6457 } 6458 6459 void CodeGenFunction::EmitOMPUseDevicePtrClause( 6460 const OMPUseDevicePtrClause &C, OMPPrivateScope &PrivateScope, 6461 const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) { 6462 auto OrigVarIt = C.varlist_begin(); 6463 auto InitIt = C.inits().begin(); 6464 for (const Expr *PvtVarIt : C.private_copies()) { 6465 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl()); 6466 const auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl()); 6467 const auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl()); 6468 6469 // In order to identify the right initializer we need to match the 6470 // declaration used by the mapping logic. In some cases we may get 6471 // OMPCapturedExprDecl that refers to the original declaration. 6472 const ValueDecl *MatchingVD = OrigVD; 6473 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) { 6474 // OMPCapturedExprDecl are used to privative fields of the current 6475 // structure. 6476 const auto *ME = cast<MemberExpr>(OED->getInit()); 6477 assert(isa<CXXThisExpr>(ME->getBase()) && 6478 "Base should be the current struct!"); 6479 MatchingVD = ME->getMemberDecl(); 6480 } 6481 6482 // If we don't have information about the current list item, move on to 6483 // the next one. 6484 auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD); 6485 if (InitAddrIt == CaptureDeviceAddrMap.end()) 6486 continue; 6487 6488 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, OrigVD, 6489 InitAddrIt, InitVD, 6490 PvtVD]() { 6491 // Initialize the temporary initialization variable with the address we 6492 // get from the runtime library. We have to cast the source address 6493 // because it is always a void *. References are materialized in the 6494 // privatization scope, so the initialization here disregards the fact 6495 // the original variable is a reference. 6496 QualType AddrQTy = 6497 getContext().getPointerType(OrigVD->getType().getNonReferenceType()); 6498 llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy); 6499 Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy); 6500 setAddrOfLocalVar(InitVD, InitAddr); 6501 6502 // Emit private declaration, it will be initialized by the value we 6503 // declaration we just added to the local declarations map. 6504 EmitDecl(*PvtVD); 6505 6506 // The initialization variables reached its purpose in the emission 6507 // of the previous declaration, so we don't need it anymore. 6508 LocalDeclMap.erase(InitVD); 6509 6510 // Return the address of the private variable. 6511 return GetAddrOfLocalVar(PvtVD); 6512 }); 6513 assert(IsRegistered && "firstprivate var already registered as private"); 6514 // Silence the warning about unused variable. 6515 (void)IsRegistered; 6516 6517 ++OrigVarIt; 6518 ++InitIt; 6519 } 6520 } 6521 6522 static const VarDecl *getBaseDecl(const Expr *Ref) { 6523 const Expr *Base = Ref->IgnoreParenImpCasts(); 6524 while (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Base)) 6525 Base = OASE->getBase()->IgnoreParenImpCasts(); 6526 while (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Base)) 6527 Base = ASE->getBase()->IgnoreParenImpCasts(); 6528 return cast<VarDecl>(cast<DeclRefExpr>(Base)->getDecl()); 6529 } 6530 6531 void CodeGenFunction::EmitOMPUseDeviceAddrClause( 6532 const OMPUseDeviceAddrClause &C, OMPPrivateScope &PrivateScope, 6533 const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) { 6534 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; 6535 for (const Expr *Ref : C.varlists()) { 6536 const VarDecl *OrigVD = getBaseDecl(Ref); 6537 if (!Processed.insert(OrigVD).second) 6538 continue; 6539 // In order to identify the right initializer we need to match the 6540 // declaration used by the mapping logic. In some cases we may get 6541 // OMPCapturedExprDecl that refers to the original declaration. 6542 const ValueDecl *MatchingVD = OrigVD; 6543 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) { 6544 // OMPCapturedExprDecl are used to privative fields of the current 6545 // structure. 6546 const auto *ME = cast<MemberExpr>(OED->getInit()); 6547 assert(isa<CXXThisExpr>(ME->getBase()) && 6548 "Base should be the current struct!"); 6549 MatchingVD = ME->getMemberDecl(); 6550 } 6551 6552 // If we don't have information about the current list item, move on to 6553 // the next one. 6554 auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD); 6555 if (InitAddrIt == CaptureDeviceAddrMap.end()) 6556 continue; 6557 6558 Address PrivAddr = InitAddrIt->getSecond(); 6559 // For declrefs and variable length array need to load the pointer for 6560 // correct mapping, since the pointer to the data was passed to the runtime. 6561 if (isa<DeclRefExpr>(Ref->IgnoreParenImpCasts()) || 6562 MatchingVD->getType()->isArrayType()) 6563 PrivAddr = 6564 EmitLoadOfPointer(PrivAddr, getContext() 6565 .getPointerType(OrigVD->getType()) 6566 ->castAs<PointerType>()); 6567 llvm::Type *RealTy = 6568 ConvertTypeForMem(OrigVD->getType().getNonReferenceType()) 6569 ->getPointerTo(); 6570 PrivAddr = Builder.CreatePointerBitCastOrAddrSpaceCast(PrivAddr, RealTy); 6571 6572 (void)PrivateScope.addPrivate(OrigVD, [PrivAddr]() { return PrivAddr; }); 6573 } 6574 } 6575 6576 // Generate the instructions for '#pragma omp target data' directive. 6577 void CodeGenFunction::EmitOMPTargetDataDirective( 6578 const OMPTargetDataDirective &S) { 6579 CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true, 6580 /*SeparateBeginEndCalls=*/true); 6581 6582 // Create a pre/post action to signal the privatization of the device pointer. 6583 // This action can be replaced by the OpenMP runtime code generation to 6584 // deactivate privatization. 6585 bool PrivatizeDevicePointers = false; 6586 class DevicePointerPrivActionTy : public PrePostActionTy { 6587 bool &PrivatizeDevicePointers; 6588 6589 public: 6590 explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers) 6591 : PrePostActionTy(), PrivatizeDevicePointers(PrivatizeDevicePointers) {} 6592 void Enter(CodeGenFunction &CGF) override { 6593 PrivatizeDevicePointers = true; 6594 } 6595 }; 6596 DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers); 6597 6598 auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers]( 6599 CodeGenFunction &CGF, PrePostActionTy &Action) { 6600 auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 6601 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 6602 }; 6603 6604 // Codegen that selects whether to generate the privatization code or not. 6605 auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers, 6606 &InnermostCodeGen](CodeGenFunction &CGF, 6607 PrePostActionTy &Action) { 6608 RegionCodeGenTy RCG(InnermostCodeGen); 6609 PrivatizeDevicePointers = false; 6610 6611 // Call the pre-action to change the status of PrivatizeDevicePointers if 6612 // needed. 6613 Action.Enter(CGF); 6614 6615 if (PrivatizeDevicePointers) { 6616 OMPPrivateScope PrivateScope(CGF); 6617 // Emit all instances of the use_device_ptr clause. 6618 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>()) 6619 CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope, 6620 Info.CaptureDeviceAddrMap); 6621 for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>()) 6622 CGF.EmitOMPUseDeviceAddrClause(*C, PrivateScope, 6623 Info.CaptureDeviceAddrMap); 6624 (void)PrivateScope.Privatize(); 6625 RCG(CGF); 6626 } else { 6627 OMPLexicalScope Scope(CGF, S, OMPD_unknown); 6628 RCG(CGF); 6629 } 6630 }; 6631 6632 // Forward the provided action to the privatization codegen. 6633 RegionCodeGenTy PrivRCG(PrivCodeGen); 6634 PrivRCG.setAction(Action); 6635 6636 // Notwithstanding the body of the region is emitted as inlined directive, 6637 // we don't use an inline scope as changes in the references inside the 6638 // region are expected to be visible outside, so we do not privative them. 6639 OMPLexicalScope Scope(CGF, S); 6640 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data, 6641 PrivRCG); 6642 }; 6643 6644 RegionCodeGenTy RCG(CodeGen); 6645 6646 // If we don't have target devices, don't bother emitting the data mapping 6647 // code. 6648 if (CGM.getLangOpts().OMPTargetTriples.empty()) { 6649 RCG(*this); 6650 return; 6651 } 6652 6653 // Check if we have any if clause associated with the directive. 6654 const Expr *IfCond = nullptr; 6655 if (const auto *C = S.getSingleClause<OMPIfClause>()) 6656 IfCond = C->getCondition(); 6657 6658 // Check if we have any device clause associated with the directive. 6659 const Expr *Device = nullptr; 6660 if (const auto *C = S.getSingleClause<OMPDeviceClause>()) 6661 Device = C->getDevice(); 6662 6663 // Set the action to signal privatization of device pointers. 6664 RCG.setAction(PrivAction); 6665 6666 // Emit region code. 6667 CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG, 6668 Info); 6669 } 6670 6671 void CodeGenFunction::EmitOMPTargetEnterDataDirective( 6672 const OMPTargetEnterDataDirective &S) { 6673 // If we don't have target devices, don't bother emitting the data mapping 6674 // code. 6675 if (CGM.getLangOpts().OMPTargetTriples.empty()) 6676 return; 6677 6678 // Check if we have any if clause associated with the directive. 6679 const Expr *IfCond = nullptr; 6680 if (const auto *C = S.getSingleClause<OMPIfClause>()) 6681 IfCond = C->getCondition(); 6682 6683 // Check if we have any device clause associated with the directive. 6684 const Expr *Device = nullptr; 6685 if (const auto *C = S.getSingleClause<OMPDeviceClause>()) 6686 Device = C->getDevice(); 6687 6688 OMPLexicalScope Scope(*this, S, OMPD_task); 6689 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 6690 } 6691 6692 void CodeGenFunction::EmitOMPTargetExitDataDirective( 6693 const OMPTargetExitDataDirective &S) { 6694 // If we don't have target devices, don't bother emitting the data mapping 6695 // code. 6696 if (CGM.getLangOpts().OMPTargetTriples.empty()) 6697 return; 6698 6699 // Check if we have any if clause associated with the directive. 6700 const Expr *IfCond = nullptr; 6701 if (const auto *C = S.getSingleClause<OMPIfClause>()) 6702 IfCond = C->getCondition(); 6703 6704 // Check if we have any device clause associated with the directive. 6705 const Expr *Device = nullptr; 6706 if (const auto *C = S.getSingleClause<OMPDeviceClause>()) 6707 Device = C->getDevice(); 6708 6709 OMPLexicalScope Scope(*this, S, OMPD_task); 6710 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 6711 } 6712 6713 static void emitTargetParallelRegion(CodeGenFunction &CGF, 6714 const OMPTargetParallelDirective &S, 6715 PrePostActionTy &Action) { 6716 // Get the captured statement associated with the 'parallel' region. 6717 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); 6718 Action.Enter(CGF); 6719 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) { 6720 Action.Enter(CGF); 6721 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6722 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 6723 CGF.EmitOMPPrivateClause(S, PrivateScope); 6724 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 6725 (void)PrivateScope.Privatize(); 6726 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 6727 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); 6728 // TODO: Add support for clauses. 6729 CGF.EmitStmt(CS->getCapturedStmt()); 6730 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 6731 }; 6732 emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen, 6733 emitEmptyBoundParameters); 6734 emitPostUpdateForReductionClause(CGF, S, 6735 [](CodeGenFunction &) { return nullptr; }); 6736 } 6737 6738 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 6739 CodeGenModule &CGM, StringRef ParentName, 6740 const OMPTargetParallelDirective &S) { 6741 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6742 emitTargetParallelRegion(CGF, S, Action); 6743 }; 6744 llvm::Function *Fn; 6745 llvm::Constant *Addr; 6746 // Emit target region as a standalone region. 6747 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 6748 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 6749 assert(Fn && Addr && "Target device function emission failed."); 6750 } 6751 6752 void CodeGenFunction::EmitOMPTargetParallelDirective( 6753 const OMPTargetParallelDirective &S) { 6754 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6755 emitTargetParallelRegion(CGF, S, Action); 6756 }; 6757 emitCommonOMPTargetDirective(*this, S, CodeGen); 6758 } 6759 6760 static void emitTargetParallelForRegion(CodeGenFunction &CGF, 6761 const OMPTargetParallelForDirective &S, 6762 PrePostActionTy &Action) { 6763 Action.Enter(CGF); 6764 // Emit directive as a combined directive that consists of two implicit 6765 // directives: 'parallel' with 'for' directive. 6766 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6767 Action.Enter(CGF); 6768 CodeGenFunction::OMPCancelStackRAII CancelRegion( 6769 CGF, OMPD_target_parallel_for, S.hasCancel()); 6770 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 6771 emitDispatchForLoopBounds); 6772 }; 6773 emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen, 6774 emitEmptyBoundParameters); 6775 } 6776 6777 void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 6778 CodeGenModule &CGM, StringRef ParentName, 6779 const OMPTargetParallelForDirective &S) { 6780 // Emit SPMD target parallel for region as a standalone region. 6781 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6782 emitTargetParallelForRegion(CGF, S, Action); 6783 }; 6784 llvm::Function *Fn; 6785 llvm::Constant *Addr; 6786 // Emit target region as a standalone region. 6787 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 6788 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 6789 assert(Fn && Addr && "Target device function emission failed."); 6790 } 6791 6792 void CodeGenFunction::EmitOMPTargetParallelForDirective( 6793 const OMPTargetParallelForDirective &S) { 6794 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6795 emitTargetParallelForRegion(CGF, S, Action); 6796 }; 6797 emitCommonOMPTargetDirective(*this, S, CodeGen); 6798 } 6799 6800 static void 6801 emitTargetParallelForSimdRegion(CodeGenFunction &CGF, 6802 const OMPTargetParallelForSimdDirective &S, 6803 PrePostActionTy &Action) { 6804 Action.Enter(CGF); 6805 // Emit directive as a combined directive that consists of two implicit 6806 // directives: 'parallel' with 'for' directive. 6807 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6808 Action.Enter(CGF); 6809 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 6810 emitDispatchForLoopBounds); 6811 }; 6812 emitCommonOMPParallelDirective(CGF, S, OMPD_simd, CodeGen, 6813 emitEmptyBoundParameters); 6814 } 6815 6816 void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 6817 CodeGenModule &CGM, StringRef ParentName, 6818 const OMPTargetParallelForSimdDirective &S) { 6819 // Emit SPMD target parallel for region as a standalone region. 6820 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6821 emitTargetParallelForSimdRegion(CGF, S, Action); 6822 }; 6823 llvm::Function *Fn; 6824 llvm::Constant *Addr; 6825 // Emit target region as a standalone region. 6826 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 6827 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 6828 assert(Fn && Addr && "Target device function emission failed."); 6829 } 6830 6831 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective( 6832 const OMPTargetParallelForSimdDirective &S) { 6833 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6834 emitTargetParallelForSimdRegion(CGF, S, Action); 6835 }; 6836 emitCommonOMPTargetDirective(*this, S, CodeGen); 6837 } 6838 6839 /// Emit a helper variable and return corresponding lvalue. 6840 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper, 6841 const ImplicitParamDecl *PVD, 6842 CodeGenFunction::OMPPrivateScope &Privates) { 6843 const auto *VDecl = cast<VarDecl>(Helper->getDecl()); 6844 Privates.addPrivate(VDecl, 6845 [&CGF, PVD]() { return CGF.GetAddrOfLocalVar(PVD); }); 6846 } 6847 6848 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { 6849 assert(isOpenMPTaskLoopDirective(S.getDirectiveKind())); 6850 // Emit outlined function for task construct. 6851 const CapturedStmt *CS = S.getCapturedStmt(OMPD_taskloop); 6852 Address CapturedStruct = Address::invalid(); 6853 { 6854 OMPLexicalScope Scope(*this, S, OMPD_taskloop, /*EmitPreInitStmt=*/false); 6855 CapturedStruct = GenerateCapturedStmtArgument(*CS); 6856 } 6857 QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 6858 const Expr *IfCond = nullptr; 6859 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 6860 if (C->getNameModifier() == OMPD_unknown || 6861 C->getNameModifier() == OMPD_taskloop) { 6862 IfCond = C->getCondition(); 6863 break; 6864 } 6865 } 6866 6867 OMPTaskDataTy Data; 6868 // Check if taskloop must be emitted without taskgroup. 6869 Data.Nogroup = S.getSingleClause<OMPNogroupClause>(); 6870 // TODO: Check if we should emit tied or untied task. 6871 Data.Tied = true; 6872 // Set scheduling for taskloop 6873 if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) { 6874 // grainsize clause 6875 Data.Schedule.setInt(/*IntVal=*/false); 6876 Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize())); 6877 } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) { 6878 // num_tasks clause 6879 Data.Schedule.setInt(/*IntVal=*/true); 6880 Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks())); 6881 } 6882 6883 auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) { 6884 // if (PreCond) { 6885 // for (IV in 0..LastIteration) BODY; 6886 // <Final counter/linear vars updates>; 6887 // } 6888 // 6889 6890 // Emit: if (PreCond) - begin. 6891 // If the condition constant folds and can be elided, avoid emitting the 6892 // whole loop. 6893 bool CondConstant; 6894 llvm::BasicBlock *ContBlock = nullptr; 6895 OMPLoopScope PreInitScope(CGF, S); 6896 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 6897 if (!CondConstant) 6898 return; 6899 } else { 6900 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("taskloop.if.then"); 6901 ContBlock = CGF.createBasicBlock("taskloop.if.end"); 6902 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 6903 CGF.getProfileCount(&S)); 6904 CGF.EmitBlock(ThenBlock); 6905 CGF.incrementProfileCounter(&S); 6906 } 6907 6908 (void)CGF.EmitOMPLinearClauseInit(S); 6909 6910 OMPPrivateScope LoopScope(CGF); 6911 // Emit helper vars inits. 6912 enum { LowerBound = 5, UpperBound, Stride, LastIter }; 6913 auto *I = CS->getCapturedDecl()->param_begin(); 6914 auto *LBP = std::next(I, LowerBound); 6915 auto *UBP = std::next(I, UpperBound); 6916 auto *STP = std::next(I, Stride); 6917 auto *LIP = std::next(I, LastIter); 6918 mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP, 6919 LoopScope); 6920 mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP, 6921 LoopScope); 6922 mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope); 6923 mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP, 6924 LoopScope); 6925 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 6926 CGF.EmitOMPLinearClause(S, LoopScope); 6927 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 6928 (void)LoopScope.Privatize(); 6929 // Emit the loop iteration variable. 6930 const Expr *IVExpr = S.getIterationVariable(); 6931 const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 6932 CGF.EmitVarDecl(*IVDecl); 6933 CGF.EmitIgnoredExpr(S.getInit()); 6934 6935 // Emit the iterations count variable. 6936 // If it is not a variable, Sema decided to calculate iterations count on 6937 // each iteration (e.g., it is foldable into a constant). 6938 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 6939 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 6940 // Emit calculation of the iterations count. 6941 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 6942 } 6943 6944 { 6945 OMPLexicalScope Scope(CGF, S, OMPD_taskloop, /*EmitPreInitStmt=*/false); 6946 emitCommonSimdLoop( 6947 CGF, S, 6948 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 6949 if (isOpenMPSimdDirective(S.getDirectiveKind())) 6950 CGF.EmitOMPSimdInit(S); 6951 }, 6952 [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { 6953 CGF.EmitOMPInnerLoop( 6954 S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), 6955 [&S](CodeGenFunction &CGF) { 6956 emitOMPLoopBodyWithStopPoint(CGF, S, 6957 CodeGenFunction::JumpDest()); 6958 }, 6959 [](CodeGenFunction &) {}); 6960 }); 6961 } 6962 // Emit: if (PreCond) - end. 6963 if (ContBlock) { 6964 CGF.EmitBranch(ContBlock); 6965 CGF.EmitBlock(ContBlock, true); 6966 } 6967 // Emit final copy of the lastprivate variables if IsLastIter != 0. 6968 if (HasLastprivateClause) { 6969 CGF.EmitOMPLastprivateClauseFinal( 6970 S, isOpenMPSimdDirective(S.getDirectiveKind()), 6971 CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar( 6972 CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false, 6973 (*LIP)->getType(), S.getBeginLoc()))); 6974 } 6975 CGF.EmitOMPLinearClauseFinal(S, [LIP, &S](CodeGenFunction &CGF) { 6976 return CGF.Builder.CreateIsNotNull( 6977 CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false, 6978 (*LIP)->getType(), S.getBeginLoc())); 6979 }); 6980 }; 6981 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 6982 IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn, 6983 const OMPTaskDataTy &Data) { 6984 auto &&CodeGen = [&S, OutlinedFn, SharedsTy, CapturedStruct, IfCond, 6985 &Data](CodeGenFunction &CGF, PrePostActionTy &) { 6986 OMPLoopScope PreInitScope(CGF, S); 6987 CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getBeginLoc(), S, 6988 OutlinedFn, SharedsTy, 6989 CapturedStruct, IfCond, Data); 6990 }; 6991 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop, 6992 CodeGen); 6993 }; 6994 if (Data.Nogroup) { 6995 EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, Data); 6996 } else { 6997 CGM.getOpenMPRuntime().emitTaskgroupRegion( 6998 *this, 6999 [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF, 7000 PrePostActionTy &Action) { 7001 Action.Enter(CGF); 7002 CGF.EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, 7003 Data); 7004 }, 7005 S.getBeginLoc()); 7006 } 7007 } 7008 7009 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) { 7010 auto LPCRegion = 7011 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 7012 EmitOMPTaskLoopBasedDirective(S); 7013 } 7014 7015 void CodeGenFunction::EmitOMPTaskLoopSimdDirective( 7016 const OMPTaskLoopSimdDirective &S) { 7017 auto LPCRegion = 7018 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 7019 OMPLexicalScope Scope(*this, S); 7020 EmitOMPTaskLoopBasedDirective(S); 7021 } 7022 7023 void CodeGenFunction::EmitOMPMasterTaskLoopDirective( 7024 const OMPMasterTaskLoopDirective &S) { 7025 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7026 Action.Enter(CGF); 7027 EmitOMPTaskLoopBasedDirective(S); 7028 }; 7029 auto LPCRegion = 7030 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 7031 OMPLexicalScope Scope(*this, S, llvm::None, /*EmitPreInitStmt=*/false); 7032 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc()); 7033 } 7034 7035 void CodeGenFunction::EmitOMPMasterTaskLoopSimdDirective( 7036 const OMPMasterTaskLoopSimdDirective &S) { 7037 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7038 Action.Enter(CGF); 7039 EmitOMPTaskLoopBasedDirective(S); 7040 }; 7041 auto LPCRegion = 7042 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 7043 OMPLexicalScope Scope(*this, S); 7044 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc()); 7045 } 7046 7047 void CodeGenFunction::EmitOMPParallelMasterTaskLoopDirective( 7048 const OMPParallelMasterTaskLoopDirective &S) { 7049 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7050 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF, 7051 PrePostActionTy &Action) { 7052 Action.Enter(CGF); 7053 CGF.EmitOMPTaskLoopBasedDirective(S); 7054 }; 7055 OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false); 7056 CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen, 7057 S.getBeginLoc()); 7058 }; 7059 auto LPCRegion = 7060 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 7061 emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop, CodeGen, 7062 emitEmptyBoundParameters); 7063 } 7064 7065 void CodeGenFunction::EmitOMPParallelMasterTaskLoopSimdDirective( 7066 const OMPParallelMasterTaskLoopSimdDirective &S) { 7067 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7068 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF, 7069 PrePostActionTy &Action) { 7070 Action.Enter(CGF); 7071 CGF.EmitOMPTaskLoopBasedDirective(S); 7072 }; 7073 OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false); 7074 CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen, 7075 S.getBeginLoc()); 7076 }; 7077 auto LPCRegion = 7078 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 7079 emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop_simd, CodeGen, 7080 emitEmptyBoundParameters); 7081 } 7082 7083 // Generate the instructions for '#pragma omp target update' directive. 7084 void CodeGenFunction::EmitOMPTargetUpdateDirective( 7085 const OMPTargetUpdateDirective &S) { 7086 // If we don't have target devices, don't bother emitting the data mapping 7087 // code. 7088 if (CGM.getLangOpts().OMPTargetTriples.empty()) 7089 return; 7090 7091 // Check if we have any if clause associated with the directive. 7092 const Expr *IfCond = nullptr; 7093 if (const auto *C = S.getSingleClause<OMPIfClause>()) 7094 IfCond = C->getCondition(); 7095 7096 // Check if we have any device clause associated with the directive. 7097 const Expr *Device = nullptr; 7098 if (const auto *C = S.getSingleClause<OMPDeviceClause>()) 7099 Device = C->getDevice(); 7100 7101 OMPLexicalScope Scope(*this, S, OMPD_task); 7102 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 7103 } 7104 7105 void CodeGenFunction::EmitSimpleOMPExecutableDirective( 7106 const OMPExecutableDirective &D) { 7107 if (const auto *SD = dyn_cast<OMPScanDirective>(&D)) { 7108 EmitOMPScanDirective(*SD); 7109 return; 7110 } 7111 if (!D.hasAssociatedStmt() || !D.getAssociatedStmt()) 7112 return; 7113 auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) { 7114 OMPPrivateScope GlobalsScope(CGF); 7115 if (isOpenMPTaskingDirective(D.getDirectiveKind())) { 7116 // Capture global firstprivates to avoid crash. 7117 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { 7118 for (const Expr *Ref : C->varlists()) { 7119 const auto *DRE = cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 7120 if (!DRE) 7121 continue; 7122 const auto *VD = dyn_cast<VarDecl>(DRE->getDecl()); 7123 if (!VD || VD->hasLocalStorage()) 7124 continue; 7125 if (!CGF.LocalDeclMap.count(VD)) { 7126 LValue GlobLVal = CGF.EmitLValue(Ref); 7127 GlobalsScope.addPrivate( 7128 VD, [&GlobLVal, &CGF]() { return GlobLVal.getAddress(CGF); }); 7129 } 7130 } 7131 } 7132 } 7133 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 7134 (void)GlobalsScope.Privatize(); 7135 ParentLoopDirectiveForScanRegion ScanRegion(CGF, D); 7136 emitOMPSimdRegion(CGF, cast<OMPLoopDirective>(D), Action); 7137 } else { 7138 if (const auto *LD = dyn_cast<OMPLoopDirective>(&D)) { 7139 for (const Expr *E : LD->counters()) { 7140 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 7141 if (!VD->hasLocalStorage() && !CGF.LocalDeclMap.count(VD)) { 7142 LValue GlobLVal = CGF.EmitLValue(E); 7143 GlobalsScope.addPrivate( 7144 VD, [&GlobLVal, &CGF]() { return GlobLVal.getAddress(CGF); }); 7145 } 7146 if (isa<OMPCapturedExprDecl>(VD)) { 7147 // Emit only those that were not explicitly referenced in clauses. 7148 if (!CGF.LocalDeclMap.count(VD)) 7149 CGF.EmitVarDecl(*VD); 7150 } 7151 } 7152 for (const auto *C : D.getClausesOfKind<OMPOrderedClause>()) { 7153 if (!C->getNumForLoops()) 7154 continue; 7155 for (unsigned I = LD->getLoopsNumber(), 7156 E = C->getLoopNumIterations().size(); 7157 I < E; ++I) { 7158 if (const auto *VD = dyn_cast<OMPCapturedExprDecl>( 7159 cast<DeclRefExpr>(C->getLoopCounter(I))->getDecl())) { 7160 // Emit only those that were not explicitly referenced in clauses. 7161 if (!CGF.LocalDeclMap.count(VD)) 7162 CGF.EmitVarDecl(*VD); 7163 } 7164 } 7165 } 7166 } 7167 (void)GlobalsScope.Privatize(); 7168 CGF.EmitStmt(D.getInnermostCapturedStmt()->getCapturedStmt()); 7169 } 7170 }; 7171 if (D.getDirectiveKind() == OMPD_atomic || 7172 D.getDirectiveKind() == OMPD_critical || 7173 D.getDirectiveKind() == OMPD_section || 7174 D.getDirectiveKind() == OMPD_master || 7175 D.getDirectiveKind() == OMPD_masked) { 7176 EmitStmt(D.getAssociatedStmt()); 7177 } else { 7178 auto LPCRegion = 7179 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, D); 7180 OMPSimdLexicalScope Scope(*this, D); 7181 CGM.getOpenMPRuntime().emitInlinedDirective( 7182 *this, 7183 isOpenMPSimdDirective(D.getDirectiveKind()) ? OMPD_simd 7184 : D.getDirectiveKind(), 7185 CodeGen); 7186 } 7187 // Check for outer lastprivate conditional update. 7188 checkForLastprivateConditionalUpdate(*this, D); 7189 } 7190