1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This contains code to emit OpenMP nodes as LLVM code. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGCleanup.h" 14 #include "CGOpenMPRuntime.h" 15 #include "CodeGenFunction.h" 16 #include "CodeGenModule.h" 17 #include "TargetInfo.h" 18 #include "clang/AST/ASTContext.h" 19 #include "clang/AST/Attr.h" 20 #include "clang/AST/DeclOpenMP.h" 21 #include "clang/AST/OpenMPClause.h" 22 #include "clang/AST/Stmt.h" 23 #include "clang/AST/StmtOpenMP.h" 24 #include "clang/AST/StmtVisitor.h" 25 #include "clang/Basic/OpenMPKinds.h" 26 #include "clang/Basic/PrettyStackTrace.h" 27 #include "llvm/Frontend/OpenMP/OMPConstants.h" 28 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" 29 #include "llvm/IR/Constants.h" 30 #include "llvm/IR/Instructions.h" 31 #include "llvm/Support/AtomicOrdering.h" 32 using namespace clang; 33 using namespace CodeGen; 34 using namespace llvm::omp; 35 36 static const VarDecl *getBaseDecl(const Expr *Ref); 37 38 namespace { 39 /// Lexical scope for OpenMP executable constructs, that handles correct codegen 40 /// for captured expressions. 41 class OMPLexicalScope : public CodeGenFunction::LexicalScope { 42 void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 43 for (const auto *C : S.clauses()) { 44 if (const auto *CPI = OMPClauseWithPreInit::get(C)) { 45 if (const auto *PreInit = 46 cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { 47 for (const auto *I : PreInit->decls()) { 48 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 49 CGF.EmitVarDecl(cast<VarDecl>(*I)); 50 } else { 51 CodeGenFunction::AutoVarEmission Emission = 52 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 53 CGF.EmitAutoVarCleanups(Emission); 54 } 55 } 56 } 57 } 58 } 59 } 60 CodeGenFunction::OMPPrivateScope InlinedShareds; 61 62 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { 63 return CGF.LambdaCaptureFields.lookup(VD) || 64 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || 65 (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) && 66 cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD)); 67 } 68 69 public: 70 OMPLexicalScope( 71 CodeGenFunction &CGF, const OMPExecutableDirective &S, 72 const llvm::Optional<OpenMPDirectiveKind> CapturedRegion = llvm::None, 73 const bool EmitPreInitStmt = true) 74 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), 75 InlinedShareds(CGF) { 76 if (EmitPreInitStmt) 77 emitPreInitStmt(CGF, S); 78 if (!CapturedRegion.hasValue()) 79 return; 80 assert(S.hasAssociatedStmt() && 81 "Expected associated statement for inlined directive."); 82 const CapturedStmt *CS = S.getCapturedStmt(*CapturedRegion); 83 for (const auto &C : CS->captures()) { 84 if (C.capturesVariable() || C.capturesVariableByCopy()) { 85 auto *VD = C.getCapturedVar(); 86 assert(VD == VD->getCanonicalDecl() && 87 "Canonical decl must be captured."); 88 DeclRefExpr DRE( 89 CGF.getContext(), const_cast<VarDecl *>(VD), 90 isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo && 91 InlinedShareds.isGlobalVarCaptured(VD)), 92 VD->getType().getNonReferenceType(), VK_LValue, C.getLocation()); 93 InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address { 94 return CGF.EmitLValue(&DRE).getAddress(CGF); 95 }); 96 } 97 } 98 (void)InlinedShareds.Privatize(); 99 } 100 }; 101 102 /// Lexical scope for OpenMP parallel construct, that handles correct codegen 103 /// for captured expressions. 104 class OMPParallelScope final : public OMPLexicalScope { 105 bool EmitPreInitStmt(const OMPExecutableDirective &S) { 106 OpenMPDirectiveKind Kind = S.getDirectiveKind(); 107 return !(isOpenMPTargetExecutionDirective(Kind) || 108 isOpenMPLoopBoundSharingDirective(Kind)) && 109 isOpenMPParallelDirective(Kind); 110 } 111 112 public: 113 OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 114 : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None, 115 EmitPreInitStmt(S)) {} 116 }; 117 118 /// Lexical scope for OpenMP teams construct, that handles correct codegen 119 /// for captured expressions. 120 class OMPTeamsScope final : public OMPLexicalScope { 121 bool EmitPreInitStmt(const OMPExecutableDirective &S) { 122 OpenMPDirectiveKind Kind = S.getDirectiveKind(); 123 return !isOpenMPTargetExecutionDirective(Kind) && 124 isOpenMPTeamsDirective(Kind); 125 } 126 127 public: 128 OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 129 : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None, 130 EmitPreInitStmt(S)) {} 131 }; 132 133 /// Private scope for OpenMP loop-based directives, that supports capturing 134 /// of used expression from loop statement. 135 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { 136 void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopBasedDirective &S) { 137 const DeclStmt *PreInits; 138 CodeGenFunction::OMPMapVars PreCondVars; 139 if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) { 140 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 141 for (const auto *E : LD->counters()) { 142 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 143 EmittedAsPrivate.insert(VD->getCanonicalDecl()); 144 (void)PreCondVars.setVarAddr( 145 CGF, VD, CGF.CreateMemTemp(VD->getType().getNonReferenceType())); 146 } 147 // Mark private vars as undefs. 148 for (const auto *C : LD->getClausesOfKind<OMPPrivateClause>()) { 149 for (const Expr *IRef : C->varlists()) { 150 const auto *OrigVD = 151 cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); 152 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 153 (void)PreCondVars.setVarAddr( 154 CGF, OrigVD, 155 Address(llvm::UndefValue::get(CGF.ConvertTypeForMem( 156 CGF.getContext().getPointerType( 157 OrigVD->getType().getNonReferenceType()))), 158 CGF.getContext().getDeclAlign(OrigVD))); 159 } 160 } 161 } 162 (void)PreCondVars.apply(CGF); 163 // Emit init, __range and __end variables for C++ range loops. 164 (void)OMPLoopBasedDirective::doForAllLoops( 165 LD->getInnermostCapturedStmt()->getCapturedStmt(), 166 /*TryImperfectlyNestedLoops=*/true, LD->getLoopsNumber(), 167 [&CGF](unsigned Cnt, const Stmt *CurStmt) { 168 if (const auto *CXXFor = dyn_cast<CXXForRangeStmt>(CurStmt)) { 169 if (const Stmt *Init = CXXFor->getInit()) 170 CGF.EmitStmt(Init); 171 CGF.EmitStmt(CXXFor->getRangeStmt()); 172 CGF.EmitStmt(CXXFor->getEndStmt()); 173 } 174 return false; 175 }); 176 PreInits = cast_or_null<DeclStmt>(LD->getPreInits()); 177 } else if (const auto *Tile = dyn_cast<OMPTileDirective>(&S)) { 178 PreInits = cast_or_null<DeclStmt>(Tile->getPreInits()); 179 } else if (const auto *Unroll = dyn_cast<OMPUnrollDirective>(&S)) { 180 PreInits = cast_or_null<DeclStmt>(Unroll->getPreInits()); 181 } else { 182 llvm_unreachable("Unknown loop-based directive kind."); 183 } 184 if (PreInits) { 185 for (const auto *I : PreInits->decls()) 186 CGF.EmitVarDecl(cast<VarDecl>(*I)); 187 } 188 PreCondVars.restore(CGF); 189 } 190 191 public: 192 OMPLoopScope(CodeGenFunction &CGF, const OMPLoopBasedDirective &S) 193 : CodeGenFunction::RunCleanupsScope(CGF) { 194 emitPreInitStmt(CGF, S); 195 } 196 }; 197 198 class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope { 199 CodeGenFunction::OMPPrivateScope InlinedShareds; 200 201 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { 202 return CGF.LambdaCaptureFields.lookup(VD) || 203 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || 204 (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) && 205 cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD)); 206 } 207 208 public: 209 OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 210 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), 211 InlinedShareds(CGF) { 212 for (const auto *C : S.clauses()) { 213 if (const auto *CPI = OMPClauseWithPreInit::get(C)) { 214 if (const auto *PreInit = 215 cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { 216 for (const auto *I : PreInit->decls()) { 217 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 218 CGF.EmitVarDecl(cast<VarDecl>(*I)); 219 } else { 220 CodeGenFunction::AutoVarEmission Emission = 221 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 222 CGF.EmitAutoVarCleanups(Emission); 223 } 224 } 225 } 226 } else if (const auto *UDP = dyn_cast<OMPUseDevicePtrClause>(C)) { 227 for (const Expr *E : UDP->varlists()) { 228 const Decl *D = cast<DeclRefExpr>(E)->getDecl(); 229 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D)) 230 CGF.EmitVarDecl(*OED); 231 } 232 } else if (const auto *UDP = dyn_cast<OMPUseDeviceAddrClause>(C)) { 233 for (const Expr *E : UDP->varlists()) { 234 const Decl *D = getBaseDecl(E); 235 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D)) 236 CGF.EmitVarDecl(*OED); 237 } 238 } 239 } 240 if (!isOpenMPSimdDirective(S.getDirectiveKind())) 241 CGF.EmitOMPPrivateClause(S, InlinedShareds); 242 if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(&S)) { 243 if (const Expr *E = TG->getReductionRef()) 244 CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl())); 245 } 246 // Temp copy arrays for inscan reductions should not be emitted as they are 247 // not used in simd only mode. 248 llvm::DenseSet<CanonicalDeclPtr<const Decl>> CopyArrayTemps; 249 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 250 if (C->getModifier() != OMPC_REDUCTION_inscan) 251 continue; 252 for (const Expr *E : C->copy_array_temps()) 253 CopyArrayTemps.insert(cast<DeclRefExpr>(E)->getDecl()); 254 } 255 const auto *CS = cast_or_null<CapturedStmt>(S.getAssociatedStmt()); 256 while (CS) { 257 for (auto &C : CS->captures()) { 258 if (C.capturesVariable() || C.capturesVariableByCopy()) { 259 auto *VD = C.getCapturedVar(); 260 if (CopyArrayTemps.contains(VD)) 261 continue; 262 assert(VD == VD->getCanonicalDecl() && 263 "Canonical decl must be captured."); 264 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 265 isCapturedVar(CGF, VD) || 266 (CGF.CapturedStmtInfo && 267 InlinedShareds.isGlobalVarCaptured(VD)), 268 VD->getType().getNonReferenceType(), VK_LValue, 269 C.getLocation()); 270 InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address { 271 return CGF.EmitLValue(&DRE).getAddress(CGF); 272 }); 273 } 274 } 275 CS = dyn_cast<CapturedStmt>(CS->getCapturedStmt()); 276 } 277 (void)InlinedShareds.Privatize(); 278 } 279 }; 280 281 } // namespace 282 283 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, 284 const OMPExecutableDirective &S, 285 const RegionCodeGenTy &CodeGen); 286 287 LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) { 288 if (const auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) { 289 if (const auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) { 290 OrigVD = OrigVD->getCanonicalDecl(); 291 bool IsCaptured = 292 LambdaCaptureFields.lookup(OrigVD) || 293 (CapturedStmtInfo && CapturedStmtInfo->lookup(OrigVD)) || 294 (CurCodeDecl && isa<BlockDecl>(CurCodeDecl)); 295 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), IsCaptured, 296 OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc()); 297 return EmitLValue(&DRE); 298 } 299 } 300 return EmitLValue(E); 301 } 302 303 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) { 304 ASTContext &C = getContext(); 305 llvm::Value *Size = nullptr; 306 auto SizeInChars = C.getTypeSizeInChars(Ty); 307 if (SizeInChars.isZero()) { 308 // getTypeSizeInChars() returns 0 for a VLA. 309 while (const VariableArrayType *VAT = C.getAsVariableArrayType(Ty)) { 310 VlaSizePair VlaSize = getVLASize(VAT); 311 Ty = VlaSize.Type; 312 Size = Size ? Builder.CreateNUWMul(Size, VlaSize.NumElts) 313 : VlaSize.NumElts; 314 } 315 SizeInChars = C.getTypeSizeInChars(Ty); 316 if (SizeInChars.isZero()) 317 return llvm::ConstantInt::get(SizeTy, /*V=*/0); 318 return Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars)); 319 } 320 return CGM.getSize(SizeInChars); 321 } 322 323 void CodeGenFunction::GenerateOpenMPCapturedVars( 324 const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) { 325 const RecordDecl *RD = S.getCapturedRecordDecl(); 326 auto CurField = RD->field_begin(); 327 auto CurCap = S.captures().begin(); 328 for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(), 329 E = S.capture_init_end(); 330 I != E; ++I, ++CurField, ++CurCap) { 331 if (CurField->hasCapturedVLAType()) { 332 const VariableArrayType *VAT = CurField->getCapturedVLAType(); 333 llvm::Value *Val = VLASizeMap[VAT->getSizeExpr()]; 334 CapturedVars.push_back(Val); 335 } else if (CurCap->capturesThis()) { 336 CapturedVars.push_back(CXXThisValue); 337 } else if (CurCap->capturesVariableByCopy()) { 338 llvm::Value *CV = EmitLoadOfScalar(EmitLValue(*I), CurCap->getLocation()); 339 340 // If the field is not a pointer, we need to save the actual value 341 // and load it as a void pointer. 342 if (!CurField->getType()->isAnyPointerType()) { 343 ASTContext &Ctx = getContext(); 344 Address DstAddr = CreateMemTemp( 345 Ctx.getUIntPtrType(), 346 Twine(CurCap->getCapturedVar()->getName(), ".casted")); 347 LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType()); 348 349 llvm::Value *SrcAddrVal = EmitScalarConversion( 350 DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()), 351 Ctx.getPointerType(CurField->getType()), CurCap->getLocation()); 352 LValue SrcLV = 353 MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType()); 354 355 // Store the value using the source type pointer. 356 EmitStoreThroughLValue(RValue::get(CV), SrcLV); 357 358 // Load the value using the destination type pointer. 359 CV = EmitLoadOfScalar(DstLV, CurCap->getLocation()); 360 } 361 CapturedVars.push_back(CV); 362 } else { 363 assert(CurCap->capturesVariable() && "Expected capture by reference."); 364 CapturedVars.push_back(EmitLValue(*I).getAddress(*this).getPointer()); 365 } 366 } 367 } 368 369 static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc, 370 QualType DstType, StringRef Name, 371 LValue AddrLV) { 372 ASTContext &Ctx = CGF.getContext(); 373 374 llvm::Value *CastedPtr = CGF.EmitScalarConversion( 375 AddrLV.getAddress(CGF).getPointer(), Ctx.getUIntPtrType(), 376 Ctx.getPointerType(DstType), Loc); 377 Address TmpAddr = 378 CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType)) 379 .getAddress(CGF); 380 return TmpAddr; 381 } 382 383 static QualType getCanonicalParamType(ASTContext &C, QualType T) { 384 if (T->isLValueReferenceType()) 385 return C.getLValueReferenceType( 386 getCanonicalParamType(C, T.getNonReferenceType()), 387 /*SpelledAsLValue=*/false); 388 if (T->isPointerType()) 389 return C.getPointerType(getCanonicalParamType(C, T->getPointeeType())); 390 if (const ArrayType *A = T->getAsArrayTypeUnsafe()) { 391 if (const auto *VLA = dyn_cast<VariableArrayType>(A)) 392 return getCanonicalParamType(C, VLA->getElementType()); 393 if (!A->isVariablyModifiedType()) 394 return C.getCanonicalType(T); 395 } 396 return C.getCanonicalParamType(T); 397 } 398 399 namespace { 400 /// Contains required data for proper outlined function codegen. 401 struct FunctionOptions { 402 /// Captured statement for which the function is generated. 403 const CapturedStmt *S = nullptr; 404 /// true if cast to/from UIntPtr is required for variables captured by 405 /// value. 406 const bool UIntPtrCastRequired = true; 407 /// true if only casted arguments must be registered as local args or VLA 408 /// sizes. 409 const bool RegisterCastedArgsOnly = false; 410 /// Name of the generated function. 411 const StringRef FunctionName; 412 /// Location of the non-debug version of the outlined function. 413 SourceLocation Loc; 414 explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired, 415 bool RegisterCastedArgsOnly, StringRef FunctionName, 416 SourceLocation Loc) 417 : S(S), UIntPtrCastRequired(UIntPtrCastRequired), 418 RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly), 419 FunctionName(FunctionName), Loc(Loc) {} 420 }; 421 } // namespace 422 423 static llvm::Function *emitOutlinedFunctionPrologue( 424 CodeGenFunction &CGF, FunctionArgList &Args, 425 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> 426 &LocalAddrs, 427 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> 428 &VLASizes, 429 llvm::Value *&CXXThisValue, const FunctionOptions &FO) { 430 const CapturedDecl *CD = FO.S->getCapturedDecl(); 431 const RecordDecl *RD = FO.S->getCapturedRecordDecl(); 432 assert(CD->hasBody() && "missing CapturedDecl body"); 433 434 CXXThisValue = nullptr; 435 // Build the argument list. 436 CodeGenModule &CGM = CGF.CGM; 437 ASTContext &Ctx = CGM.getContext(); 438 FunctionArgList TargetArgs; 439 Args.append(CD->param_begin(), 440 std::next(CD->param_begin(), CD->getContextParamPosition())); 441 TargetArgs.append( 442 CD->param_begin(), 443 std::next(CD->param_begin(), CD->getContextParamPosition())); 444 auto I = FO.S->captures().begin(); 445 FunctionDecl *DebugFunctionDecl = nullptr; 446 if (!FO.UIntPtrCastRequired) { 447 FunctionProtoType::ExtProtoInfo EPI; 448 QualType FunctionTy = Ctx.getFunctionType(Ctx.VoidTy, llvm::None, EPI); 449 DebugFunctionDecl = FunctionDecl::Create( 450 Ctx, Ctx.getTranslationUnitDecl(), FO.S->getBeginLoc(), 451 SourceLocation(), DeclarationName(), FunctionTy, 452 Ctx.getTrivialTypeSourceInfo(FunctionTy), SC_Static, 453 /*isInlineSpecified=*/false, /*hasWrittenPrototype=*/false); 454 } 455 for (const FieldDecl *FD : RD->fields()) { 456 QualType ArgType = FD->getType(); 457 IdentifierInfo *II = nullptr; 458 VarDecl *CapVar = nullptr; 459 460 // If this is a capture by copy and the type is not a pointer, the outlined 461 // function argument type should be uintptr and the value properly casted to 462 // uintptr. This is necessary given that the runtime library is only able to 463 // deal with pointers. We can pass in the same way the VLA type sizes to the 464 // outlined function. 465 if (FO.UIntPtrCastRequired && 466 ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) || 467 I->capturesVariableArrayType())) 468 ArgType = Ctx.getUIntPtrType(); 469 470 if (I->capturesVariable() || I->capturesVariableByCopy()) { 471 CapVar = I->getCapturedVar(); 472 II = CapVar->getIdentifier(); 473 } else if (I->capturesThis()) { 474 II = &Ctx.Idents.get("this"); 475 } else { 476 assert(I->capturesVariableArrayType()); 477 II = &Ctx.Idents.get("vla"); 478 } 479 if (ArgType->isVariablyModifiedType()) 480 ArgType = getCanonicalParamType(Ctx, ArgType); 481 VarDecl *Arg; 482 if (DebugFunctionDecl && (CapVar || I->capturesThis())) { 483 Arg = ParmVarDecl::Create( 484 Ctx, DebugFunctionDecl, 485 CapVar ? CapVar->getBeginLoc() : FD->getBeginLoc(), 486 CapVar ? CapVar->getLocation() : FD->getLocation(), II, ArgType, 487 /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr); 488 } else { 489 Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(), 490 II, ArgType, ImplicitParamDecl::Other); 491 } 492 Args.emplace_back(Arg); 493 // Do not cast arguments if we emit function with non-original types. 494 TargetArgs.emplace_back( 495 FO.UIntPtrCastRequired 496 ? Arg 497 : CGM.getOpenMPRuntime().translateParameter(FD, Arg)); 498 ++I; 499 } 500 Args.append( 501 std::next(CD->param_begin(), CD->getContextParamPosition() + 1), 502 CD->param_end()); 503 TargetArgs.append( 504 std::next(CD->param_begin(), CD->getContextParamPosition() + 1), 505 CD->param_end()); 506 507 // Create the function declaration. 508 const CGFunctionInfo &FuncInfo = 509 CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs); 510 llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); 511 512 auto *F = 513 llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage, 514 FO.FunctionName, &CGM.getModule()); 515 CGM.SetInternalFunctionAttributes(CD, F, FuncInfo); 516 if (CD->isNothrow()) 517 F->setDoesNotThrow(); 518 F->setDoesNotRecurse(); 519 520 // Generate the function. 521 CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs, 522 FO.UIntPtrCastRequired ? FO.Loc : FO.S->getBeginLoc(), 523 FO.UIntPtrCastRequired ? FO.Loc 524 : CD->getBody()->getBeginLoc()); 525 unsigned Cnt = CD->getContextParamPosition(); 526 I = FO.S->captures().begin(); 527 for (const FieldDecl *FD : RD->fields()) { 528 // Do not map arguments if we emit function with non-original types. 529 Address LocalAddr(Address::invalid()); 530 if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) { 531 LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt], 532 TargetArgs[Cnt]); 533 } else { 534 LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]); 535 } 536 // If we are capturing a pointer by copy we don't need to do anything, just 537 // use the value that we get from the arguments. 538 if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) { 539 const VarDecl *CurVD = I->getCapturedVar(); 540 if (!FO.RegisterCastedArgsOnly) 541 LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}}); 542 ++Cnt; 543 ++I; 544 continue; 545 } 546 547 LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(), 548 AlignmentSource::Decl); 549 if (FD->hasCapturedVLAType()) { 550 if (FO.UIntPtrCastRequired) { 551 ArgLVal = CGF.MakeAddrLValue( 552 castValueFromUintptr(CGF, I->getLocation(), FD->getType(), 553 Args[Cnt]->getName(), ArgLVal), 554 FD->getType(), AlignmentSource::Decl); 555 } 556 llvm::Value *ExprArg = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation()); 557 const VariableArrayType *VAT = FD->getCapturedVLAType(); 558 VLASizes.try_emplace(Args[Cnt], VAT->getSizeExpr(), ExprArg); 559 } else if (I->capturesVariable()) { 560 const VarDecl *Var = I->getCapturedVar(); 561 QualType VarTy = Var->getType(); 562 Address ArgAddr = ArgLVal.getAddress(CGF); 563 if (ArgLVal.getType()->isLValueReferenceType()) { 564 ArgAddr = CGF.EmitLoadOfReference(ArgLVal); 565 } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) { 566 assert(ArgLVal.getType()->isPointerType()); 567 ArgAddr = CGF.EmitLoadOfPointer( 568 ArgAddr, ArgLVal.getType()->castAs<PointerType>()); 569 } 570 if (!FO.RegisterCastedArgsOnly) { 571 LocalAddrs.insert( 572 {Args[Cnt], 573 {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}}); 574 } 575 } else if (I->capturesVariableByCopy()) { 576 assert(!FD->getType()->isAnyPointerType() && 577 "Not expecting a captured pointer."); 578 const VarDecl *Var = I->getCapturedVar(); 579 LocalAddrs.insert({Args[Cnt], 580 {Var, FO.UIntPtrCastRequired 581 ? castValueFromUintptr( 582 CGF, I->getLocation(), FD->getType(), 583 Args[Cnt]->getName(), ArgLVal) 584 : ArgLVal.getAddress(CGF)}}); 585 } else { 586 // If 'this' is captured, load it into CXXThisValue. 587 assert(I->capturesThis()); 588 CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation()); 589 LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress(CGF)}}); 590 } 591 ++Cnt; 592 ++I; 593 } 594 595 return F; 596 } 597 598 llvm::Function * 599 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, 600 SourceLocation Loc) { 601 assert( 602 CapturedStmtInfo && 603 "CapturedStmtInfo should be set when generating the captured function"); 604 const CapturedDecl *CD = S.getCapturedDecl(); 605 // Build the argument list. 606 bool NeedWrapperFunction = 607 getDebugInfo() && CGM.getCodeGenOpts().hasReducedDebugInfo(); 608 FunctionArgList Args; 609 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs; 610 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes; 611 SmallString<256> Buffer; 612 llvm::raw_svector_ostream Out(Buffer); 613 Out << CapturedStmtInfo->getHelperName(); 614 if (NeedWrapperFunction) 615 Out << "_debug__"; 616 FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false, 617 Out.str(), Loc); 618 llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs, 619 VLASizes, CXXThisValue, FO); 620 CodeGenFunction::OMPPrivateScope LocalScope(*this); 621 for (const auto &LocalAddrPair : LocalAddrs) { 622 if (LocalAddrPair.second.first) { 623 LocalScope.addPrivate(LocalAddrPair.second.first, [&LocalAddrPair]() { 624 return LocalAddrPair.second.second; 625 }); 626 } 627 } 628 (void)LocalScope.Privatize(); 629 for (const auto &VLASizePair : VLASizes) 630 VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second; 631 PGO.assignRegionCounters(GlobalDecl(CD), F); 632 CapturedStmtInfo->EmitBody(*this, CD->getBody()); 633 (void)LocalScope.ForceCleanup(); 634 FinishFunction(CD->getBodyRBrace()); 635 if (!NeedWrapperFunction) 636 return F; 637 638 FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true, 639 /*RegisterCastedArgsOnly=*/true, 640 CapturedStmtInfo->getHelperName(), Loc); 641 CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true); 642 WrapperCGF.CapturedStmtInfo = CapturedStmtInfo; 643 Args.clear(); 644 LocalAddrs.clear(); 645 VLASizes.clear(); 646 llvm::Function *WrapperF = 647 emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes, 648 WrapperCGF.CXXThisValue, WrapperFO); 649 llvm::SmallVector<llvm::Value *, 4> CallArgs; 650 auto *PI = F->arg_begin(); 651 for (const auto *Arg : Args) { 652 llvm::Value *CallArg; 653 auto I = LocalAddrs.find(Arg); 654 if (I != LocalAddrs.end()) { 655 LValue LV = WrapperCGF.MakeAddrLValue( 656 I->second.second, 657 I->second.first ? I->second.first->getType() : Arg->getType(), 658 AlignmentSource::Decl); 659 if (LV.getType()->isAnyComplexType()) 660 LV.setAddress(WrapperCGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 661 LV.getAddress(WrapperCGF), 662 PI->getType()->getPointerTo( 663 LV.getAddress(WrapperCGF).getAddressSpace()))); 664 CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc()); 665 } else { 666 auto EI = VLASizes.find(Arg); 667 if (EI != VLASizes.end()) { 668 CallArg = EI->second.second; 669 } else { 670 LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg), 671 Arg->getType(), 672 AlignmentSource::Decl); 673 CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc()); 674 } 675 } 676 CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType())); 677 ++PI; 678 } 679 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, Loc, F, CallArgs); 680 WrapperCGF.FinishFunction(); 681 return WrapperF; 682 } 683 684 //===----------------------------------------------------------------------===// 685 // OpenMP Directive Emission 686 //===----------------------------------------------------------------------===// 687 void CodeGenFunction::EmitOMPAggregateAssign( 688 Address DestAddr, Address SrcAddr, QualType OriginalType, 689 const llvm::function_ref<void(Address, Address)> CopyGen) { 690 // Perform element-by-element initialization. 691 QualType ElementTy; 692 693 // Drill down to the base element type on both arrays. 694 const ArrayType *ArrayTy = OriginalType->getAsArrayTypeUnsafe(); 695 llvm::Value *NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr); 696 SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 697 698 llvm::Value *SrcBegin = SrcAddr.getPointer(); 699 llvm::Value *DestBegin = DestAddr.getPointer(); 700 // Cast from pointer to array type to pointer to single element. 701 llvm::Value *DestEnd = Builder.CreateGEP(DestBegin, NumElements); 702 // The basic structure here is a while-do loop. 703 llvm::BasicBlock *BodyBB = createBasicBlock("omp.arraycpy.body"); 704 llvm::BasicBlock *DoneBB = createBasicBlock("omp.arraycpy.done"); 705 llvm::Value *IsEmpty = 706 Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty"); 707 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 708 709 // Enter the loop body, making that address the current address. 710 llvm::BasicBlock *EntryBB = Builder.GetInsertBlock(); 711 EmitBlock(BodyBB); 712 713 CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy); 714 715 llvm::PHINode *SrcElementPHI = 716 Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 717 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 718 Address SrcElementCurrent = 719 Address(SrcElementPHI, 720 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 721 722 llvm::PHINode *DestElementPHI = 723 Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 724 DestElementPHI->addIncoming(DestBegin, EntryBB); 725 Address DestElementCurrent = 726 Address(DestElementPHI, 727 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 728 729 // Emit copy. 730 CopyGen(DestElementCurrent, SrcElementCurrent); 731 732 // Shift the address forward by one element. 733 llvm::Value *DestElementNext = Builder.CreateConstGEP1_32( 734 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 735 llvm::Value *SrcElementNext = Builder.CreateConstGEP1_32( 736 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 737 // Check whether we've reached the end. 738 llvm::Value *Done = 739 Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 740 Builder.CreateCondBr(Done, DoneBB, BodyBB); 741 DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock()); 742 SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock()); 743 744 // Done. 745 EmitBlock(DoneBB, /*IsFinished=*/true); 746 } 747 748 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr, 749 Address SrcAddr, const VarDecl *DestVD, 750 const VarDecl *SrcVD, const Expr *Copy) { 751 if (OriginalType->isArrayType()) { 752 const auto *BO = dyn_cast<BinaryOperator>(Copy); 753 if (BO && BO->getOpcode() == BO_Assign) { 754 // Perform simple memcpy for simple copying. 755 LValue Dest = MakeAddrLValue(DestAddr, OriginalType); 756 LValue Src = MakeAddrLValue(SrcAddr, OriginalType); 757 EmitAggregateAssign(Dest, Src, OriginalType); 758 } else { 759 // For arrays with complex element types perform element by element 760 // copying. 761 EmitOMPAggregateAssign( 762 DestAddr, SrcAddr, OriginalType, 763 [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) { 764 // Working with the single array element, so have to remap 765 // destination and source variables to corresponding array 766 // elements. 767 CodeGenFunction::OMPPrivateScope Remap(*this); 768 Remap.addPrivate(DestVD, [DestElement]() { return DestElement; }); 769 Remap.addPrivate(SrcVD, [SrcElement]() { return SrcElement; }); 770 (void)Remap.Privatize(); 771 EmitIgnoredExpr(Copy); 772 }); 773 } 774 } else { 775 // Remap pseudo source variable to private copy. 776 CodeGenFunction::OMPPrivateScope Remap(*this); 777 Remap.addPrivate(SrcVD, [SrcAddr]() { return SrcAddr; }); 778 Remap.addPrivate(DestVD, [DestAddr]() { return DestAddr; }); 779 (void)Remap.Privatize(); 780 // Emit copying of the whole variable. 781 EmitIgnoredExpr(Copy); 782 } 783 } 784 785 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, 786 OMPPrivateScope &PrivateScope) { 787 if (!HaveInsertPoint()) 788 return false; 789 bool DeviceConstTarget = 790 getLangOpts().OpenMPIsDevice && 791 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 792 bool FirstprivateIsLastprivate = false; 793 llvm::DenseMap<const VarDecl *, OpenMPLastprivateModifier> Lastprivates; 794 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 795 for (const auto *D : C->varlists()) 796 Lastprivates.try_emplace( 797 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl(), 798 C->getKind()); 799 } 800 llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate; 801 llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 802 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 803 // Force emission of the firstprivate copy if the directive does not emit 804 // outlined function, like omp for, omp simd, omp distribute etc. 805 bool MustEmitFirstprivateCopy = 806 CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown; 807 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { 808 const auto *IRef = C->varlist_begin(); 809 const auto *InitsRef = C->inits().begin(); 810 for (const Expr *IInit : C->private_copies()) { 811 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 812 bool ThisFirstprivateIsLastprivate = 813 Lastprivates.count(OrigVD->getCanonicalDecl()) > 0; 814 const FieldDecl *FD = CapturedStmtInfo->lookup(OrigVD); 815 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 816 if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD && 817 !FD->getType()->isReferenceType() && 818 (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) { 819 EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()); 820 ++IRef; 821 ++InitsRef; 822 continue; 823 } 824 // Do not emit copy for firstprivate constant variables in target regions, 825 // captured by reference. 826 if (DeviceConstTarget && OrigVD->getType().isConstant(getContext()) && 827 FD && FD->getType()->isReferenceType() && 828 (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) { 829 (void)CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(*this, 830 OrigVD); 831 ++IRef; 832 ++InitsRef; 833 continue; 834 } 835 FirstprivateIsLastprivate = 836 FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate; 837 if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) { 838 const auto *VDInit = 839 cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl()); 840 bool IsRegistered; 841 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), 842 /*RefersToEnclosingVariableOrCapture=*/FD != nullptr, 843 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 844 LValue OriginalLVal; 845 if (!FD) { 846 // Check if the firstprivate variable is just a constant value. 847 ConstantEmission CE = tryEmitAsConstant(&DRE); 848 if (CE && !CE.isReference()) { 849 // Constant value, no need to create a copy. 850 ++IRef; 851 ++InitsRef; 852 continue; 853 } 854 if (CE && CE.isReference()) { 855 OriginalLVal = CE.getReferenceLValue(*this, &DRE); 856 } else { 857 assert(!CE && "Expected non-constant firstprivate."); 858 OriginalLVal = EmitLValue(&DRE); 859 } 860 } else { 861 OriginalLVal = EmitLValue(&DRE); 862 } 863 QualType Type = VD->getType(); 864 if (Type->isArrayType()) { 865 // Emit VarDecl with copy init for arrays. 866 // Get the address of the original variable captured in current 867 // captured region. 868 IsRegistered = PrivateScope.addPrivate( 869 OrigVD, [this, VD, Type, OriginalLVal, VDInit]() { 870 AutoVarEmission Emission = EmitAutoVarAlloca(*VD); 871 const Expr *Init = VD->getInit(); 872 if (!isa<CXXConstructExpr>(Init) || 873 isTrivialInitializer(Init)) { 874 // Perform simple memcpy. 875 LValue Dest = 876 MakeAddrLValue(Emission.getAllocatedAddress(), Type); 877 EmitAggregateAssign(Dest, OriginalLVal, Type); 878 } else { 879 EmitOMPAggregateAssign( 880 Emission.getAllocatedAddress(), 881 OriginalLVal.getAddress(*this), Type, 882 [this, VDInit, Init](Address DestElement, 883 Address SrcElement) { 884 // Clean up any temporaries needed by the 885 // initialization. 886 RunCleanupsScope InitScope(*this); 887 // Emit initialization for single element. 888 setAddrOfLocalVar(VDInit, SrcElement); 889 EmitAnyExprToMem(Init, DestElement, 890 Init->getType().getQualifiers(), 891 /*IsInitializer*/ false); 892 LocalDeclMap.erase(VDInit); 893 }); 894 } 895 EmitAutoVarCleanups(Emission); 896 return Emission.getAllocatedAddress(); 897 }); 898 } else { 899 Address OriginalAddr = OriginalLVal.getAddress(*this); 900 IsRegistered = 901 PrivateScope.addPrivate(OrigVD, [this, VDInit, OriginalAddr, VD, 902 ThisFirstprivateIsLastprivate, 903 OrigVD, &Lastprivates, IRef]() { 904 // Emit private VarDecl with copy init. 905 // Remap temp VDInit variable to the address of the original 906 // variable (for proper handling of captured global variables). 907 setAddrOfLocalVar(VDInit, OriginalAddr); 908 EmitDecl(*VD); 909 LocalDeclMap.erase(VDInit); 910 if (ThisFirstprivateIsLastprivate && 911 Lastprivates[OrigVD->getCanonicalDecl()] == 912 OMPC_LASTPRIVATE_conditional) { 913 // Create/init special variable for lastprivate conditionals. 914 Address VDAddr = 915 CGM.getOpenMPRuntime().emitLastprivateConditionalInit( 916 *this, OrigVD); 917 llvm::Value *V = EmitLoadOfScalar( 918 MakeAddrLValue(GetAddrOfLocalVar(VD), (*IRef)->getType(), 919 AlignmentSource::Decl), 920 (*IRef)->getExprLoc()); 921 EmitStoreOfScalar(V, 922 MakeAddrLValue(VDAddr, (*IRef)->getType(), 923 AlignmentSource::Decl)); 924 LocalDeclMap.erase(VD); 925 setAddrOfLocalVar(VD, VDAddr); 926 return VDAddr; 927 } 928 return GetAddrOfLocalVar(VD); 929 }); 930 } 931 assert(IsRegistered && 932 "firstprivate var already registered as private"); 933 // Silence the warning about unused variable. 934 (void)IsRegistered; 935 } 936 ++IRef; 937 ++InitsRef; 938 } 939 } 940 return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty(); 941 } 942 943 void CodeGenFunction::EmitOMPPrivateClause( 944 const OMPExecutableDirective &D, 945 CodeGenFunction::OMPPrivateScope &PrivateScope) { 946 if (!HaveInsertPoint()) 947 return; 948 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 949 for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) { 950 auto IRef = C->varlist_begin(); 951 for (const Expr *IInit : C->private_copies()) { 952 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 953 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 954 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 955 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD]() { 956 // Emit private VarDecl with copy init. 957 EmitDecl(*VD); 958 return GetAddrOfLocalVar(VD); 959 }); 960 assert(IsRegistered && "private var already registered as private"); 961 // Silence the warning about unused variable. 962 (void)IsRegistered; 963 } 964 ++IRef; 965 } 966 } 967 } 968 969 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { 970 if (!HaveInsertPoint()) 971 return false; 972 // threadprivate_var1 = master_threadprivate_var1; 973 // operator=(threadprivate_var2, master_threadprivate_var2); 974 // ... 975 // __kmpc_barrier(&loc, global_tid); 976 llvm::DenseSet<const VarDecl *> CopiedVars; 977 llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr; 978 for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) { 979 auto IRef = C->varlist_begin(); 980 auto ISrcRef = C->source_exprs().begin(); 981 auto IDestRef = C->destination_exprs().begin(); 982 for (const Expr *AssignOp : C->assignment_ops()) { 983 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 984 QualType Type = VD->getType(); 985 if (CopiedVars.insert(VD->getCanonicalDecl()).second) { 986 // Get the address of the master variable. If we are emitting code with 987 // TLS support, the address is passed from the master as field in the 988 // captured declaration. 989 Address MasterAddr = Address::invalid(); 990 if (getLangOpts().OpenMPUseTLS && 991 getContext().getTargetInfo().isTLSSupported()) { 992 assert(CapturedStmtInfo->lookup(VD) && 993 "Copyin threadprivates should have been captured!"); 994 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), true, 995 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 996 MasterAddr = EmitLValue(&DRE).getAddress(*this); 997 LocalDeclMap.erase(VD); 998 } else { 999 MasterAddr = 1000 Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD) 1001 : CGM.GetAddrOfGlobal(VD), 1002 getContext().getDeclAlign(VD)); 1003 } 1004 // Get the address of the threadprivate variable. 1005 Address PrivateAddr = EmitLValue(*IRef).getAddress(*this); 1006 if (CopiedVars.size() == 1) { 1007 // At first check if current thread is a master thread. If it is, no 1008 // need to copy data. 1009 CopyBegin = createBasicBlock("copyin.not.master"); 1010 CopyEnd = createBasicBlock("copyin.not.master.end"); 1011 // TODO: Avoid ptrtoint conversion. 1012 auto *MasterAddrInt = 1013 Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy); 1014 auto *PrivateAddrInt = 1015 Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy); 1016 Builder.CreateCondBr( 1017 Builder.CreateICmpNE(MasterAddrInt, PrivateAddrInt), CopyBegin, 1018 CopyEnd); 1019 EmitBlock(CopyBegin); 1020 } 1021 const auto *SrcVD = 1022 cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 1023 const auto *DestVD = 1024 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 1025 EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp); 1026 } 1027 ++IRef; 1028 ++ISrcRef; 1029 ++IDestRef; 1030 } 1031 } 1032 if (CopyEnd) { 1033 // Exit out of copying procedure for non-master thread. 1034 EmitBlock(CopyEnd, /*IsFinished=*/true); 1035 return true; 1036 } 1037 return false; 1038 } 1039 1040 bool CodeGenFunction::EmitOMPLastprivateClauseInit( 1041 const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) { 1042 if (!HaveInsertPoint()) 1043 return false; 1044 bool HasAtLeastOneLastprivate = false; 1045 llvm::DenseSet<const VarDecl *> SIMDLCVs; 1046 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 1047 const auto *LoopDirective = cast<OMPLoopDirective>(&D); 1048 for (const Expr *C : LoopDirective->counters()) { 1049 SIMDLCVs.insert( 1050 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 1051 } 1052 } 1053 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 1054 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 1055 HasAtLeastOneLastprivate = true; 1056 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 1057 !getLangOpts().OpenMPSimd) 1058 break; 1059 const auto *IRef = C->varlist_begin(); 1060 const auto *IDestRef = C->destination_exprs().begin(); 1061 for (const Expr *IInit : C->private_copies()) { 1062 // Keep the address of the original variable for future update at the end 1063 // of the loop. 1064 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 1065 // Taskloops do not require additional initialization, it is done in 1066 // runtime support library. 1067 if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) { 1068 const auto *DestVD = 1069 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 1070 PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() { 1071 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), 1072 /*RefersToEnclosingVariableOrCapture=*/ 1073 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1074 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 1075 return EmitLValue(&DRE).getAddress(*this); 1076 }); 1077 // Check if the variable is also a firstprivate: in this case IInit is 1078 // not generated. Initialization of this variable will happen in codegen 1079 // for 'firstprivate' clause. 1080 if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) { 1081 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 1082 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD, C, 1083 OrigVD]() { 1084 if (C->getKind() == OMPC_LASTPRIVATE_conditional) { 1085 Address VDAddr = 1086 CGM.getOpenMPRuntime().emitLastprivateConditionalInit(*this, 1087 OrigVD); 1088 setAddrOfLocalVar(VD, VDAddr); 1089 return VDAddr; 1090 } 1091 // Emit private VarDecl with copy init. 1092 EmitDecl(*VD); 1093 return GetAddrOfLocalVar(VD); 1094 }); 1095 assert(IsRegistered && 1096 "lastprivate var already registered as private"); 1097 (void)IsRegistered; 1098 } 1099 } 1100 ++IRef; 1101 ++IDestRef; 1102 } 1103 } 1104 return HasAtLeastOneLastprivate; 1105 } 1106 1107 void CodeGenFunction::EmitOMPLastprivateClauseFinal( 1108 const OMPExecutableDirective &D, bool NoFinals, 1109 llvm::Value *IsLastIterCond) { 1110 if (!HaveInsertPoint()) 1111 return; 1112 // Emit following code: 1113 // if (<IsLastIterCond>) { 1114 // orig_var1 = private_orig_var1; 1115 // ... 1116 // orig_varn = private_orig_varn; 1117 // } 1118 llvm::BasicBlock *ThenBB = nullptr; 1119 llvm::BasicBlock *DoneBB = nullptr; 1120 if (IsLastIterCond) { 1121 // Emit implicit barrier if at least one lastprivate conditional is found 1122 // and this is not a simd mode. 1123 if (!getLangOpts().OpenMPSimd && 1124 llvm::any_of(D.getClausesOfKind<OMPLastprivateClause>(), 1125 [](const OMPLastprivateClause *C) { 1126 return C->getKind() == OMPC_LASTPRIVATE_conditional; 1127 })) { 1128 CGM.getOpenMPRuntime().emitBarrierCall(*this, D.getBeginLoc(), 1129 OMPD_unknown, 1130 /*EmitChecks=*/false, 1131 /*ForceSimpleCall=*/true); 1132 } 1133 ThenBB = createBasicBlock(".omp.lastprivate.then"); 1134 DoneBB = createBasicBlock(".omp.lastprivate.done"); 1135 Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB); 1136 EmitBlock(ThenBB); 1137 } 1138 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 1139 llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates; 1140 if (const auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) { 1141 auto IC = LoopDirective->counters().begin(); 1142 for (const Expr *F : LoopDirective->finals()) { 1143 const auto *D = 1144 cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl(); 1145 if (NoFinals) 1146 AlreadyEmittedVars.insert(D); 1147 else 1148 LoopCountersAndUpdates[D] = F; 1149 ++IC; 1150 } 1151 } 1152 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 1153 auto IRef = C->varlist_begin(); 1154 auto ISrcRef = C->source_exprs().begin(); 1155 auto IDestRef = C->destination_exprs().begin(); 1156 for (const Expr *AssignOp : C->assignment_ops()) { 1157 const auto *PrivateVD = 1158 cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 1159 QualType Type = PrivateVD->getType(); 1160 const auto *CanonicalVD = PrivateVD->getCanonicalDecl(); 1161 if (AlreadyEmittedVars.insert(CanonicalVD).second) { 1162 // If lastprivate variable is a loop control variable for loop-based 1163 // directive, update its value before copyin back to original 1164 // variable. 1165 if (const Expr *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD)) 1166 EmitIgnoredExpr(FinalExpr); 1167 const auto *SrcVD = 1168 cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 1169 const auto *DestVD = 1170 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 1171 // Get the address of the private variable. 1172 Address PrivateAddr = GetAddrOfLocalVar(PrivateVD); 1173 if (const auto *RefTy = PrivateVD->getType()->getAs<ReferenceType>()) 1174 PrivateAddr = 1175 Address(Builder.CreateLoad(PrivateAddr), 1176 CGM.getNaturalTypeAlignment(RefTy->getPointeeType())); 1177 // Store the last value to the private copy in the last iteration. 1178 if (C->getKind() == OMPC_LASTPRIVATE_conditional) 1179 CGM.getOpenMPRuntime().emitLastprivateConditionalFinalUpdate( 1180 *this, MakeAddrLValue(PrivateAddr, (*IRef)->getType()), PrivateVD, 1181 (*IRef)->getExprLoc()); 1182 // Get the address of the original variable. 1183 Address OriginalAddr = GetAddrOfLocalVar(DestVD); 1184 EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp); 1185 } 1186 ++IRef; 1187 ++ISrcRef; 1188 ++IDestRef; 1189 } 1190 if (const Expr *PostUpdate = C->getPostUpdateExpr()) 1191 EmitIgnoredExpr(PostUpdate); 1192 } 1193 if (IsLastIterCond) 1194 EmitBlock(DoneBB, /*IsFinished=*/true); 1195 } 1196 1197 void CodeGenFunction::EmitOMPReductionClauseInit( 1198 const OMPExecutableDirective &D, 1199 CodeGenFunction::OMPPrivateScope &PrivateScope, bool ForInscan) { 1200 if (!HaveInsertPoint()) 1201 return; 1202 SmallVector<const Expr *, 4> Shareds; 1203 SmallVector<const Expr *, 4> Privates; 1204 SmallVector<const Expr *, 4> ReductionOps; 1205 SmallVector<const Expr *, 4> LHSs; 1206 SmallVector<const Expr *, 4> RHSs; 1207 OMPTaskDataTy Data; 1208 SmallVector<const Expr *, 4> TaskLHSs; 1209 SmallVector<const Expr *, 4> TaskRHSs; 1210 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1211 if (ForInscan != (C->getModifier() == OMPC_REDUCTION_inscan)) 1212 continue; 1213 Shareds.append(C->varlist_begin(), C->varlist_end()); 1214 Privates.append(C->privates().begin(), C->privates().end()); 1215 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 1216 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 1217 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 1218 if (C->getModifier() == OMPC_REDUCTION_task) { 1219 Data.ReductionVars.append(C->privates().begin(), C->privates().end()); 1220 Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end()); 1221 Data.ReductionCopies.append(C->privates().begin(), C->privates().end()); 1222 Data.ReductionOps.append(C->reduction_ops().begin(), 1223 C->reduction_ops().end()); 1224 TaskLHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 1225 TaskRHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 1226 } 1227 } 1228 ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps); 1229 unsigned Count = 0; 1230 auto *ILHS = LHSs.begin(); 1231 auto *IRHS = RHSs.begin(); 1232 auto *IPriv = Privates.begin(); 1233 for (const Expr *IRef : Shareds) { 1234 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl()); 1235 // Emit private VarDecl with reduction init. 1236 RedCG.emitSharedOrigLValue(*this, Count); 1237 RedCG.emitAggregateType(*this, Count); 1238 AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD); 1239 RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(), 1240 RedCG.getSharedLValue(Count), 1241 [&Emission](CodeGenFunction &CGF) { 1242 CGF.EmitAutoVarInit(Emission); 1243 return true; 1244 }); 1245 EmitAutoVarCleanups(Emission); 1246 Address BaseAddr = RedCG.adjustPrivateAddress( 1247 *this, Count, Emission.getAllocatedAddress()); 1248 bool IsRegistered = PrivateScope.addPrivate( 1249 RedCG.getBaseDecl(Count), [BaseAddr]() { return BaseAddr; }); 1250 assert(IsRegistered && "private var already registered as private"); 1251 // Silence the warning about unused variable. 1252 (void)IsRegistered; 1253 1254 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 1255 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 1256 QualType Type = PrivateVD->getType(); 1257 bool isaOMPArraySectionExpr = isa<OMPArraySectionExpr>(IRef); 1258 if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) { 1259 // Store the address of the original variable associated with the LHS 1260 // implicit variable. 1261 PrivateScope.addPrivate(LHSVD, [&RedCG, Count, this]() { 1262 return RedCG.getSharedLValue(Count).getAddress(*this); 1263 }); 1264 PrivateScope.addPrivate( 1265 RHSVD, [this, PrivateVD]() { return GetAddrOfLocalVar(PrivateVD); }); 1266 } else if ((isaOMPArraySectionExpr && Type->isScalarType()) || 1267 isa<ArraySubscriptExpr>(IRef)) { 1268 // Store the address of the original variable associated with the LHS 1269 // implicit variable. 1270 PrivateScope.addPrivate(LHSVD, [&RedCG, Count, this]() { 1271 return RedCG.getSharedLValue(Count).getAddress(*this); 1272 }); 1273 PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() { 1274 return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD), 1275 ConvertTypeForMem(RHSVD->getType()), 1276 "rhs.begin"); 1277 }); 1278 } else { 1279 QualType Type = PrivateVD->getType(); 1280 bool IsArray = getContext().getAsArrayType(Type) != nullptr; 1281 Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress(*this); 1282 // Store the address of the original variable associated with the LHS 1283 // implicit variable. 1284 if (IsArray) { 1285 OriginalAddr = Builder.CreateElementBitCast( 1286 OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin"); 1287 } 1288 PrivateScope.addPrivate(LHSVD, [OriginalAddr]() { return OriginalAddr; }); 1289 PrivateScope.addPrivate( 1290 RHSVD, [this, PrivateVD, RHSVD, IsArray]() { 1291 return IsArray 1292 ? Builder.CreateElementBitCast( 1293 GetAddrOfLocalVar(PrivateVD), 1294 ConvertTypeForMem(RHSVD->getType()), "rhs.begin") 1295 : GetAddrOfLocalVar(PrivateVD); 1296 }); 1297 } 1298 ++ILHS; 1299 ++IRHS; 1300 ++IPriv; 1301 ++Count; 1302 } 1303 if (!Data.ReductionVars.empty()) { 1304 Data.IsReductionWithTaskMod = true; 1305 Data.IsWorksharingReduction = 1306 isOpenMPWorksharingDirective(D.getDirectiveKind()); 1307 llvm::Value *ReductionDesc = CGM.getOpenMPRuntime().emitTaskReductionInit( 1308 *this, D.getBeginLoc(), TaskLHSs, TaskRHSs, Data); 1309 const Expr *TaskRedRef = nullptr; 1310 switch (D.getDirectiveKind()) { 1311 case OMPD_parallel: 1312 TaskRedRef = cast<OMPParallelDirective>(D).getTaskReductionRefExpr(); 1313 break; 1314 case OMPD_for: 1315 TaskRedRef = cast<OMPForDirective>(D).getTaskReductionRefExpr(); 1316 break; 1317 case OMPD_sections: 1318 TaskRedRef = cast<OMPSectionsDirective>(D).getTaskReductionRefExpr(); 1319 break; 1320 case OMPD_parallel_for: 1321 TaskRedRef = cast<OMPParallelForDirective>(D).getTaskReductionRefExpr(); 1322 break; 1323 case OMPD_parallel_master: 1324 TaskRedRef = 1325 cast<OMPParallelMasterDirective>(D).getTaskReductionRefExpr(); 1326 break; 1327 case OMPD_parallel_sections: 1328 TaskRedRef = 1329 cast<OMPParallelSectionsDirective>(D).getTaskReductionRefExpr(); 1330 break; 1331 case OMPD_target_parallel: 1332 TaskRedRef = 1333 cast<OMPTargetParallelDirective>(D).getTaskReductionRefExpr(); 1334 break; 1335 case OMPD_target_parallel_for: 1336 TaskRedRef = 1337 cast<OMPTargetParallelForDirective>(D).getTaskReductionRefExpr(); 1338 break; 1339 case OMPD_distribute_parallel_for: 1340 TaskRedRef = 1341 cast<OMPDistributeParallelForDirective>(D).getTaskReductionRefExpr(); 1342 break; 1343 case OMPD_teams_distribute_parallel_for: 1344 TaskRedRef = cast<OMPTeamsDistributeParallelForDirective>(D) 1345 .getTaskReductionRefExpr(); 1346 break; 1347 case OMPD_target_teams_distribute_parallel_for: 1348 TaskRedRef = cast<OMPTargetTeamsDistributeParallelForDirective>(D) 1349 .getTaskReductionRefExpr(); 1350 break; 1351 case OMPD_simd: 1352 case OMPD_for_simd: 1353 case OMPD_section: 1354 case OMPD_single: 1355 case OMPD_master: 1356 case OMPD_critical: 1357 case OMPD_parallel_for_simd: 1358 case OMPD_task: 1359 case OMPD_taskyield: 1360 case OMPD_barrier: 1361 case OMPD_taskwait: 1362 case OMPD_taskgroup: 1363 case OMPD_flush: 1364 case OMPD_depobj: 1365 case OMPD_scan: 1366 case OMPD_ordered: 1367 case OMPD_atomic: 1368 case OMPD_teams: 1369 case OMPD_target: 1370 case OMPD_cancellation_point: 1371 case OMPD_cancel: 1372 case OMPD_target_data: 1373 case OMPD_target_enter_data: 1374 case OMPD_target_exit_data: 1375 case OMPD_taskloop: 1376 case OMPD_taskloop_simd: 1377 case OMPD_master_taskloop: 1378 case OMPD_master_taskloop_simd: 1379 case OMPD_parallel_master_taskloop: 1380 case OMPD_parallel_master_taskloop_simd: 1381 case OMPD_distribute: 1382 case OMPD_target_update: 1383 case OMPD_distribute_parallel_for_simd: 1384 case OMPD_distribute_simd: 1385 case OMPD_target_parallel_for_simd: 1386 case OMPD_target_simd: 1387 case OMPD_teams_distribute: 1388 case OMPD_teams_distribute_simd: 1389 case OMPD_teams_distribute_parallel_for_simd: 1390 case OMPD_target_teams: 1391 case OMPD_target_teams_distribute: 1392 case OMPD_target_teams_distribute_parallel_for_simd: 1393 case OMPD_target_teams_distribute_simd: 1394 case OMPD_declare_target: 1395 case OMPD_end_declare_target: 1396 case OMPD_threadprivate: 1397 case OMPD_allocate: 1398 case OMPD_declare_reduction: 1399 case OMPD_declare_mapper: 1400 case OMPD_declare_simd: 1401 case OMPD_requires: 1402 case OMPD_declare_variant: 1403 case OMPD_begin_declare_variant: 1404 case OMPD_end_declare_variant: 1405 case OMPD_unknown: 1406 default: 1407 llvm_unreachable("Enexpected directive with task reductions."); 1408 } 1409 1410 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(TaskRedRef)->getDecl()); 1411 EmitVarDecl(*VD); 1412 EmitStoreOfScalar(ReductionDesc, GetAddrOfLocalVar(VD), 1413 /*Volatile=*/false, TaskRedRef->getType()); 1414 } 1415 } 1416 1417 void CodeGenFunction::EmitOMPReductionClauseFinal( 1418 const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) { 1419 if (!HaveInsertPoint()) 1420 return; 1421 llvm::SmallVector<const Expr *, 8> Privates; 1422 llvm::SmallVector<const Expr *, 8> LHSExprs; 1423 llvm::SmallVector<const Expr *, 8> RHSExprs; 1424 llvm::SmallVector<const Expr *, 8> ReductionOps; 1425 bool HasAtLeastOneReduction = false; 1426 bool IsReductionWithTaskMod = false; 1427 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1428 // Do not emit for inscan reductions. 1429 if (C->getModifier() == OMPC_REDUCTION_inscan) 1430 continue; 1431 HasAtLeastOneReduction = true; 1432 Privates.append(C->privates().begin(), C->privates().end()); 1433 LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 1434 RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 1435 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 1436 IsReductionWithTaskMod = 1437 IsReductionWithTaskMod || C->getModifier() == OMPC_REDUCTION_task; 1438 } 1439 if (HasAtLeastOneReduction) { 1440 if (IsReductionWithTaskMod) { 1441 CGM.getOpenMPRuntime().emitTaskReductionFini( 1442 *this, D.getBeginLoc(), 1443 isOpenMPWorksharingDirective(D.getDirectiveKind())); 1444 } 1445 bool WithNowait = D.getSingleClause<OMPNowaitClause>() || 1446 isOpenMPParallelDirective(D.getDirectiveKind()) || 1447 ReductionKind == OMPD_simd; 1448 bool SimpleReduction = ReductionKind == OMPD_simd; 1449 // Emit nowait reduction if nowait clause is present or directive is a 1450 // parallel directive (it always has implicit barrier). 1451 CGM.getOpenMPRuntime().emitReduction( 1452 *this, D.getEndLoc(), Privates, LHSExprs, RHSExprs, ReductionOps, 1453 {WithNowait, SimpleReduction, ReductionKind}); 1454 } 1455 } 1456 1457 static void emitPostUpdateForReductionClause( 1458 CodeGenFunction &CGF, const OMPExecutableDirective &D, 1459 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { 1460 if (!CGF.HaveInsertPoint()) 1461 return; 1462 llvm::BasicBlock *DoneBB = nullptr; 1463 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1464 if (const Expr *PostUpdate = C->getPostUpdateExpr()) { 1465 if (!DoneBB) { 1466 if (llvm::Value *Cond = CondGen(CGF)) { 1467 // If the first post-update expression is found, emit conditional 1468 // block if it was requested. 1469 llvm::BasicBlock *ThenBB = CGF.createBasicBlock(".omp.reduction.pu"); 1470 DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done"); 1471 CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1472 CGF.EmitBlock(ThenBB); 1473 } 1474 } 1475 CGF.EmitIgnoredExpr(PostUpdate); 1476 } 1477 } 1478 if (DoneBB) 1479 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 1480 } 1481 1482 namespace { 1483 /// Codegen lambda for appending distribute lower and upper bounds to outlined 1484 /// parallel function. This is necessary for combined constructs such as 1485 /// 'distribute parallel for' 1486 typedef llvm::function_ref<void(CodeGenFunction &, 1487 const OMPExecutableDirective &, 1488 llvm::SmallVectorImpl<llvm::Value *> &)> 1489 CodeGenBoundParametersTy; 1490 } // anonymous namespace 1491 1492 static void 1493 checkForLastprivateConditionalUpdate(CodeGenFunction &CGF, 1494 const OMPExecutableDirective &S) { 1495 if (CGF.getLangOpts().OpenMP < 50) 1496 return; 1497 llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> PrivateDecls; 1498 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 1499 for (const Expr *Ref : C->varlists()) { 1500 if (!Ref->getType()->isScalarType()) 1501 continue; 1502 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 1503 if (!DRE) 1504 continue; 1505 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); 1506 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref); 1507 } 1508 } 1509 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 1510 for (const Expr *Ref : C->varlists()) { 1511 if (!Ref->getType()->isScalarType()) 1512 continue; 1513 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 1514 if (!DRE) 1515 continue; 1516 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); 1517 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref); 1518 } 1519 } 1520 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 1521 for (const Expr *Ref : C->varlists()) { 1522 if (!Ref->getType()->isScalarType()) 1523 continue; 1524 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 1525 if (!DRE) 1526 continue; 1527 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); 1528 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref); 1529 } 1530 } 1531 // Privates should ne analyzed since they are not captured at all. 1532 // Task reductions may be skipped - tasks are ignored. 1533 // Firstprivates do not return value but may be passed by reference - no need 1534 // to check for updated lastprivate conditional. 1535 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 1536 for (const Expr *Ref : C->varlists()) { 1537 if (!Ref->getType()->isScalarType()) 1538 continue; 1539 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 1540 if (!DRE) 1541 continue; 1542 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); 1543 } 1544 } 1545 CGF.CGM.getOpenMPRuntime().checkAndEmitSharedLastprivateConditional( 1546 CGF, S, PrivateDecls); 1547 } 1548 1549 static void emitCommonOMPParallelDirective( 1550 CodeGenFunction &CGF, const OMPExecutableDirective &S, 1551 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1552 const CodeGenBoundParametersTy &CodeGenBoundParameters) { 1553 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); 1554 llvm::Function *OutlinedFn = 1555 CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( 1556 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 1557 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) { 1558 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 1559 llvm::Value *NumThreads = 1560 CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), 1561 /*IgnoreResultAssign=*/true); 1562 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause( 1563 CGF, NumThreads, NumThreadsClause->getBeginLoc()); 1564 } 1565 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) { 1566 CodeGenFunction::RunCleanupsScope ProcBindScope(CGF); 1567 CGF.CGM.getOpenMPRuntime().emitProcBindClause( 1568 CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getBeginLoc()); 1569 } 1570 const Expr *IfCond = nullptr; 1571 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 1572 if (C->getNameModifier() == OMPD_unknown || 1573 C->getNameModifier() == OMPD_parallel) { 1574 IfCond = C->getCondition(); 1575 break; 1576 } 1577 } 1578 1579 OMPParallelScope Scope(CGF, S); 1580 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 1581 // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk 1582 // lower and upper bounds with the pragma 'for' chunking mechanism. 1583 // The following lambda takes care of appending the lower and upper bound 1584 // parameters when necessary 1585 CodeGenBoundParameters(CGF, S, CapturedVars); 1586 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 1587 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getBeginLoc(), OutlinedFn, 1588 CapturedVars, IfCond); 1589 } 1590 1591 static bool isAllocatableDecl(const VarDecl *VD) { 1592 const VarDecl *CVD = VD->getCanonicalDecl(); 1593 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 1594 return false; 1595 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 1596 // Use the default allocation. 1597 return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || 1598 AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && 1599 !AA->getAllocator()); 1600 } 1601 1602 static void emitEmptyBoundParameters(CodeGenFunction &, 1603 const OMPExecutableDirective &, 1604 llvm::SmallVectorImpl<llvm::Value *> &) {} 1605 1606 Address CodeGenFunction::OMPBuilderCBHelpers::getAddressOfLocalVariable( 1607 CodeGenFunction &CGF, const VarDecl *VD) { 1608 CodeGenModule &CGM = CGF.CGM; 1609 auto &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1610 1611 if (!VD) 1612 return Address::invalid(); 1613 const VarDecl *CVD = VD->getCanonicalDecl(); 1614 if (!isAllocatableDecl(CVD)) 1615 return Address::invalid(); 1616 llvm::Value *Size; 1617 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 1618 if (CVD->getType()->isVariablyModifiedType()) { 1619 Size = CGF.getTypeSize(CVD->getType()); 1620 // Align the size: ((size + align - 1) / align) * align 1621 Size = CGF.Builder.CreateNUWAdd( 1622 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 1623 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 1624 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 1625 } else { 1626 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 1627 Size = CGM.getSize(Sz.alignTo(Align)); 1628 } 1629 1630 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 1631 assert(AA->getAllocator() && 1632 "Expected allocator expression for non-default allocator."); 1633 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); 1634 // According to the standard, the original allocator type is a enum (integer). 1635 // Convert to pointer type, if required. 1636 if (Allocator->getType()->isIntegerTy()) 1637 Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy); 1638 else if (Allocator->getType()->isPointerTy()) 1639 Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator, 1640 CGM.VoidPtrTy); 1641 1642 llvm::Value *Addr = OMPBuilder.createOMPAlloc( 1643 CGF.Builder, Size, Allocator, 1644 getNameWithSeparators({CVD->getName(), ".void.addr"}, ".", ".")); 1645 llvm::CallInst *FreeCI = 1646 OMPBuilder.createOMPFree(CGF.Builder, Addr, Allocator); 1647 1648 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FreeCI); 1649 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1650 Addr, 1651 CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())), 1652 getNameWithSeparators({CVD->getName(), ".addr"}, ".", ".")); 1653 return Address(Addr, Align); 1654 } 1655 1656 Address CodeGenFunction::OMPBuilderCBHelpers::getAddrOfThreadPrivate( 1657 CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, 1658 SourceLocation Loc) { 1659 CodeGenModule &CGM = CGF.CGM; 1660 if (CGM.getLangOpts().OpenMPUseTLS && 1661 CGM.getContext().getTargetInfo().isTLSSupported()) 1662 return VDAddr; 1663 1664 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1665 1666 llvm::Type *VarTy = VDAddr.getElementType(); 1667 llvm::Value *Data = 1668 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy); 1669 llvm::ConstantInt *Size = CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)); 1670 std::string Suffix = getNameWithSeparators({"cache", ""}); 1671 llvm::Twine CacheName = Twine(CGM.getMangledName(VD)).concat(Suffix); 1672 1673 llvm::CallInst *ThreadPrivateCacheCall = 1674 OMPBuilder.createCachedThreadPrivate(CGF.Builder, Data, Size, CacheName); 1675 1676 return Address(ThreadPrivateCacheCall, VDAddr.getAlignment()); 1677 } 1678 1679 std::string CodeGenFunction::OMPBuilderCBHelpers::getNameWithSeparators( 1680 ArrayRef<StringRef> Parts, StringRef FirstSeparator, StringRef Separator) { 1681 SmallString<128> Buffer; 1682 llvm::raw_svector_ostream OS(Buffer); 1683 StringRef Sep = FirstSeparator; 1684 for (StringRef Part : Parts) { 1685 OS << Sep << Part; 1686 Sep = Separator; 1687 } 1688 return OS.str().str(); 1689 } 1690 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { 1691 if (CGM.getLangOpts().OpenMPIRBuilder) { 1692 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1693 // Check if we have any if clause associated with the directive. 1694 llvm::Value *IfCond = nullptr; 1695 if (const auto *C = S.getSingleClause<OMPIfClause>()) 1696 IfCond = EmitScalarExpr(C->getCondition(), 1697 /*IgnoreResultAssign=*/true); 1698 1699 llvm::Value *NumThreads = nullptr; 1700 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) 1701 NumThreads = EmitScalarExpr(NumThreadsClause->getNumThreads(), 1702 /*IgnoreResultAssign=*/true); 1703 1704 ProcBindKind ProcBind = OMP_PROC_BIND_default; 1705 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) 1706 ProcBind = ProcBindClause->getProcBindKind(); 1707 1708 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 1709 1710 // The cleanup callback that finalizes all variabels at the given location, 1711 // thus calls destructors etc. 1712 auto FiniCB = [this](InsertPointTy IP) { 1713 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); 1714 }; 1715 1716 // Privatization callback that performs appropriate action for 1717 // shared/private/firstprivate/lastprivate/copyin/... variables. 1718 // 1719 // TODO: This defaults to shared right now. 1720 auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, 1721 llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) { 1722 // The next line is appropriate only for variables (Val) with the 1723 // data-sharing attribute "shared". 1724 ReplVal = &Val; 1725 1726 return CodeGenIP; 1727 }; 1728 1729 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); 1730 const Stmt *ParallelRegionBodyStmt = CS->getCapturedStmt(); 1731 1732 auto BodyGenCB = [ParallelRegionBodyStmt, 1733 this](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, 1734 llvm::BasicBlock &ContinuationBB) { 1735 OMPBuilderCBHelpers::OutlinedRegionBodyRAII ORB(*this, AllocaIP, 1736 ContinuationBB); 1737 OMPBuilderCBHelpers::EmitOMPRegionBody(*this, ParallelRegionBodyStmt, 1738 CodeGenIP, ContinuationBB); 1739 }; 1740 1741 CGCapturedStmtInfo CGSI(*CS, CR_OpenMP); 1742 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI); 1743 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( 1744 AllocaInsertPt->getParent(), AllocaInsertPt->getIterator()); 1745 Builder.restoreIP( 1746 OMPBuilder.createParallel(Builder, AllocaIP, BodyGenCB, PrivCB, FiniCB, 1747 IfCond, NumThreads, ProcBind, S.hasCancel())); 1748 return; 1749 } 1750 1751 // Emit parallel region as a standalone region. 1752 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 1753 Action.Enter(CGF); 1754 OMPPrivateScope PrivateScope(CGF); 1755 bool Copyins = CGF.EmitOMPCopyinClause(S); 1756 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 1757 if (Copyins) { 1758 // Emit implicit barrier to synchronize threads and avoid data races on 1759 // propagation master's thread values of threadprivate variables to local 1760 // instances of that variables of all other implicit threads. 1761 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 1762 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, 1763 /*ForceSimpleCall=*/true); 1764 } 1765 CGF.EmitOMPPrivateClause(S, PrivateScope); 1766 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 1767 (void)PrivateScope.Privatize(); 1768 CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt()); 1769 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 1770 }; 1771 { 1772 auto LPCRegion = 1773 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 1774 emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen, 1775 emitEmptyBoundParameters); 1776 emitPostUpdateForReductionClause(*this, S, 1777 [](CodeGenFunction &) { return nullptr; }); 1778 } 1779 // Check for outer lastprivate conditional update. 1780 checkForLastprivateConditionalUpdate(*this, S); 1781 } 1782 1783 namespace { 1784 /// RAII to handle scopes for loop transformation directives. 1785 class OMPTransformDirectiveScopeRAII { 1786 OMPLoopScope *Scope = nullptr; 1787 CodeGenFunction::CGCapturedStmtInfo *CGSI = nullptr; 1788 CodeGenFunction::CGCapturedStmtRAII *CapInfoRAII = nullptr; 1789 1790 public: 1791 OMPTransformDirectiveScopeRAII(CodeGenFunction &CGF, const Stmt *S) { 1792 if (const auto *Dir = dyn_cast<OMPLoopBasedDirective>(S)) { 1793 Scope = new OMPLoopScope(CGF, *Dir); 1794 CGSI = new CodeGenFunction::CGCapturedStmtInfo(CR_OpenMP); 1795 CapInfoRAII = new CodeGenFunction::CGCapturedStmtRAII(CGF, CGSI); 1796 } 1797 } 1798 ~OMPTransformDirectiveScopeRAII() { 1799 if (!Scope) 1800 return; 1801 delete CapInfoRAII; 1802 delete CGSI; 1803 delete Scope; 1804 } 1805 }; 1806 } // namespace 1807 1808 static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop, 1809 int MaxLevel, int Level = 0) { 1810 assert(Level < MaxLevel && "Too deep lookup during loop body codegen."); 1811 const Stmt *SimplifiedS = S->IgnoreContainers(); 1812 if (const auto *CS = dyn_cast<CompoundStmt>(SimplifiedS)) { 1813 PrettyStackTraceLoc CrashInfo( 1814 CGF.getContext().getSourceManager(), CS->getLBracLoc(), 1815 "LLVM IR generation of compound statement ('{}')"); 1816 1817 // Keep track of the current cleanup stack depth, including debug scopes. 1818 CodeGenFunction::LexicalScope Scope(CGF, S->getSourceRange()); 1819 for (const Stmt *CurStmt : CS->body()) 1820 emitBody(CGF, CurStmt, NextLoop, MaxLevel, Level); 1821 return; 1822 } 1823 if (SimplifiedS == NextLoop) { 1824 if (auto *Dir = dyn_cast<OMPTileDirective>(SimplifiedS)) 1825 SimplifiedS = Dir->getTransformedStmt(); 1826 if (auto *Dir = dyn_cast<OMPUnrollDirective>(SimplifiedS)) 1827 SimplifiedS = Dir->getTransformedStmt(); 1828 if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(SimplifiedS)) 1829 SimplifiedS = CanonLoop->getLoopStmt(); 1830 if (const auto *For = dyn_cast<ForStmt>(SimplifiedS)) { 1831 S = For->getBody(); 1832 } else { 1833 assert(isa<CXXForRangeStmt>(SimplifiedS) && 1834 "Expected canonical for loop or range-based for loop."); 1835 const auto *CXXFor = cast<CXXForRangeStmt>(SimplifiedS); 1836 CGF.EmitStmt(CXXFor->getLoopVarStmt()); 1837 S = CXXFor->getBody(); 1838 } 1839 if (Level + 1 < MaxLevel) { 1840 NextLoop = OMPLoopDirective::tryToFindNextInnerLoop( 1841 S, /*TryImperfectlyNestedLoops=*/true); 1842 emitBody(CGF, S, NextLoop, MaxLevel, Level + 1); 1843 return; 1844 } 1845 } 1846 CGF.EmitStmt(S); 1847 } 1848 1849 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, 1850 JumpDest LoopExit) { 1851 RunCleanupsScope BodyScope(*this); 1852 // Update counters values on current iteration. 1853 for (const Expr *UE : D.updates()) 1854 EmitIgnoredExpr(UE); 1855 // Update the linear variables. 1856 // In distribute directives only loop counters may be marked as linear, no 1857 // need to generate the code for them. 1858 if (!isOpenMPDistributeDirective(D.getDirectiveKind())) { 1859 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1860 for (const Expr *UE : C->updates()) 1861 EmitIgnoredExpr(UE); 1862 } 1863 } 1864 1865 // On a continue in the body, jump to the end. 1866 JumpDest Continue = getJumpDestInCurrentScope("omp.body.continue"); 1867 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1868 for (const Expr *E : D.finals_conditions()) { 1869 if (!E) 1870 continue; 1871 // Check that loop counter in non-rectangular nest fits into the iteration 1872 // space. 1873 llvm::BasicBlock *NextBB = createBasicBlock("omp.body.next"); 1874 EmitBranchOnBoolExpr(E, NextBB, Continue.getBlock(), 1875 getProfileCount(D.getBody())); 1876 EmitBlock(NextBB); 1877 } 1878 1879 OMPPrivateScope InscanScope(*this); 1880 EmitOMPReductionClauseInit(D, InscanScope, /*ForInscan=*/true); 1881 bool IsInscanRegion = InscanScope.Privatize(); 1882 if (IsInscanRegion) { 1883 // Need to remember the block before and after scan directive 1884 // to dispatch them correctly depending on the clause used in 1885 // this directive, inclusive or exclusive. For inclusive scan the natural 1886 // order of the blocks is used, for exclusive clause the blocks must be 1887 // executed in reverse order. 1888 OMPBeforeScanBlock = createBasicBlock("omp.before.scan.bb"); 1889 OMPAfterScanBlock = createBasicBlock("omp.after.scan.bb"); 1890 // No need to allocate inscan exit block, in simd mode it is selected in the 1891 // codegen for the scan directive. 1892 if (D.getDirectiveKind() != OMPD_simd && !getLangOpts().OpenMPSimd) 1893 OMPScanExitBlock = createBasicBlock("omp.exit.inscan.bb"); 1894 OMPScanDispatch = createBasicBlock("omp.inscan.dispatch"); 1895 EmitBranch(OMPScanDispatch); 1896 EmitBlock(OMPBeforeScanBlock); 1897 } 1898 1899 // Emit loop variables for C++ range loops. 1900 const Stmt *Body = 1901 D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(); 1902 // Emit loop body. 1903 emitBody(*this, Body, 1904 OMPLoopBasedDirective::tryToFindNextInnerLoop( 1905 Body, /*TryImperfectlyNestedLoops=*/true), 1906 D.getLoopsNumber()); 1907 1908 // Jump to the dispatcher at the end of the loop body. 1909 if (IsInscanRegion) 1910 EmitBranch(OMPScanExitBlock); 1911 1912 // The end (updates/cleanups). 1913 EmitBlock(Continue.getBlock()); 1914 BreakContinueStack.pop_back(); 1915 } 1916 1917 using EmittedClosureTy = std::pair<llvm::Function *, llvm::Value *>; 1918 1919 /// Emit a captured statement and return the function as well as its captured 1920 /// closure context. 1921 static EmittedClosureTy emitCapturedStmtFunc(CodeGenFunction &ParentCGF, 1922 const CapturedStmt *S) { 1923 LValue CapStruct = ParentCGF.InitCapturedStruct(*S); 1924 CodeGenFunction CGF(ParentCGF.CGM, /*suppressNewContext=*/true); 1925 std::unique_ptr<CodeGenFunction::CGCapturedStmtInfo> CSI = 1926 std::make_unique<CodeGenFunction::CGCapturedStmtInfo>(*S); 1927 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, CSI.get()); 1928 llvm::Function *F = CGF.GenerateCapturedStmtFunction(*S); 1929 1930 return {F, CapStruct.getPointer(ParentCGF)}; 1931 } 1932 1933 /// Emit a call to a previously captured closure. 1934 static llvm::CallInst * 1935 emitCapturedStmtCall(CodeGenFunction &ParentCGF, EmittedClosureTy Cap, 1936 llvm::ArrayRef<llvm::Value *> Args) { 1937 // Append the closure context to the argument. 1938 SmallVector<llvm::Value *> EffectiveArgs; 1939 EffectiveArgs.reserve(Args.size() + 1); 1940 llvm::append_range(EffectiveArgs, Args); 1941 EffectiveArgs.push_back(Cap.second); 1942 1943 return ParentCGF.Builder.CreateCall(Cap.first, EffectiveArgs); 1944 } 1945 1946 llvm::CanonicalLoopInfo * 1947 CodeGenFunction::EmitOMPCollapsedCanonicalLoopNest(const Stmt *S, int Depth) { 1948 assert(Depth == 1 && "Nested loops with OpenMPIRBuilder not yet implemented"); 1949 1950 EmitStmt(S); 1951 assert(OMPLoopNestStack.size() >= (size_t)Depth && "Found too few loops"); 1952 1953 // The last added loop is the outermost one. 1954 return OMPLoopNestStack.back(); 1955 } 1956 1957 void CodeGenFunction::EmitOMPCanonicalLoop(const OMPCanonicalLoop *S) { 1958 const Stmt *SyntacticalLoop = S->getLoopStmt(); 1959 if (!getLangOpts().OpenMPIRBuilder) { 1960 // Ignore if OpenMPIRBuilder is not enabled. 1961 EmitStmt(SyntacticalLoop); 1962 return; 1963 } 1964 1965 LexicalScope ForScope(*this, S->getSourceRange()); 1966 1967 // Emit init statements. The Distance/LoopVar funcs may reference variable 1968 // declarations they contain. 1969 const Stmt *BodyStmt; 1970 if (const auto *For = dyn_cast<ForStmt>(SyntacticalLoop)) { 1971 if (const Stmt *InitStmt = For->getInit()) 1972 EmitStmt(InitStmt); 1973 BodyStmt = For->getBody(); 1974 } else if (const auto *RangeFor = 1975 dyn_cast<CXXForRangeStmt>(SyntacticalLoop)) { 1976 if (const DeclStmt *RangeStmt = RangeFor->getRangeStmt()) 1977 EmitStmt(RangeStmt); 1978 if (const DeclStmt *BeginStmt = RangeFor->getBeginStmt()) 1979 EmitStmt(BeginStmt); 1980 if (const DeclStmt *EndStmt = RangeFor->getEndStmt()) 1981 EmitStmt(EndStmt); 1982 if (const DeclStmt *LoopVarStmt = RangeFor->getLoopVarStmt()) 1983 EmitStmt(LoopVarStmt); 1984 BodyStmt = RangeFor->getBody(); 1985 } else 1986 llvm_unreachable("Expected for-stmt or range-based for-stmt"); 1987 1988 // Emit closure for later use. By-value captures will be captured here. 1989 const CapturedStmt *DistanceFunc = S->getDistanceFunc(); 1990 EmittedClosureTy DistanceClosure = emitCapturedStmtFunc(*this, DistanceFunc); 1991 const CapturedStmt *LoopVarFunc = S->getLoopVarFunc(); 1992 EmittedClosureTy LoopVarClosure = emitCapturedStmtFunc(*this, LoopVarFunc); 1993 1994 // Call the distance function to get the number of iterations of the loop to 1995 // come. 1996 QualType LogicalTy = DistanceFunc->getCapturedDecl() 1997 ->getParam(0) 1998 ->getType() 1999 .getNonReferenceType(); 2000 Address CountAddr = CreateMemTemp(LogicalTy, ".count.addr"); 2001 emitCapturedStmtCall(*this, DistanceClosure, {CountAddr.getPointer()}); 2002 llvm::Value *DistVal = Builder.CreateLoad(CountAddr, ".count"); 2003 2004 // Emit the loop structure. 2005 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 2006 auto BodyGen = [&, this](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP, 2007 llvm::Value *IndVar) { 2008 Builder.restoreIP(CodeGenIP); 2009 2010 // Emit the loop body: Convert the logical iteration number to the loop 2011 // variable and emit the body. 2012 const DeclRefExpr *LoopVarRef = S->getLoopVarRef(); 2013 LValue LCVal = EmitLValue(LoopVarRef); 2014 Address LoopVarAddress = LCVal.getAddress(*this); 2015 emitCapturedStmtCall(*this, LoopVarClosure, 2016 {LoopVarAddress.getPointer(), IndVar}); 2017 2018 RunCleanupsScope BodyScope(*this); 2019 EmitStmt(BodyStmt); 2020 }; 2021 llvm::CanonicalLoopInfo *CL = 2022 OMPBuilder.createCanonicalLoop(Builder, BodyGen, DistVal); 2023 2024 // Finish up the loop. 2025 Builder.restoreIP(CL->getAfterIP()); 2026 ForScope.ForceCleanup(); 2027 2028 // Remember the CanonicalLoopInfo for parent AST nodes consuming it. 2029 OMPLoopNestStack.push_back(CL); 2030 } 2031 2032 void CodeGenFunction::EmitOMPInnerLoop( 2033 const OMPExecutableDirective &S, bool RequiresCleanup, const Expr *LoopCond, 2034 const Expr *IncExpr, 2035 const llvm::function_ref<void(CodeGenFunction &)> BodyGen, 2036 const llvm::function_ref<void(CodeGenFunction &)> PostIncGen) { 2037 auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end"); 2038 2039 // Start the loop with a block that tests the condition. 2040 auto CondBlock = createBasicBlock("omp.inner.for.cond"); 2041 EmitBlock(CondBlock); 2042 const SourceRange R = S.getSourceRange(); 2043 2044 // If attributes are attached, push to the basic block with them. 2045 const auto &OMPED = cast<OMPExecutableDirective>(S); 2046 const CapturedStmt *ICS = OMPED.getInnermostCapturedStmt(); 2047 const Stmt *SS = ICS->getCapturedStmt(); 2048 const AttributedStmt *AS = dyn_cast_or_null<AttributedStmt>(SS); 2049 OMPLoopNestStack.clear(); 2050 if (AS) 2051 LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(), 2052 AS->getAttrs(), SourceLocToDebugLoc(R.getBegin()), 2053 SourceLocToDebugLoc(R.getEnd())); 2054 else 2055 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), 2056 SourceLocToDebugLoc(R.getEnd())); 2057 2058 // If there are any cleanups between here and the loop-exit scope, 2059 // create a block to stage a loop exit along. 2060 llvm::BasicBlock *ExitBlock = LoopExit.getBlock(); 2061 if (RequiresCleanup) 2062 ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup"); 2063 2064 llvm::BasicBlock *LoopBody = createBasicBlock("omp.inner.for.body"); 2065 2066 // Emit condition. 2067 EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S)); 2068 if (ExitBlock != LoopExit.getBlock()) { 2069 EmitBlock(ExitBlock); 2070 EmitBranchThroughCleanup(LoopExit); 2071 } 2072 2073 EmitBlock(LoopBody); 2074 incrementProfileCounter(&S); 2075 2076 // Create a block for the increment. 2077 JumpDest Continue = getJumpDestInCurrentScope("omp.inner.for.inc"); 2078 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 2079 2080 BodyGen(*this); 2081 2082 // Emit "IV = IV + 1" and a back-edge to the condition block. 2083 EmitBlock(Continue.getBlock()); 2084 EmitIgnoredExpr(IncExpr); 2085 PostIncGen(*this); 2086 BreakContinueStack.pop_back(); 2087 EmitBranch(CondBlock); 2088 LoopStack.pop(); 2089 // Emit the fall-through block. 2090 EmitBlock(LoopExit.getBlock()); 2091 } 2092 2093 bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { 2094 if (!HaveInsertPoint()) 2095 return false; 2096 // Emit inits for the linear variables. 2097 bool HasLinears = false; 2098 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 2099 for (const Expr *Init : C->inits()) { 2100 HasLinears = true; 2101 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl()); 2102 if (const auto *Ref = 2103 dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) { 2104 AutoVarEmission Emission = EmitAutoVarAlloca(*VD); 2105 const auto *OrigVD = cast<VarDecl>(Ref->getDecl()); 2106 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), 2107 CapturedStmtInfo->lookup(OrigVD) != nullptr, 2108 VD->getInit()->getType(), VK_LValue, 2109 VD->getInit()->getExprLoc()); 2110 EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(), 2111 VD->getType()), 2112 /*capturedByInit=*/false); 2113 EmitAutoVarCleanups(Emission); 2114 } else { 2115 EmitVarDecl(*VD); 2116 } 2117 } 2118 // Emit the linear steps for the linear clauses. 2119 // If a step is not constant, it is pre-calculated before the loop. 2120 if (const auto *CS = cast_or_null<BinaryOperator>(C->getCalcStep())) 2121 if (const auto *SaveRef = cast<DeclRefExpr>(CS->getLHS())) { 2122 EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl())); 2123 // Emit calculation of the linear step. 2124 EmitIgnoredExpr(CS); 2125 } 2126 } 2127 return HasLinears; 2128 } 2129 2130 void CodeGenFunction::EmitOMPLinearClauseFinal( 2131 const OMPLoopDirective &D, 2132 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { 2133 if (!HaveInsertPoint()) 2134 return; 2135 llvm::BasicBlock *DoneBB = nullptr; 2136 // Emit the final values of the linear variables. 2137 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 2138 auto IC = C->varlist_begin(); 2139 for (const Expr *F : C->finals()) { 2140 if (!DoneBB) { 2141 if (llvm::Value *Cond = CondGen(*this)) { 2142 // If the first post-update expression is found, emit conditional 2143 // block if it was requested. 2144 llvm::BasicBlock *ThenBB = createBasicBlock(".omp.linear.pu"); 2145 DoneBB = createBasicBlock(".omp.linear.pu.done"); 2146 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 2147 EmitBlock(ThenBB); 2148 } 2149 } 2150 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl()); 2151 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), 2152 CapturedStmtInfo->lookup(OrigVD) != nullptr, 2153 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); 2154 Address OrigAddr = EmitLValue(&DRE).getAddress(*this); 2155 CodeGenFunction::OMPPrivateScope VarScope(*this); 2156 VarScope.addPrivate(OrigVD, [OrigAddr]() { return OrigAddr; }); 2157 (void)VarScope.Privatize(); 2158 EmitIgnoredExpr(F); 2159 ++IC; 2160 } 2161 if (const Expr *PostUpdate = C->getPostUpdateExpr()) 2162 EmitIgnoredExpr(PostUpdate); 2163 } 2164 if (DoneBB) 2165 EmitBlock(DoneBB, /*IsFinished=*/true); 2166 } 2167 2168 static void emitAlignedClause(CodeGenFunction &CGF, 2169 const OMPExecutableDirective &D) { 2170 if (!CGF.HaveInsertPoint()) 2171 return; 2172 for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) { 2173 llvm::APInt ClauseAlignment(64, 0); 2174 if (const Expr *AlignmentExpr = Clause->getAlignment()) { 2175 auto *AlignmentCI = 2176 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); 2177 ClauseAlignment = AlignmentCI->getValue(); 2178 } 2179 for (const Expr *E : Clause->varlists()) { 2180 llvm::APInt Alignment(ClauseAlignment); 2181 if (Alignment == 0) { 2182 // OpenMP [2.8.1, Description] 2183 // If no optional parameter is specified, implementation-defined default 2184 // alignments for SIMD instructions on the target platforms are assumed. 2185 Alignment = 2186 CGF.getContext() 2187 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign( 2188 E->getType()->getPointeeType())) 2189 .getQuantity(); 2190 } 2191 assert((Alignment == 0 || Alignment.isPowerOf2()) && 2192 "alignment is not power of 2"); 2193 if (Alignment != 0) { 2194 llvm::Value *PtrValue = CGF.EmitScalarExpr(E); 2195 CGF.emitAlignmentAssumption( 2196 PtrValue, E, /*No second loc needed*/ SourceLocation(), 2197 llvm::ConstantInt::get(CGF.getLLVMContext(), Alignment)); 2198 } 2199 } 2200 } 2201 } 2202 2203 void CodeGenFunction::EmitOMPPrivateLoopCounters( 2204 const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) { 2205 if (!HaveInsertPoint()) 2206 return; 2207 auto I = S.private_counters().begin(); 2208 for (const Expr *E : S.counters()) { 2209 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2210 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()); 2211 // Emit var without initialization. 2212 AutoVarEmission VarEmission = EmitAutoVarAlloca(*PrivateVD); 2213 EmitAutoVarCleanups(VarEmission); 2214 LocalDeclMap.erase(PrivateVD); 2215 (void)LoopScope.addPrivate(VD, [&VarEmission]() { 2216 return VarEmission.getAllocatedAddress(); 2217 }); 2218 if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) || 2219 VD->hasGlobalStorage()) { 2220 (void)LoopScope.addPrivate(PrivateVD, [this, VD, E]() { 2221 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), 2222 LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD), 2223 E->getType(), VK_LValue, E->getExprLoc()); 2224 return EmitLValue(&DRE).getAddress(*this); 2225 }); 2226 } else { 2227 (void)LoopScope.addPrivate(PrivateVD, [&VarEmission]() { 2228 return VarEmission.getAllocatedAddress(); 2229 }); 2230 } 2231 ++I; 2232 } 2233 // Privatize extra loop counters used in loops for ordered(n) clauses. 2234 for (const auto *C : S.getClausesOfKind<OMPOrderedClause>()) { 2235 if (!C->getNumForLoops()) 2236 continue; 2237 for (unsigned I = S.getLoopsNumber(), E = C->getLoopNumIterations().size(); 2238 I < E; ++I) { 2239 const auto *DRE = cast<DeclRefExpr>(C->getLoopCounter(I)); 2240 const auto *VD = cast<VarDecl>(DRE->getDecl()); 2241 // Override only those variables that can be captured to avoid re-emission 2242 // of the variables declared within the loops. 2243 if (DRE->refersToEnclosingVariableOrCapture()) { 2244 (void)LoopScope.addPrivate(VD, [this, DRE, VD]() { 2245 return CreateMemTemp(DRE->getType(), VD->getName()); 2246 }); 2247 } 2248 } 2249 } 2250 } 2251 2252 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S, 2253 const Expr *Cond, llvm::BasicBlock *TrueBlock, 2254 llvm::BasicBlock *FalseBlock, uint64_t TrueCount) { 2255 if (!CGF.HaveInsertPoint()) 2256 return; 2257 { 2258 CodeGenFunction::OMPPrivateScope PreCondScope(CGF); 2259 CGF.EmitOMPPrivateLoopCounters(S, PreCondScope); 2260 (void)PreCondScope.Privatize(); 2261 // Get initial values of real counters. 2262 for (const Expr *I : S.inits()) { 2263 CGF.EmitIgnoredExpr(I); 2264 } 2265 } 2266 // Create temp loop control variables with their init values to support 2267 // non-rectangular loops. 2268 CodeGenFunction::OMPMapVars PreCondVars; 2269 for (const Expr * E: S.dependent_counters()) { 2270 if (!E) 2271 continue; 2272 assert(!E->getType().getNonReferenceType()->isRecordType() && 2273 "dependent counter must not be an iterator."); 2274 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2275 Address CounterAddr = 2276 CGF.CreateMemTemp(VD->getType().getNonReferenceType()); 2277 (void)PreCondVars.setVarAddr(CGF, VD, CounterAddr); 2278 } 2279 (void)PreCondVars.apply(CGF); 2280 for (const Expr *E : S.dependent_inits()) { 2281 if (!E) 2282 continue; 2283 CGF.EmitIgnoredExpr(E); 2284 } 2285 // Check that loop is executed at least one time. 2286 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount); 2287 PreCondVars.restore(CGF); 2288 } 2289 2290 void CodeGenFunction::EmitOMPLinearClause( 2291 const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) { 2292 if (!HaveInsertPoint()) 2293 return; 2294 llvm::DenseSet<const VarDecl *> SIMDLCVs; 2295 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 2296 const auto *LoopDirective = cast<OMPLoopDirective>(&D); 2297 for (const Expr *C : LoopDirective->counters()) { 2298 SIMDLCVs.insert( 2299 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 2300 } 2301 } 2302 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 2303 auto CurPrivate = C->privates().begin(); 2304 for (const Expr *E : C->varlists()) { 2305 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2306 const auto *PrivateVD = 2307 cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl()); 2308 if (!SIMDLCVs.count(VD->getCanonicalDecl())) { 2309 bool IsRegistered = PrivateScope.addPrivate(VD, [this, PrivateVD]() { 2310 // Emit private VarDecl with copy init. 2311 EmitVarDecl(*PrivateVD); 2312 return GetAddrOfLocalVar(PrivateVD); 2313 }); 2314 assert(IsRegistered && "linear var already registered as private"); 2315 // Silence the warning about unused variable. 2316 (void)IsRegistered; 2317 } else { 2318 EmitVarDecl(*PrivateVD); 2319 } 2320 ++CurPrivate; 2321 } 2322 } 2323 } 2324 2325 static void emitSimdlenSafelenClause(CodeGenFunction &CGF, 2326 const OMPExecutableDirective &D) { 2327 if (!CGF.HaveInsertPoint()) 2328 return; 2329 if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) { 2330 RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(), 2331 /*ignoreResult=*/true); 2332 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 2333 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 2334 // In presence of finite 'safelen', it may be unsafe to mark all 2335 // the memory instructions parallel, because loop-carried 2336 // dependences of 'safelen' iterations are possible. 2337 CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>()); 2338 } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) { 2339 RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(), 2340 /*ignoreResult=*/true); 2341 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 2342 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 2343 // In presence of finite 'safelen', it may be unsafe to mark all 2344 // the memory instructions parallel, because loop-carried 2345 // dependences of 'safelen' iterations are possible. 2346 CGF.LoopStack.setParallel(/*Enable=*/false); 2347 } 2348 } 2349 2350 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D) { 2351 // Walk clauses and process safelen/lastprivate. 2352 LoopStack.setParallel(/*Enable=*/true); 2353 LoopStack.setVectorizeEnable(); 2354 emitSimdlenSafelenClause(*this, D); 2355 if (const auto *C = D.getSingleClause<OMPOrderClause>()) 2356 if (C->getKind() == OMPC_ORDER_concurrent) 2357 LoopStack.setParallel(/*Enable=*/true); 2358 if ((D.getDirectiveKind() == OMPD_simd || 2359 (getLangOpts().OpenMPSimd && 2360 isOpenMPSimdDirective(D.getDirectiveKind()))) && 2361 llvm::any_of(D.getClausesOfKind<OMPReductionClause>(), 2362 [](const OMPReductionClause *C) { 2363 return C->getModifier() == OMPC_REDUCTION_inscan; 2364 })) 2365 // Disable parallel access in case of prefix sum. 2366 LoopStack.setParallel(/*Enable=*/false); 2367 } 2368 2369 void CodeGenFunction::EmitOMPSimdFinal( 2370 const OMPLoopDirective &D, 2371 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { 2372 if (!HaveInsertPoint()) 2373 return; 2374 llvm::BasicBlock *DoneBB = nullptr; 2375 auto IC = D.counters().begin(); 2376 auto IPC = D.private_counters().begin(); 2377 for (const Expr *F : D.finals()) { 2378 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl()); 2379 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl()); 2380 const auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD); 2381 if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) || 2382 OrigVD->hasGlobalStorage() || CED) { 2383 if (!DoneBB) { 2384 if (llvm::Value *Cond = CondGen(*this)) { 2385 // If the first post-update expression is found, emit conditional 2386 // block if it was requested. 2387 llvm::BasicBlock *ThenBB = createBasicBlock(".omp.final.then"); 2388 DoneBB = createBasicBlock(".omp.final.done"); 2389 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 2390 EmitBlock(ThenBB); 2391 } 2392 } 2393 Address OrigAddr = Address::invalid(); 2394 if (CED) { 2395 OrigAddr = 2396 EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress(*this); 2397 } else { 2398 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(PrivateVD), 2399 /*RefersToEnclosingVariableOrCapture=*/false, 2400 (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc()); 2401 OrigAddr = EmitLValue(&DRE).getAddress(*this); 2402 } 2403 OMPPrivateScope VarScope(*this); 2404 VarScope.addPrivate(OrigVD, [OrigAddr]() { return OrigAddr; }); 2405 (void)VarScope.Privatize(); 2406 EmitIgnoredExpr(F); 2407 } 2408 ++IC; 2409 ++IPC; 2410 } 2411 if (DoneBB) 2412 EmitBlock(DoneBB, /*IsFinished=*/true); 2413 } 2414 2415 static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF, 2416 const OMPLoopDirective &S, 2417 CodeGenFunction::JumpDest LoopExit) { 2418 CGF.EmitOMPLoopBody(S, LoopExit); 2419 CGF.EmitStopPoint(&S); 2420 } 2421 2422 /// Emit a helper variable and return corresponding lvalue. 2423 static LValue EmitOMPHelperVar(CodeGenFunction &CGF, 2424 const DeclRefExpr *Helper) { 2425 auto VDecl = cast<VarDecl>(Helper->getDecl()); 2426 CGF.EmitVarDecl(*VDecl); 2427 return CGF.EmitLValue(Helper); 2428 } 2429 2430 static void emitCommonSimdLoop(CodeGenFunction &CGF, const OMPLoopDirective &S, 2431 const RegionCodeGenTy &SimdInitGen, 2432 const RegionCodeGenTy &BodyCodeGen) { 2433 auto &&ThenGen = [&S, &SimdInitGen, &BodyCodeGen](CodeGenFunction &CGF, 2434 PrePostActionTy &) { 2435 CGOpenMPRuntime::NontemporalDeclsRAII NontemporalsRegion(CGF.CGM, S); 2436 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); 2437 SimdInitGen(CGF); 2438 2439 BodyCodeGen(CGF); 2440 }; 2441 auto &&ElseGen = [&BodyCodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 2442 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); 2443 CGF.LoopStack.setVectorizeEnable(/*Enable=*/false); 2444 2445 BodyCodeGen(CGF); 2446 }; 2447 const Expr *IfCond = nullptr; 2448 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 2449 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 2450 if (CGF.getLangOpts().OpenMP >= 50 && 2451 (C->getNameModifier() == OMPD_unknown || 2452 C->getNameModifier() == OMPD_simd)) { 2453 IfCond = C->getCondition(); 2454 break; 2455 } 2456 } 2457 } 2458 if (IfCond) { 2459 CGF.CGM.getOpenMPRuntime().emitIfClause(CGF, IfCond, ThenGen, ElseGen); 2460 } else { 2461 RegionCodeGenTy ThenRCG(ThenGen); 2462 ThenRCG(CGF); 2463 } 2464 } 2465 2466 static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S, 2467 PrePostActionTy &Action) { 2468 Action.Enter(CGF); 2469 assert(isOpenMPSimdDirective(S.getDirectiveKind()) && 2470 "Expected simd directive"); 2471 OMPLoopScope PreInitScope(CGF, S); 2472 // if (PreCond) { 2473 // for (IV in 0..LastIteration) BODY; 2474 // <Final counter/linear vars updates>; 2475 // } 2476 // 2477 if (isOpenMPDistributeDirective(S.getDirectiveKind()) || 2478 isOpenMPWorksharingDirective(S.getDirectiveKind()) || 2479 isOpenMPTaskLoopDirective(S.getDirectiveKind())) { 2480 (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable())); 2481 (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable())); 2482 } 2483 2484 // Emit: if (PreCond) - begin. 2485 // If the condition constant folds and can be elided, avoid emitting the 2486 // whole loop. 2487 bool CondConstant; 2488 llvm::BasicBlock *ContBlock = nullptr; 2489 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 2490 if (!CondConstant) 2491 return; 2492 } else { 2493 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("simd.if.then"); 2494 ContBlock = CGF.createBasicBlock("simd.if.end"); 2495 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 2496 CGF.getProfileCount(&S)); 2497 CGF.EmitBlock(ThenBlock); 2498 CGF.incrementProfileCounter(&S); 2499 } 2500 2501 // Emit the loop iteration variable. 2502 const Expr *IVExpr = S.getIterationVariable(); 2503 const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 2504 CGF.EmitVarDecl(*IVDecl); 2505 CGF.EmitIgnoredExpr(S.getInit()); 2506 2507 // Emit the iterations count variable. 2508 // If it is not a variable, Sema decided to calculate iterations count on 2509 // each iteration (e.g., it is foldable into a constant). 2510 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 2511 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 2512 // Emit calculation of the iterations count. 2513 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 2514 } 2515 2516 emitAlignedClause(CGF, S); 2517 (void)CGF.EmitOMPLinearClauseInit(S); 2518 { 2519 CodeGenFunction::OMPPrivateScope LoopScope(CGF); 2520 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 2521 CGF.EmitOMPLinearClause(S, LoopScope); 2522 CGF.EmitOMPPrivateClause(S, LoopScope); 2523 CGF.EmitOMPReductionClauseInit(S, LoopScope); 2524 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion( 2525 CGF, S, CGF.EmitLValue(S.getIterationVariable())); 2526 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 2527 (void)LoopScope.Privatize(); 2528 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 2529 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); 2530 2531 emitCommonSimdLoop( 2532 CGF, S, 2533 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2534 CGF.EmitOMPSimdInit(S); 2535 }, 2536 [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { 2537 CGF.EmitOMPInnerLoop( 2538 S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), 2539 [&S](CodeGenFunction &CGF) { 2540 emitOMPLoopBodyWithStopPoint(CGF, S, 2541 CodeGenFunction::JumpDest()); 2542 }, 2543 [](CodeGenFunction &) {}); 2544 }); 2545 CGF.EmitOMPSimdFinal(S, [](CodeGenFunction &) { return nullptr; }); 2546 // Emit final copy of the lastprivate variables at the end of loops. 2547 if (HasLastprivateClause) 2548 CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true); 2549 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd); 2550 emitPostUpdateForReductionClause(CGF, S, 2551 [](CodeGenFunction &) { return nullptr; }); 2552 } 2553 CGF.EmitOMPLinearClauseFinal(S, [](CodeGenFunction &) { return nullptr; }); 2554 // Emit: if (PreCond) - end. 2555 if (ContBlock) { 2556 CGF.EmitBranch(ContBlock); 2557 CGF.EmitBlock(ContBlock, true); 2558 } 2559 } 2560 2561 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { 2562 ParentLoopDirectiveForScanRegion ScanRegion(*this, S); 2563 OMPFirstScanLoop = true; 2564 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2565 emitOMPSimdRegion(CGF, S, Action); 2566 }; 2567 { 2568 auto LPCRegion = 2569 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 2570 OMPLexicalScope Scope(*this, S, OMPD_unknown); 2571 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 2572 } 2573 // Check for outer lastprivate conditional update. 2574 checkForLastprivateConditionalUpdate(*this, S); 2575 } 2576 2577 void CodeGenFunction::EmitOMPTileDirective(const OMPTileDirective &S) { 2578 // Emit the de-sugared statement. 2579 OMPTransformDirectiveScopeRAII TileScope(*this, &S); 2580 EmitStmt(S.getTransformedStmt()); 2581 } 2582 2583 void CodeGenFunction::EmitOMPUnrollDirective(const OMPUnrollDirective &S) { 2584 // This function is only called if the unrolled loop is not consumed by any 2585 // other loop-associated construct. Such a loop-associated construct will have 2586 // used the transformed AST. 2587 2588 // Set the unroll metadata for the next emitted loop. 2589 LoopStack.setUnrollState(LoopAttributes::Enable); 2590 2591 if (S.hasClausesOfKind<OMPFullClause>()) { 2592 LoopStack.setUnrollState(LoopAttributes::Full); 2593 } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) { 2594 if (Expr *FactorExpr = PartialClause->getFactor()) { 2595 uint64_t Factor = 2596 FactorExpr->EvaluateKnownConstInt(getContext()).getZExtValue(); 2597 assert(Factor >= 1 && "Only positive factors are valid"); 2598 LoopStack.setUnrollCount(Factor); 2599 } 2600 } 2601 2602 EmitStmt(S.getAssociatedStmt()); 2603 } 2604 2605 void CodeGenFunction::EmitOMPOuterLoop( 2606 bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S, 2607 CodeGenFunction::OMPPrivateScope &LoopScope, 2608 const CodeGenFunction::OMPLoopArguments &LoopArgs, 2609 const CodeGenFunction::CodeGenLoopTy &CodeGenLoop, 2610 const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) { 2611 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); 2612 2613 const Expr *IVExpr = S.getIterationVariable(); 2614 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2615 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2616 2617 JumpDest LoopExit = getJumpDestInCurrentScope("omp.dispatch.end"); 2618 2619 // Start the loop with a block that tests the condition. 2620 llvm::BasicBlock *CondBlock = createBasicBlock("omp.dispatch.cond"); 2621 EmitBlock(CondBlock); 2622 const SourceRange R = S.getSourceRange(); 2623 OMPLoopNestStack.clear(); 2624 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), 2625 SourceLocToDebugLoc(R.getEnd())); 2626 2627 llvm::Value *BoolCondVal = nullptr; 2628 if (!DynamicOrOrdered) { 2629 // UB = min(UB, GlobalUB) or 2630 // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g. 2631 // 'distribute parallel for') 2632 EmitIgnoredExpr(LoopArgs.EUB); 2633 // IV = LB 2634 EmitIgnoredExpr(LoopArgs.Init); 2635 // IV < UB 2636 BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond); 2637 } else { 2638 BoolCondVal = 2639 RT.emitForNext(*this, S.getBeginLoc(), IVSize, IVSigned, LoopArgs.IL, 2640 LoopArgs.LB, LoopArgs.UB, LoopArgs.ST); 2641 } 2642 2643 // If there are any cleanups between here and the loop-exit scope, 2644 // create a block to stage a loop exit along. 2645 llvm::BasicBlock *ExitBlock = LoopExit.getBlock(); 2646 if (LoopScope.requiresCleanups()) 2647 ExitBlock = createBasicBlock("omp.dispatch.cleanup"); 2648 2649 llvm::BasicBlock *LoopBody = createBasicBlock("omp.dispatch.body"); 2650 Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock); 2651 if (ExitBlock != LoopExit.getBlock()) { 2652 EmitBlock(ExitBlock); 2653 EmitBranchThroughCleanup(LoopExit); 2654 } 2655 EmitBlock(LoopBody); 2656 2657 // Emit "IV = LB" (in case of static schedule, we have already calculated new 2658 // LB for loop condition and emitted it above). 2659 if (DynamicOrOrdered) 2660 EmitIgnoredExpr(LoopArgs.Init); 2661 2662 // Create a block for the increment. 2663 JumpDest Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); 2664 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 2665 2666 emitCommonSimdLoop( 2667 *this, S, 2668 [&S, IsMonotonic](CodeGenFunction &CGF, PrePostActionTy &) { 2669 // Generate !llvm.loop.parallel metadata for loads and stores for loops 2670 // with dynamic/guided scheduling and without ordered clause. 2671 if (!isOpenMPSimdDirective(S.getDirectiveKind())) { 2672 CGF.LoopStack.setParallel(!IsMonotonic); 2673 if (const auto *C = S.getSingleClause<OMPOrderClause>()) 2674 if (C->getKind() == OMPC_ORDER_concurrent) 2675 CGF.LoopStack.setParallel(/*Enable=*/true); 2676 } else { 2677 CGF.EmitOMPSimdInit(S); 2678 } 2679 }, 2680 [&S, &LoopArgs, LoopExit, &CodeGenLoop, IVSize, IVSigned, &CodeGenOrdered, 2681 &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { 2682 SourceLocation Loc = S.getBeginLoc(); 2683 // when 'distribute' is not combined with a 'for': 2684 // while (idx <= UB) { BODY; ++idx; } 2685 // when 'distribute' is combined with a 'for' 2686 // (e.g. 'distribute parallel for') 2687 // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; } 2688 CGF.EmitOMPInnerLoop( 2689 S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr, 2690 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { 2691 CodeGenLoop(CGF, S, LoopExit); 2692 }, 2693 [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) { 2694 CodeGenOrdered(CGF, Loc, IVSize, IVSigned); 2695 }); 2696 }); 2697 2698 EmitBlock(Continue.getBlock()); 2699 BreakContinueStack.pop_back(); 2700 if (!DynamicOrOrdered) { 2701 // Emit "LB = LB + Stride", "UB = UB + Stride". 2702 EmitIgnoredExpr(LoopArgs.NextLB); 2703 EmitIgnoredExpr(LoopArgs.NextUB); 2704 } 2705 2706 EmitBranch(CondBlock); 2707 OMPLoopNestStack.clear(); 2708 LoopStack.pop(); 2709 // Emit the fall-through block. 2710 EmitBlock(LoopExit.getBlock()); 2711 2712 // Tell the runtime we are done. 2713 auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) { 2714 if (!DynamicOrOrdered) 2715 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(), 2716 S.getDirectiveKind()); 2717 }; 2718 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); 2719 } 2720 2721 void CodeGenFunction::EmitOMPForOuterLoop( 2722 const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic, 2723 const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, 2724 const OMPLoopArguments &LoopArgs, 2725 const CodeGenDispatchBoundsTy &CGDispatchBounds) { 2726 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); 2727 2728 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). 2729 const bool DynamicOrOrdered = 2730 Ordered || RT.isDynamic(ScheduleKind.Schedule); 2731 2732 assert((Ordered || 2733 !RT.isStaticNonchunked(ScheduleKind.Schedule, 2734 LoopArgs.Chunk != nullptr)) && 2735 "static non-chunked schedule does not need outer loop"); 2736 2737 // Emit outer loop. 2738 // 2739 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 2740 // When schedule(dynamic,chunk_size) is specified, the iterations are 2741 // distributed to threads in the team in chunks as the threads request them. 2742 // Each thread executes a chunk of iterations, then requests another chunk, 2743 // until no chunks remain to be distributed. Each chunk contains chunk_size 2744 // iterations, except for the last chunk to be distributed, which may have 2745 // fewer iterations. When no chunk_size is specified, it defaults to 1. 2746 // 2747 // When schedule(guided,chunk_size) is specified, the iterations are assigned 2748 // to threads in the team in chunks as the executing threads request them. 2749 // Each thread executes a chunk of iterations, then requests another chunk, 2750 // until no chunks remain to be assigned. For a chunk_size of 1, the size of 2751 // each chunk is proportional to the number of unassigned iterations divided 2752 // by the number of threads in the team, decreasing to 1. For a chunk_size 2753 // with value k (greater than 1), the size of each chunk is determined in the 2754 // same way, with the restriction that the chunks do not contain fewer than k 2755 // iterations (except for the last chunk to be assigned, which may have fewer 2756 // than k iterations). 2757 // 2758 // When schedule(auto) is specified, the decision regarding scheduling is 2759 // delegated to the compiler and/or runtime system. The programmer gives the 2760 // implementation the freedom to choose any possible mapping of iterations to 2761 // threads in the team. 2762 // 2763 // When schedule(runtime) is specified, the decision regarding scheduling is 2764 // deferred until run time, and the schedule and chunk size are taken from the 2765 // run-sched-var ICV. If the ICV is set to auto, the schedule is 2766 // implementation defined 2767 // 2768 // while(__kmpc_dispatch_next(&LB, &UB)) { 2769 // idx = LB; 2770 // while (idx <= UB) { BODY; ++idx; 2771 // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only. 2772 // } // inner loop 2773 // } 2774 // 2775 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 2776 // When schedule(static, chunk_size) is specified, iterations are divided into 2777 // chunks of size chunk_size, and the chunks are assigned to the threads in 2778 // the team in a round-robin fashion in the order of the thread number. 2779 // 2780 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { 2781 // while (idx <= UB) { BODY; ++idx; } // inner loop 2782 // LB = LB + ST; 2783 // UB = UB + ST; 2784 // } 2785 // 2786 2787 const Expr *IVExpr = S.getIterationVariable(); 2788 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2789 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2790 2791 if (DynamicOrOrdered) { 2792 const std::pair<llvm::Value *, llvm::Value *> DispatchBounds = 2793 CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB); 2794 llvm::Value *LBVal = DispatchBounds.first; 2795 llvm::Value *UBVal = DispatchBounds.second; 2796 CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal, 2797 LoopArgs.Chunk}; 2798 RT.emitForDispatchInit(*this, S.getBeginLoc(), ScheduleKind, IVSize, 2799 IVSigned, Ordered, DipatchRTInputValues); 2800 } else { 2801 CGOpenMPRuntime::StaticRTInput StaticInit( 2802 IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB, 2803 LoopArgs.ST, LoopArgs.Chunk); 2804 RT.emitForStaticInit(*this, S.getBeginLoc(), S.getDirectiveKind(), 2805 ScheduleKind, StaticInit); 2806 } 2807 2808 auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc, 2809 const unsigned IVSize, 2810 const bool IVSigned) { 2811 if (Ordered) { 2812 CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize, 2813 IVSigned); 2814 } 2815 }; 2816 2817 OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST, 2818 LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB); 2819 OuterLoopArgs.IncExpr = S.getInc(); 2820 OuterLoopArgs.Init = S.getInit(); 2821 OuterLoopArgs.Cond = S.getCond(); 2822 OuterLoopArgs.NextLB = S.getNextLowerBound(); 2823 OuterLoopArgs.NextUB = S.getNextUpperBound(); 2824 EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs, 2825 emitOMPLoopBodyWithStopPoint, CodeGenOrdered); 2826 } 2827 2828 static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc, 2829 const unsigned IVSize, const bool IVSigned) {} 2830 2831 void CodeGenFunction::EmitOMPDistributeOuterLoop( 2832 OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S, 2833 OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs, 2834 const CodeGenLoopTy &CodeGenLoopContent) { 2835 2836 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); 2837 2838 // Emit outer loop. 2839 // Same behavior as a OMPForOuterLoop, except that schedule cannot be 2840 // dynamic 2841 // 2842 2843 const Expr *IVExpr = S.getIterationVariable(); 2844 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2845 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2846 2847 CGOpenMPRuntime::StaticRTInput StaticInit( 2848 IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB, 2849 LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk); 2850 RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, StaticInit); 2851 2852 // for combined 'distribute' and 'for' the increment expression of distribute 2853 // is stored in DistInc. For 'distribute' alone, it is in Inc. 2854 Expr *IncExpr; 2855 if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())) 2856 IncExpr = S.getDistInc(); 2857 else 2858 IncExpr = S.getInc(); 2859 2860 // this routine is shared by 'omp distribute parallel for' and 2861 // 'omp distribute': select the right EUB expression depending on the 2862 // directive 2863 OMPLoopArguments OuterLoopArgs; 2864 OuterLoopArgs.LB = LoopArgs.LB; 2865 OuterLoopArgs.UB = LoopArgs.UB; 2866 OuterLoopArgs.ST = LoopArgs.ST; 2867 OuterLoopArgs.IL = LoopArgs.IL; 2868 OuterLoopArgs.Chunk = LoopArgs.Chunk; 2869 OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2870 ? S.getCombinedEnsureUpperBound() 2871 : S.getEnsureUpperBound(); 2872 OuterLoopArgs.IncExpr = IncExpr; 2873 OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2874 ? S.getCombinedInit() 2875 : S.getInit(); 2876 OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2877 ? S.getCombinedCond() 2878 : S.getCond(); 2879 OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2880 ? S.getCombinedNextLowerBound() 2881 : S.getNextLowerBound(); 2882 OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2883 ? S.getCombinedNextUpperBound() 2884 : S.getNextUpperBound(); 2885 2886 EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S, 2887 LoopScope, OuterLoopArgs, CodeGenLoopContent, 2888 emitEmptyOrdered); 2889 } 2890 2891 static std::pair<LValue, LValue> 2892 emitDistributeParallelForInnerBounds(CodeGenFunction &CGF, 2893 const OMPExecutableDirective &S) { 2894 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 2895 LValue LB = 2896 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); 2897 LValue UB = 2898 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); 2899 2900 // When composing 'distribute' with 'for' (e.g. as in 'distribute 2901 // parallel for') we need to use the 'distribute' 2902 // chunk lower and upper bounds rather than the whole loop iteration 2903 // space. These are parameters to the outlined function for 'parallel' 2904 // and we copy the bounds of the previous schedule into the 2905 // the current ones. 2906 LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable()); 2907 LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable()); 2908 llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar( 2909 PrevLB, LS.getPrevLowerBoundVariable()->getExprLoc()); 2910 PrevLBVal = CGF.EmitScalarConversion( 2911 PrevLBVal, LS.getPrevLowerBoundVariable()->getType(), 2912 LS.getIterationVariable()->getType(), 2913 LS.getPrevLowerBoundVariable()->getExprLoc()); 2914 llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar( 2915 PrevUB, LS.getPrevUpperBoundVariable()->getExprLoc()); 2916 PrevUBVal = CGF.EmitScalarConversion( 2917 PrevUBVal, LS.getPrevUpperBoundVariable()->getType(), 2918 LS.getIterationVariable()->getType(), 2919 LS.getPrevUpperBoundVariable()->getExprLoc()); 2920 2921 CGF.EmitStoreOfScalar(PrevLBVal, LB); 2922 CGF.EmitStoreOfScalar(PrevUBVal, UB); 2923 2924 return {LB, UB}; 2925 } 2926 2927 /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then 2928 /// we need to use the LB and UB expressions generated by the worksharing 2929 /// code generation support, whereas in non combined situations we would 2930 /// just emit 0 and the LastIteration expression 2931 /// This function is necessary due to the difference of the LB and UB 2932 /// types for the RT emission routines for 'for_static_init' and 2933 /// 'for_dispatch_init' 2934 static std::pair<llvm::Value *, llvm::Value *> 2935 emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF, 2936 const OMPExecutableDirective &S, 2937 Address LB, Address UB) { 2938 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 2939 const Expr *IVExpr = LS.getIterationVariable(); 2940 // when implementing a dynamic schedule for a 'for' combined with a 2941 // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop 2942 // is not normalized as each team only executes its own assigned 2943 // distribute chunk 2944 QualType IteratorTy = IVExpr->getType(); 2945 llvm::Value *LBVal = 2946 CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, S.getBeginLoc()); 2947 llvm::Value *UBVal = 2948 CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, S.getBeginLoc()); 2949 return {LBVal, UBVal}; 2950 } 2951 2952 static void emitDistributeParallelForDistributeInnerBoundParams( 2953 CodeGenFunction &CGF, const OMPExecutableDirective &S, 2954 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) { 2955 const auto &Dir = cast<OMPLoopDirective>(S); 2956 LValue LB = 2957 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable())); 2958 llvm::Value *LBCast = 2959 CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(LB.getAddress(CGF)), 2960 CGF.SizeTy, /*isSigned=*/false); 2961 CapturedVars.push_back(LBCast); 2962 LValue UB = 2963 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable())); 2964 2965 llvm::Value *UBCast = 2966 CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(UB.getAddress(CGF)), 2967 CGF.SizeTy, /*isSigned=*/false); 2968 CapturedVars.push_back(UBCast); 2969 } 2970 2971 static void 2972 emitInnerParallelForWhenCombined(CodeGenFunction &CGF, 2973 const OMPLoopDirective &S, 2974 CodeGenFunction::JumpDest LoopExit) { 2975 auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF, 2976 PrePostActionTy &Action) { 2977 Action.Enter(CGF); 2978 bool HasCancel = false; 2979 if (!isOpenMPSimdDirective(S.getDirectiveKind())) { 2980 if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(&S)) 2981 HasCancel = D->hasCancel(); 2982 else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(&S)) 2983 HasCancel = D->hasCancel(); 2984 else if (const auto *D = 2985 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&S)) 2986 HasCancel = D->hasCancel(); 2987 } 2988 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(), 2989 HasCancel); 2990 CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(), 2991 emitDistributeParallelForInnerBounds, 2992 emitDistributeParallelForDispatchBounds); 2993 }; 2994 2995 emitCommonOMPParallelDirective( 2996 CGF, S, 2997 isOpenMPSimdDirective(S.getDirectiveKind()) ? OMPD_for_simd : OMPD_for, 2998 CGInlinedWorksharingLoop, 2999 emitDistributeParallelForDistributeInnerBoundParams); 3000 } 3001 3002 void CodeGenFunction::EmitOMPDistributeParallelForDirective( 3003 const OMPDistributeParallelForDirective &S) { 3004 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3005 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 3006 S.getDistInc()); 3007 }; 3008 OMPLexicalScope Scope(*this, S, OMPD_parallel); 3009 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); 3010 } 3011 3012 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective( 3013 const OMPDistributeParallelForSimdDirective &S) { 3014 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3015 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 3016 S.getDistInc()); 3017 }; 3018 OMPLexicalScope Scope(*this, S, OMPD_parallel); 3019 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); 3020 } 3021 3022 void CodeGenFunction::EmitOMPDistributeSimdDirective( 3023 const OMPDistributeSimdDirective &S) { 3024 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3025 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 3026 }; 3027 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3028 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 3029 } 3030 3031 void CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 3032 CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) { 3033 // Emit SPMD target parallel for region as a standalone region. 3034 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3035 emitOMPSimdRegion(CGF, S, Action); 3036 }; 3037 llvm::Function *Fn; 3038 llvm::Constant *Addr; 3039 // Emit target region as a standalone region. 3040 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 3041 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 3042 assert(Fn && Addr && "Target device function emission failed."); 3043 } 3044 3045 void CodeGenFunction::EmitOMPTargetSimdDirective( 3046 const OMPTargetSimdDirective &S) { 3047 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3048 emitOMPSimdRegion(CGF, S, Action); 3049 }; 3050 emitCommonOMPTargetDirective(*this, S, CodeGen); 3051 } 3052 3053 namespace { 3054 struct ScheduleKindModifiersTy { 3055 OpenMPScheduleClauseKind Kind; 3056 OpenMPScheduleClauseModifier M1; 3057 OpenMPScheduleClauseModifier M2; 3058 ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind, 3059 OpenMPScheduleClauseModifier M1, 3060 OpenMPScheduleClauseModifier M2) 3061 : Kind(Kind), M1(M1), M2(M2) {} 3062 }; 3063 } // namespace 3064 3065 bool CodeGenFunction::EmitOMPWorksharingLoop( 3066 const OMPLoopDirective &S, Expr *EUB, 3067 const CodeGenLoopBoundsTy &CodeGenLoopBounds, 3068 const CodeGenDispatchBoundsTy &CGDispatchBounds) { 3069 // Emit the loop iteration variable. 3070 const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 3071 const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl()); 3072 EmitVarDecl(*IVDecl); 3073 3074 // Emit the iterations count variable. 3075 // If it is not a variable, Sema decided to calculate iterations count on each 3076 // iteration (e.g., it is foldable into a constant). 3077 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 3078 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 3079 // Emit calculation of the iterations count. 3080 EmitIgnoredExpr(S.getCalcLastIteration()); 3081 } 3082 3083 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); 3084 3085 bool HasLastprivateClause; 3086 // Check pre-condition. 3087 { 3088 OMPLoopScope PreInitScope(*this, S); 3089 // Skip the entire loop if we don't meet the precondition. 3090 // If the condition constant folds and can be elided, avoid emitting the 3091 // whole loop. 3092 bool CondConstant; 3093 llvm::BasicBlock *ContBlock = nullptr; 3094 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 3095 if (!CondConstant) 3096 return false; 3097 } else { 3098 llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then"); 3099 ContBlock = createBasicBlock("omp.precond.end"); 3100 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 3101 getProfileCount(&S)); 3102 EmitBlock(ThenBlock); 3103 incrementProfileCounter(&S); 3104 } 3105 3106 RunCleanupsScope DoacrossCleanupScope(*this); 3107 bool Ordered = false; 3108 if (const auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) { 3109 if (OrderedClause->getNumForLoops()) 3110 RT.emitDoacrossInit(*this, S, OrderedClause->getLoopNumIterations()); 3111 else 3112 Ordered = true; 3113 } 3114 3115 llvm::DenseSet<const Expr *> EmittedFinals; 3116 emitAlignedClause(*this, S); 3117 bool HasLinears = EmitOMPLinearClauseInit(S); 3118 // Emit helper vars inits. 3119 3120 std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S); 3121 LValue LB = Bounds.first; 3122 LValue UB = Bounds.second; 3123 LValue ST = 3124 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 3125 LValue IL = 3126 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 3127 3128 // Emit 'then' code. 3129 { 3130 OMPPrivateScope LoopScope(*this); 3131 if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) { 3132 // Emit implicit barrier to synchronize threads and avoid data races on 3133 // initialization of firstprivate variables and post-update of 3134 // lastprivate variables. 3135 CGM.getOpenMPRuntime().emitBarrierCall( 3136 *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, 3137 /*ForceSimpleCall=*/true); 3138 } 3139 EmitOMPPrivateClause(S, LoopScope); 3140 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion( 3141 *this, S, EmitLValue(S.getIterationVariable())); 3142 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 3143 EmitOMPReductionClauseInit(S, LoopScope); 3144 EmitOMPPrivateLoopCounters(S, LoopScope); 3145 EmitOMPLinearClause(S, LoopScope); 3146 (void)LoopScope.Privatize(); 3147 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 3148 CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S); 3149 3150 // Detect the loop schedule kind and chunk. 3151 const Expr *ChunkExpr = nullptr; 3152 OpenMPScheduleTy ScheduleKind; 3153 if (const auto *C = S.getSingleClause<OMPScheduleClause>()) { 3154 ScheduleKind.Schedule = C->getScheduleKind(); 3155 ScheduleKind.M1 = C->getFirstScheduleModifier(); 3156 ScheduleKind.M2 = C->getSecondScheduleModifier(); 3157 ChunkExpr = C->getChunkSize(); 3158 } else { 3159 // Default behaviour for schedule clause. 3160 CGM.getOpenMPRuntime().getDefaultScheduleAndChunk( 3161 *this, S, ScheduleKind.Schedule, ChunkExpr); 3162 } 3163 bool HasChunkSizeOne = false; 3164 llvm::Value *Chunk = nullptr; 3165 if (ChunkExpr) { 3166 Chunk = EmitScalarExpr(ChunkExpr); 3167 Chunk = EmitScalarConversion(Chunk, ChunkExpr->getType(), 3168 S.getIterationVariable()->getType(), 3169 S.getBeginLoc()); 3170 Expr::EvalResult Result; 3171 if (ChunkExpr->EvaluateAsInt(Result, getContext())) { 3172 llvm::APSInt EvaluatedChunk = Result.Val.getInt(); 3173 HasChunkSizeOne = (EvaluatedChunk.getLimitedValue() == 1); 3174 } 3175 } 3176 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 3177 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 3178 // OpenMP 4.5, 2.7.1 Loop Construct, Description. 3179 // If the static schedule kind is specified or if the ordered clause is 3180 // specified, and if no monotonic modifier is specified, the effect will 3181 // be as if the monotonic modifier was specified. 3182 bool StaticChunkedOne = RT.isStaticChunked(ScheduleKind.Schedule, 3183 /* Chunked */ Chunk != nullptr) && HasChunkSizeOne && 3184 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()); 3185 bool IsMonotonic = 3186 Ordered || 3187 (ScheduleKind.Schedule == OMPC_SCHEDULE_static && 3188 !(ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic || 3189 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_nonmonotonic)) || 3190 ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic || 3191 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic; 3192 if ((RT.isStaticNonchunked(ScheduleKind.Schedule, 3193 /* Chunked */ Chunk != nullptr) || 3194 StaticChunkedOne) && 3195 !Ordered) { 3196 JumpDest LoopExit = 3197 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 3198 emitCommonSimdLoop( 3199 *this, S, 3200 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3201 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 3202 CGF.EmitOMPSimdInit(S); 3203 } else if (const auto *C = S.getSingleClause<OMPOrderClause>()) { 3204 if (C->getKind() == OMPC_ORDER_concurrent) 3205 CGF.LoopStack.setParallel(/*Enable=*/true); 3206 } 3207 }, 3208 [IVSize, IVSigned, Ordered, IL, LB, UB, ST, StaticChunkedOne, Chunk, 3209 &S, ScheduleKind, LoopExit, 3210 &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { 3211 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 3212 // When no chunk_size is specified, the iteration space is divided 3213 // into chunks that are approximately equal in size, and at most 3214 // one chunk is distributed to each thread. Note that the size of 3215 // the chunks is unspecified in this case. 3216 CGOpenMPRuntime::StaticRTInput StaticInit( 3217 IVSize, IVSigned, Ordered, IL.getAddress(CGF), 3218 LB.getAddress(CGF), UB.getAddress(CGF), ST.getAddress(CGF), 3219 StaticChunkedOne ? Chunk : nullptr); 3220 CGF.CGM.getOpenMPRuntime().emitForStaticInit( 3221 CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, 3222 StaticInit); 3223 // UB = min(UB, GlobalUB); 3224 if (!StaticChunkedOne) 3225 CGF.EmitIgnoredExpr(S.getEnsureUpperBound()); 3226 // IV = LB; 3227 CGF.EmitIgnoredExpr(S.getInit()); 3228 // For unchunked static schedule generate: 3229 // 3230 // while (idx <= UB) { 3231 // BODY; 3232 // ++idx; 3233 // } 3234 // 3235 // For static schedule with chunk one: 3236 // 3237 // while (IV <= PrevUB) { 3238 // BODY; 3239 // IV += ST; 3240 // } 3241 CGF.EmitOMPInnerLoop( 3242 S, LoopScope.requiresCleanups(), 3243 StaticChunkedOne ? S.getCombinedParForInDistCond() 3244 : S.getCond(), 3245 StaticChunkedOne ? S.getDistInc() : S.getInc(), 3246 [&S, LoopExit](CodeGenFunction &CGF) { 3247 emitOMPLoopBodyWithStopPoint(CGF, S, LoopExit); 3248 }, 3249 [](CodeGenFunction &) {}); 3250 }); 3251 EmitBlock(LoopExit.getBlock()); 3252 // Tell the runtime we are done. 3253 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 3254 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(), 3255 S.getDirectiveKind()); 3256 }; 3257 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); 3258 } else { 3259 // Emit the outer loop, which requests its work chunk [LB..UB] from 3260 // runtime and runs the inner loop to process it. 3261 const OMPLoopArguments LoopArguments( 3262 LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this), 3263 IL.getAddress(*this), Chunk, EUB); 3264 EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered, 3265 LoopArguments, CGDispatchBounds); 3266 } 3267 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 3268 EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) { 3269 return CGF.Builder.CreateIsNotNull( 3270 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 3271 }); 3272 } 3273 EmitOMPReductionClauseFinal( 3274 S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind()) 3275 ? /*Parallel and Simd*/ OMPD_parallel_for_simd 3276 : /*Parallel only*/ OMPD_parallel); 3277 // Emit post-update of the reduction variables if IsLastIter != 0. 3278 emitPostUpdateForReductionClause( 3279 *this, S, [IL, &S](CodeGenFunction &CGF) { 3280 return CGF.Builder.CreateIsNotNull( 3281 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 3282 }); 3283 // Emit final copy of the lastprivate variables if IsLastIter != 0. 3284 if (HasLastprivateClause) 3285 EmitOMPLastprivateClauseFinal( 3286 S, isOpenMPSimdDirective(S.getDirectiveKind()), 3287 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc()))); 3288 } 3289 EmitOMPLinearClauseFinal(S, [IL, &S](CodeGenFunction &CGF) { 3290 return CGF.Builder.CreateIsNotNull( 3291 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 3292 }); 3293 DoacrossCleanupScope.ForceCleanup(); 3294 // We're now done with the loop, so jump to the continuation block. 3295 if (ContBlock) { 3296 EmitBranch(ContBlock); 3297 EmitBlock(ContBlock, /*IsFinished=*/true); 3298 } 3299 } 3300 return HasLastprivateClause; 3301 } 3302 3303 /// The following two functions generate expressions for the loop lower 3304 /// and upper bounds in case of static and dynamic (dispatch) schedule 3305 /// of the associated 'for' or 'distribute' loop. 3306 static std::pair<LValue, LValue> 3307 emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 3308 const auto &LS = cast<OMPLoopDirective>(S); 3309 LValue LB = 3310 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); 3311 LValue UB = 3312 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); 3313 return {LB, UB}; 3314 } 3315 3316 /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not 3317 /// consider the lower and upper bound expressions generated by the 3318 /// worksharing loop support, but we use 0 and the iteration space size as 3319 /// constants 3320 static std::pair<llvm::Value *, llvm::Value *> 3321 emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S, 3322 Address LB, Address UB) { 3323 const auto &LS = cast<OMPLoopDirective>(S); 3324 const Expr *IVExpr = LS.getIterationVariable(); 3325 const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType()); 3326 llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0); 3327 llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration()); 3328 return {LBVal, UBVal}; 3329 } 3330 3331 /// Emits internal temp array declarations for the directive with inscan 3332 /// reductions. 3333 /// The code is the following: 3334 /// \code 3335 /// size num_iters = <num_iters>; 3336 /// <type> buffer[num_iters]; 3337 /// \endcode 3338 static void emitScanBasedDirectiveDecls( 3339 CodeGenFunction &CGF, const OMPLoopDirective &S, 3340 llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) { 3341 llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast( 3342 NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false); 3343 SmallVector<const Expr *, 4> Shareds; 3344 SmallVector<const Expr *, 4> Privates; 3345 SmallVector<const Expr *, 4> ReductionOps; 3346 SmallVector<const Expr *, 4> CopyArrayTemps; 3347 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 3348 assert(C->getModifier() == OMPC_REDUCTION_inscan && 3349 "Only inscan reductions are expected."); 3350 Shareds.append(C->varlist_begin(), C->varlist_end()); 3351 Privates.append(C->privates().begin(), C->privates().end()); 3352 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 3353 CopyArrayTemps.append(C->copy_array_temps().begin(), 3354 C->copy_array_temps().end()); 3355 } 3356 { 3357 // Emit buffers for each reduction variables. 3358 // ReductionCodeGen is required to emit correctly the code for array 3359 // reductions. 3360 ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps); 3361 unsigned Count = 0; 3362 auto *ITA = CopyArrayTemps.begin(); 3363 for (const Expr *IRef : Privates) { 3364 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); 3365 // Emit variably modified arrays, used for arrays/array sections 3366 // reductions. 3367 if (PrivateVD->getType()->isVariablyModifiedType()) { 3368 RedCG.emitSharedOrigLValue(CGF, Count); 3369 RedCG.emitAggregateType(CGF, Count); 3370 } 3371 CodeGenFunction::OpaqueValueMapping DimMapping( 3372 CGF, 3373 cast<OpaqueValueExpr>( 3374 cast<VariableArrayType>((*ITA)->getType()->getAsArrayTypeUnsafe()) 3375 ->getSizeExpr()), 3376 RValue::get(OMPScanNumIterations)); 3377 // Emit temp buffer. 3378 CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(*ITA)->getDecl())); 3379 ++ITA; 3380 ++Count; 3381 } 3382 } 3383 } 3384 3385 /// Emits the code for the directive with inscan reductions. 3386 /// The code is the following: 3387 /// \code 3388 /// #pragma omp ... 3389 /// for (i: 0..<num_iters>) { 3390 /// <input phase>; 3391 /// buffer[i] = red; 3392 /// } 3393 /// #pragma omp master // in parallel region 3394 /// for (int k = 0; k != ceil(log2(num_iters)); ++k) 3395 /// for (size cnt = last_iter; cnt >= pow(2, k); --k) 3396 /// buffer[i] op= buffer[i-pow(2,k)]; 3397 /// #pragma omp barrier // in parallel region 3398 /// #pragma omp ... 3399 /// for (0..<num_iters>) { 3400 /// red = InclusiveScan ? buffer[i] : buffer[i-1]; 3401 /// <scan phase>; 3402 /// } 3403 /// \endcode 3404 static void emitScanBasedDirective( 3405 CodeGenFunction &CGF, const OMPLoopDirective &S, 3406 llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen, 3407 llvm::function_ref<void(CodeGenFunction &)> FirstGen, 3408 llvm::function_ref<void(CodeGenFunction &)> SecondGen) { 3409 llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast( 3410 NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false); 3411 SmallVector<const Expr *, 4> Privates; 3412 SmallVector<const Expr *, 4> ReductionOps; 3413 SmallVector<const Expr *, 4> LHSs; 3414 SmallVector<const Expr *, 4> RHSs; 3415 SmallVector<const Expr *, 4> CopyArrayElems; 3416 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 3417 assert(C->getModifier() == OMPC_REDUCTION_inscan && 3418 "Only inscan reductions are expected."); 3419 Privates.append(C->privates().begin(), C->privates().end()); 3420 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 3421 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 3422 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 3423 CopyArrayElems.append(C->copy_array_elems().begin(), 3424 C->copy_array_elems().end()); 3425 } 3426 CodeGenFunction::ParentLoopDirectiveForScanRegion ScanRegion(CGF, S); 3427 { 3428 // Emit loop with input phase: 3429 // #pragma omp ... 3430 // for (i: 0..<num_iters>) { 3431 // <input phase>; 3432 // buffer[i] = red; 3433 // } 3434 CGF.OMPFirstScanLoop = true; 3435 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); 3436 FirstGen(CGF); 3437 } 3438 // #pragma omp barrier // in parallel region 3439 auto &&CodeGen = [&S, OMPScanNumIterations, &LHSs, &RHSs, &CopyArrayElems, 3440 &ReductionOps, 3441 &Privates](CodeGenFunction &CGF, PrePostActionTy &Action) { 3442 Action.Enter(CGF); 3443 // Emit prefix reduction: 3444 // #pragma omp master // in parallel region 3445 // for (int k = 0; k <= ceil(log2(n)); ++k) 3446 llvm::BasicBlock *InputBB = CGF.Builder.GetInsertBlock(); 3447 llvm::BasicBlock *LoopBB = CGF.createBasicBlock("omp.outer.log.scan.body"); 3448 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.outer.log.scan.exit"); 3449 llvm::Function *F = 3450 CGF.CGM.getIntrinsic(llvm::Intrinsic::log2, CGF.DoubleTy); 3451 llvm::Value *Arg = 3452 CGF.Builder.CreateUIToFP(OMPScanNumIterations, CGF.DoubleTy); 3453 llvm::Value *LogVal = CGF.EmitNounwindRuntimeCall(F, Arg); 3454 F = CGF.CGM.getIntrinsic(llvm::Intrinsic::ceil, CGF.DoubleTy); 3455 LogVal = CGF.EmitNounwindRuntimeCall(F, LogVal); 3456 LogVal = CGF.Builder.CreateFPToUI(LogVal, CGF.IntTy); 3457 llvm::Value *NMin1 = CGF.Builder.CreateNUWSub( 3458 OMPScanNumIterations, llvm::ConstantInt::get(CGF.SizeTy, 1)); 3459 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getBeginLoc()); 3460 CGF.EmitBlock(LoopBB); 3461 auto *Counter = CGF.Builder.CreatePHI(CGF.IntTy, 2); 3462 // size pow2k = 1; 3463 auto *Pow2K = CGF.Builder.CreatePHI(CGF.SizeTy, 2); 3464 Counter->addIncoming(llvm::ConstantInt::get(CGF.IntTy, 0), InputBB); 3465 Pow2K->addIncoming(llvm::ConstantInt::get(CGF.SizeTy, 1), InputBB); 3466 // for (size i = n - 1; i >= 2 ^ k; --i) 3467 // tmp[i] op= tmp[i-pow2k]; 3468 llvm::BasicBlock *InnerLoopBB = 3469 CGF.createBasicBlock("omp.inner.log.scan.body"); 3470 llvm::BasicBlock *InnerExitBB = 3471 CGF.createBasicBlock("omp.inner.log.scan.exit"); 3472 llvm::Value *CmpI = CGF.Builder.CreateICmpUGE(NMin1, Pow2K); 3473 CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB); 3474 CGF.EmitBlock(InnerLoopBB); 3475 auto *IVal = CGF.Builder.CreatePHI(CGF.SizeTy, 2); 3476 IVal->addIncoming(NMin1, LoopBB); 3477 { 3478 CodeGenFunction::OMPPrivateScope PrivScope(CGF); 3479 auto *ILHS = LHSs.begin(); 3480 auto *IRHS = RHSs.begin(); 3481 for (const Expr *CopyArrayElem : CopyArrayElems) { 3482 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 3483 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 3484 Address LHSAddr = Address::invalid(); 3485 { 3486 CodeGenFunction::OpaqueValueMapping IdxMapping( 3487 CGF, 3488 cast<OpaqueValueExpr>( 3489 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), 3490 RValue::get(IVal)); 3491 LHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF); 3492 } 3493 PrivScope.addPrivate(LHSVD, [LHSAddr]() { return LHSAddr; }); 3494 Address RHSAddr = Address::invalid(); 3495 { 3496 llvm::Value *OffsetIVal = CGF.Builder.CreateNUWSub(IVal, Pow2K); 3497 CodeGenFunction::OpaqueValueMapping IdxMapping( 3498 CGF, 3499 cast<OpaqueValueExpr>( 3500 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), 3501 RValue::get(OffsetIVal)); 3502 RHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF); 3503 } 3504 PrivScope.addPrivate(RHSVD, [RHSAddr]() { return RHSAddr; }); 3505 ++ILHS; 3506 ++IRHS; 3507 } 3508 PrivScope.Privatize(); 3509 CGF.CGM.getOpenMPRuntime().emitReduction( 3510 CGF, S.getEndLoc(), Privates, LHSs, RHSs, ReductionOps, 3511 {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_unknown}); 3512 } 3513 llvm::Value *NextIVal = 3514 CGF.Builder.CreateNUWSub(IVal, llvm::ConstantInt::get(CGF.SizeTy, 1)); 3515 IVal->addIncoming(NextIVal, CGF.Builder.GetInsertBlock()); 3516 CmpI = CGF.Builder.CreateICmpUGE(NextIVal, Pow2K); 3517 CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB); 3518 CGF.EmitBlock(InnerExitBB); 3519 llvm::Value *Next = 3520 CGF.Builder.CreateNUWAdd(Counter, llvm::ConstantInt::get(CGF.IntTy, 1)); 3521 Counter->addIncoming(Next, CGF.Builder.GetInsertBlock()); 3522 // pow2k <<= 1; 3523 llvm::Value *NextPow2K = 3524 CGF.Builder.CreateShl(Pow2K, 1, "", /*HasNUW=*/true); 3525 Pow2K->addIncoming(NextPow2K, CGF.Builder.GetInsertBlock()); 3526 llvm::Value *Cmp = CGF.Builder.CreateICmpNE(Next, LogVal); 3527 CGF.Builder.CreateCondBr(Cmp, LoopBB, ExitBB); 3528 auto DL1 = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getEndLoc()); 3529 CGF.EmitBlock(ExitBB); 3530 }; 3531 if (isOpenMPParallelDirective(S.getDirectiveKind())) { 3532 CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc()); 3533 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 3534 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, 3535 /*ForceSimpleCall=*/true); 3536 } else { 3537 RegionCodeGenTy RCG(CodeGen); 3538 RCG(CGF); 3539 } 3540 3541 CGF.OMPFirstScanLoop = false; 3542 SecondGen(CGF); 3543 } 3544 3545 static bool emitWorksharingDirective(CodeGenFunction &CGF, 3546 const OMPLoopDirective &S, 3547 bool HasCancel) { 3548 bool HasLastprivates; 3549 if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(), 3550 [](const OMPReductionClause *C) { 3551 return C->getModifier() == OMPC_REDUCTION_inscan; 3552 })) { 3553 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) { 3554 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); 3555 OMPLoopScope LoopScope(CGF, S); 3556 return CGF.EmitScalarExpr(S.getNumIterations()); 3557 }; 3558 const auto &&FirstGen = [&S, HasCancel](CodeGenFunction &CGF) { 3559 CodeGenFunction::OMPCancelStackRAII CancelRegion( 3560 CGF, S.getDirectiveKind(), HasCancel); 3561 (void)CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 3562 emitForLoopBounds, 3563 emitDispatchForLoopBounds); 3564 // Emit an implicit barrier at the end. 3565 CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getBeginLoc(), 3566 OMPD_for); 3567 }; 3568 const auto &&SecondGen = [&S, HasCancel, 3569 &HasLastprivates](CodeGenFunction &CGF) { 3570 CodeGenFunction::OMPCancelStackRAII CancelRegion( 3571 CGF, S.getDirectiveKind(), HasCancel); 3572 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 3573 emitForLoopBounds, 3574 emitDispatchForLoopBounds); 3575 }; 3576 if (!isOpenMPParallelDirective(S.getDirectiveKind())) 3577 emitScanBasedDirectiveDecls(CGF, S, NumIteratorsGen); 3578 emitScanBasedDirective(CGF, S, NumIteratorsGen, FirstGen, SecondGen); 3579 } else { 3580 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(), 3581 HasCancel); 3582 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 3583 emitForLoopBounds, 3584 emitDispatchForLoopBounds); 3585 } 3586 return HasLastprivates; 3587 } 3588 3589 static bool isSupportedByOpenMPIRBuilder(const OMPForDirective &S) { 3590 if (S.hasCancel()) 3591 return false; 3592 for (OMPClause *C : S.clauses()) 3593 if (!isa<OMPNowaitClause>(C)) 3594 return false; 3595 3596 return true; 3597 } 3598 3599 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { 3600 bool HasLastprivates = false; 3601 bool UseOMPIRBuilder = 3602 CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(S); 3603 auto &&CodeGen = [this, &S, &HasLastprivates, 3604 UseOMPIRBuilder](CodeGenFunction &CGF, PrePostActionTy &) { 3605 // Use the OpenMPIRBuilder if enabled. 3606 if (UseOMPIRBuilder) { 3607 // Emit the associated statement and get its loop representation. 3608 const Stmt *Inner = S.getRawStmt(); 3609 llvm::CanonicalLoopInfo *CLI = 3610 EmitOMPCollapsedCanonicalLoopNest(Inner, 1); 3611 3612 bool NeedsBarrier = !S.getSingleClause<OMPNowaitClause>(); 3613 llvm::OpenMPIRBuilder &OMPBuilder = 3614 CGM.getOpenMPRuntime().getOMPBuilder(); 3615 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( 3616 AllocaInsertPt->getParent(), AllocaInsertPt->getIterator()); 3617 OMPBuilder.createWorkshareLoop(Builder, CLI, AllocaIP, NeedsBarrier); 3618 return; 3619 } 3620 3621 HasLastprivates = emitWorksharingDirective(CGF, S, S.hasCancel()); 3622 }; 3623 { 3624 auto LPCRegion = 3625 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 3626 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3627 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen, 3628 S.hasCancel()); 3629 } 3630 3631 if (!UseOMPIRBuilder) { 3632 // Emit an implicit barrier at the end. 3633 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) 3634 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for); 3635 } 3636 // Check for outer lastprivate conditional update. 3637 checkForLastprivateConditionalUpdate(*this, S); 3638 } 3639 3640 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { 3641 bool HasLastprivates = false; 3642 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, 3643 PrePostActionTy &) { 3644 HasLastprivates = emitWorksharingDirective(CGF, S, /*HasCancel=*/false); 3645 }; 3646 { 3647 auto LPCRegion = 3648 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 3649 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3650 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 3651 } 3652 3653 // Emit an implicit barrier at the end. 3654 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) 3655 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for); 3656 // Check for outer lastprivate conditional update. 3657 checkForLastprivateConditionalUpdate(*this, S); 3658 } 3659 3660 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, 3661 const Twine &Name, 3662 llvm::Value *Init = nullptr) { 3663 LValue LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty); 3664 if (Init) 3665 CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true); 3666 return LVal; 3667 } 3668 3669 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { 3670 const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt(); 3671 const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt); 3672 bool HasLastprivates = false; 3673 auto &&CodeGen = [&S, CapturedStmt, CS, 3674 &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) { 3675 const ASTContext &C = CGF.getContext(); 3676 QualType KmpInt32Ty = 3677 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 3678 // Emit helper vars inits. 3679 LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.", 3680 CGF.Builder.getInt32(0)); 3681 llvm::ConstantInt *GlobalUBVal = CS != nullptr 3682 ? CGF.Builder.getInt32(CS->size() - 1) 3683 : CGF.Builder.getInt32(0); 3684 LValue UB = 3685 createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal); 3686 LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.", 3687 CGF.Builder.getInt32(1)); 3688 LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.", 3689 CGF.Builder.getInt32(0)); 3690 // Loop counter. 3691 LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv."); 3692 OpaqueValueExpr IVRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue); 3693 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV); 3694 OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue); 3695 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); 3696 // Generate condition for loop. 3697 BinaryOperator *Cond = BinaryOperator::Create( 3698 C, &IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_PRValue, OK_Ordinary, 3699 S.getBeginLoc(), FPOptionsOverride()); 3700 // Increment for loop counter. 3701 UnaryOperator *Inc = UnaryOperator::Create( 3702 C, &IVRefExpr, UO_PreInc, KmpInt32Ty, VK_PRValue, OK_Ordinary, 3703 S.getBeginLoc(), true, FPOptionsOverride()); 3704 auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) { 3705 // Iterate through all sections and emit a switch construct: 3706 // switch (IV) { 3707 // case 0: 3708 // <SectionStmt[0]>; 3709 // break; 3710 // ... 3711 // case <NumSection> - 1: 3712 // <SectionStmt[<NumSection> - 1]>; 3713 // break; 3714 // } 3715 // .omp.sections.exit: 3716 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".omp.sections.exit"); 3717 llvm::SwitchInst *SwitchStmt = 3718 CGF.Builder.CreateSwitch(CGF.EmitLoadOfScalar(IV, S.getBeginLoc()), 3719 ExitBB, CS == nullptr ? 1 : CS->size()); 3720 if (CS) { 3721 unsigned CaseNumber = 0; 3722 for (const Stmt *SubStmt : CS->children()) { 3723 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 3724 CGF.EmitBlock(CaseBB); 3725 SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB); 3726 CGF.EmitStmt(SubStmt); 3727 CGF.EmitBranch(ExitBB); 3728 ++CaseNumber; 3729 } 3730 } else { 3731 llvm::BasicBlock *CaseBB = CGF.createBasicBlock(".omp.sections.case"); 3732 CGF.EmitBlock(CaseBB); 3733 SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB); 3734 CGF.EmitStmt(CapturedStmt); 3735 CGF.EmitBranch(ExitBB); 3736 } 3737 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 3738 }; 3739 3740 CodeGenFunction::OMPPrivateScope LoopScope(CGF); 3741 if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) { 3742 // Emit implicit barrier to synchronize threads and avoid data races on 3743 // initialization of firstprivate variables and post-update of lastprivate 3744 // variables. 3745 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 3746 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, 3747 /*ForceSimpleCall=*/true); 3748 } 3749 CGF.EmitOMPPrivateClause(S, LoopScope); 3750 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(CGF, S, IV); 3751 HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 3752 CGF.EmitOMPReductionClauseInit(S, LoopScope); 3753 (void)LoopScope.Privatize(); 3754 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 3755 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); 3756 3757 // Emit static non-chunked loop. 3758 OpenMPScheduleTy ScheduleKind; 3759 ScheduleKind.Schedule = OMPC_SCHEDULE_static; 3760 CGOpenMPRuntime::StaticRTInput StaticInit( 3761 /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(CGF), 3762 LB.getAddress(CGF), UB.getAddress(CGF), ST.getAddress(CGF)); 3763 CGF.CGM.getOpenMPRuntime().emitForStaticInit( 3764 CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, StaticInit); 3765 // UB = min(UB, GlobalUB); 3766 llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, S.getBeginLoc()); 3767 llvm::Value *MinUBGlobalUB = CGF.Builder.CreateSelect( 3768 CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal); 3769 CGF.EmitStoreOfScalar(MinUBGlobalUB, UB); 3770 // IV = LB; 3771 CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getBeginLoc()), IV); 3772 // while (idx <= UB) { BODY; ++idx; } 3773 CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, Cond, Inc, BodyGen, 3774 [](CodeGenFunction &) {}); 3775 // Tell the runtime we are done. 3776 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 3777 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(), 3778 S.getDirectiveKind()); 3779 }; 3780 CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen); 3781 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 3782 // Emit post-update of the reduction variables if IsLastIter != 0. 3783 emitPostUpdateForReductionClause(CGF, S, [IL, &S](CodeGenFunction &CGF) { 3784 return CGF.Builder.CreateIsNotNull( 3785 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 3786 }); 3787 3788 // Emit final copy of the lastprivate variables if IsLastIter != 0. 3789 if (HasLastprivates) 3790 CGF.EmitOMPLastprivateClauseFinal( 3791 S, /*NoFinals=*/false, 3792 CGF.Builder.CreateIsNotNull( 3793 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()))); 3794 }; 3795 3796 bool HasCancel = false; 3797 if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S)) 3798 HasCancel = OSD->hasCancel(); 3799 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S)) 3800 HasCancel = OPSD->hasCancel(); 3801 OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel); 3802 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen, 3803 HasCancel); 3804 // Emit barrier for lastprivates only if 'sections' directive has 'nowait' 3805 // clause. Otherwise the barrier will be generated by the codegen for the 3806 // directive. 3807 if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) { 3808 // Emit implicit barrier to synchronize threads and avoid data races on 3809 // initialization of firstprivate variables. 3810 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), 3811 OMPD_unknown); 3812 } 3813 } 3814 3815 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { 3816 if (CGM.getLangOpts().OpenMPIRBuilder) { 3817 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 3818 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 3819 using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy; 3820 3821 auto FiniCB = [this](InsertPointTy IP) { 3822 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); 3823 }; 3824 3825 const CapturedStmt *ICS = S.getInnermostCapturedStmt(); 3826 const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt(); 3827 const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt); 3828 llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector; 3829 if (CS) { 3830 for (const Stmt *SubStmt : CS->children()) { 3831 auto SectionCB = [this, SubStmt](InsertPointTy AllocaIP, 3832 InsertPointTy CodeGenIP, 3833 llvm::BasicBlock &FiniBB) { 3834 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, 3835 FiniBB); 3836 OMPBuilderCBHelpers::EmitOMPRegionBody(*this, SubStmt, CodeGenIP, 3837 FiniBB); 3838 }; 3839 SectionCBVector.push_back(SectionCB); 3840 } 3841 } else { 3842 auto SectionCB = [this, CapturedStmt](InsertPointTy AllocaIP, 3843 InsertPointTy CodeGenIP, 3844 llvm::BasicBlock &FiniBB) { 3845 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB); 3846 OMPBuilderCBHelpers::EmitOMPRegionBody(*this, CapturedStmt, CodeGenIP, 3847 FiniBB); 3848 }; 3849 SectionCBVector.push_back(SectionCB); 3850 } 3851 3852 // Privatization callback that performs appropriate action for 3853 // shared/private/firstprivate/lastprivate/copyin/... variables. 3854 // 3855 // TODO: This defaults to shared right now. 3856 auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, 3857 llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) { 3858 // The next line is appropriate only for variables (Val) with the 3859 // data-sharing attribute "shared". 3860 ReplVal = &Val; 3861 3862 return CodeGenIP; 3863 }; 3864 3865 CGCapturedStmtInfo CGSI(*ICS, CR_OpenMP); 3866 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI); 3867 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( 3868 AllocaInsertPt->getParent(), AllocaInsertPt->getIterator()); 3869 Builder.restoreIP(OMPBuilder.createSections( 3870 Builder, AllocaIP, SectionCBVector, PrivCB, FiniCB, S.hasCancel(), 3871 S.getSingleClause<OMPNowaitClause>())); 3872 return; 3873 } 3874 { 3875 auto LPCRegion = 3876 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 3877 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3878 EmitSections(S); 3879 } 3880 // Emit an implicit barrier at the end. 3881 if (!S.getSingleClause<OMPNowaitClause>()) { 3882 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), 3883 OMPD_sections); 3884 } 3885 // Check for outer lastprivate conditional update. 3886 checkForLastprivateConditionalUpdate(*this, S); 3887 } 3888 3889 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { 3890 if (CGM.getLangOpts().OpenMPIRBuilder) { 3891 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 3892 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 3893 3894 const Stmt *SectionRegionBodyStmt = S.getAssociatedStmt(); 3895 auto FiniCB = [this](InsertPointTy IP) { 3896 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); 3897 }; 3898 3899 auto BodyGenCB = [SectionRegionBodyStmt, this](InsertPointTy AllocaIP, 3900 InsertPointTy CodeGenIP, 3901 llvm::BasicBlock &FiniBB) { 3902 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB); 3903 OMPBuilderCBHelpers::EmitOMPRegionBody(*this, SectionRegionBodyStmt, 3904 CodeGenIP, FiniBB); 3905 }; 3906 3907 LexicalScope Scope(*this, S.getSourceRange()); 3908 EmitStopPoint(&S); 3909 Builder.restoreIP(OMPBuilder.createSection(Builder, BodyGenCB, FiniCB)); 3910 3911 return; 3912 } 3913 LexicalScope Scope(*this, S.getSourceRange()); 3914 EmitStopPoint(&S); 3915 EmitStmt(S.getAssociatedStmt()); 3916 } 3917 3918 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { 3919 llvm::SmallVector<const Expr *, 8> CopyprivateVars; 3920 llvm::SmallVector<const Expr *, 8> DestExprs; 3921 llvm::SmallVector<const Expr *, 8> SrcExprs; 3922 llvm::SmallVector<const Expr *, 8> AssignmentOps; 3923 // Check if there are any 'copyprivate' clauses associated with this 3924 // 'single' construct. 3925 // Build a list of copyprivate variables along with helper expressions 3926 // (<source>, <destination>, <destination>=<source> expressions) 3927 for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) { 3928 CopyprivateVars.append(C->varlists().begin(), C->varlists().end()); 3929 DestExprs.append(C->destination_exprs().begin(), 3930 C->destination_exprs().end()); 3931 SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end()); 3932 AssignmentOps.append(C->assignment_ops().begin(), 3933 C->assignment_ops().end()); 3934 } 3935 // Emit code for 'single' region along with 'copyprivate' clauses 3936 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3937 Action.Enter(CGF); 3938 OMPPrivateScope SingleScope(CGF); 3939 (void)CGF.EmitOMPFirstprivateClause(S, SingleScope); 3940 CGF.EmitOMPPrivateClause(S, SingleScope); 3941 (void)SingleScope.Privatize(); 3942 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 3943 }; 3944 { 3945 auto LPCRegion = 3946 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 3947 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3948 CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getBeginLoc(), 3949 CopyprivateVars, DestExprs, 3950 SrcExprs, AssignmentOps); 3951 } 3952 // Emit an implicit barrier at the end (to avoid data race on firstprivate 3953 // init or if no 'nowait' clause was specified and no 'copyprivate' clause). 3954 if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) { 3955 CGM.getOpenMPRuntime().emitBarrierCall( 3956 *this, S.getBeginLoc(), 3957 S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single); 3958 } 3959 // Check for outer lastprivate conditional update. 3960 checkForLastprivateConditionalUpdate(*this, S); 3961 } 3962 3963 static void emitMaster(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 3964 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3965 Action.Enter(CGF); 3966 CGF.EmitStmt(S.getRawStmt()); 3967 }; 3968 CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc()); 3969 } 3970 3971 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { 3972 if (CGM.getLangOpts().OpenMPIRBuilder) { 3973 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 3974 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 3975 3976 const Stmt *MasterRegionBodyStmt = S.getAssociatedStmt(); 3977 3978 auto FiniCB = [this](InsertPointTy IP) { 3979 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); 3980 }; 3981 3982 auto BodyGenCB = [MasterRegionBodyStmt, this](InsertPointTy AllocaIP, 3983 InsertPointTy CodeGenIP, 3984 llvm::BasicBlock &FiniBB) { 3985 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB); 3986 OMPBuilderCBHelpers::EmitOMPRegionBody(*this, MasterRegionBodyStmt, 3987 CodeGenIP, FiniBB); 3988 }; 3989 3990 LexicalScope Scope(*this, S.getSourceRange()); 3991 EmitStopPoint(&S); 3992 Builder.restoreIP(OMPBuilder.createMaster(Builder, BodyGenCB, FiniCB)); 3993 3994 return; 3995 } 3996 LexicalScope Scope(*this, S.getSourceRange()); 3997 EmitStopPoint(&S); 3998 emitMaster(*this, S); 3999 } 4000 4001 static void emitMasked(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 4002 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4003 Action.Enter(CGF); 4004 CGF.EmitStmt(S.getRawStmt()); 4005 }; 4006 Expr *Filter = nullptr; 4007 if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>()) 4008 Filter = FilterClause->getThreadID(); 4009 CGF.CGM.getOpenMPRuntime().emitMaskedRegion(CGF, CodeGen, S.getBeginLoc(), 4010 Filter); 4011 } 4012 4013 void CodeGenFunction::EmitOMPMaskedDirective(const OMPMaskedDirective &S) { 4014 if (CGM.getLangOpts().OpenMPIRBuilder) { 4015 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 4016 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 4017 4018 const Stmt *MaskedRegionBodyStmt = S.getAssociatedStmt(); 4019 const Expr *Filter = nullptr; 4020 if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>()) 4021 Filter = FilterClause->getThreadID(); 4022 llvm::Value *FilterVal = Filter 4023 ? EmitScalarExpr(Filter, CGM.Int32Ty) 4024 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0); 4025 4026 auto FiniCB = [this](InsertPointTy IP) { 4027 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); 4028 }; 4029 4030 auto BodyGenCB = [MaskedRegionBodyStmt, this](InsertPointTy AllocaIP, 4031 InsertPointTy CodeGenIP, 4032 llvm::BasicBlock &FiniBB) { 4033 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB); 4034 OMPBuilderCBHelpers::EmitOMPRegionBody(*this, MaskedRegionBodyStmt, 4035 CodeGenIP, FiniBB); 4036 }; 4037 4038 LexicalScope Scope(*this, S.getSourceRange()); 4039 EmitStopPoint(&S); 4040 Builder.restoreIP( 4041 OMPBuilder.createMasked(Builder, BodyGenCB, FiniCB, FilterVal)); 4042 4043 return; 4044 } 4045 LexicalScope Scope(*this, S.getSourceRange()); 4046 EmitStopPoint(&S); 4047 emitMasked(*this, S); 4048 } 4049 4050 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { 4051 if (CGM.getLangOpts().OpenMPIRBuilder) { 4052 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 4053 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 4054 4055 const Stmt *CriticalRegionBodyStmt = S.getAssociatedStmt(); 4056 const Expr *Hint = nullptr; 4057 if (const auto *HintClause = S.getSingleClause<OMPHintClause>()) 4058 Hint = HintClause->getHint(); 4059 4060 // TODO: This is slightly different from what's currently being done in 4061 // clang. Fix the Int32Ty to IntPtrTy (pointer width size) when everything 4062 // about typing is final. 4063 llvm::Value *HintInst = nullptr; 4064 if (Hint) 4065 HintInst = 4066 Builder.CreateIntCast(EmitScalarExpr(Hint), CGM.Int32Ty, false); 4067 4068 auto FiniCB = [this](InsertPointTy IP) { 4069 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); 4070 }; 4071 4072 auto BodyGenCB = [CriticalRegionBodyStmt, this](InsertPointTy AllocaIP, 4073 InsertPointTy CodeGenIP, 4074 llvm::BasicBlock &FiniBB) { 4075 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB); 4076 OMPBuilderCBHelpers::EmitOMPRegionBody(*this, CriticalRegionBodyStmt, 4077 CodeGenIP, FiniBB); 4078 }; 4079 4080 LexicalScope Scope(*this, S.getSourceRange()); 4081 EmitStopPoint(&S); 4082 Builder.restoreIP(OMPBuilder.createCritical( 4083 Builder, BodyGenCB, FiniCB, S.getDirectiveName().getAsString(), 4084 HintInst)); 4085 4086 return; 4087 } 4088 4089 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4090 Action.Enter(CGF); 4091 CGF.EmitStmt(S.getAssociatedStmt()); 4092 }; 4093 const Expr *Hint = nullptr; 4094 if (const auto *HintClause = S.getSingleClause<OMPHintClause>()) 4095 Hint = HintClause->getHint(); 4096 LexicalScope Scope(*this, S.getSourceRange()); 4097 EmitStopPoint(&S); 4098 CGM.getOpenMPRuntime().emitCriticalRegion(*this, 4099 S.getDirectiveName().getAsString(), 4100 CodeGen, S.getBeginLoc(), Hint); 4101 } 4102 4103 void CodeGenFunction::EmitOMPParallelForDirective( 4104 const OMPParallelForDirective &S) { 4105 // Emit directive as a combined directive that consists of two implicit 4106 // directives: 'parallel' with 'for' directive. 4107 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4108 Action.Enter(CGF); 4109 (void)emitWorksharingDirective(CGF, S, S.hasCancel()); 4110 }; 4111 { 4112 if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(), 4113 [](const OMPReductionClause *C) { 4114 return C->getModifier() == OMPC_REDUCTION_inscan; 4115 })) { 4116 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) { 4117 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); 4118 CGCapturedStmtInfo CGSI(CR_OpenMP); 4119 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI); 4120 OMPLoopScope LoopScope(CGF, S); 4121 return CGF.EmitScalarExpr(S.getNumIterations()); 4122 }; 4123 emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen); 4124 } 4125 auto LPCRegion = 4126 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 4127 emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen, 4128 emitEmptyBoundParameters); 4129 } 4130 // Check for outer lastprivate conditional update. 4131 checkForLastprivateConditionalUpdate(*this, S); 4132 } 4133 4134 void CodeGenFunction::EmitOMPParallelForSimdDirective( 4135 const OMPParallelForSimdDirective &S) { 4136 // Emit directive as a combined directive that consists of two implicit 4137 // directives: 'parallel' with 'for' directive. 4138 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4139 Action.Enter(CGF); 4140 (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false); 4141 }; 4142 { 4143 if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(), 4144 [](const OMPReductionClause *C) { 4145 return C->getModifier() == OMPC_REDUCTION_inscan; 4146 })) { 4147 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) { 4148 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); 4149 CGCapturedStmtInfo CGSI(CR_OpenMP); 4150 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI); 4151 OMPLoopScope LoopScope(CGF, S); 4152 return CGF.EmitScalarExpr(S.getNumIterations()); 4153 }; 4154 emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen); 4155 } 4156 auto LPCRegion = 4157 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 4158 emitCommonOMPParallelDirective(*this, S, OMPD_for_simd, CodeGen, 4159 emitEmptyBoundParameters); 4160 } 4161 // Check for outer lastprivate conditional update. 4162 checkForLastprivateConditionalUpdate(*this, S); 4163 } 4164 4165 void CodeGenFunction::EmitOMPParallelMasterDirective( 4166 const OMPParallelMasterDirective &S) { 4167 // Emit directive as a combined directive that consists of two implicit 4168 // directives: 'parallel' with 'master' directive. 4169 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4170 Action.Enter(CGF); 4171 OMPPrivateScope PrivateScope(CGF); 4172 bool Copyins = CGF.EmitOMPCopyinClause(S); 4173 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 4174 if (Copyins) { 4175 // Emit implicit barrier to synchronize threads and avoid data races on 4176 // propagation master's thread values of threadprivate variables to local 4177 // instances of that variables of all other implicit threads. 4178 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 4179 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, 4180 /*ForceSimpleCall=*/true); 4181 } 4182 CGF.EmitOMPPrivateClause(S, PrivateScope); 4183 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4184 (void)PrivateScope.Privatize(); 4185 emitMaster(CGF, S); 4186 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 4187 }; 4188 { 4189 auto LPCRegion = 4190 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 4191 emitCommonOMPParallelDirective(*this, S, OMPD_master, CodeGen, 4192 emitEmptyBoundParameters); 4193 emitPostUpdateForReductionClause(*this, S, 4194 [](CodeGenFunction &) { return nullptr; }); 4195 } 4196 // Check for outer lastprivate conditional update. 4197 checkForLastprivateConditionalUpdate(*this, S); 4198 } 4199 4200 void CodeGenFunction::EmitOMPParallelSectionsDirective( 4201 const OMPParallelSectionsDirective &S) { 4202 // Emit directive as a combined directive that consists of two implicit 4203 // directives: 'parallel' with 'sections' directive. 4204 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4205 Action.Enter(CGF); 4206 CGF.EmitSections(S); 4207 }; 4208 { 4209 auto LPCRegion = 4210 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 4211 emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen, 4212 emitEmptyBoundParameters); 4213 } 4214 // Check for outer lastprivate conditional update. 4215 checkForLastprivateConditionalUpdate(*this, S); 4216 } 4217 4218 namespace { 4219 /// Get the list of variables declared in the context of the untied tasks. 4220 class CheckVarsEscapingUntiedTaskDeclContext final 4221 : public ConstStmtVisitor<CheckVarsEscapingUntiedTaskDeclContext> { 4222 llvm::SmallVector<const VarDecl *, 4> PrivateDecls; 4223 4224 public: 4225 explicit CheckVarsEscapingUntiedTaskDeclContext() = default; 4226 virtual ~CheckVarsEscapingUntiedTaskDeclContext() = default; 4227 void VisitDeclStmt(const DeclStmt *S) { 4228 if (!S) 4229 return; 4230 // Need to privatize only local vars, static locals can be processed as is. 4231 for (const Decl *D : S->decls()) { 4232 if (const auto *VD = dyn_cast_or_null<VarDecl>(D)) 4233 if (VD->hasLocalStorage()) 4234 PrivateDecls.push_back(VD); 4235 } 4236 } 4237 void VisitOMPExecutableDirective(const OMPExecutableDirective *) { return; } 4238 void VisitCapturedStmt(const CapturedStmt *) { return; } 4239 void VisitLambdaExpr(const LambdaExpr *) { return; } 4240 void VisitBlockExpr(const BlockExpr *) { return; } 4241 void VisitStmt(const Stmt *S) { 4242 if (!S) 4243 return; 4244 for (const Stmt *Child : S->children()) 4245 if (Child) 4246 Visit(Child); 4247 } 4248 4249 /// Swaps list of vars with the provided one. 4250 ArrayRef<const VarDecl *> getPrivateDecls() const { return PrivateDecls; } 4251 }; 4252 } // anonymous namespace 4253 4254 void CodeGenFunction::EmitOMPTaskBasedDirective( 4255 const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion, 4256 const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen, 4257 OMPTaskDataTy &Data) { 4258 // Emit outlined function for task construct. 4259 const CapturedStmt *CS = S.getCapturedStmt(CapturedRegion); 4260 auto I = CS->getCapturedDecl()->param_begin(); 4261 auto PartId = std::next(I); 4262 auto TaskT = std::next(I, 4); 4263 // Check if the task is final 4264 if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) { 4265 // If the condition constant folds and can be elided, try to avoid emitting 4266 // the condition and the dead arm of the if/else. 4267 const Expr *Cond = Clause->getCondition(); 4268 bool CondConstant; 4269 if (ConstantFoldsToSimpleInteger(Cond, CondConstant)) 4270 Data.Final.setInt(CondConstant); 4271 else 4272 Data.Final.setPointer(EvaluateExprAsBool(Cond)); 4273 } else { 4274 // By default the task is not final. 4275 Data.Final.setInt(/*IntVal=*/false); 4276 } 4277 // Check if the task has 'priority' clause. 4278 if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) { 4279 const Expr *Prio = Clause->getPriority(); 4280 Data.Priority.setInt(/*IntVal=*/true); 4281 Data.Priority.setPointer(EmitScalarConversion( 4282 EmitScalarExpr(Prio), Prio->getType(), 4283 getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1), 4284 Prio->getExprLoc())); 4285 } 4286 // The first function argument for tasks is a thread id, the second one is a 4287 // part id (0 for tied tasks, >=0 for untied task). 4288 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 4289 // Get list of private variables. 4290 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 4291 auto IRef = C->varlist_begin(); 4292 for (const Expr *IInit : C->private_copies()) { 4293 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 4294 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 4295 Data.PrivateVars.push_back(*IRef); 4296 Data.PrivateCopies.push_back(IInit); 4297 } 4298 ++IRef; 4299 } 4300 } 4301 EmittedAsPrivate.clear(); 4302 // Get list of firstprivate variables. 4303 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 4304 auto IRef = C->varlist_begin(); 4305 auto IElemInitRef = C->inits().begin(); 4306 for (const Expr *IInit : C->private_copies()) { 4307 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 4308 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 4309 Data.FirstprivateVars.push_back(*IRef); 4310 Data.FirstprivateCopies.push_back(IInit); 4311 Data.FirstprivateInits.push_back(*IElemInitRef); 4312 } 4313 ++IRef; 4314 ++IElemInitRef; 4315 } 4316 } 4317 // Get list of lastprivate variables (for taskloops). 4318 llvm::MapVector<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs; 4319 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 4320 auto IRef = C->varlist_begin(); 4321 auto ID = C->destination_exprs().begin(); 4322 for (const Expr *IInit : C->private_copies()) { 4323 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 4324 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 4325 Data.LastprivateVars.push_back(*IRef); 4326 Data.LastprivateCopies.push_back(IInit); 4327 } 4328 LastprivateDstsOrigs.insert( 4329 std::make_pair(cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()), 4330 cast<DeclRefExpr>(*IRef))); 4331 ++IRef; 4332 ++ID; 4333 } 4334 } 4335 SmallVector<const Expr *, 4> LHSs; 4336 SmallVector<const Expr *, 4> RHSs; 4337 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 4338 Data.ReductionVars.append(C->varlist_begin(), C->varlist_end()); 4339 Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end()); 4340 Data.ReductionCopies.append(C->privates().begin(), C->privates().end()); 4341 Data.ReductionOps.append(C->reduction_ops().begin(), 4342 C->reduction_ops().end()); 4343 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 4344 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 4345 } 4346 Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit( 4347 *this, S.getBeginLoc(), LHSs, RHSs, Data); 4348 // Build list of dependences. 4349 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) { 4350 OMPTaskDataTy::DependData &DD = 4351 Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier()); 4352 DD.DepExprs.append(C->varlist_begin(), C->varlist_end()); 4353 } 4354 // Get list of local vars for untied tasks. 4355 if (!Data.Tied) { 4356 CheckVarsEscapingUntiedTaskDeclContext Checker; 4357 Checker.Visit(S.getInnermostCapturedStmt()->getCapturedStmt()); 4358 Data.PrivateLocals.append(Checker.getPrivateDecls().begin(), 4359 Checker.getPrivateDecls().end()); 4360 } 4361 auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs, 4362 CapturedRegion](CodeGenFunction &CGF, 4363 PrePostActionTy &Action) { 4364 llvm::MapVector<CanonicalDeclPtr<const VarDecl>, 4365 std::pair<Address, Address>> 4366 UntiedLocalVars; 4367 // Set proper addresses for generated private copies. 4368 OMPPrivateScope Scope(CGF); 4369 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> FirstprivatePtrs; 4370 if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() || 4371 !Data.LastprivateVars.empty() || !Data.PrivateLocals.empty()) { 4372 enum { PrivatesParam = 2, CopyFnParam = 3 }; 4373 llvm::Value *CopyFn = CGF.Builder.CreateLoad( 4374 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam))); 4375 llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar( 4376 CS->getCapturedDecl()->getParam(PrivatesParam))); 4377 // Map privates. 4378 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; 4379 llvm::SmallVector<llvm::Value *, 16> CallArgs; 4380 llvm::SmallVector<llvm::Type *, 4> ParamTypes; 4381 CallArgs.push_back(PrivatesPtr); 4382 ParamTypes.push_back(PrivatesPtr->getType()); 4383 for (const Expr *E : Data.PrivateVars) { 4384 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4385 Address PrivatePtr = CGF.CreateMemTemp( 4386 CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr"); 4387 PrivatePtrs.emplace_back(VD, PrivatePtr); 4388 CallArgs.push_back(PrivatePtr.getPointer()); 4389 ParamTypes.push_back(PrivatePtr.getType()); 4390 } 4391 for (const Expr *E : Data.FirstprivateVars) { 4392 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4393 Address PrivatePtr = 4394 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 4395 ".firstpriv.ptr.addr"); 4396 PrivatePtrs.emplace_back(VD, PrivatePtr); 4397 FirstprivatePtrs.emplace_back(VD, PrivatePtr); 4398 CallArgs.push_back(PrivatePtr.getPointer()); 4399 ParamTypes.push_back(PrivatePtr.getType()); 4400 } 4401 for (const Expr *E : Data.LastprivateVars) { 4402 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4403 Address PrivatePtr = 4404 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 4405 ".lastpriv.ptr.addr"); 4406 PrivatePtrs.emplace_back(VD, PrivatePtr); 4407 CallArgs.push_back(PrivatePtr.getPointer()); 4408 ParamTypes.push_back(PrivatePtr.getType()); 4409 } 4410 for (const VarDecl *VD : Data.PrivateLocals) { 4411 QualType Ty = VD->getType().getNonReferenceType(); 4412 if (VD->getType()->isLValueReferenceType()) 4413 Ty = CGF.getContext().getPointerType(Ty); 4414 if (isAllocatableDecl(VD)) 4415 Ty = CGF.getContext().getPointerType(Ty); 4416 Address PrivatePtr = CGF.CreateMemTemp( 4417 CGF.getContext().getPointerType(Ty), ".local.ptr.addr"); 4418 auto Result = UntiedLocalVars.insert( 4419 std::make_pair(VD, std::make_pair(PrivatePtr, Address::invalid()))); 4420 // If key exists update in place. 4421 if (Result.second == false) 4422 *Result.first = std::make_pair( 4423 VD, std::make_pair(PrivatePtr, Address::invalid())); 4424 CallArgs.push_back(PrivatePtr.getPointer()); 4425 ParamTypes.push_back(PrivatePtr.getType()); 4426 } 4427 auto *CopyFnTy = llvm::FunctionType::get(CGF.Builder.getVoidTy(), 4428 ParamTypes, /*isVarArg=*/false); 4429 CopyFn = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4430 CopyFn, CopyFnTy->getPointerTo()); 4431 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall( 4432 CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs); 4433 for (const auto &Pair : LastprivateDstsOrigs) { 4434 const auto *OrigVD = cast<VarDecl>(Pair.second->getDecl()); 4435 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(OrigVD), 4436 /*RefersToEnclosingVariableOrCapture=*/ 4437 CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr, 4438 Pair.second->getType(), VK_LValue, 4439 Pair.second->getExprLoc()); 4440 Scope.addPrivate(Pair.first, [&CGF, &DRE]() { 4441 return CGF.EmitLValue(&DRE).getAddress(CGF); 4442 }); 4443 } 4444 for (const auto &Pair : PrivatePtrs) { 4445 Address Replacement(CGF.Builder.CreateLoad(Pair.second), 4446 CGF.getContext().getDeclAlign(Pair.first)); 4447 Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); 4448 } 4449 // Adjust mapping for internal locals by mapping actual memory instead of 4450 // a pointer to this memory. 4451 for (auto &Pair : UntiedLocalVars) { 4452 if (isAllocatableDecl(Pair.first)) { 4453 llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first); 4454 Address Replacement(Ptr, CGF.getPointerAlign()); 4455 Pair.second.first = Replacement; 4456 Ptr = CGF.Builder.CreateLoad(Replacement); 4457 Replacement = Address(Ptr, CGF.getContext().getDeclAlign(Pair.first)); 4458 Pair.second.second = Replacement; 4459 } else { 4460 llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first); 4461 Address Replacement(Ptr, CGF.getContext().getDeclAlign(Pair.first)); 4462 Pair.second.first = Replacement; 4463 } 4464 } 4465 } 4466 if (Data.Reductions) { 4467 OMPPrivateScope FirstprivateScope(CGF); 4468 for (const auto &Pair : FirstprivatePtrs) { 4469 Address Replacement(CGF.Builder.CreateLoad(Pair.second), 4470 CGF.getContext().getDeclAlign(Pair.first)); 4471 FirstprivateScope.addPrivate(Pair.first, 4472 [Replacement]() { return Replacement; }); 4473 } 4474 (void)FirstprivateScope.Privatize(); 4475 OMPLexicalScope LexScope(CGF, S, CapturedRegion); 4476 ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars, 4477 Data.ReductionCopies, Data.ReductionOps); 4478 llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad( 4479 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9))); 4480 for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) { 4481 RedCG.emitSharedOrigLValue(CGF, Cnt); 4482 RedCG.emitAggregateType(CGF, Cnt); 4483 // FIXME: This must removed once the runtime library is fixed. 4484 // Emit required threadprivate variables for 4485 // initializer/combiner/finalizer. 4486 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(), 4487 RedCG, Cnt); 4488 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( 4489 CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); 4490 Replacement = 4491 Address(CGF.EmitScalarConversion( 4492 Replacement.getPointer(), CGF.getContext().VoidPtrTy, 4493 CGF.getContext().getPointerType( 4494 Data.ReductionCopies[Cnt]->getType()), 4495 Data.ReductionCopies[Cnt]->getExprLoc()), 4496 Replacement.getAlignment()); 4497 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); 4498 Scope.addPrivate(RedCG.getBaseDecl(Cnt), 4499 [Replacement]() { return Replacement; }); 4500 } 4501 } 4502 // Privatize all private variables except for in_reduction items. 4503 (void)Scope.Privatize(); 4504 SmallVector<const Expr *, 4> InRedVars; 4505 SmallVector<const Expr *, 4> InRedPrivs; 4506 SmallVector<const Expr *, 4> InRedOps; 4507 SmallVector<const Expr *, 4> TaskgroupDescriptors; 4508 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) { 4509 auto IPriv = C->privates().begin(); 4510 auto IRed = C->reduction_ops().begin(); 4511 auto ITD = C->taskgroup_descriptors().begin(); 4512 for (const Expr *Ref : C->varlists()) { 4513 InRedVars.emplace_back(Ref); 4514 InRedPrivs.emplace_back(*IPriv); 4515 InRedOps.emplace_back(*IRed); 4516 TaskgroupDescriptors.emplace_back(*ITD); 4517 std::advance(IPriv, 1); 4518 std::advance(IRed, 1); 4519 std::advance(ITD, 1); 4520 } 4521 } 4522 // Privatize in_reduction items here, because taskgroup descriptors must be 4523 // privatized earlier. 4524 OMPPrivateScope InRedScope(CGF); 4525 if (!InRedVars.empty()) { 4526 ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps); 4527 for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) { 4528 RedCG.emitSharedOrigLValue(CGF, Cnt); 4529 RedCG.emitAggregateType(CGF, Cnt); 4530 // The taskgroup descriptor variable is always implicit firstprivate and 4531 // privatized already during processing of the firstprivates. 4532 // FIXME: This must removed once the runtime library is fixed. 4533 // Emit required threadprivate variables for 4534 // initializer/combiner/finalizer. 4535 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(), 4536 RedCG, Cnt); 4537 llvm::Value *ReductionsPtr; 4538 if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) { 4539 ReductionsPtr = CGF.EmitLoadOfScalar(CGF.EmitLValue(TRExpr), 4540 TRExpr->getExprLoc()); 4541 } else { 4542 ReductionsPtr = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4543 } 4544 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( 4545 CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); 4546 Replacement = Address( 4547 CGF.EmitScalarConversion( 4548 Replacement.getPointer(), CGF.getContext().VoidPtrTy, 4549 CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()), 4550 InRedPrivs[Cnt]->getExprLoc()), 4551 Replacement.getAlignment()); 4552 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); 4553 InRedScope.addPrivate(RedCG.getBaseDecl(Cnt), 4554 [Replacement]() { return Replacement; }); 4555 } 4556 } 4557 (void)InRedScope.Privatize(); 4558 4559 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII LocalVarsScope(CGF, 4560 UntiedLocalVars); 4561 Action.Enter(CGF); 4562 BodyGen(CGF); 4563 }; 4564 llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( 4565 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied, 4566 Data.NumberOfParts); 4567 OMPLexicalScope Scope(*this, S, llvm::None, 4568 !isOpenMPParallelDirective(S.getDirectiveKind()) && 4569 !isOpenMPSimdDirective(S.getDirectiveKind())); 4570 TaskGen(*this, OutlinedFn, Data); 4571 } 4572 4573 static ImplicitParamDecl * 4574 createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data, 4575 QualType Ty, CapturedDecl *CD, 4576 SourceLocation Loc) { 4577 auto *OrigVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty, 4578 ImplicitParamDecl::Other); 4579 auto *OrigRef = DeclRefExpr::Create( 4580 C, NestedNameSpecifierLoc(), SourceLocation(), OrigVD, 4581 /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue); 4582 auto *PrivateVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty, 4583 ImplicitParamDecl::Other); 4584 auto *PrivateRef = DeclRefExpr::Create( 4585 C, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD, 4586 /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue); 4587 QualType ElemType = C.getBaseElementType(Ty); 4588 auto *InitVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, ElemType, 4589 ImplicitParamDecl::Other); 4590 auto *InitRef = DeclRefExpr::Create( 4591 C, NestedNameSpecifierLoc(), SourceLocation(), InitVD, 4592 /*RefersToEnclosingVariableOrCapture=*/false, Loc, ElemType, VK_LValue); 4593 PrivateVD->setInitStyle(VarDecl::CInit); 4594 PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue, 4595 InitRef, /*BasePath=*/nullptr, 4596 VK_PRValue, FPOptionsOverride())); 4597 Data.FirstprivateVars.emplace_back(OrigRef); 4598 Data.FirstprivateCopies.emplace_back(PrivateRef); 4599 Data.FirstprivateInits.emplace_back(InitRef); 4600 return OrigVD; 4601 } 4602 4603 void CodeGenFunction::EmitOMPTargetTaskBasedDirective( 4604 const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen, 4605 OMPTargetDataInfo &InputInfo) { 4606 // Emit outlined function for task construct. 4607 const CapturedStmt *CS = S.getCapturedStmt(OMPD_task); 4608 Address CapturedStruct = GenerateCapturedStmtArgument(*CS); 4609 QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 4610 auto I = CS->getCapturedDecl()->param_begin(); 4611 auto PartId = std::next(I); 4612 auto TaskT = std::next(I, 4); 4613 OMPTaskDataTy Data; 4614 // The task is not final. 4615 Data.Final.setInt(/*IntVal=*/false); 4616 // Get list of firstprivate variables. 4617 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 4618 auto IRef = C->varlist_begin(); 4619 auto IElemInitRef = C->inits().begin(); 4620 for (auto *IInit : C->private_copies()) { 4621 Data.FirstprivateVars.push_back(*IRef); 4622 Data.FirstprivateCopies.push_back(IInit); 4623 Data.FirstprivateInits.push_back(*IElemInitRef); 4624 ++IRef; 4625 ++IElemInitRef; 4626 } 4627 } 4628 OMPPrivateScope TargetScope(*this); 4629 VarDecl *BPVD = nullptr; 4630 VarDecl *PVD = nullptr; 4631 VarDecl *SVD = nullptr; 4632 VarDecl *MVD = nullptr; 4633 if (InputInfo.NumberOfTargetItems > 0) { 4634 auto *CD = CapturedDecl::Create( 4635 getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0); 4636 llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems); 4637 QualType BaseAndPointerAndMapperType = getContext().getConstantArrayType( 4638 getContext().VoidPtrTy, ArrSize, nullptr, ArrayType::Normal, 4639 /*IndexTypeQuals=*/0); 4640 BPVD = createImplicitFirstprivateForType( 4641 getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc()); 4642 PVD = createImplicitFirstprivateForType( 4643 getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc()); 4644 QualType SizesType = getContext().getConstantArrayType( 4645 getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1), 4646 ArrSize, nullptr, ArrayType::Normal, 4647 /*IndexTypeQuals=*/0); 4648 SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD, 4649 S.getBeginLoc()); 4650 TargetScope.addPrivate( 4651 BPVD, [&InputInfo]() { return InputInfo.BasePointersArray; }); 4652 TargetScope.addPrivate(PVD, 4653 [&InputInfo]() { return InputInfo.PointersArray; }); 4654 TargetScope.addPrivate(SVD, 4655 [&InputInfo]() { return InputInfo.SizesArray; }); 4656 // If there is no user-defined mapper, the mapper array will be nullptr. In 4657 // this case, we don't need to privatize it. 4658 if (!dyn_cast_or_null<llvm::ConstantPointerNull>( 4659 InputInfo.MappersArray.getPointer())) { 4660 MVD = createImplicitFirstprivateForType( 4661 getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc()); 4662 TargetScope.addPrivate(MVD, 4663 [&InputInfo]() { return InputInfo.MappersArray; }); 4664 } 4665 } 4666 (void)TargetScope.Privatize(); 4667 // Build list of dependences. 4668 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) { 4669 OMPTaskDataTy::DependData &DD = 4670 Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier()); 4671 DD.DepExprs.append(C->varlist_begin(), C->varlist_end()); 4672 } 4673 auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD, MVD, 4674 &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) { 4675 // Set proper addresses for generated private copies. 4676 OMPPrivateScope Scope(CGF); 4677 if (!Data.FirstprivateVars.empty()) { 4678 enum { PrivatesParam = 2, CopyFnParam = 3 }; 4679 llvm::Value *CopyFn = CGF.Builder.CreateLoad( 4680 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam))); 4681 llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar( 4682 CS->getCapturedDecl()->getParam(PrivatesParam))); 4683 // Map privates. 4684 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; 4685 llvm::SmallVector<llvm::Value *, 16> CallArgs; 4686 llvm::SmallVector<llvm::Type *, 4> ParamTypes; 4687 CallArgs.push_back(PrivatesPtr); 4688 ParamTypes.push_back(PrivatesPtr->getType()); 4689 for (const Expr *E : Data.FirstprivateVars) { 4690 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4691 Address PrivatePtr = 4692 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 4693 ".firstpriv.ptr.addr"); 4694 PrivatePtrs.emplace_back(VD, PrivatePtr); 4695 CallArgs.push_back(PrivatePtr.getPointer()); 4696 ParamTypes.push_back(PrivatePtr.getType()); 4697 } 4698 auto *CopyFnTy = llvm::FunctionType::get(CGF.Builder.getVoidTy(), 4699 ParamTypes, /*isVarArg=*/false); 4700 CopyFn = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4701 CopyFn, CopyFnTy->getPointerTo()); 4702 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall( 4703 CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs); 4704 for (const auto &Pair : PrivatePtrs) { 4705 Address Replacement(CGF.Builder.CreateLoad(Pair.second), 4706 CGF.getContext().getDeclAlign(Pair.first)); 4707 Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); 4708 } 4709 } 4710 // Privatize all private variables except for in_reduction items. 4711 (void)Scope.Privatize(); 4712 if (InputInfo.NumberOfTargetItems > 0) { 4713 InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP( 4714 CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0); 4715 InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP( 4716 CGF.GetAddrOfLocalVar(PVD), /*Index=*/0); 4717 InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP( 4718 CGF.GetAddrOfLocalVar(SVD), /*Index=*/0); 4719 // If MVD is nullptr, the mapper array is not privatized 4720 if (MVD) 4721 InputInfo.MappersArray = CGF.Builder.CreateConstArrayGEP( 4722 CGF.GetAddrOfLocalVar(MVD), /*Index=*/0); 4723 } 4724 4725 Action.Enter(CGF); 4726 OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false); 4727 BodyGen(CGF); 4728 }; 4729 llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( 4730 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true, 4731 Data.NumberOfParts); 4732 llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0); 4733 IntegerLiteral IfCond(getContext(), TrueOrFalse, 4734 getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 4735 SourceLocation()); 4736 4737 CGM.getOpenMPRuntime().emitTaskCall(*this, S.getBeginLoc(), S, OutlinedFn, 4738 SharedsTy, CapturedStruct, &IfCond, Data); 4739 } 4740 4741 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { 4742 // Emit outlined function for task construct. 4743 const CapturedStmt *CS = S.getCapturedStmt(OMPD_task); 4744 Address CapturedStruct = GenerateCapturedStmtArgument(*CS); 4745 QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 4746 const Expr *IfCond = nullptr; 4747 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 4748 if (C->getNameModifier() == OMPD_unknown || 4749 C->getNameModifier() == OMPD_task) { 4750 IfCond = C->getCondition(); 4751 break; 4752 } 4753 } 4754 4755 OMPTaskDataTy Data; 4756 // Check if we should emit tied or untied task. 4757 Data.Tied = !S.getSingleClause<OMPUntiedClause>(); 4758 auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { 4759 CGF.EmitStmt(CS->getCapturedStmt()); 4760 }; 4761 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 4762 IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn, 4763 const OMPTaskDataTy &Data) { 4764 CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getBeginLoc(), S, OutlinedFn, 4765 SharedsTy, CapturedStruct, IfCond, 4766 Data); 4767 }; 4768 auto LPCRegion = 4769 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 4770 EmitOMPTaskBasedDirective(S, OMPD_task, BodyGen, TaskGen, Data); 4771 } 4772 4773 void CodeGenFunction::EmitOMPTaskyieldDirective( 4774 const OMPTaskyieldDirective &S) { 4775 CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getBeginLoc()); 4776 } 4777 4778 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { 4779 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_barrier); 4780 } 4781 4782 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) { 4783 CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getBeginLoc()); 4784 } 4785 4786 void CodeGenFunction::EmitOMPTaskgroupDirective( 4787 const OMPTaskgroupDirective &S) { 4788 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4789 Action.Enter(CGF); 4790 if (const Expr *E = S.getReductionRef()) { 4791 SmallVector<const Expr *, 4> LHSs; 4792 SmallVector<const Expr *, 4> RHSs; 4793 OMPTaskDataTy Data; 4794 for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) { 4795 Data.ReductionVars.append(C->varlist_begin(), C->varlist_end()); 4796 Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end()); 4797 Data.ReductionCopies.append(C->privates().begin(), C->privates().end()); 4798 Data.ReductionOps.append(C->reduction_ops().begin(), 4799 C->reduction_ops().end()); 4800 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 4801 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 4802 } 4803 llvm::Value *ReductionDesc = 4804 CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getBeginLoc(), 4805 LHSs, RHSs, Data); 4806 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4807 CGF.EmitVarDecl(*VD); 4808 CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD), 4809 /*Volatile=*/false, E->getType()); 4810 } 4811 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 4812 }; 4813 OMPLexicalScope Scope(*this, S, OMPD_unknown); 4814 CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getBeginLoc()); 4815 } 4816 4817 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { 4818 llvm::AtomicOrdering AO = S.getSingleClause<OMPFlushClause>() 4819 ? llvm::AtomicOrdering::NotAtomic 4820 : llvm::AtomicOrdering::AcquireRelease; 4821 CGM.getOpenMPRuntime().emitFlush( 4822 *this, 4823 [&S]() -> ArrayRef<const Expr *> { 4824 if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) 4825 return llvm::makeArrayRef(FlushClause->varlist_begin(), 4826 FlushClause->varlist_end()); 4827 return llvm::None; 4828 }(), 4829 S.getBeginLoc(), AO); 4830 } 4831 4832 void CodeGenFunction::EmitOMPDepobjDirective(const OMPDepobjDirective &S) { 4833 const auto *DO = S.getSingleClause<OMPDepobjClause>(); 4834 LValue DOLVal = EmitLValue(DO->getDepobj()); 4835 if (const auto *DC = S.getSingleClause<OMPDependClause>()) { 4836 OMPTaskDataTy::DependData Dependencies(DC->getDependencyKind(), 4837 DC->getModifier()); 4838 Dependencies.DepExprs.append(DC->varlist_begin(), DC->varlist_end()); 4839 Address DepAddr = CGM.getOpenMPRuntime().emitDepobjDependClause( 4840 *this, Dependencies, DC->getBeginLoc()); 4841 EmitStoreOfScalar(DepAddr.getPointer(), DOLVal); 4842 return; 4843 } 4844 if (const auto *DC = S.getSingleClause<OMPDestroyClause>()) { 4845 CGM.getOpenMPRuntime().emitDestroyClause(*this, DOLVal, DC->getBeginLoc()); 4846 return; 4847 } 4848 if (const auto *UC = S.getSingleClause<OMPUpdateClause>()) { 4849 CGM.getOpenMPRuntime().emitUpdateClause( 4850 *this, DOLVal, UC->getDependencyKind(), UC->getBeginLoc()); 4851 return; 4852 } 4853 } 4854 4855 void CodeGenFunction::EmitOMPScanDirective(const OMPScanDirective &S) { 4856 if (!OMPParentLoopDirectiveForScan) 4857 return; 4858 const OMPExecutableDirective &ParentDir = *OMPParentLoopDirectiveForScan; 4859 bool IsInclusive = S.hasClausesOfKind<OMPInclusiveClause>(); 4860 SmallVector<const Expr *, 4> Shareds; 4861 SmallVector<const Expr *, 4> Privates; 4862 SmallVector<const Expr *, 4> LHSs; 4863 SmallVector<const Expr *, 4> RHSs; 4864 SmallVector<const Expr *, 4> ReductionOps; 4865 SmallVector<const Expr *, 4> CopyOps; 4866 SmallVector<const Expr *, 4> CopyArrayTemps; 4867 SmallVector<const Expr *, 4> CopyArrayElems; 4868 for (const auto *C : ParentDir.getClausesOfKind<OMPReductionClause>()) { 4869 if (C->getModifier() != OMPC_REDUCTION_inscan) 4870 continue; 4871 Shareds.append(C->varlist_begin(), C->varlist_end()); 4872 Privates.append(C->privates().begin(), C->privates().end()); 4873 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 4874 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 4875 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 4876 CopyOps.append(C->copy_ops().begin(), C->copy_ops().end()); 4877 CopyArrayTemps.append(C->copy_array_temps().begin(), 4878 C->copy_array_temps().end()); 4879 CopyArrayElems.append(C->copy_array_elems().begin(), 4880 C->copy_array_elems().end()); 4881 } 4882 if (ParentDir.getDirectiveKind() == OMPD_simd || 4883 (getLangOpts().OpenMPSimd && 4884 isOpenMPSimdDirective(ParentDir.getDirectiveKind()))) { 4885 // For simd directive and simd-based directives in simd only mode, use the 4886 // following codegen: 4887 // int x = 0; 4888 // #pragma omp simd reduction(inscan, +: x) 4889 // for (..) { 4890 // <first part> 4891 // #pragma omp scan inclusive(x) 4892 // <second part> 4893 // } 4894 // is transformed to: 4895 // int x = 0; 4896 // for (..) { 4897 // int x_priv = 0; 4898 // <first part> 4899 // x = x_priv + x; 4900 // x_priv = x; 4901 // <second part> 4902 // } 4903 // and 4904 // int x = 0; 4905 // #pragma omp simd reduction(inscan, +: x) 4906 // for (..) { 4907 // <first part> 4908 // #pragma omp scan exclusive(x) 4909 // <second part> 4910 // } 4911 // to 4912 // int x = 0; 4913 // for (..) { 4914 // int x_priv = 0; 4915 // <second part> 4916 // int temp = x; 4917 // x = x_priv + x; 4918 // x_priv = temp; 4919 // <first part> 4920 // } 4921 llvm::BasicBlock *OMPScanReduce = createBasicBlock("omp.inscan.reduce"); 4922 EmitBranch(IsInclusive 4923 ? OMPScanReduce 4924 : BreakContinueStack.back().ContinueBlock.getBlock()); 4925 EmitBlock(OMPScanDispatch); 4926 { 4927 // New scope for correct construction/destruction of temp variables for 4928 // exclusive scan. 4929 LexicalScope Scope(*this, S.getSourceRange()); 4930 EmitBranch(IsInclusive ? OMPBeforeScanBlock : OMPAfterScanBlock); 4931 EmitBlock(OMPScanReduce); 4932 if (!IsInclusive) { 4933 // Create temp var and copy LHS value to this temp value. 4934 // TMP = LHS; 4935 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { 4936 const Expr *PrivateExpr = Privates[I]; 4937 const Expr *TempExpr = CopyArrayTemps[I]; 4938 EmitAutoVarDecl( 4939 *cast<VarDecl>(cast<DeclRefExpr>(TempExpr)->getDecl())); 4940 LValue DestLVal = EmitLValue(TempExpr); 4941 LValue SrcLVal = EmitLValue(LHSs[I]); 4942 EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this), 4943 SrcLVal.getAddress(*this), 4944 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), 4945 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), 4946 CopyOps[I]); 4947 } 4948 } 4949 CGM.getOpenMPRuntime().emitReduction( 4950 *this, ParentDir.getEndLoc(), Privates, LHSs, RHSs, ReductionOps, 4951 {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_simd}); 4952 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { 4953 const Expr *PrivateExpr = Privates[I]; 4954 LValue DestLVal; 4955 LValue SrcLVal; 4956 if (IsInclusive) { 4957 DestLVal = EmitLValue(RHSs[I]); 4958 SrcLVal = EmitLValue(LHSs[I]); 4959 } else { 4960 const Expr *TempExpr = CopyArrayTemps[I]; 4961 DestLVal = EmitLValue(RHSs[I]); 4962 SrcLVal = EmitLValue(TempExpr); 4963 } 4964 EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this), 4965 SrcLVal.getAddress(*this), 4966 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), 4967 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), 4968 CopyOps[I]); 4969 } 4970 } 4971 EmitBranch(IsInclusive ? OMPAfterScanBlock : OMPBeforeScanBlock); 4972 OMPScanExitBlock = IsInclusive 4973 ? BreakContinueStack.back().ContinueBlock.getBlock() 4974 : OMPScanReduce; 4975 EmitBlock(OMPAfterScanBlock); 4976 return; 4977 } 4978 if (!IsInclusive) { 4979 EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock()); 4980 EmitBlock(OMPScanExitBlock); 4981 } 4982 if (OMPFirstScanLoop) { 4983 // Emit buffer[i] = red; at the end of the input phase. 4984 const auto *IVExpr = cast<OMPLoopDirective>(ParentDir) 4985 .getIterationVariable() 4986 ->IgnoreParenImpCasts(); 4987 LValue IdxLVal = EmitLValue(IVExpr); 4988 llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc()); 4989 IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false); 4990 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { 4991 const Expr *PrivateExpr = Privates[I]; 4992 const Expr *OrigExpr = Shareds[I]; 4993 const Expr *CopyArrayElem = CopyArrayElems[I]; 4994 OpaqueValueMapping IdxMapping( 4995 *this, 4996 cast<OpaqueValueExpr>( 4997 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), 4998 RValue::get(IdxVal)); 4999 LValue DestLVal = EmitLValue(CopyArrayElem); 5000 LValue SrcLVal = EmitLValue(OrigExpr); 5001 EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this), 5002 SrcLVal.getAddress(*this), 5003 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), 5004 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), 5005 CopyOps[I]); 5006 } 5007 } 5008 EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock()); 5009 if (IsInclusive) { 5010 EmitBlock(OMPScanExitBlock); 5011 EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock()); 5012 } 5013 EmitBlock(OMPScanDispatch); 5014 if (!OMPFirstScanLoop) { 5015 // Emit red = buffer[i]; at the entrance to the scan phase. 5016 const auto *IVExpr = cast<OMPLoopDirective>(ParentDir) 5017 .getIterationVariable() 5018 ->IgnoreParenImpCasts(); 5019 LValue IdxLVal = EmitLValue(IVExpr); 5020 llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc()); 5021 IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false); 5022 llvm::BasicBlock *ExclusiveExitBB = nullptr; 5023 if (!IsInclusive) { 5024 llvm::BasicBlock *ContBB = createBasicBlock("omp.exclusive.dec"); 5025 ExclusiveExitBB = createBasicBlock("omp.exclusive.copy.exit"); 5026 llvm::Value *Cmp = Builder.CreateIsNull(IdxVal); 5027 Builder.CreateCondBr(Cmp, ExclusiveExitBB, ContBB); 5028 EmitBlock(ContBB); 5029 // Use idx - 1 iteration for exclusive scan. 5030 IdxVal = Builder.CreateNUWSub(IdxVal, llvm::ConstantInt::get(SizeTy, 1)); 5031 } 5032 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { 5033 const Expr *PrivateExpr = Privates[I]; 5034 const Expr *OrigExpr = Shareds[I]; 5035 const Expr *CopyArrayElem = CopyArrayElems[I]; 5036 OpaqueValueMapping IdxMapping( 5037 *this, 5038 cast<OpaqueValueExpr>( 5039 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), 5040 RValue::get(IdxVal)); 5041 LValue SrcLVal = EmitLValue(CopyArrayElem); 5042 LValue DestLVal = EmitLValue(OrigExpr); 5043 EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this), 5044 SrcLVal.getAddress(*this), 5045 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), 5046 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), 5047 CopyOps[I]); 5048 } 5049 if (!IsInclusive) { 5050 EmitBlock(ExclusiveExitBB); 5051 } 5052 } 5053 EmitBranch((OMPFirstScanLoop == IsInclusive) ? OMPBeforeScanBlock 5054 : OMPAfterScanBlock); 5055 EmitBlock(OMPAfterScanBlock); 5056 } 5057 5058 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, 5059 const CodeGenLoopTy &CodeGenLoop, 5060 Expr *IncExpr) { 5061 // Emit the loop iteration variable. 5062 const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 5063 const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl()); 5064 EmitVarDecl(*IVDecl); 5065 5066 // Emit the iterations count variable. 5067 // If it is not a variable, Sema decided to calculate iterations count on each 5068 // iteration (e.g., it is foldable into a constant). 5069 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 5070 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 5071 // Emit calculation of the iterations count. 5072 EmitIgnoredExpr(S.getCalcLastIteration()); 5073 } 5074 5075 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); 5076 5077 bool HasLastprivateClause = false; 5078 // Check pre-condition. 5079 { 5080 OMPLoopScope PreInitScope(*this, S); 5081 // Skip the entire loop if we don't meet the precondition. 5082 // If the condition constant folds and can be elided, avoid emitting the 5083 // whole loop. 5084 bool CondConstant; 5085 llvm::BasicBlock *ContBlock = nullptr; 5086 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 5087 if (!CondConstant) 5088 return; 5089 } else { 5090 llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then"); 5091 ContBlock = createBasicBlock("omp.precond.end"); 5092 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 5093 getProfileCount(&S)); 5094 EmitBlock(ThenBlock); 5095 incrementProfileCounter(&S); 5096 } 5097 5098 emitAlignedClause(*this, S); 5099 // Emit 'then' code. 5100 { 5101 // Emit helper vars inits. 5102 5103 LValue LB = EmitOMPHelperVar( 5104 *this, cast<DeclRefExpr>( 5105 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 5106 ? S.getCombinedLowerBoundVariable() 5107 : S.getLowerBoundVariable()))); 5108 LValue UB = EmitOMPHelperVar( 5109 *this, cast<DeclRefExpr>( 5110 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 5111 ? S.getCombinedUpperBoundVariable() 5112 : S.getUpperBoundVariable()))); 5113 LValue ST = 5114 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 5115 LValue IL = 5116 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 5117 5118 OMPPrivateScope LoopScope(*this); 5119 if (EmitOMPFirstprivateClause(S, LoopScope)) { 5120 // Emit implicit barrier to synchronize threads and avoid data races 5121 // on initialization of firstprivate variables and post-update of 5122 // lastprivate variables. 5123 CGM.getOpenMPRuntime().emitBarrierCall( 5124 *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, 5125 /*ForceSimpleCall=*/true); 5126 } 5127 EmitOMPPrivateClause(S, LoopScope); 5128 if (isOpenMPSimdDirective(S.getDirectiveKind()) && 5129 !isOpenMPParallelDirective(S.getDirectiveKind()) && 5130 !isOpenMPTeamsDirective(S.getDirectiveKind())) 5131 EmitOMPReductionClauseInit(S, LoopScope); 5132 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 5133 EmitOMPPrivateLoopCounters(S, LoopScope); 5134 (void)LoopScope.Privatize(); 5135 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 5136 CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S); 5137 5138 // Detect the distribute schedule kind and chunk. 5139 llvm::Value *Chunk = nullptr; 5140 OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown; 5141 if (const auto *C = S.getSingleClause<OMPDistScheduleClause>()) { 5142 ScheduleKind = C->getDistScheduleKind(); 5143 if (const Expr *Ch = C->getChunkSize()) { 5144 Chunk = EmitScalarExpr(Ch); 5145 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 5146 S.getIterationVariable()->getType(), 5147 S.getBeginLoc()); 5148 } 5149 } else { 5150 // Default behaviour for dist_schedule clause. 5151 CGM.getOpenMPRuntime().getDefaultDistScheduleAndChunk( 5152 *this, S, ScheduleKind, Chunk); 5153 } 5154 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 5155 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 5156 5157 // OpenMP [2.10.8, distribute Construct, Description] 5158 // If dist_schedule is specified, kind must be static. If specified, 5159 // iterations are divided into chunks of size chunk_size, chunks are 5160 // assigned to the teams of the league in a round-robin fashion in the 5161 // order of the team number. When no chunk_size is specified, the 5162 // iteration space is divided into chunks that are approximately equal 5163 // in size, and at most one chunk is distributed to each team of the 5164 // league. The size of the chunks is unspecified in this case. 5165 bool StaticChunked = RT.isStaticChunked( 5166 ScheduleKind, /* Chunked */ Chunk != nullptr) && 5167 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()); 5168 if (RT.isStaticNonchunked(ScheduleKind, 5169 /* Chunked */ Chunk != nullptr) || 5170 StaticChunked) { 5171 CGOpenMPRuntime::StaticRTInput StaticInit( 5172 IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(*this), 5173 LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this), 5174 StaticChunked ? Chunk : nullptr); 5175 RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, 5176 StaticInit); 5177 JumpDest LoopExit = 5178 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 5179 // UB = min(UB, GlobalUB); 5180 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 5181 ? S.getCombinedEnsureUpperBound() 5182 : S.getEnsureUpperBound()); 5183 // IV = LB; 5184 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 5185 ? S.getCombinedInit() 5186 : S.getInit()); 5187 5188 const Expr *Cond = 5189 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 5190 ? S.getCombinedCond() 5191 : S.getCond(); 5192 5193 if (StaticChunked) 5194 Cond = S.getCombinedDistCond(); 5195 5196 // For static unchunked schedules generate: 5197 // 5198 // 1. For distribute alone, codegen 5199 // while (idx <= UB) { 5200 // BODY; 5201 // ++idx; 5202 // } 5203 // 5204 // 2. When combined with 'for' (e.g. as in 'distribute parallel for') 5205 // while (idx <= UB) { 5206 // <CodeGen rest of pragma>(LB, UB); 5207 // idx += ST; 5208 // } 5209 // 5210 // For static chunk one schedule generate: 5211 // 5212 // while (IV <= GlobalUB) { 5213 // <CodeGen rest of pragma>(LB, UB); 5214 // LB += ST; 5215 // UB += ST; 5216 // UB = min(UB, GlobalUB); 5217 // IV = LB; 5218 // } 5219 // 5220 emitCommonSimdLoop( 5221 *this, S, 5222 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 5223 if (isOpenMPSimdDirective(S.getDirectiveKind())) 5224 CGF.EmitOMPSimdInit(S); 5225 }, 5226 [&S, &LoopScope, Cond, IncExpr, LoopExit, &CodeGenLoop, 5227 StaticChunked](CodeGenFunction &CGF, PrePostActionTy &) { 5228 CGF.EmitOMPInnerLoop( 5229 S, LoopScope.requiresCleanups(), Cond, IncExpr, 5230 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { 5231 CodeGenLoop(CGF, S, LoopExit); 5232 }, 5233 [&S, StaticChunked](CodeGenFunction &CGF) { 5234 if (StaticChunked) { 5235 CGF.EmitIgnoredExpr(S.getCombinedNextLowerBound()); 5236 CGF.EmitIgnoredExpr(S.getCombinedNextUpperBound()); 5237 CGF.EmitIgnoredExpr(S.getCombinedEnsureUpperBound()); 5238 CGF.EmitIgnoredExpr(S.getCombinedInit()); 5239 } 5240 }); 5241 }); 5242 EmitBlock(LoopExit.getBlock()); 5243 // Tell the runtime we are done. 5244 RT.emitForStaticFinish(*this, S.getEndLoc(), S.getDirectiveKind()); 5245 } else { 5246 // Emit the outer loop, which requests its work chunk [LB..UB] from 5247 // runtime and runs the inner loop to process it. 5248 const OMPLoopArguments LoopArguments = { 5249 LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this), 5250 IL.getAddress(*this), Chunk}; 5251 EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments, 5252 CodeGenLoop); 5253 } 5254 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 5255 EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) { 5256 return CGF.Builder.CreateIsNotNull( 5257 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 5258 }); 5259 } 5260 if (isOpenMPSimdDirective(S.getDirectiveKind()) && 5261 !isOpenMPParallelDirective(S.getDirectiveKind()) && 5262 !isOpenMPTeamsDirective(S.getDirectiveKind())) { 5263 EmitOMPReductionClauseFinal(S, OMPD_simd); 5264 // Emit post-update of the reduction variables if IsLastIter != 0. 5265 emitPostUpdateForReductionClause( 5266 *this, S, [IL, &S](CodeGenFunction &CGF) { 5267 return CGF.Builder.CreateIsNotNull( 5268 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 5269 }); 5270 } 5271 // Emit final copy of the lastprivate variables if IsLastIter != 0. 5272 if (HasLastprivateClause) { 5273 EmitOMPLastprivateClauseFinal( 5274 S, /*NoFinals=*/false, 5275 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc()))); 5276 } 5277 } 5278 5279 // We're now done with the loop, so jump to the continuation block. 5280 if (ContBlock) { 5281 EmitBranch(ContBlock); 5282 EmitBlock(ContBlock, true); 5283 } 5284 } 5285 } 5286 5287 void CodeGenFunction::EmitOMPDistributeDirective( 5288 const OMPDistributeDirective &S) { 5289 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 5290 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 5291 }; 5292 OMPLexicalScope Scope(*this, S, OMPD_unknown); 5293 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); 5294 } 5295 5296 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, 5297 const CapturedStmt *S, 5298 SourceLocation Loc) { 5299 CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); 5300 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; 5301 CGF.CapturedStmtInfo = &CapStmtInfo; 5302 llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S, Loc); 5303 Fn->setDoesNotRecurse(); 5304 return Fn; 5305 } 5306 5307 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { 5308 if (S.hasClausesOfKind<OMPDependClause>()) { 5309 assert(!S.hasAssociatedStmt() && 5310 "No associated statement must be in ordered depend construct."); 5311 for (const auto *DC : S.getClausesOfKind<OMPDependClause>()) 5312 CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC); 5313 return; 5314 } 5315 const auto *C = S.getSingleClause<OMPSIMDClause>(); 5316 auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF, 5317 PrePostActionTy &Action) { 5318 const CapturedStmt *CS = S.getInnermostCapturedStmt(); 5319 if (C) { 5320 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 5321 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 5322 llvm::Function *OutlinedFn = 5323 emitOutlinedOrderedFunction(CGM, CS, S.getBeginLoc()); 5324 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getBeginLoc(), 5325 OutlinedFn, CapturedVars); 5326 } else { 5327 Action.Enter(CGF); 5328 CGF.EmitStmt(CS->getCapturedStmt()); 5329 } 5330 }; 5331 OMPLexicalScope Scope(*this, S, OMPD_unknown); 5332 CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getBeginLoc(), !C); 5333 } 5334 5335 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, 5336 QualType SrcType, QualType DestType, 5337 SourceLocation Loc) { 5338 assert(CGF.hasScalarEvaluationKind(DestType) && 5339 "DestType must have scalar evaluation kind."); 5340 assert(!Val.isAggregate() && "Must be a scalar or complex."); 5341 return Val.isScalar() ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, 5342 DestType, Loc) 5343 : CGF.EmitComplexToScalarConversion( 5344 Val.getComplexVal(), SrcType, DestType, Loc); 5345 } 5346 5347 static CodeGenFunction::ComplexPairTy 5348 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, 5349 QualType DestType, SourceLocation Loc) { 5350 assert(CGF.getEvaluationKind(DestType) == TEK_Complex && 5351 "DestType must have complex evaluation kind."); 5352 CodeGenFunction::ComplexPairTy ComplexVal; 5353 if (Val.isScalar()) { 5354 // Convert the input element to the element type of the complex. 5355 QualType DestElementType = 5356 DestType->castAs<ComplexType>()->getElementType(); 5357 llvm::Value *ScalarVal = CGF.EmitScalarConversion( 5358 Val.getScalarVal(), SrcType, DestElementType, Loc); 5359 ComplexVal = CodeGenFunction::ComplexPairTy( 5360 ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType())); 5361 } else { 5362 assert(Val.isComplex() && "Must be a scalar or complex."); 5363 QualType SrcElementType = SrcType->castAs<ComplexType>()->getElementType(); 5364 QualType DestElementType = 5365 DestType->castAs<ComplexType>()->getElementType(); 5366 ComplexVal.first = CGF.EmitScalarConversion( 5367 Val.getComplexVal().first, SrcElementType, DestElementType, Loc); 5368 ComplexVal.second = CGF.EmitScalarConversion( 5369 Val.getComplexVal().second, SrcElementType, DestElementType, Loc); 5370 } 5371 return ComplexVal; 5372 } 5373 5374 static void emitSimpleAtomicStore(CodeGenFunction &CGF, llvm::AtomicOrdering AO, 5375 LValue LVal, RValue RVal) { 5376 if (LVal.isGlobalReg()) 5377 CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal); 5378 else 5379 CGF.EmitAtomicStore(RVal, LVal, AO, LVal.isVolatile(), /*isInit=*/false); 5380 } 5381 5382 static RValue emitSimpleAtomicLoad(CodeGenFunction &CGF, 5383 llvm::AtomicOrdering AO, LValue LVal, 5384 SourceLocation Loc) { 5385 if (LVal.isGlobalReg()) 5386 return CGF.EmitLoadOfLValue(LVal, Loc); 5387 return CGF.EmitAtomicLoad( 5388 LVal, Loc, llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO), 5389 LVal.isVolatile()); 5390 } 5391 5392 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal, 5393 QualType RValTy, SourceLocation Loc) { 5394 switch (getEvaluationKind(LVal.getType())) { 5395 case TEK_Scalar: 5396 EmitStoreThroughLValue(RValue::get(convertToScalarValue( 5397 *this, RVal, RValTy, LVal.getType(), Loc)), 5398 LVal); 5399 break; 5400 case TEK_Complex: 5401 EmitStoreOfComplex( 5402 convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal, 5403 /*isInit=*/false); 5404 break; 5405 case TEK_Aggregate: 5406 llvm_unreachable("Must be a scalar or complex."); 5407 } 5408 } 5409 5410 static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, llvm::AtomicOrdering AO, 5411 const Expr *X, const Expr *V, 5412 SourceLocation Loc) { 5413 // v = x; 5414 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue"); 5415 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue"); 5416 LValue XLValue = CGF.EmitLValue(X); 5417 LValue VLValue = CGF.EmitLValue(V); 5418 RValue Res = emitSimpleAtomicLoad(CGF, AO, XLValue, Loc); 5419 // OpenMP, 2.17.7, atomic Construct 5420 // If the read or capture clause is specified and the acquire, acq_rel, or 5421 // seq_cst clause is specified then the strong flush on exit from the atomic 5422 // operation is also an acquire flush. 5423 switch (AO) { 5424 case llvm::AtomicOrdering::Acquire: 5425 case llvm::AtomicOrdering::AcquireRelease: 5426 case llvm::AtomicOrdering::SequentiallyConsistent: 5427 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, 5428 llvm::AtomicOrdering::Acquire); 5429 break; 5430 case llvm::AtomicOrdering::Monotonic: 5431 case llvm::AtomicOrdering::Release: 5432 break; 5433 case llvm::AtomicOrdering::NotAtomic: 5434 case llvm::AtomicOrdering::Unordered: 5435 llvm_unreachable("Unexpected ordering."); 5436 } 5437 CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc); 5438 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V); 5439 } 5440 5441 static void emitOMPAtomicWriteExpr(CodeGenFunction &CGF, 5442 llvm::AtomicOrdering AO, const Expr *X, 5443 const Expr *E, SourceLocation Loc) { 5444 // x = expr; 5445 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue"); 5446 emitSimpleAtomicStore(CGF, AO, CGF.EmitLValue(X), CGF.EmitAnyExpr(E)); 5447 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X); 5448 // OpenMP, 2.17.7, atomic Construct 5449 // If the write, update, or capture clause is specified and the release, 5450 // acq_rel, or seq_cst clause is specified then the strong flush on entry to 5451 // the atomic operation is also a release flush. 5452 switch (AO) { 5453 case llvm::AtomicOrdering::Release: 5454 case llvm::AtomicOrdering::AcquireRelease: 5455 case llvm::AtomicOrdering::SequentiallyConsistent: 5456 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, 5457 llvm::AtomicOrdering::Release); 5458 break; 5459 case llvm::AtomicOrdering::Acquire: 5460 case llvm::AtomicOrdering::Monotonic: 5461 break; 5462 case llvm::AtomicOrdering::NotAtomic: 5463 case llvm::AtomicOrdering::Unordered: 5464 llvm_unreachable("Unexpected ordering."); 5465 } 5466 } 5467 5468 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, 5469 RValue Update, 5470 BinaryOperatorKind BO, 5471 llvm::AtomicOrdering AO, 5472 bool IsXLHSInRHSPart) { 5473 ASTContext &Context = CGF.getContext(); 5474 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x' 5475 // expression is simple and atomic is allowed for the given type for the 5476 // target platform. 5477 if (BO == BO_Comma || !Update.isScalar() || 5478 !Update.getScalarVal()->getType()->isIntegerTy() || !X.isSimple() || 5479 (!isa<llvm::ConstantInt>(Update.getScalarVal()) && 5480 (Update.getScalarVal()->getType() != 5481 X.getAddress(CGF).getElementType())) || 5482 !X.getAddress(CGF).getElementType()->isIntegerTy() || 5483 !Context.getTargetInfo().hasBuiltinAtomic( 5484 Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment()))) 5485 return std::make_pair(false, RValue::get(nullptr)); 5486 5487 llvm::AtomicRMWInst::BinOp RMWOp; 5488 switch (BO) { 5489 case BO_Add: 5490 RMWOp = llvm::AtomicRMWInst::Add; 5491 break; 5492 case BO_Sub: 5493 if (!IsXLHSInRHSPart) 5494 return std::make_pair(false, RValue::get(nullptr)); 5495 RMWOp = llvm::AtomicRMWInst::Sub; 5496 break; 5497 case BO_And: 5498 RMWOp = llvm::AtomicRMWInst::And; 5499 break; 5500 case BO_Or: 5501 RMWOp = llvm::AtomicRMWInst::Or; 5502 break; 5503 case BO_Xor: 5504 RMWOp = llvm::AtomicRMWInst::Xor; 5505 break; 5506 case BO_LT: 5507 RMWOp = X.getType()->hasSignedIntegerRepresentation() 5508 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min 5509 : llvm::AtomicRMWInst::Max) 5510 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin 5511 : llvm::AtomicRMWInst::UMax); 5512 break; 5513 case BO_GT: 5514 RMWOp = X.getType()->hasSignedIntegerRepresentation() 5515 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max 5516 : llvm::AtomicRMWInst::Min) 5517 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax 5518 : llvm::AtomicRMWInst::UMin); 5519 break; 5520 case BO_Assign: 5521 RMWOp = llvm::AtomicRMWInst::Xchg; 5522 break; 5523 case BO_Mul: 5524 case BO_Div: 5525 case BO_Rem: 5526 case BO_Shl: 5527 case BO_Shr: 5528 case BO_LAnd: 5529 case BO_LOr: 5530 return std::make_pair(false, RValue::get(nullptr)); 5531 case BO_PtrMemD: 5532 case BO_PtrMemI: 5533 case BO_LE: 5534 case BO_GE: 5535 case BO_EQ: 5536 case BO_NE: 5537 case BO_Cmp: 5538 case BO_AddAssign: 5539 case BO_SubAssign: 5540 case BO_AndAssign: 5541 case BO_OrAssign: 5542 case BO_XorAssign: 5543 case BO_MulAssign: 5544 case BO_DivAssign: 5545 case BO_RemAssign: 5546 case BO_ShlAssign: 5547 case BO_ShrAssign: 5548 case BO_Comma: 5549 llvm_unreachable("Unsupported atomic update operation"); 5550 } 5551 llvm::Value *UpdateVal = Update.getScalarVal(); 5552 if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) { 5553 UpdateVal = CGF.Builder.CreateIntCast( 5554 IC, X.getAddress(CGF).getElementType(), 5555 X.getType()->hasSignedIntegerRepresentation()); 5556 } 5557 llvm::Value *Res = 5558 CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(CGF), UpdateVal, AO); 5559 return std::make_pair(true, RValue::get(Res)); 5560 } 5561 5562 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr( 5563 LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart, 5564 llvm::AtomicOrdering AO, SourceLocation Loc, 5565 const llvm::function_ref<RValue(RValue)> CommonGen) { 5566 // Update expressions are allowed to have the following forms: 5567 // x binop= expr; -> xrval + expr; 5568 // x++, ++x -> xrval + 1; 5569 // x--, --x -> xrval - 1; 5570 // x = x binop expr; -> xrval binop expr 5571 // x = expr Op x; - > expr binop xrval; 5572 auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart); 5573 if (!Res.first) { 5574 if (X.isGlobalReg()) { 5575 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop 5576 // 'xrval'. 5577 EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X); 5578 } else { 5579 // Perform compare-and-swap procedure. 5580 EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified()); 5581 } 5582 } 5583 return Res; 5584 } 5585 5586 static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF, 5587 llvm::AtomicOrdering AO, const Expr *X, 5588 const Expr *E, const Expr *UE, 5589 bool IsXLHSInRHSPart, SourceLocation Loc) { 5590 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 5591 "Update expr in 'atomic update' must be a binary operator."); 5592 const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 5593 // Update expressions are allowed to have the following forms: 5594 // x binop= expr; -> xrval + expr; 5595 // x++, ++x -> xrval + 1; 5596 // x--, --x -> xrval - 1; 5597 // x = x binop expr; -> xrval binop expr 5598 // x = expr Op x; - > expr binop xrval; 5599 assert(X->isLValue() && "X of 'omp atomic update' is not lvalue"); 5600 LValue XLValue = CGF.EmitLValue(X); 5601 RValue ExprRValue = CGF.EmitAnyExpr(E); 5602 const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 5603 const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 5604 const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 5605 const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 5606 auto &&Gen = [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) { 5607 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 5608 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 5609 return CGF.EmitAnyExpr(UE); 5610 }; 5611 (void)CGF.EmitOMPAtomicSimpleUpdateExpr( 5612 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 5613 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X); 5614 // OpenMP, 2.17.7, atomic Construct 5615 // If the write, update, or capture clause is specified and the release, 5616 // acq_rel, or seq_cst clause is specified then the strong flush on entry to 5617 // the atomic operation is also a release flush. 5618 switch (AO) { 5619 case llvm::AtomicOrdering::Release: 5620 case llvm::AtomicOrdering::AcquireRelease: 5621 case llvm::AtomicOrdering::SequentiallyConsistent: 5622 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, 5623 llvm::AtomicOrdering::Release); 5624 break; 5625 case llvm::AtomicOrdering::Acquire: 5626 case llvm::AtomicOrdering::Monotonic: 5627 break; 5628 case llvm::AtomicOrdering::NotAtomic: 5629 case llvm::AtomicOrdering::Unordered: 5630 llvm_unreachable("Unexpected ordering."); 5631 } 5632 } 5633 5634 static RValue convertToType(CodeGenFunction &CGF, RValue Value, 5635 QualType SourceType, QualType ResType, 5636 SourceLocation Loc) { 5637 switch (CGF.getEvaluationKind(ResType)) { 5638 case TEK_Scalar: 5639 return RValue::get( 5640 convertToScalarValue(CGF, Value, SourceType, ResType, Loc)); 5641 case TEK_Complex: { 5642 auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc); 5643 return RValue::getComplex(Res.first, Res.second); 5644 } 5645 case TEK_Aggregate: 5646 break; 5647 } 5648 llvm_unreachable("Must be a scalar or complex."); 5649 } 5650 5651 static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF, 5652 llvm::AtomicOrdering AO, 5653 bool IsPostfixUpdate, const Expr *V, 5654 const Expr *X, const Expr *E, 5655 const Expr *UE, bool IsXLHSInRHSPart, 5656 SourceLocation Loc) { 5657 assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue"); 5658 assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue"); 5659 RValue NewVVal; 5660 LValue VLValue = CGF.EmitLValue(V); 5661 LValue XLValue = CGF.EmitLValue(X); 5662 RValue ExprRValue = CGF.EmitAnyExpr(E); 5663 QualType NewVValType; 5664 if (UE) { 5665 // 'x' is updated with some additional value. 5666 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 5667 "Update expr in 'atomic capture' must be a binary operator."); 5668 const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 5669 // Update expressions are allowed to have the following forms: 5670 // x binop= expr; -> xrval + expr; 5671 // x++, ++x -> xrval + 1; 5672 // x--, --x -> xrval - 1; 5673 // x = x binop expr; -> xrval binop expr 5674 // x = expr Op x; - > expr binop xrval; 5675 const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 5676 const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 5677 const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 5678 NewVValType = XRValExpr->getType(); 5679 const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 5680 auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr, 5681 IsPostfixUpdate](RValue XRValue) { 5682 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 5683 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 5684 RValue Res = CGF.EmitAnyExpr(UE); 5685 NewVVal = IsPostfixUpdate ? XRValue : Res; 5686 return Res; 5687 }; 5688 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 5689 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 5690 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X); 5691 if (Res.first) { 5692 // 'atomicrmw' instruction was generated. 5693 if (IsPostfixUpdate) { 5694 // Use old value from 'atomicrmw'. 5695 NewVVal = Res.second; 5696 } else { 5697 // 'atomicrmw' does not provide new value, so evaluate it using old 5698 // value of 'x'. 5699 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 5700 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second); 5701 NewVVal = CGF.EmitAnyExpr(UE); 5702 } 5703 } 5704 } else { 5705 // 'x' is simply rewritten with some 'expr'. 5706 NewVValType = X->getType().getNonReferenceType(); 5707 ExprRValue = convertToType(CGF, ExprRValue, E->getType(), 5708 X->getType().getNonReferenceType(), Loc); 5709 auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) { 5710 NewVVal = XRValue; 5711 return ExprRValue; 5712 }; 5713 // Try to perform atomicrmw xchg, otherwise simple exchange. 5714 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 5715 XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO, 5716 Loc, Gen); 5717 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X); 5718 if (Res.first) { 5719 // 'atomicrmw' instruction was generated. 5720 NewVVal = IsPostfixUpdate ? Res.second : ExprRValue; 5721 } 5722 } 5723 // Emit post-update store to 'v' of old/new 'x' value. 5724 CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc); 5725 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V); 5726 // OpenMP, 2.17.7, atomic Construct 5727 // If the write, update, or capture clause is specified and the release, 5728 // acq_rel, or seq_cst clause is specified then the strong flush on entry to 5729 // the atomic operation is also a release flush. 5730 // If the read or capture clause is specified and the acquire, acq_rel, or 5731 // seq_cst clause is specified then the strong flush on exit from the atomic 5732 // operation is also an acquire flush. 5733 switch (AO) { 5734 case llvm::AtomicOrdering::Release: 5735 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, 5736 llvm::AtomicOrdering::Release); 5737 break; 5738 case llvm::AtomicOrdering::Acquire: 5739 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, 5740 llvm::AtomicOrdering::Acquire); 5741 break; 5742 case llvm::AtomicOrdering::AcquireRelease: 5743 case llvm::AtomicOrdering::SequentiallyConsistent: 5744 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, 5745 llvm::AtomicOrdering::AcquireRelease); 5746 break; 5747 case llvm::AtomicOrdering::Monotonic: 5748 break; 5749 case llvm::AtomicOrdering::NotAtomic: 5750 case llvm::AtomicOrdering::Unordered: 5751 llvm_unreachable("Unexpected ordering."); 5752 } 5753 } 5754 5755 static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, 5756 llvm::AtomicOrdering AO, bool IsPostfixUpdate, 5757 const Expr *X, const Expr *V, const Expr *E, 5758 const Expr *UE, bool IsXLHSInRHSPart, 5759 SourceLocation Loc) { 5760 switch (Kind) { 5761 case OMPC_read: 5762 emitOMPAtomicReadExpr(CGF, AO, X, V, Loc); 5763 break; 5764 case OMPC_write: 5765 emitOMPAtomicWriteExpr(CGF, AO, X, E, Loc); 5766 break; 5767 case OMPC_unknown: 5768 case OMPC_update: 5769 emitOMPAtomicUpdateExpr(CGF, AO, X, E, UE, IsXLHSInRHSPart, Loc); 5770 break; 5771 case OMPC_capture: 5772 emitOMPAtomicCaptureExpr(CGF, AO, IsPostfixUpdate, V, X, E, UE, 5773 IsXLHSInRHSPart, Loc); 5774 break; 5775 case OMPC_if: 5776 case OMPC_final: 5777 case OMPC_num_threads: 5778 case OMPC_private: 5779 case OMPC_firstprivate: 5780 case OMPC_lastprivate: 5781 case OMPC_reduction: 5782 case OMPC_task_reduction: 5783 case OMPC_in_reduction: 5784 case OMPC_safelen: 5785 case OMPC_simdlen: 5786 case OMPC_sizes: 5787 case OMPC_full: 5788 case OMPC_partial: 5789 case OMPC_allocator: 5790 case OMPC_allocate: 5791 case OMPC_collapse: 5792 case OMPC_default: 5793 case OMPC_seq_cst: 5794 case OMPC_acq_rel: 5795 case OMPC_acquire: 5796 case OMPC_release: 5797 case OMPC_relaxed: 5798 case OMPC_shared: 5799 case OMPC_linear: 5800 case OMPC_aligned: 5801 case OMPC_copyin: 5802 case OMPC_copyprivate: 5803 case OMPC_flush: 5804 case OMPC_depobj: 5805 case OMPC_proc_bind: 5806 case OMPC_schedule: 5807 case OMPC_ordered: 5808 case OMPC_nowait: 5809 case OMPC_untied: 5810 case OMPC_threadprivate: 5811 case OMPC_depend: 5812 case OMPC_mergeable: 5813 case OMPC_device: 5814 case OMPC_threads: 5815 case OMPC_simd: 5816 case OMPC_map: 5817 case OMPC_num_teams: 5818 case OMPC_thread_limit: 5819 case OMPC_priority: 5820 case OMPC_grainsize: 5821 case OMPC_nogroup: 5822 case OMPC_num_tasks: 5823 case OMPC_hint: 5824 case OMPC_dist_schedule: 5825 case OMPC_defaultmap: 5826 case OMPC_uniform: 5827 case OMPC_to: 5828 case OMPC_from: 5829 case OMPC_use_device_ptr: 5830 case OMPC_use_device_addr: 5831 case OMPC_is_device_ptr: 5832 case OMPC_unified_address: 5833 case OMPC_unified_shared_memory: 5834 case OMPC_reverse_offload: 5835 case OMPC_dynamic_allocators: 5836 case OMPC_atomic_default_mem_order: 5837 case OMPC_device_type: 5838 case OMPC_match: 5839 case OMPC_nontemporal: 5840 case OMPC_order: 5841 case OMPC_destroy: 5842 case OMPC_detach: 5843 case OMPC_inclusive: 5844 case OMPC_exclusive: 5845 case OMPC_uses_allocators: 5846 case OMPC_affinity: 5847 case OMPC_init: 5848 case OMPC_inbranch: 5849 case OMPC_notinbranch: 5850 case OMPC_link: 5851 case OMPC_use: 5852 case OMPC_novariants: 5853 case OMPC_nocontext: 5854 case OMPC_filter: 5855 llvm_unreachable("Clause is not allowed in 'omp atomic'."); 5856 } 5857 } 5858 5859 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { 5860 llvm::AtomicOrdering AO = llvm::AtomicOrdering::Monotonic; 5861 bool MemOrderingSpecified = false; 5862 if (S.getSingleClause<OMPSeqCstClause>()) { 5863 AO = llvm::AtomicOrdering::SequentiallyConsistent; 5864 MemOrderingSpecified = true; 5865 } else if (S.getSingleClause<OMPAcqRelClause>()) { 5866 AO = llvm::AtomicOrdering::AcquireRelease; 5867 MemOrderingSpecified = true; 5868 } else if (S.getSingleClause<OMPAcquireClause>()) { 5869 AO = llvm::AtomicOrdering::Acquire; 5870 MemOrderingSpecified = true; 5871 } else if (S.getSingleClause<OMPReleaseClause>()) { 5872 AO = llvm::AtomicOrdering::Release; 5873 MemOrderingSpecified = true; 5874 } else if (S.getSingleClause<OMPRelaxedClause>()) { 5875 AO = llvm::AtomicOrdering::Monotonic; 5876 MemOrderingSpecified = true; 5877 } 5878 OpenMPClauseKind Kind = OMPC_unknown; 5879 for (const OMPClause *C : S.clauses()) { 5880 // Find first clause (skip seq_cst|acq_rel|aqcuire|release|relaxed clause, 5881 // if it is first). 5882 if (C->getClauseKind() != OMPC_seq_cst && 5883 C->getClauseKind() != OMPC_acq_rel && 5884 C->getClauseKind() != OMPC_acquire && 5885 C->getClauseKind() != OMPC_release && 5886 C->getClauseKind() != OMPC_relaxed && C->getClauseKind() != OMPC_hint) { 5887 Kind = C->getClauseKind(); 5888 break; 5889 } 5890 } 5891 if (!MemOrderingSpecified) { 5892 llvm::AtomicOrdering DefaultOrder = 5893 CGM.getOpenMPRuntime().getDefaultMemoryOrdering(); 5894 if (DefaultOrder == llvm::AtomicOrdering::Monotonic || 5895 DefaultOrder == llvm::AtomicOrdering::SequentiallyConsistent || 5896 (DefaultOrder == llvm::AtomicOrdering::AcquireRelease && 5897 Kind == OMPC_capture)) { 5898 AO = DefaultOrder; 5899 } else if (DefaultOrder == llvm::AtomicOrdering::AcquireRelease) { 5900 if (Kind == OMPC_unknown || Kind == OMPC_update || Kind == OMPC_write) { 5901 AO = llvm::AtomicOrdering::Release; 5902 } else if (Kind == OMPC_read) { 5903 assert(Kind == OMPC_read && "Unexpected atomic kind."); 5904 AO = llvm::AtomicOrdering::Acquire; 5905 } 5906 } 5907 } 5908 5909 LexicalScope Scope(*this, S.getSourceRange()); 5910 EmitStopPoint(S.getAssociatedStmt()); 5911 emitOMPAtomicExpr(*this, Kind, AO, S.isPostfixUpdate(), S.getX(), S.getV(), 5912 S.getExpr(), S.getUpdateExpr(), S.isXLHSInRHSPart(), 5913 S.getBeginLoc()); 5914 } 5915 5916 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, 5917 const OMPExecutableDirective &S, 5918 const RegionCodeGenTy &CodeGen) { 5919 assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind())); 5920 CodeGenModule &CGM = CGF.CGM; 5921 5922 // On device emit this construct as inlined code. 5923 if (CGM.getLangOpts().OpenMPIsDevice) { 5924 OMPLexicalScope Scope(CGF, S, OMPD_target); 5925 CGM.getOpenMPRuntime().emitInlinedDirective( 5926 CGF, OMPD_target, [&S](CodeGenFunction &CGF, PrePostActionTy &) { 5927 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 5928 }); 5929 return; 5930 } 5931 5932 auto LPCRegion = 5933 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S); 5934 llvm::Function *Fn = nullptr; 5935 llvm::Constant *FnID = nullptr; 5936 5937 const Expr *IfCond = nullptr; 5938 // Check for the at most one if clause associated with the target region. 5939 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 5940 if (C->getNameModifier() == OMPD_unknown || 5941 C->getNameModifier() == OMPD_target) { 5942 IfCond = C->getCondition(); 5943 break; 5944 } 5945 } 5946 5947 // Check if we have any device clause associated with the directive. 5948 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device( 5949 nullptr, OMPC_DEVICE_unknown); 5950 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 5951 Device.setPointerAndInt(C->getDevice(), C->getModifier()); 5952 5953 // Check if we have an if clause whose conditional always evaluates to false 5954 // or if we do not have any targets specified. If so the target region is not 5955 // an offload entry point. 5956 bool IsOffloadEntry = true; 5957 if (IfCond) { 5958 bool Val; 5959 if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val) 5960 IsOffloadEntry = false; 5961 } 5962 if (CGM.getLangOpts().OMPTargetTriples.empty()) 5963 IsOffloadEntry = false; 5964 5965 assert(CGF.CurFuncDecl && "No parent declaration for target region!"); 5966 StringRef ParentName; 5967 // In case we have Ctors/Dtors we use the complete type variant to produce 5968 // the mangling of the device outlined kernel. 5969 if (const auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl)) 5970 ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete)); 5971 else if (const auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl)) 5972 ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete)); 5973 else 5974 ParentName = 5975 CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl))); 5976 5977 // Emit target region as a standalone region. 5978 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID, 5979 IsOffloadEntry, CodeGen); 5980 OMPLexicalScope Scope(CGF, S, OMPD_task); 5981 auto &&SizeEmitter = 5982 [IsOffloadEntry](CodeGenFunction &CGF, 5983 const OMPLoopDirective &D) -> llvm::Value * { 5984 if (IsOffloadEntry) { 5985 OMPLoopScope(CGF, D); 5986 // Emit calculation of the iterations count. 5987 llvm::Value *NumIterations = CGF.EmitScalarExpr(D.getNumIterations()); 5988 NumIterations = CGF.Builder.CreateIntCast(NumIterations, CGF.Int64Ty, 5989 /*isSigned=*/false); 5990 return NumIterations; 5991 } 5992 return nullptr; 5993 }; 5994 CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device, 5995 SizeEmitter); 5996 } 5997 5998 static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S, 5999 PrePostActionTy &Action) { 6000 Action.Enter(CGF); 6001 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6002 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 6003 CGF.EmitOMPPrivateClause(S, PrivateScope); 6004 (void)PrivateScope.Privatize(); 6005 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 6006 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); 6007 6008 CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt()); 6009 CGF.EnsureInsertPoint(); 6010 } 6011 6012 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM, 6013 StringRef ParentName, 6014 const OMPTargetDirective &S) { 6015 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6016 emitTargetRegion(CGF, S, Action); 6017 }; 6018 llvm::Function *Fn; 6019 llvm::Constant *Addr; 6020 // Emit target region as a standalone region. 6021 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 6022 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 6023 assert(Fn && Addr && "Target device function emission failed."); 6024 } 6025 6026 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { 6027 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6028 emitTargetRegion(CGF, S, Action); 6029 }; 6030 emitCommonOMPTargetDirective(*this, S, CodeGen); 6031 } 6032 6033 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF, 6034 const OMPExecutableDirective &S, 6035 OpenMPDirectiveKind InnermostKind, 6036 const RegionCodeGenTy &CodeGen) { 6037 const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams); 6038 llvm::Function *OutlinedFn = 6039 CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction( 6040 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 6041 6042 const auto *NT = S.getSingleClause<OMPNumTeamsClause>(); 6043 const auto *TL = S.getSingleClause<OMPThreadLimitClause>(); 6044 if (NT || TL) { 6045 const Expr *NumTeams = NT ? NT->getNumTeams() : nullptr; 6046 const Expr *ThreadLimit = TL ? TL->getThreadLimit() : nullptr; 6047 6048 CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit, 6049 S.getBeginLoc()); 6050 } 6051 6052 OMPTeamsScope Scope(CGF, S); 6053 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 6054 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 6055 CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getBeginLoc(), OutlinedFn, 6056 CapturedVars); 6057 } 6058 6059 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) { 6060 // Emit teams region as a standalone region. 6061 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6062 Action.Enter(CGF); 6063 OMPPrivateScope PrivateScope(CGF); 6064 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 6065 CGF.EmitOMPPrivateClause(S, PrivateScope); 6066 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 6067 (void)PrivateScope.Privatize(); 6068 CGF.EmitStmt(S.getCapturedStmt(OMPD_teams)->getCapturedStmt()); 6069 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 6070 }; 6071 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen); 6072 emitPostUpdateForReductionClause(*this, S, 6073 [](CodeGenFunction &) { return nullptr; }); 6074 } 6075 6076 static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action, 6077 const OMPTargetTeamsDirective &S) { 6078 auto *CS = S.getCapturedStmt(OMPD_teams); 6079 Action.Enter(CGF); 6080 // Emit teams region as a standalone region. 6081 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) { 6082 Action.Enter(CGF); 6083 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6084 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 6085 CGF.EmitOMPPrivateClause(S, PrivateScope); 6086 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 6087 (void)PrivateScope.Privatize(); 6088 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 6089 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); 6090 CGF.EmitStmt(CS->getCapturedStmt()); 6091 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 6092 }; 6093 emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen); 6094 emitPostUpdateForReductionClause(CGF, S, 6095 [](CodeGenFunction &) { return nullptr; }); 6096 } 6097 6098 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 6099 CodeGenModule &CGM, StringRef ParentName, 6100 const OMPTargetTeamsDirective &S) { 6101 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6102 emitTargetTeamsRegion(CGF, Action, S); 6103 }; 6104 llvm::Function *Fn; 6105 llvm::Constant *Addr; 6106 // Emit target region as a standalone region. 6107 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 6108 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 6109 assert(Fn && Addr && "Target device function emission failed."); 6110 } 6111 6112 void CodeGenFunction::EmitOMPTargetTeamsDirective( 6113 const OMPTargetTeamsDirective &S) { 6114 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6115 emitTargetTeamsRegion(CGF, Action, S); 6116 }; 6117 emitCommonOMPTargetDirective(*this, S, CodeGen); 6118 } 6119 6120 static void 6121 emitTargetTeamsDistributeRegion(CodeGenFunction &CGF, PrePostActionTy &Action, 6122 const OMPTargetTeamsDistributeDirective &S) { 6123 Action.Enter(CGF); 6124 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 6125 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 6126 }; 6127 6128 // Emit teams region as a standalone region. 6129 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 6130 PrePostActionTy &Action) { 6131 Action.Enter(CGF); 6132 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6133 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 6134 (void)PrivateScope.Privatize(); 6135 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 6136 CodeGenDistribute); 6137 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 6138 }; 6139 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute, CodeGen); 6140 emitPostUpdateForReductionClause(CGF, S, 6141 [](CodeGenFunction &) { return nullptr; }); 6142 } 6143 6144 void CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 6145 CodeGenModule &CGM, StringRef ParentName, 6146 const OMPTargetTeamsDistributeDirective &S) { 6147 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6148 emitTargetTeamsDistributeRegion(CGF, Action, S); 6149 }; 6150 llvm::Function *Fn; 6151 llvm::Constant *Addr; 6152 // Emit target region as a standalone region. 6153 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 6154 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 6155 assert(Fn && Addr && "Target device function emission failed."); 6156 } 6157 6158 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective( 6159 const OMPTargetTeamsDistributeDirective &S) { 6160 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6161 emitTargetTeamsDistributeRegion(CGF, Action, S); 6162 }; 6163 emitCommonOMPTargetDirective(*this, S, CodeGen); 6164 } 6165 6166 static void emitTargetTeamsDistributeSimdRegion( 6167 CodeGenFunction &CGF, PrePostActionTy &Action, 6168 const OMPTargetTeamsDistributeSimdDirective &S) { 6169 Action.Enter(CGF); 6170 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 6171 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 6172 }; 6173 6174 // Emit teams region as a standalone region. 6175 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 6176 PrePostActionTy &Action) { 6177 Action.Enter(CGF); 6178 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6179 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 6180 (void)PrivateScope.Privatize(); 6181 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 6182 CodeGenDistribute); 6183 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 6184 }; 6185 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_simd, CodeGen); 6186 emitPostUpdateForReductionClause(CGF, S, 6187 [](CodeGenFunction &) { return nullptr; }); 6188 } 6189 6190 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 6191 CodeGenModule &CGM, StringRef ParentName, 6192 const OMPTargetTeamsDistributeSimdDirective &S) { 6193 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6194 emitTargetTeamsDistributeSimdRegion(CGF, Action, S); 6195 }; 6196 llvm::Function *Fn; 6197 llvm::Constant *Addr; 6198 // Emit target region as a standalone region. 6199 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 6200 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 6201 assert(Fn && Addr && "Target device function emission failed."); 6202 } 6203 6204 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective( 6205 const OMPTargetTeamsDistributeSimdDirective &S) { 6206 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6207 emitTargetTeamsDistributeSimdRegion(CGF, Action, S); 6208 }; 6209 emitCommonOMPTargetDirective(*this, S, CodeGen); 6210 } 6211 6212 void CodeGenFunction::EmitOMPTeamsDistributeDirective( 6213 const OMPTeamsDistributeDirective &S) { 6214 6215 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 6216 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 6217 }; 6218 6219 // Emit teams region as a standalone region. 6220 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 6221 PrePostActionTy &Action) { 6222 Action.Enter(CGF); 6223 OMPPrivateScope PrivateScope(CGF); 6224 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 6225 (void)PrivateScope.Privatize(); 6226 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 6227 CodeGenDistribute); 6228 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 6229 }; 6230 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen); 6231 emitPostUpdateForReductionClause(*this, S, 6232 [](CodeGenFunction &) { return nullptr; }); 6233 } 6234 6235 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective( 6236 const OMPTeamsDistributeSimdDirective &S) { 6237 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 6238 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 6239 }; 6240 6241 // Emit teams region as a standalone region. 6242 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 6243 PrePostActionTy &Action) { 6244 Action.Enter(CGF); 6245 OMPPrivateScope PrivateScope(CGF); 6246 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 6247 (void)PrivateScope.Privatize(); 6248 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_simd, 6249 CodeGenDistribute); 6250 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 6251 }; 6252 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_simd, CodeGen); 6253 emitPostUpdateForReductionClause(*this, S, 6254 [](CodeGenFunction &) { return nullptr; }); 6255 } 6256 6257 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective( 6258 const OMPTeamsDistributeParallelForDirective &S) { 6259 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 6260 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 6261 S.getDistInc()); 6262 }; 6263 6264 // Emit teams region as a standalone region. 6265 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 6266 PrePostActionTy &Action) { 6267 Action.Enter(CGF); 6268 OMPPrivateScope PrivateScope(CGF); 6269 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 6270 (void)PrivateScope.Privatize(); 6271 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 6272 CodeGenDistribute); 6273 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 6274 }; 6275 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen); 6276 emitPostUpdateForReductionClause(*this, S, 6277 [](CodeGenFunction &) { return nullptr; }); 6278 } 6279 6280 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective( 6281 const OMPTeamsDistributeParallelForSimdDirective &S) { 6282 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 6283 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 6284 S.getDistInc()); 6285 }; 6286 6287 // Emit teams region as a standalone region. 6288 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 6289 PrePostActionTy &Action) { 6290 Action.Enter(CGF); 6291 OMPPrivateScope PrivateScope(CGF); 6292 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 6293 (void)PrivateScope.Privatize(); 6294 CGF.CGM.getOpenMPRuntime().emitInlinedDirective( 6295 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); 6296 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 6297 }; 6298 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for_simd, 6299 CodeGen); 6300 emitPostUpdateForReductionClause(*this, S, 6301 [](CodeGenFunction &) { return nullptr; }); 6302 } 6303 6304 static void emitTargetTeamsDistributeParallelForRegion( 6305 CodeGenFunction &CGF, const OMPTargetTeamsDistributeParallelForDirective &S, 6306 PrePostActionTy &Action) { 6307 Action.Enter(CGF); 6308 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 6309 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 6310 S.getDistInc()); 6311 }; 6312 6313 // Emit teams region as a standalone region. 6314 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 6315 PrePostActionTy &Action) { 6316 Action.Enter(CGF); 6317 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6318 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 6319 (void)PrivateScope.Privatize(); 6320 CGF.CGM.getOpenMPRuntime().emitInlinedDirective( 6321 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); 6322 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 6323 }; 6324 6325 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for, 6326 CodeGenTeams); 6327 emitPostUpdateForReductionClause(CGF, S, 6328 [](CodeGenFunction &) { return nullptr; }); 6329 } 6330 6331 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 6332 CodeGenModule &CGM, StringRef ParentName, 6333 const OMPTargetTeamsDistributeParallelForDirective &S) { 6334 // Emit SPMD target teams distribute parallel for region as a standalone 6335 // region. 6336 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6337 emitTargetTeamsDistributeParallelForRegion(CGF, S, Action); 6338 }; 6339 llvm::Function *Fn; 6340 llvm::Constant *Addr; 6341 // Emit target region as a standalone region. 6342 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 6343 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 6344 assert(Fn && Addr && "Target device function emission failed."); 6345 } 6346 6347 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective( 6348 const OMPTargetTeamsDistributeParallelForDirective &S) { 6349 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6350 emitTargetTeamsDistributeParallelForRegion(CGF, S, Action); 6351 }; 6352 emitCommonOMPTargetDirective(*this, S, CodeGen); 6353 } 6354 6355 static void emitTargetTeamsDistributeParallelForSimdRegion( 6356 CodeGenFunction &CGF, 6357 const OMPTargetTeamsDistributeParallelForSimdDirective &S, 6358 PrePostActionTy &Action) { 6359 Action.Enter(CGF); 6360 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 6361 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 6362 S.getDistInc()); 6363 }; 6364 6365 // Emit teams region as a standalone region. 6366 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 6367 PrePostActionTy &Action) { 6368 Action.Enter(CGF); 6369 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6370 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 6371 (void)PrivateScope.Privatize(); 6372 CGF.CGM.getOpenMPRuntime().emitInlinedDirective( 6373 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); 6374 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 6375 }; 6376 6377 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for_simd, 6378 CodeGenTeams); 6379 emitPostUpdateForReductionClause(CGF, S, 6380 [](CodeGenFunction &) { return nullptr; }); 6381 } 6382 6383 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 6384 CodeGenModule &CGM, StringRef ParentName, 6385 const OMPTargetTeamsDistributeParallelForSimdDirective &S) { 6386 // Emit SPMD target teams distribute parallel for simd region as a standalone 6387 // region. 6388 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6389 emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action); 6390 }; 6391 llvm::Function *Fn; 6392 llvm::Constant *Addr; 6393 // Emit target region as a standalone region. 6394 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 6395 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 6396 assert(Fn && Addr && "Target device function emission failed."); 6397 } 6398 6399 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective( 6400 const OMPTargetTeamsDistributeParallelForSimdDirective &S) { 6401 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6402 emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action); 6403 }; 6404 emitCommonOMPTargetDirective(*this, S, CodeGen); 6405 } 6406 6407 void CodeGenFunction::EmitOMPCancellationPointDirective( 6408 const OMPCancellationPointDirective &S) { 6409 CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getBeginLoc(), 6410 S.getCancelRegion()); 6411 } 6412 6413 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { 6414 const Expr *IfCond = nullptr; 6415 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 6416 if (C->getNameModifier() == OMPD_unknown || 6417 C->getNameModifier() == OMPD_cancel) { 6418 IfCond = C->getCondition(); 6419 break; 6420 } 6421 } 6422 if (CGM.getLangOpts().OpenMPIRBuilder) { 6423 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 6424 // TODO: This check is necessary as we only generate `omp parallel` through 6425 // the OpenMPIRBuilder for now. 6426 if (S.getCancelRegion() == OMPD_parallel || 6427 S.getCancelRegion() == OMPD_sections || 6428 S.getCancelRegion() == OMPD_section) { 6429 llvm::Value *IfCondition = nullptr; 6430 if (IfCond) 6431 IfCondition = EmitScalarExpr(IfCond, 6432 /*IgnoreResultAssign=*/true); 6433 return Builder.restoreIP( 6434 OMPBuilder.createCancel(Builder, IfCondition, S.getCancelRegion())); 6435 } 6436 } 6437 6438 CGM.getOpenMPRuntime().emitCancelCall(*this, S.getBeginLoc(), IfCond, 6439 S.getCancelRegion()); 6440 } 6441 6442 CodeGenFunction::JumpDest 6443 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) { 6444 if (Kind == OMPD_parallel || Kind == OMPD_task || 6445 Kind == OMPD_target_parallel || Kind == OMPD_taskloop || 6446 Kind == OMPD_master_taskloop || Kind == OMPD_parallel_master_taskloop) 6447 return ReturnBlock; 6448 assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections || 6449 Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for || 6450 Kind == OMPD_distribute_parallel_for || 6451 Kind == OMPD_target_parallel_for || 6452 Kind == OMPD_teams_distribute_parallel_for || 6453 Kind == OMPD_target_teams_distribute_parallel_for); 6454 return OMPCancelStack.getExitBlock(); 6455 } 6456 6457 void CodeGenFunction::EmitOMPUseDevicePtrClause( 6458 const OMPUseDevicePtrClause &C, OMPPrivateScope &PrivateScope, 6459 const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) { 6460 auto OrigVarIt = C.varlist_begin(); 6461 auto InitIt = C.inits().begin(); 6462 for (const Expr *PvtVarIt : C.private_copies()) { 6463 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl()); 6464 const auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl()); 6465 const auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl()); 6466 6467 // In order to identify the right initializer we need to match the 6468 // declaration used by the mapping logic. In some cases we may get 6469 // OMPCapturedExprDecl that refers to the original declaration. 6470 const ValueDecl *MatchingVD = OrigVD; 6471 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) { 6472 // OMPCapturedExprDecl are used to privative fields of the current 6473 // structure. 6474 const auto *ME = cast<MemberExpr>(OED->getInit()); 6475 assert(isa<CXXThisExpr>(ME->getBase()) && 6476 "Base should be the current struct!"); 6477 MatchingVD = ME->getMemberDecl(); 6478 } 6479 6480 // If we don't have information about the current list item, move on to 6481 // the next one. 6482 auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD); 6483 if (InitAddrIt == CaptureDeviceAddrMap.end()) 6484 continue; 6485 6486 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, OrigVD, 6487 InitAddrIt, InitVD, 6488 PvtVD]() { 6489 // Initialize the temporary initialization variable with the address we 6490 // get from the runtime library. We have to cast the source address 6491 // because it is always a void *. References are materialized in the 6492 // privatization scope, so the initialization here disregards the fact 6493 // the original variable is a reference. 6494 QualType AddrQTy = 6495 getContext().getPointerType(OrigVD->getType().getNonReferenceType()); 6496 llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy); 6497 Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy); 6498 setAddrOfLocalVar(InitVD, InitAddr); 6499 6500 // Emit private declaration, it will be initialized by the value we 6501 // declaration we just added to the local declarations map. 6502 EmitDecl(*PvtVD); 6503 6504 // The initialization variables reached its purpose in the emission 6505 // of the previous declaration, so we don't need it anymore. 6506 LocalDeclMap.erase(InitVD); 6507 6508 // Return the address of the private variable. 6509 return GetAddrOfLocalVar(PvtVD); 6510 }); 6511 assert(IsRegistered && "firstprivate var already registered as private"); 6512 // Silence the warning about unused variable. 6513 (void)IsRegistered; 6514 6515 ++OrigVarIt; 6516 ++InitIt; 6517 } 6518 } 6519 6520 static const VarDecl *getBaseDecl(const Expr *Ref) { 6521 const Expr *Base = Ref->IgnoreParenImpCasts(); 6522 while (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Base)) 6523 Base = OASE->getBase()->IgnoreParenImpCasts(); 6524 while (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Base)) 6525 Base = ASE->getBase()->IgnoreParenImpCasts(); 6526 return cast<VarDecl>(cast<DeclRefExpr>(Base)->getDecl()); 6527 } 6528 6529 void CodeGenFunction::EmitOMPUseDeviceAddrClause( 6530 const OMPUseDeviceAddrClause &C, OMPPrivateScope &PrivateScope, 6531 const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) { 6532 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; 6533 for (const Expr *Ref : C.varlists()) { 6534 const VarDecl *OrigVD = getBaseDecl(Ref); 6535 if (!Processed.insert(OrigVD).second) 6536 continue; 6537 // In order to identify the right initializer we need to match the 6538 // declaration used by the mapping logic. In some cases we may get 6539 // OMPCapturedExprDecl that refers to the original declaration. 6540 const ValueDecl *MatchingVD = OrigVD; 6541 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) { 6542 // OMPCapturedExprDecl are used to privative fields of the current 6543 // structure. 6544 const auto *ME = cast<MemberExpr>(OED->getInit()); 6545 assert(isa<CXXThisExpr>(ME->getBase()) && 6546 "Base should be the current struct!"); 6547 MatchingVD = ME->getMemberDecl(); 6548 } 6549 6550 // If we don't have information about the current list item, move on to 6551 // the next one. 6552 auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD); 6553 if (InitAddrIt == CaptureDeviceAddrMap.end()) 6554 continue; 6555 6556 Address PrivAddr = InitAddrIt->getSecond(); 6557 // For declrefs and variable length array need to load the pointer for 6558 // correct mapping, since the pointer to the data was passed to the runtime. 6559 if (isa<DeclRefExpr>(Ref->IgnoreParenImpCasts()) || 6560 MatchingVD->getType()->isArrayType()) 6561 PrivAddr = 6562 EmitLoadOfPointer(PrivAddr, getContext() 6563 .getPointerType(OrigVD->getType()) 6564 ->castAs<PointerType>()); 6565 llvm::Type *RealTy = 6566 ConvertTypeForMem(OrigVD->getType().getNonReferenceType()) 6567 ->getPointerTo(); 6568 PrivAddr = Builder.CreatePointerBitCastOrAddrSpaceCast(PrivAddr, RealTy); 6569 6570 (void)PrivateScope.addPrivate(OrigVD, [PrivAddr]() { return PrivAddr; }); 6571 } 6572 } 6573 6574 // Generate the instructions for '#pragma omp target data' directive. 6575 void CodeGenFunction::EmitOMPTargetDataDirective( 6576 const OMPTargetDataDirective &S) { 6577 CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true, 6578 /*SeparateBeginEndCalls=*/true); 6579 6580 // Create a pre/post action to signal the privatization of the device pointer. 6581 // This action can be replaced by the OpenMP runtime code generation to 6582 // deactivate privatization. 6583 bool PrivatizeDevicePointers = false; 6584 class DevicePointerPrivActionTy : public PrePostActionTy { 6585 bool &PrivatizeDevicePointers; 6586 6587 public: 6588 explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers) 6589 : PrePostActionTy(), PrivatizeDevicePointers(PrivatizeDevicePointers) {} 6590 void Enter(CodeGenFunction &CGF) override { 6591 PrivatizeDevicePointers = true; 6592 } 6593 }; 6594 DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers); 6595 6596 auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers]( 6597 CodeGenFunction &CGF, PrePostActionTy &Action) { 6598 auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 6599 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 6600 }; 6601 6602 // Codegen that selects whether to generate the privatization code or not. 6603 auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers, 6604 &InnermostCodeGen](CodeGenFunction &CGF, 6605 PrePostActionTy &Action) { 6606 RegionCodeGenTy RCG(InnermostCodeGen); 6607 PrivatizeDevicePointers = false; 6608 6609 // Call the pre-action to change the status of PrivatizeDevicePointers if 6610 // needed. 6611 Action.Enter(CGF); 6612 6613 if (PrivatizeDevicePointers) { 6614 OMPPrivateScope PrivateScope(CGF); 6615 // Emit all instances of the use_device_ptr clause. 6616 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>()) 6617 CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope, 6618 Info.CaptureDeviceAddrMap); 6619 for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>()) 6620 CGF.EmitOMPUseDeviceAddrClause(*C, PrivateScope, 6621 Info.CaptureDeviceAddrMap); 6622 (void)PrivateScope.Privatize(); 6623 RCG(CGF); 6624 } else { 6625 OMPLexicalScope Scope(CGF, S, OMPD_unknown); 6626 RCG(CGF); 6627 } 6628 }; 6629 6630 // Forward the provided action to the privatization codegen. 6631 RegionCodeGenTy PrivRCG(PrivCodeGen); 6632 PrivRCG.setAction(Action); 6633 6634 // Notwithstanding the body of the region is emitted as inlined directive, 6635 // we don't use an inline scope as changes in the references inside the 6636 // region are expected to be visible outside, so we do not privative them. 6637 OMPLexicalScope Scope(CGF, S); 6638 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data, 6639 PrivRCG); 6640 }; 6641 6642 RegionCodeGenTy RCG(CodeGen); 6643 6644 // If we don't have target devices, don't bother emitting the data mapping 6645 // code. 6646 if (CGM.getLangOpts().OMPTargetTriples.empty()) { 6647 RCG(*this); 6648 return; 6649 } 6650 6651 // Check if we have any if clause associated with the directive. 6652 const Expr *IfCond = nullptr; 6653 if (const auto *C = S.getSingleClause<OMPIfClause>()) 6654 IfCond = C->getCondition(); 6655 6656 // Check if we have any device clause associated with the directive. 6657 const Expr *Device = nullptr; 6658 if (const auto *C = S.getSingleClause<OMPDeviceClause>()) 6659 Device = C->getDevice(); 6660 6661 // Set the action to signal privatization of device pointers. 6662 RCG.setAction(PrivAction); 6663 6664 // Emit region code. 6665 CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG, 6666 Info); 6667 } 6668 6669 void CodeGenFunction::EmitOMPTargetEnterDataDirective( 6670 const OMPTargetEnterDataDirective &S) { 6671 // If we don't have target devices, don't bother emitting the data mapping 6672 // code. 6673 if (CGM.getLangOpts().OMPTargetTriples.empty()) 6674 return; 6675 6676 // Check if we have any if clause associated with the directive. 6677 const Expr *IfCond = nullptr; 6678 if (const auto *C = S.getSingleClause<OMPIfClause>()) 6679 IfCond = C->getCondition(); 6680 6681 // Check if we have any device clause associated with the directive. 6682 const Expr *Device = nullptr; 6683 if (const auto *C = S.getSingleClause<OMPDeviceClause>()) 6684 Device = C->getDevice(); 6685 6686 OMPLexicalScope Scope(*this, S, OMPD_task); 6687 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 6688 } 6689 6690 void CodeGenFunction::EmitOMPTargetExitDataDirective( 6691 const OMPTargetExitDataDirective &S) { 6692 // If we don't have target devices, don't bother emitting the data mapping 6693 // code. 6694 if (CGM.getLangOpts().OMPTargetTriples.empty()) 6695 return; 6696 6697 // Check if we have any if clause associated with the directive. 6698 const Expr *IfCond = nullptr; 6699 if (const auto *C = S.getSingleClause<OMPIfClause>()) 6700 IfCond = C->getCondition(); 6701 6702 // Check if we have any device clause associated with the directive. 6703 const Expr *Device = nullptr; 6704 if (const auto *C = S.getSingleClause<OMPDeviceClause>()) 6705 Device = C->getDevice(); 6706 6707 OMPLexicalScope Scope(*this, S, OMPD_task); 6708 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 6709 } 6710 6711 static void emitTargetParallelRegion(CodeGenFunction &CGF, 6712 const OMPTargetParallelDirective &S, 6713 PrePostActionTy &Action) { 6714 // Get the captured statement associated with the 'parallel' region. 6715 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); 6716 Action.Enter(CGF); 6717 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) { 6718 Action.Enter(CGF); 6719 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6720 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 6721 CGF.EmitOMPPrivateClause(S, PrivateScope); 6722 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 6723 (void)PrivateScope.Privatize(); 6724 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 6725 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); 6726 // TODO: Add support for clauses. 6727 CGF.EmitStmt(CS->getCapturedStmt()); 6728 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 6729 }; 6730 emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen, 6731 emitEmptyBoundParameters); 6732 emitPostUpdateForReductionClause(CGF, S, 6733 [](CodeGenFunction &) { return nullptr; }); 6734 } 6735 6736 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 6737 CodeGenModule &CGM, StringRef ParentName, 6738 const OMPTargetParallelDirective &S) { 6739 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6740 emitTargetParallelRegion(CGF, S, Action); 6741 }; 6742 llvm::Function *Fn; 6743 llvm::Constant *Addr; 6744 // Emit target region as a standalone region. 6745 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 6746 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 6747 assert(Fn && Addr && "Target device function emission failed."); 6748 } 6749 6750 void CodeGenFunction::EmitOMPTargetParallelDirective( 6751 const OMPTargetParallelDirective &S) { 6752 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6753 emitTargetParallelRegion(CGF, S, Action); 6754 }; 6755 emitCommonOMPTargetDirective(*this, S, CodeGen); 6756 } 6757 6758 static void emitTargetParallelForRegion(CodeGenFunction &CGF, 6759 const OMPTargetParallelForDirective &S, 6760 PrePostActionTy &Action) { 6761 Action.Enter(CGF); 6762 // Emit directive as a combined directive that consists of two implicit 6763 // directives: 'parallel' with 'for' directive. 6764 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6765 Action.Enter(CGF); 6766 CodeGenFunction::OMPCancelStackRAII CancelRegion( 6767 CGF, OMPD_target_parallel_for, S.hasCancel()); 6768 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 6769 emitDispatchForLoopBounds); 6770 }; 6771 emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen, 6772 emitEmptyBoundParameters); 6773 } 6774 6775 void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 6776 CodeGenModule &CGM, StringRef ParentName, 6777 const OMPTargetParallelForDirective &S) { 6778 // Emit SPMD target parallel for region as a standalone region. 6779 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6780 emitTargetParallelForRegion(CGF, S, Action); 6781 }; 6782 llvm::Function *Fn; 6783 llvm::Constant *Addr; 6784 // Emit target region as a standalone region. 6785 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 6786 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 6787 assert(Fn && Addr && "Target device function emission failed."); 6788 } 6789 6790 void CodeGenFunction::EmitOMPTargetParallelForDirective( 6791 const OMPTargetParallelForDirective &S) { 6792 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6793 emitTargetParallelForRegion(CGF, S, Action); 6794 }; 6795 emitCommonOMPTargetDirective(*this, S, CodeGen); 6796 } 6797 6798 static void 6799 emitTargetParallelForSimdRegion(CodeGenFunction &CGF, 6800 const OMPTargetParallelForSimdDirective &S, 6801 PrePostActionTy &Action) { 6802 Action.Enter(CGF); 6803 // Emit directive as a combined directive that consists of two implicit 6804 // directives: 'parallel' with 'for' directive. 6805 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6806 Action.Enter(CGF); 6807 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 6808 emitDispatchForLoopBounds); 6809 }; 6810 emitCommonOMPParallelDirective(CGF, S, OMPD_simd, CodeGen, 6811 emitEmptyBoundParameters); 6812 } 6813 6814 void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 6815 CodeGenModule &CGM, StringRef ParentName, 6816 const OMPTargetParallelForSimdDirective &S) { 6817 // Emit SPMD target parallel for region as a standalone region. 6818 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6819 emitTargetParallelForSimdRegion(CGF, S, Action); 6820 }; 6821 llvm::Function *Fn; 6822 llvm::Constant *Addr; 6823 // Emit target region as a standalone region. 6824 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 6825 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 6826 assert(Fn && Addr && "Target device function emission failed."); 6827 } 6828 6829 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective( 6830 const OMPTargetParallelForSimdDirective &S) { 6831 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6832 emitTargetParallelForSimdRegion(CGF, S, Action); 6833 }; 6834 emitCommonOMPTargetDirective(*this, S, CodeGen); 6835 } 6836 6837 /// Emit a helper variable and return corresponding lvalue. 6838 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper, 6839 const ImplicitParamDecl *PVD, 6840 CodeGenFunction::OMPPrivateScope &Privates) { 6841 const auto *VDecl = cast<VarDecl>(Helper->getDecl()); 6842 Privates.addPrivate(VDecl, 6843 [&CGF, PVD]() { return CGF.GetAddrOfLocalVar(PVD); }); 6844 } 6845 6846 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { 6847 assert(isOpenMPTaskLoopDirective(S.getDirectiveKind())); 6848 // Emit outlined function for task construct. 6849 const CapturedStmt *CS = S.getCapturedStmt(OMPD_taskloop); 6850 Address CapturedStruct = Address::invalid(); 6851 { 6852 OMPLexicalScope Scope(*this, S, OMPD_taskloop, /*EmitPreInitStmt=*/false); 6853 CapturedStruct = GenerateCapturedStmtArgument(*CS); 6854 } 6855 QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 6856 const Expr *IfCond = nullptr; 6857 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 6858 if (C->getNameModifier() == OMPD_unknown || 6859 C->getNameModifier() == OMPD_taskloop) { 6860 IfCond = C->getCondition(); 6861 break; 6862 } 6863 } 6864 6865 OMPTaskDataTy Data; 6866 // Check if taskloop must be emitted without taskgroup. 6867 Data.Nogroup = S.getSingleClause<OMPNogroupClause>(); 6868 // TODO: Check if we should emit tied or untied task. 6869 Data.Tied = true; 6870 // Set scheduling for taskloop 6871 if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) { 6872 // grainsize clause 6873 Data.Schedule.setInt(/*IntVal=*/false); 6874 Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize())); 6875 } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) { 6876 // num_tasks clause 6877 Data.Schedule.setInt(/*IntVal=*/true); 6878 Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks())); 6879 } 6880 6881 auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) { 6882 // if (PreCond) { 6883 // for (IV in 0..LastIteration) BODY; 6884 // <Final counter/linear vars updates>; 6885 // } 6886 // 6887 6888 // Emit: if (PreCond) - begin. 6889 // If the condition constant folds and can be elided, avoid emitting the 6890 // whole loop. 6891 bool CondConstant; 6892 llvm::BasicBlock *ContBlock = nullptr; 6893 OMPLoopScope PreInitScope(CGF, S); 6894 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 6895 if (!CondConstant) 6896 return; 6897 } else { 6898 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("taskloop.if.then"); 6899 ContBlock = CGF.createBasicBlock("taskloop.if.end"); 6900 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 6901 CGF.getProfileCount(&S)); 6902 CGF.EmitBlock(ThenBlock); 6903 CGF.incrementProfileCounter(&S); 6904 } 6905 6906 (void)CGF.EmitOMPLinearClauseInit(S); 6907 6908 OMPPrivateScope LoopScope(CGF); 6909 // Emit helper vars inits. 6910 enum { LowerBound = 5, UpperBound, Stride, LastIter }; 6911 auto *I = CS->getCapturedDecl()->param_begin(); 6912 auto *LBP = std::next(I, LowerBound); 6913 auto *UBP = std::next(I, UpperBound); 6914 auto *STP = std::next(I, Stride); 6915 auto *LIP = std::next(I, LastIter); 6916 mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP, 6917 LoopScope); 6918 mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP, 6919 LoopScope); 6920 mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope); 6921 mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP, 6922 LoopScope); 6923 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 6924 CGF.EmitOMPLinearClause(S, LoopScope); 6925 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 6926 (void)LoopScope.Privatize(); 6927 // Emit the loop iteration variable. 6928 const Expr *IVExpr = S.getIterationVariable(); 6929 const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 6930 CGF.EmitVarDecl(*IVDecl); 6931 CGF.EmitIgnoredExpr(S.getInit()); 6932 6933 // Emit the iterations count variable. 6934 // If it is not a variable, Sema decided to calculate iterations count on 6935 // each iteration (e.g., it is foldable into a constant). 6936 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 6937 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 6938 // Emit calculation of the iterations count. 6939 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 6940 } 6941 6942 { 6943 OMPLexicalScope Scope(CGF, S, OMPD_taskloop, /*EmitPreInitStmt=*/false); 6944 emitCommonSimdLoop( 6945 CGF, S, 6946 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 6947 if (isOpenMPSimdDirective(S.getDirectiveKind())) 6948 CGF.EmitOMPSimdInit(S); 6949 }, 6950 [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { 6951 CGF.EmitOMPInnerLoop( 6952 S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), 6953 [&S](CodeGenFunction &CGF) { 6954 emitOMPLoopBodyWithStopPoint(CGF, S, 6955 CodeGenFunction::JumpDest()); 6956 }, 6957 [](CodeGenFunction &) {}); 6958 }); 6959 } 6960 // Emit: if (PreCond) - end. 6961 if (ContBlock) { 6962 CGF.EmitBranch(ContBlock); 6963 CGF.EmitBlock(ContBlock, true); 6964 } 6965 // Emit final copy of the lastprivate variables if IsLastIter != 0. 6966 if (HasLastprivateClause) { 6967 CGF.EmitOMPLastprivateClauseFinal( 6968 S, isOpenMPSimdDirective(S.getDirectiveKind()), 6969 CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar( 6970 CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false, 6971 (*LIP)->getType(), S.getBeginLoc()))); 6972 } 6973 CGF.EmitOMPLinearClauseFinal(S, [LIP, &S](CodeGenFunction &CGF) { 6974 return CGF.Builder.CreateIsNotNull( 6975 CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false, 6976 (*LIP)->getType(), S.getBeginLoc())); 6977 }); 6978 }; 6979 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 6980 IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn, 6981 const OMPTaskDataTy &Data) { 6982 auto &&CodeGen = [&S, OutlinedFn, SharedsTy, CapturedStruct, IfCond, 6983 &Data](CodeGenFunction &CGF, PrePostActionTy &) { 6984 OMPLoopScope PreInitScope(CGF, S); 6985 CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getBeginLoc(), S, 6986 OutlinedFn, SharedsTy, 6987 CapturedStruct, IfCond, Data); 6988 }; 6989 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop, 6990 CodeGen); 6991 }; 6992 if (Data.Nogroup) { 6993 EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, Data); 6994 } else { 6995 CGM.getOpenMPRuntime().emitTaskgroupRegion( 6996 *this, 6997 [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF, 6998 PrePostActionTy &Action) { 6999 Action.Enter(CGF); 7000 CGF.EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, 7001 Data); 7002 }, 7003 S.getBeginLoc()); 7004 } 7005 } 7006 7007 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) { 7008 auto LPCRegion = 7009 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 7010 EmitOMPTaskLoopBasedDirective(S); 7011 } 7012 7013 void CodeGenFunction::EmitOMPTaskLoopSimdDirective( 7014 const OMPTaskLoopSimdDirective &S) { 7015 auto LPCRegion = 7016 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 7017 OMPLexicalScope Scope(*this, S); 7018 EmitOMPTaskLoopBasedDirective(S); 7019 } 7020 7021 void CodeGenFunction::EmitOMPMasterTaskLoopDirective( 7022 const OMPMasterTaskLoopDirective &S) { 7023 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7024 Action.Enter(CGF); 7025 EmitOMPTaskLoopBasedDirective(S); 7026 }; 7027 auto LPCRegion = 7028 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 7029 OMPLexicalScope Scope(*this, S, llvm::None, /*EmitPreInitStmt=*/false); 7030 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc()); 7031 } 7032 7033 void CodeGenFunction::EmitOMPMasterTaskLoopSimdDirective( 7034 const OMPMasterTaskLoopSimdDirective &S) { 7035 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7036 Action.Enter(CGF); 7037 EmitOMPTaskLoopBasedDirective(S); 7038 }; 7039 auto LPCRegion = 7040 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 7041 OMPLexicalScope Scope(*this, S); 7042 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc()); 7043 } 7044 7045 void CodeGenFunction::EmitOMPParallelMasterTaskLoopDirective( 7046 const OMPParallelMasterTaskLoopDirective &S) { 7047 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7048 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF, 7049 PrePostActionTy &Action) { 7050 Action.Enter(CGF); 7051 CGF.EmitOMPTaskLoopBasedDirective(S); 7052 }; 7053 OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false); 7054 CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen, 7055 S.getBeginLoc()); 7056 }; 7057 auto LPCRegion = 7058 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 7059 emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop, CodeGen, 7060 emitEmptyBoundParameters); 7061 } 7062 7063 void CodeGenFunction::EmitOMPParallelMasterTaskLoopSimdDirective( 7064 const OMPParallelMasterTaskLoopSimdDirective &S) { 7065 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7066 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF, 7067 PrePostActionTy &Action) { 7068 Action.Enter(CGF); 7069 CGF.EmitOMPTaskLoopBasedDirective(S); 7070 }; 7071 OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false); 7072 CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen, 7073 S.getBeginLoc()); 7074 }; 7075 auto LPCRegion = 7076 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 7077 emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop_simd, CodeGen, 7078 emitEmptyBoundParameters); 7079 } 7080 7081 // Generate the instructions for '#pragma omp target update' directive. 7082 void CodeGenFunction::EmitOMPTargetUpdateDirective( 7083 const OMPTargetUpdateDirective &S) { 7084 // If we don't have target devices, don't bother emitting the data mapping 7085 // code. 7086 if (CGM.getLangOpts().OMPTargetTriples.empty()) 7087 return; 7088 7089 // Check if we have any if clause associated with the directive. 7090 const Expr *IfCond = nullptr; 7091 if (const auto *C = S.getSingleClause<OMPIfClause>()) 7092 IfCond = C->getCondition(); 7093 7094 // Check if we have any device clause associated with the directive. 7095 const Expr *Device = nullptr; 7096 if (const auto *C = S.getSingleClause<OMPDeviceClause>()) 7097 Device = C->getDevice(); 7098 7099 OMPLexicalScope Scope(*this, S, OMPD_task); 7100 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 7101 } 7102 7103 void CodeGenFunction::EmitSimpleOMPExecutableDirective( 7104 const OMPExecutableDirective &D) { 7105 if (const auto *SD = dyn_cast<OMPScanDirective>(&D)) { 7106 EmitOMPScanDirective(*SD); 7107 return; 7108 } 7109 if (!D.hasAssociatedStmt() || !D.getAssociatedStmt()) 7110 return; 7111 auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) { 7112 OMPPrivateScope GlobalsScope(CGF); 7113 if (isOpenMPTaskingDirective(D.getDirectiveKind())) { 7114 // Capture global firstprivates to avoid crash. 7115 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { 7116 for (const Expr *Ref : C->varlists()) { 7117 const auto *DRE = cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 7118 if (!DRE) 7119 continue; 7120 const auto *VD = dyn_cast<VarDecl>(DRE->getDecl()); 7121 if (!VD || VD->hasLocalStorage()) 7122 continue; 7123 if (!CGF.LocalDeclMap.count(VD)) { 7124 LValue GlobLVal = CGF.EmitLValue(Ref); 7125 GlobalsScope.addPrivate( 7126 VD, [&GlobLVal, &CGF]() { return GlobLVal.getAddress(CGF); }); 7127 } 7128 } 7129 } 7130 } 7131 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 7132 (void)GlobalsScope.Privatize(); 7133 ParentLoopDirectiveForScanRegion ScanRegion(CGF, D); 7134 emitOMPSimdRegion(CGF, cast<OMPLoopDirective>(D), Action); 7135 } else { 7136 if (const auto *LD = dyn_cast<OMPLoopDirective>(&D)) { 7137 for (const Expr *E : LD->counters()) { 7138 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 7139 if (!VD->hasLocalStorage() && !CGF.LocalDeclMap.count(VD)) { 7140 LValue GlobLVal = CGF.EmitLValue(E); 7141 GlobalsScope.addPrivate( 7142 VD, [&GlobLVal, &CGF]() { return GlobLVal.getAddress(CGF); }); 7143 } 7144 if (isa<OMPCapturedExprDecl>(VD)) { 7145 // Emit only those that were not explicitly referenced in clauses. 7146 if (!CGF.LocalDeclMap.count(VD)) 7147 CGF.EmitVarDecl(*VD); 7148 } 7149 } 7150 for (const auto *C : D.getClausesOfKind<OMPOrderedClause>()) { 7151 if (!C->getNumForLoops()) 7152 continue; 7153 for (unsigned I = LD->getLoopsNumber(), 7154 E = C->getLoopNumIterations().size(); 7155 I < E; ++I) { 7156 if (const auto *VD = dyn_cast<OMPCapturedExprDecl>( 7157 cast<DeclRefExpr>(C->getLoopCounter(I))->getDecl())) { 7158 // Emit only those that were not explicitly referenced in clauses. 7159 if (!CGF.LocalDeclMap.count(VD)) 7160 CGF.EmitVarDecl(*VD); 7161 } 7162 } 7163 } 7164 } 7165 (void)GlobalsScope.Privatize(); 7166 CGF.EmitStmt(D.getInnermostCapturedStmt()->getCapturedStmt()); 7167 } 7168 }; 7169 if (D.getDirectiveKind() == OMPD_atomic || 7170 D.getDirectiveKind() == OMPD_critical || 7171 D.getDirectiveKind() == OMPD_section || 7172 D.getDirectiveKind() == OMPD_master || 7173 D.getDirectiveKind() == OMPD_masked) { 7174 EmitStmt(D.getAssociatedStmt()); 7175 } else { 7176 auto LPCRegion = 7177 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, D); 7178 OMPSimdLexicalScope Scope(*this, D); 7179 CGM.getOpenMPRuntime().emitInlinedDirective( 7180 *this, 7181 isOpenMPSimdDirective(D.getDirectiveKind()) ? OMPD_simd 7182 : D.getDirectiveKind(), 7183 CodeGen); 7184 } 7185 // Check for outer lastprivate conditional update. 7186 checkForLastprivateConditionalUpdate(*this, D); 7187 } 7188