1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This contains code to emit OpenMP nodes as LLVM code. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGCleanup.h" 14 #include "CGOpenMPRuntime.h" 15 #include "CodeGenFunction.h" 16 #include "CodeGenModule.h" 17 #include "TargetInfo.h" 18 #include "clang/AST/ASTContext.h" 19 #include "clang/AST/Attr.h" 20 #include "clang/AST/DeclOpenMP.h" 21 #include "clang/AST/OpenMPClause.h" 22 #include "clang/AST/Stmt.h" 23 #include "clang/AST/StmtOpenMP.h" 24 #include "clang/AST/StmtVisitor.h" 25 #include "clang/Basic/OpenMPKinds.h" 26 #include "clang/Basic/PrettyStackTrace.h" 27 #include "llvm/ADT/SmallSet.h" 28 #include "llvm/BinaryFormat/Dwarf.h" 29 #include "llvm/Frontend/OpenMP/OMPConstants.h" 30 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" 31 #include "llvm/IR/Constants.h" 32 #include "llvm/IR/DebugInfoMetadata.h" 33 #include "llvm/IR/Instructions.h" 34 #include "llvm/IR/IntrinsicInst.h" 35 #include "llvm/IR/Metadata.h" 36 #include "llvm/Support/AtomicOrdering.h" 37 using namespace clang; 38 using namespace CodeGen; 39 using namespace llvm::omp; 40 41 static const VarDecl *getBaseDecl(const Expr *Ref); 42 43 namespace { 44 /// Lexical scope for OpenMP executable constructs, that handles correct codegen 45 /// for captured expressions. 46 class OMPLexicalScope : public CodeGenFunction::LexicalScope { 47 void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 48 for (const auto *C : S.clauses()) { 49 if (const auto *CPI = OMPClauseWithPreInit::get(C)) { 50 if (const auto *PreInit = 51 cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { 52 for (const auto *I : PreInit->decls()) { 53 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 54 CGF.EmitVarDecl(cast<VarDecl>(*I)); 55 } else { 56 CodeGenFunction::AutoVarEmission Emission = 57 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 58 CGF.EmitAutoVarCleanups(Emission); 59 } 60 } 61 } 62 } 63 } 64 } 65 CodeGenFunction::OMPPrivateScope InlinedShareds; 66 67 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { 68 return CGF.LambdaCaptureFields.lookup(VD) || 69 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || 70 (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) && 71 cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD)); 72 } 73 74 public: 75 OMPLexicalScope( 76 CodeGenFunction &CGF, const OMPExecutableDirective &S, 77 const llvm::Optional<OpenMPDirectiveKind> CapturedRegion = llvm::None, 78 const bool EmitPreInitStmt = true) 79 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), 80 InlinedShareds(CGF) { 81 if (EmitPreInitStmt) 82 emitPreInitStmt(CGF, S); 83 if (!CapturedRegion.hasValue()) 84 return; 85 assert(S.hasAssociatedStmt() && 86 "Expected associated statement for inlined directive."); 87 const CapturedStmt *CS = S.getCapturedStmt(*CapturedRegion); 88 for (const auto &C : CS->captures()) { 89 if (C.capturesVariable() || C.capturesVariableByCopy()) { 90 auto *VD = C.getCapturedVar(); 91 assert(VD == VD->getCanonicalDecl() && 92 "Canonical decl must be captured."); 93 DeclRefExpr DRE( 94 CGF.getContext(), const_cast<VarDecl *>(VD), 95 isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo && 96 InlinedShareds.isGlobalVarCaptured(VD)), 97 VD->getType().getNonReferenceType(), VK_LValue, C.getLocation()); 98 InlinedShareds.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress(CGF)); 99 } 100 } 101 (void)InlinedShareds.Privatize(); 102 } 103 }; 104 105 /// Lexical scope for OpenMP parallel construct, that handles correct codegen 106 /// for captured expressions. 107 class OMPParallelScope final : public OMPLexicalScope { 108 bool EmitPreInitStmt(const OMPExecutableDirective &S) { 109 OpenMPDirectiveKind Kind = S.getDirectiveKind(); 110 return !(isOpenMPTargetExecutionDirective(Kind) || 111 isOpenMPLoopBoundSharingDirective(Kind)) && 112 isOpenMPParallelDirective(Kind); 113 } 114 115 public: 116 OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 117 : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None, 118 EmitPreInitStmt(S)) {} 119 }; 120 121 /// Lexical scope for OpenMP teams construct, that handles correct codegen 122 /// for captured expressions. 123 class OMPTeamsScope final : public OMPLexicalScope { 124 bool EmitPreInitStmt(const OMPExecutableDirective &S) { 125 OpenMPDirectiveKind Kind = S.getDirectiveKind(); 126 return !isOpenMPTargetExecutionDirective(Kind) && 127 isOpenMPTeamsDirective(Kind); 128 } 129 130 public: 131 OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 132 : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None, 133 EmitPreInitStmt(S)) {} 134 }; 135 136 /// Private scope for OpenMP loop-based directives, that supports capturing 137 /// of used expression from loop statement. 138 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { 139 void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopBasedDirective &S) { 140 const DeclStmt *PreInits; 141 CodeGenFunction::OMPMapVars PreCondVars; 142 if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) { 143 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 144 for (const auto *E : LD->counters()) { 145 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 146 EmittedAsPrivate.insert(VD->getCanonicalDecl()); 147 (void)PreCondVars.setVarAddr( 148 CGF, VD, CGF.CreateMemTemp(VD->getType().getNonReferenceType())); 149 } 150 // Mark private vars as undefs. 151 for (const auto *C : LD->getClausesOfKind<OMPPrivateClause>()) { 152 for (const Expr *IRef : C->varlists()) { 153 const auto *OrigVD = 154 cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); 155 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 156 (void)PreCondVars.setVarAddr( 157 CGF, OrigVD, 158 Address::deprecated( 159 llvm::UndefValue::get( 160 CGF.ConvertTypeForMem(CGF.getContext().getPointerType( 161 OrigVD->getType().getNonReferenceType()))), 162 CGF.getContext().getDeclAlign(OrigVD))); 163 } 164 } 165 } 166 (void)PreCondVars.apply(CGF); 167 // Emit init, __range and __end variables for C++ range loops. 168 (void)OMPLoopBasedDirective::doForAllLoops( 169 LD->getInnermostCapturedStmt()->getCapturedStmt(), 170 /*TryImperfectlyNestedLoops=*/true, LD->getLoopsNumber(), 171 [&CGF](unsigned Cnt, const Stmt *CurStmt) { 172 if (const auto *CXXFor = dyn_cast<CXXForRangeStmt>(CurStmt)) { 173 if (const Stmt *Init = CXXFor->getInit()) 174 CGF.EmitStmt(Init); 175 CGF.EmitStmt(CXXFor->getRangeStmt()); 176 CGF.EmitStmt(CXXFor->getEndStmt()); 177 } 178 return false; 179 }); 180 PreInits = cast_or_null<DeclStmt>(LD->getPreInits()); 181 } else if (const auto *Tile = dyn_cast<OMPTileDirective>(&S)) { 182 PreInits = cast_or_null<DeclStmt>(Tile->getPreInits()); 183 } else if (const auto *Unroll = dyn_cast<OMPUnrollDirective>(&S)) { 184 PreInits = cast_or_null<DeclStmt>(Unroll->getPreInits()); 185 } else { 186 llvm_unreachable("Unknown loop-based directive kind."); 187 } 188 if (PreInits) { 189 for (const auto *I : PreInits->decls()) 190 CGF.EmitVarDecl(cast<VarDecl>(*I)); 191 } 192 PreCondVars.restore(CGF); 193 } 194 195 public: 196 OMPLoopScope(CodeGenFunction &CGF, const OMPLoopBasedDirective &S) 197 : CodeGenFunction::RunCleanupsScope(CGF) { 198 emitPreInitStmt(CGF, S); 199 } 200 }; 201 202 class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope { 203 CodeGenFunction::OMPPrivateScope InlinedShareds; 204 205 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { 206 return CGF.LambdaCaptureFields.lookup(VD) || 207 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || 208 (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) && 209 cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD)); 210 } 211 212 public: 213 OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 214 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), 215 InlinedShareds(CGF) { 216 for (const auto *C : S.clauses()) { 217 if (const auto *CPI = OMPClauseWithPreInit::get(C)) { 218 if (const auto *PreInit = 219 cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { 220 for (const auto *I : PreInit->decls()) { 221 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 222 CGF.EmitVarDecl(cast<VarDecl>(*I)); 223 } else { 224 CodeGenFunction::AutoVarEmission Emission = 225 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 226 CGF.EmitAutoVarCleanups(Emission); 227 } 228 } 229 } 230 } else if (const auto *UDP = dyn_cast<OMPUseDevicePtrClause>(C)) { 231 for (const Expr *E : UDP->varlists()) { 232 const Decl *D = cast<DeclRefExpr>(E)->getDecl(); 233 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D)) 234 CGF.EmitVarDecl(*OED); 235 } 236 } else if (const auto *UDP = dyn_cast<OMPUseDeviceAddrClause>(C)) { 237 for (const Expr *E : UDP->varlists()) { 238 const Decl *D = getBaseDecl(E); 239 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D)) 240 CGF.EmitVarDecl(*OED); 241 } 242 } 243 } 244 if (!isOpenMPSimdDirective(S.getDirectiveKind())) 245 CGF.EmitOMPPrivateClause(S, InlinedShareds); 246 if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(&S)) { 247 if (const Expr *E = TG->getReductionRef()) 248 CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl())); 249 } 250 // Temp copy arrays for inscan reductions should not be emitted as they are 251 // not used in simd only mode. 252 llvm::DenseSet<CanonicalDeclPtr<const Decl>> CopyArrayTemps; 253 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 254 if (C->getModifier() != OMPC_REDUCTION_inscan) 255 continue; 256 for (const Expr *E : C->copy_array_temps()) 257 CopyArrayTemps.insert(cast<DeclRefExpr>(E)->getDecl()); 258 } 259 const auto *CS = cast_or_null<CapturedStmt>(S.getAssociatedStmt()); 260 while (CS) { 261 for (auto &C : CS->captures()) { 262 if (C.capturesVariable() || C.capturesVariableByCopy()) { 263 auto *VD = C.getCapturedVar(); 264 if (CopyArrayTemps.contains(VD)) 265 continue; 266 assert(VD == VD->getCanonicalDecl() && 267 "Canonical decl must be captured."); 268 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 269 isCapturedVar(CGF, VD) || 270 (CGF.CapturedStmtInfo && 271 InlinedShareds.isGlobalVarCaptured(VD)), 272 VD->getType().getNonReferenceType(), VK_LValue, 273 C.getLocation()); 274 InlinedShareds.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress(CGF)); 275 } 276 } 277 CS = dyn_cast<CapturedStmt>(CS->getCapturedStmt()); 278 } 279 (void)InlinedShareds.Privatize(); 280 } 281 }; 282 283 } // namespace 284 285 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, 286 const OMPExecutableDirective &S, 287 const RegionCodeGenTy &CodeGen); 288 289 LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) { 290 if (const auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) { 291 if (const auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) { 292 OrigVD = OrigVD->getCanonicalDecl(); 293 bool IsCaptured = 294 LambdaCaptureFields.lookup(OrigVD) || 295 (CapturedStmtInfo && CapturedStmtInfo->lookup(OrigVD)) || 296 (CurCodeDecl && isa<BlockDecl>(CurCodeDecl)); 297 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), IsCaptured, 298 OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc()); 299 return EmitLValue(&DRE); 300 } 301 } 302 return EmitLValue(E); 303 } 304 305 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) { 306 ASTContext &C = getContext(); 307 llvm::Value *Size = nullptr; 308 auto SizeInChars = C.getTypeSizeInChars(Ty); 309 if (SizeInChars.isZero()) { 310 // getTypeSizeInChars() returns 0 for a VLA. 311 while (const VariableArrayType *VAT = C.getAsVariableArrayType(Ty)) { 312 VlaSizePair VlaSize = getVLASize(VAT); 313 Ty = VlaSize.Type; 314 Size = 315 Size ? Builder.CreateNUWMul(Size, VlaSize.NumElts) : VlaSize.NumElts; 316 } 317 SizeInChars = C.getTypeSizeInChars(Ty); 318 if (SizeInChars.isZero()) 319 return llvm::ConstantInt::get(SizeTy, /*V=*/0); 320 return Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars)); 321 } 322 return CGM.getSize(SizeInChars); 323 } 324 325 void CodeGenFunction::GenerateOpenMPCapturedVars( 326 const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) { 327 const RecordDecl *RD = S.getCapturedRecordDecl(); 328 auto CurField = RD->field_begin(); 329 auto CurCap = S.captures().begin(); 330 for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(), 331 E = S.capture_init_end(); 332 I != E; ++I, ++CurField, ++CurCap) { 333 if (CurField->hasCapturedVLAType()) { 334 const VariableArrayType *VAT = CurField->getCapturedVLAType(); 335 llvm::Value *Val = VLASizeMap[VAT->getSizeExpr()]; 336 CapturedVars.push_back(Val); 337 } else if (CurCap->capturesThis()) { 338 CapturedVars.push_back(CXXThisValue); 339 } else if (CurCap->capturesVariableByCopy()) { 340 llvm::Value *CV = EmitLoadOfScalar(EmitLValue(*I), CurCap->getLocation()); 341 342 // If the field is not a pointer, we need to save the actual value 343 // and load it as a void pointer. 344 if (!CurField->getType()->isAnyPointerType()) { 345 ASTContext &Ctx = getContext(); 346 Address DstAddr = CreateMemTemp( 347 Ctx.getUIntPtrType(), 348 Twine(CurCap->getCapturedVar()->getName(), ".casted")); 349 LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType()); 350 351 llvm::Value *SrcAddrVal = EmitScalarConversion( 352 DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()), 353 Ctx.getPointerType(CurField->getType()), CurCap->getLocation()); 354 LValue SrcLV = 355 MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType()); 356 357 // Store the value using the source type pointer. 358 EmitStoreThroughLValue(RValue::get(CV), SrcLV); 359 360 // Load the value using the destination type pointer. 361 CV = EmitLoadOfScalar(DstLV, CurCap->getLocation()); 362 } 363 CapturedVars.push_back(CV); 364 } else { 365 assert(CurCap->capturesVariable() && "Expected capture by reference."); 366 CapturedVars.push_back(EmitLValue(*I).getAddress(*this).getPointer()); 367 } 368 } 369 } 370 371 static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc, 372 QualType DstType, StringRef Name, 373 LValue AddrLV) { 374 ASTContext &Ctx = CGF.getContext(); 375 376 llvm::Value *CastedPtr = CGF.EmitScalarConversion( 377 AddrLV.getAddress(CGF).getPointer(), Ctx.getUIntPtrType(), 378 Ctx.getPointerType(DstType), Loc); 379 Address TmpAddr = 380 CGF.MakeNaturalAlignAddrLValue(CastedPtr, DstType).getAddress(CGF); 381 return TmpAddr; 382 } 383 384 static QualType getCanonicalParamType(ASTContext &C, QualType T) { 385 if (T->isLValueReferenceType()) 386 return C.getLValueReferenceType( 387 getCanonicalParamType(C, T.getNonReferenceType()), 388 /*SpelledAsLValue=*/false); 389 if (T->isPointerType()) 390 return C.getPointerType(getCanonicalParamType(C, T->getPointeeType())); 391 if (const ArrayType *A = T->getAsArrayTypeUnsafe()) { 392 if (const auto *VLA = dyn_cast<VariableArrayType>(A)) 393 return getCanonicalParamType(C, VLA->getElementType()); 394 if (!A->isVariablyModifiedType()) 395 return C.getCanonicalType(T); 396 } 397 return C.getCanonicalParamType(T); 398 } 399 400 namespace { 401 /// Contains required data for proper outlined function codegen. 402 struct FunctionOptions { 403 /// Captured statement for which the function is generated. 404 const CapturedStmt *S = nullptr; 405 /// true if cast to/from UIntPtr is required for variables captured by 406 /// value. 407 const bool UIntPtrCastRequired = true; 408 /// true if only casted arguments must be registered as local args or VLA 409 /// sizes. 410 const bool RegisterCastedArgsOnly = false; 411 /// Name of the generated function. 412 const StringRef FunctionName; 413 /// Location of the non-debug version of the outlined function. 414 SourceLocation Loc; 415 explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired, 416 bool RegisterCastedArgsOnly, StringRef FunctionName, 417 SourceLocation Loc) 418 : S(S), UIntPtrCastRequired(UIntPtrCastRequired), 419 RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly), 420 FunctionName(FunctionName), Loc(Loc) {} 421 }; 422 } // namespace 423 424 static llvm::Function *emitOutlinedFunctionPrologue( 425 CodeGenFunction &CGF, FunctionArgList &Args, 426 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> 427 &LocalAddrs, 428 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> 429 &VLASizes, 430 llvm::Value *&CXXThisValue, const FunctionOptions &FO) { 431 const CapturedDecl *CD = FO.S->getCapturedDecl(); 432 const RecordDecl *RD = FO.S->getCapturedRecordDecl(); 433 assert(CD->hasBody() && "missing CapturedDecl body"); 434 435 CXXThisValue = nullptr; 436 // Build the argument list. 437 CodeGenModule &CGM = CGF.CGM; 438 ASTContext &Ctx = CGM.getContext(); 439 FunctionArgList TargetArgs; 440 Args.append(CD->param_begin(), 441 std::next(CD->param_begin(), CD->getContextParamPosition())); 442 TargetArgs.append( 443 CD->param_begin(), 444 std::next(CD->param_begin(), CD->getContextParamPosition())); 445 auto I = FO.S->captures().begin(); 446 FunctionDecl *DebugFunctionDecl = nullptr; 447 if (!FO.UIntPtrCastRequired) { 448 FunctionProtoType::ExtProtoInfo EPI; 449 QualType FunctionTy = Ctx.getFunctionType(Ctx.VoidTy, llvm::None, EPI); 450 DebugFunctionDecl = FunctionDecl::Create( 451 Ctx, Ctx.getTranslationUnitDecl(), FO.S->getBeginLoc(), 452 SourceLocation(), DeclarationName(), FunctionTy, 453 Ctx.getTrivialTypeSourceInfo(FunctionTy), SC_Static, 454 /*UsesFPIntrin=*/false, /*isInlineSpecified=*/false, 455 /*hasWrittenPrototype=*/false); 456 } 457 for (const FieldDecl *FD : RD->fields()) { 458 QualType ArgType = FD->getType(); 459 IdentifierInfo *II = nullptr; 460 VarDecl *CapVar = nullptr; 461 462 // If this is a capture by copy and the type is not a pointer, the outlined 463 // function argument type should be uintptr and the value properly casted to 464 // uintptr. This is necessary given that the runtime library is only able to 465 // deal with pointers. We can pass in the same way the VLA type sizes to the 466 // outlined function. 467 if (FO.UIntPtrCastRequired && 468 ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) || 469 I->capturesVariableArrayType())) 470 ArgType = Ctx.getUIntPtrType(); 471 472 if (I->capturesVariable() || I->capturesVariableByCopy()) { 473 CapVar = I->getCapturedVar(); 474 II = CapVar->getIdentifier(); 475 } else if (I->capturesThis()) { 476 II = &Ctx.Idents.get("this"); 477 } else { 478 assert(I->capturesVariableArrayType()); 479 II = &Ctx.Idents.get("vla"); 480 } 481 if (ArgType->isVariablyModifiedType()) 482 ArgType = getCanonicalParamType(Ctx, ArgType); 483 VarDecl *Arg; 484 if (DebugFunctionDecl && (CapVar || I->capturesThis())) { 485 Arg = ParmVarDecl::Create( 486 Ctx, DebugFunctionDecl, 487 CapVar ? CapVar->getBeginLoc() : FD->getBeginLoc(), 488 CapVar ? CapVar->getLocation() : FD->getLocation(), II, ArgType, 489 /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr); 490 } else { 491 Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(), 492 II, ArgType, ImplicitParamDecl::Other); 493 } 494 Args.emplace_back(Arg); 495 // Do not cast arguments if we emit function with non-original types. 496 TargetArgs.emplace_back( 497 FO.UIntPtrCastRequired 498 ? Arg 499 : CGM.getOpenMPRuntime().translateParameter(FD, Arg)); 500 ++I; 501 } 502 Args.append(std::next(CD->param_begin(), CD->getContextParamPosition() + 1), 503 CD->param_end()); 504 TargetArgs.append( 505 std::next(CD->param_begin(), CD->getContextParamPosition() + 1), 506 CD->param_end()); 507 508 // Create the function declaration. 509 const CGFunctionInfo &FuncInfo = 510 CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs); 511 llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); 512 513 auto *F = 514 llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage, 515 FO.FunctionName, &CGM.getModule()); 516 CGM.SetInternalFunctionAttributes(CD, F, FuncInfo); 517 if (CD->isNothrow()) 518 F->setDoesNotThrow(); 519 F->setDoesNotRecurse(); 520 521 // Always inline the outlined function if optimizations are enabled. 522 if (CGM.getCodeGenOpts().OptimizationLevel != 0) { 523 F->removeFnAttr(llvm::Attribute::NoInline); 524 F->addFnAttr(llvm::Attribute::AlwaysInline); 525 } 526 527 // Generate the function. 528 CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs, 529 FO.UIntPtrCastRequired ? FO.Loc : FO.S->getBeginLoc(), 530 FO.UIntPtrCastRequired ? FO.Loc 531 : CD->getBody()->getBeginLoc()); 532 unsigned Cnt = CD->getContextParamPosition(); 533 I = FO.S->captures().begin(); 534 for (const FieldDecl *FD : RD->fields()) { 535 // Do not map arguments if we emit function with non-original types. 536 Address LocalAddr(Address::invalid()); 537 if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) { 538 LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt], 539 TargetArgs[Cnt]); 540 } else { 541 LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]); 542 } 543 // If we are capturing a pointer by copy we don't need to do anything, just 544 // use the value that we get from the arguments. 545 if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) { 546 const VarDecl *CurVD = I->getCapturedVar(); 547 if (!FO.RegisterCastedArgsOnly) 548 LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}}); 549 ++Cnt; 550 ++I; 551 continue; 552 } 553 554 LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(), 555 AlignmentSource::Decl); 556 if (FD->hasCapturedVLAType()) { 557 if (FO.UIntPtrCastRequired) { 558 ArgLVal = CGF.MakeAddrLValue( 559 castValueFromUintptr(CGF, I->getLocation(), FD->getType(), 560 Args[Cnt]->getName(), ArgLVal), 561 FD->getType(), AlignmentSource::Decl); 562 } 563 llvm::Value *ExprArg = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation()); 564 const VariableArrayType *VAT = FD->getCapturedVLAType(); 565 VLASizes.try_emplace(Args[Cnt], VAT->getSizeExpr(), ExprArg); 566 } else if (I->capturesVariable()) { 567 const VarDecl *Var = I->getCapturedVar(); 568 QualType VarTy = Var->getType(); 569 Address ArgAddr = ArgLVal.getAddress(CGF); 570 if (ArgLVal.getType()->isLValueReferenceType()) { 571 ArgAddr = CGF.EmitLoadOfReference(ArgLVal); 572 } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) { 573 assert(ArgLVal.getType()->isPointerType()); 574 ArgAddr = CGF.EmitLoadOfPointer( 575 ArgAddr, ArgLVal.getType()->castAs<PointerType>()); 576 } 577 if (!FO.RegisterCastedArgsOnly) { 578 LocalAddrs.insert( 579 {Args[Cnt], {Var, ArgAddr.withAlignment(Ctx.getDeclAlign(Var))}}); 580 } 581 } else if (I->capturesVariableByCopy()) { 582 assert(!FD->getType()->isAnyPointerType() && 583 "Not expecting a captured pointer."); 584 const VarDecl *Var = I->getCapturedVar(); 585 LocalAddrs.insert({Args[Cnt], 586 {Var, FO.UIntPtrCastRequired 587 ? castValueFromUintptr( 588 CGF, I->getLocation(), FD->getType(), 589 Args[Cnt]->getName(), ArgLVal) 590 : ArgLVal.getAddress(CGF)}}); 591 } else { 592 // If 'this' is captured, load it into CXXThisValue. 593 assert(I->capturesThis()); 594 CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation()); 595 LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress(CGF)}}); 596 } 597 ++Cnt; 598 ++I; 599 } 600 601 return F; 602 } 603 604 llvm::Function * 605 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, 606 SourceLocation Loc) { 607 assert( 608 CapturedStmtInfo && 609 "CapturedStmtInfo should be set when generating the captured function"); 610 const CapturedDecl *CD = S.getCapturedDecl(); 611 // Build the argument list. 612 bool NeedWrapperFunction = 613 getDebugInfo() && CGM.getCodeGenOpts().hasReducedDebugInfo(); 614 FunctionArgList Args; 615 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs; 616 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes; 617 SmallString<256> Buffer; 618 llvm::raw_svector_ostream Out(Buffer); 619 Out << CapturedStmtInfo->getHelperName(); 620 if (NeedWrapperFunction) 621 Out << "_debug__"; 622 FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false, 623 Out.str(), Loc); 624 llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs, 625 VLASizes, CXXThisValue, FO); 626 CodeGenFunction::OMPPrivateScope LocalScope(*this); 627 for (const auto &LocalAddrPair : LocalAddrs) { 628 if (LocalAddrPair.second.first) { 629 LocalScope.addPrivate(LocalAddrPair.second.first, 630 LocalAddrPair.second.second); 631 } 632 } 633 (void)LocalScope.Privatize(); 634 for (const auto &VLASizePair : VLASizes) 635 VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second; 636 PGO.assignRegionCounters(GlobalDecl(CD), F); 637 CapturedStmtInfo->EmitBody(*this, CD->getBody()); 638 (void)LocalScope.ForceCleanup(); 639 FinishFunction(CD->getBodyRBrace()); 640 if (!NeedWrapperFunction) 641 return F; 642 643 FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true, 644 /*RegisterCastedArgsOnly=*/true, 645 CapturedStmtInfo->getHelperName(), Loc); 646 CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true); 647 WrapperCGF.CapturedStmtInfo = CapturedStmtInfo; 648 Args.clear(); 649 LocalAddrs.clear(); 650 VLASizes.clear(); 651 llvm::Function *WrapperF = 652 emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes, 653 WrapperCGF.CXXThisValue, WrapperFO); 654 llvm::SmallVector<llvm::Value *, 4> CallArgs; 655 auto *PI = F->arg_begin(); 656 for (const auto *Arg : Args) { 657 llvm::Value *CallArg; 658 auto I = LocalAddrs.find(Arg); 659 if (I != LocalAddrs.end()) { 660 LValue LV = WrapperCGF.MakeAddrLValue( 661 I->second.second, 662 I->second.first ? I->second.first->getType() : Arg->getType(), 663 AlignmentSource::Decl); 664 if (LV.getType()->isAnyComplexType()) 665 LV.setAddress(WrapperCGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 666 LV.getAddress(WrapperCGF), 667 PI->getType()->getPointerTo( 668 LV.getAddress(WrapperCGF).getAddressSpace()), 669 PI->getType())); 670 CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc()); 671 } else { 672 auto EI = VLASizes.find(Arg); 673 if (EI != VLASizes.end()) { 674 CallArg = EI->second.second; 675 } else { 676 LValue LV = 677 WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg), 678 Arg->getType(), AlignmentSource::Decl); 679 CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc()); 680 } 681 } 682 CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType())); 683 ++PI; 684 } 685 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, Loc, F, CallArgs); 686 WrapperCGF.FinishFunction(); 687 return WrapperF; 688 } 689 690 //===----------------------------------------------------------------------===// 691 // OpenMP Directive Emission 692 //===----------------------------------------------------------------------===// 693 void CodeGenFunction::EmitOMPAggregateAssign( 694 Address DestAddr, Address SrcAddr, QualType OriginalType, 695 const llvm::function_ref<void(Address, Address)> CopyGen) { 696 // Perform element-by-element initialization. 697 QualType ElementTy; 698 699 // Drill down to the base element type on both arrays. 700 const ArrayType *ArrayTy = OriginalType->getAsArrayTypeUnsafe(); 701 llvm::Value *NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr); 702 SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 703 704 llvm::Value *SrcBegin = SrcAddr.getPointer(); 705 llvm::Value *DestBegin = DestAddr.getPointer(); 706 // Cast from pointer to array type to pointer to single element. 707 llvm::Value *DestEnd = 708 Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements); 709 // The basic structure here is a while-do loop. 710 llvm::BasicBlock *BodyBB = createBasicBlock("omp.arraycpy.body"); 711 llvm::BasicBlock *DoneBB = createBasicBlock("omp.arraycpy.done"); 712 llvm::Value *IsEmpty = 713 Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty"); 714 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 715 716 // Enter the loop body, making that address the current address. 717 llvm::BasicBlock *EntryBB = Builder.GetInsertBlock(); 718 EmitBlock(BodyBB); 719 720 CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy); 721 722 llvm::PHINode *SrcElementPHI = 723 Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 724 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 725 Address SrcElementCurrent = 726 Address(SrcElementPHI, SrcAddr.getElementType(), 727 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 728 729 llvm::PHINode *DestElementPHI = Builder.CreatePHI( 730 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 731 DestElementPHI->addIncoming(DestBegin, EntryBB); 732 Address DestElementCurrent = 733 Address(DestElementPHI, DestAddr.getElementType(), 734 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 735 736 // Emit copy. 737 CopyGen(DestElementCurrent, SrcElementCurrent); 738 739 // Shift the address forward by one element. 740 llvm::Value *DestElementNext = 741 Builder.CreateConstGEP1_32(DestAddr.getElementType(), DestElementPHI, 742 /*Idx0=*/1, "omp.arraycpy.dest.element"); 743 llvm::Value *SrcElementNext = 744 Builder.CreateConstGEP1_32(SrcAddr.getElementType(), SrcElementPHI, 745 /*Idx0=*/1, "omp.arraycpy.src.element"); 746 // Check whether we've reached the end. 747 llvm::Value *Done = 748 Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 749 Builder.CreateCondBr(Done, DoneBB, BodyBB); 750 DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock()); 751 SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock()); 752 753 // Done. 754 EmitBlock(DoneBB, /*IsFinished=*/true); 755 } 756 757 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr, 758 Address SrcAddr, const VarDecl *DestVD, 759 const VarDecl *SrcVD, const Expr *Copy) { 760 if (OriginalType->isArrayType()) { 761 const auto *BO = dyn_cast<BinaryOperator>(Copy); 762 if (BO && BO->getOpcode() == BO_Assign) { 763 // Perform simple memcpy for simple copying. 764 LValue Dest = MakeAddrLValue(DestAddr, OriginalType); 765 LValue Src = MakeAddrLValue(SrcAddr, OriginalType); 766 EmitAggregateAssign(Dest, Src, OriginalType); 767 } else { 768 // For arrays with complex element types perform element by element 769 // copying. 770 EmitOMPAggregateAssign( 771 DestAddr, SrcAddr, OriginalType, 772 [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) { 773 // Working with the single array element, so have to remap 774 // destination and source variables to corresponding array 775 // elements. 776 CodeGenFunction::OMPPrivateScope Remap(*this); 777 Remap.addPrivate(DestVD, DestElement); 778 Remap.addPrivate(SrcVD, SrcElement); 779 (void)Remap.Privatize(); 780 EmitIgnoredExpr(Copy); 781 }); 782 } 783 } else { 784 // Remap pseudo source variable to private copy. 785 CodeGenFunction::OMPPrivateScope Remap(*this); 786 Remap.addPrivate(SrcVD, SrcAddr); 787 Remap.addPrivate(DestVD, DestAddr); 788 (void)Remap.Privatize(); 789 // Emit copying of the whole variable. 790 EmitIgnoredExpr(Copy); 791 } 792 } 793 794 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, 795 OMPPrivateScope &PrivateScope) { 796 if (!HaveInsertPoint()) 797 return false; 798 bool DeviceConstTarget = 799 getLangOpts().OpenMPIsDevice && 800 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 801 bool FirstprivateIsLastprivate = false; 802 llvm::DenseMap<const VarDecl *, OpenMPLastprivateModifier> Lastprivates; 803 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 804 for (const auto *D : C->varlists()) 805 Lastprivates.try_emplace( 806 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl(), 807 C->getKind()); 808 } 809 llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate; 810 llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 811 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 812 // Force emission of the firstprivate copy if the directive does not emit 813 // outlined function, like omp for, omp simd, omp distribute etc. 814 bool MustEmitFirstprivateCopy = 815 CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown; 816 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { 817 const auto *IRef = C->varlist_begin(); 818 const auto *InitsRef = C->inits().begin(); 819 for (const Expr *IInit : C->private_copies()) { 820 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 821 bool ThisFirstprivateIsLastprivate = 822 Lastprivates.count(OrigVD->getCanonicalDecl()) > 0; 823 const FieldDecl *FD = CapturedStmtInfo->lookup(OrigVD); 824 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 825 if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD && 826 !FD->getType()->isReferenceType() && 827 (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) { 828 EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()); 829 ++IRef; 830 ++InitsRef; 831 continue; 832 } 833 // Do not emit copy for firstprivate constant variables in target regions, 834 // captured by reference. 835 if (DeviceConstTarget && OrigVD->getType().isConstant(getContext()) && 836 FD && FD->getType()->isReferenceType() && 837 (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) { 838 EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()); 839 ++IRef; 840 ++InitsRef; 841 continue; 842 } 843 FirstprivateIsLastprivate = 844 FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate; 845 if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) { 846 const auto *VDInit = 847 cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl()); 848 bool IsRegistered; 849 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), 850 /*RefersToEnclosingVariableOrCapture=*/FD != nullptr, 851 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 852 LValue OriginalLVal; 853 if (!FD) { 854 // Check if the firstprivate variable is just a constant value. 855 ConstantEmission CE = tryEmitAsConstant(&DRE); 856 if (CE && !CE.isReference()) { 857 // Constant value, no need to create a copy. 858 ++IRef; 859 ++InitsRef; 860 continue; 861 } 862 if (CE && CE.isReference()) { 863 OriginalLVal = CE.getReferenceLValue(*this, &DRE); 864 } else { 865 assert(!CE && "Expected non-constant firstprivate."); 866 OriginalLVal = EmitLValue(&DRE); 867 } 868 } else { 869 OriginalLVal = EmitLValue(&DRE); 870 } 871 QualType Type = VD->getType(); 872 if (Type->isArrayType()) { 873 // Emit VarDecl with copy init for arrays. 874 // Get the address of the original variable captured in current 875 // captured region. 876 AutoVarEmission Emission = EmitAutoVarAlloca(*VD); 877 const Expr *Init = VD->getInit(); 878 if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) { 879 // Perform simple memcpy. 880 LValue Dest = MakeAddrLValue(Emission.getAllocatedAddress(), Type); 881 EmitAggregateAssign(Dest, OriginalLVal, Type); 882 } else { 883 EmitOMPAggregateAssign( 884 Emission.getAllocatedAddress(), OriginalLVal.getAddress(*this), 885 Type, 886 [this, VDInit, Init](Address DestElement, Address SrcElement) { 887 // Clean up any temporaries needed by the 888 // initialization. 889 RunCleanupsScope InitScope(*this); 890 // Emit initialization for single element. 891 setAddrOfLocalVar(VDInit, SrcElement); 892 EmitAnyExprToMem(Init, DestElement, 893 Init->getType().getQualifiers(), 894 /*IsInitializer*/ false); 895 LocalDeclMap.erase(VDInit); 896 }); 897 } 898 EmitAutoVarCleanups(Emission); 899 IsRegistered = 900 PrivateScope.addPrivate(OrigVD, Emission.getAllocatedAddress()); 901 } else { 902 Address OriginalAddr = OriginalLVal.getAddress(*this); 903 // Emit private VarDecl with copy init. 904 // Remap temp VDInit variable to the address of the original 905 // variable (for proper handling of captured global variables). 906 setAddrOfLocalVar(VDInit, OriginalAddr); 907 EmitDecl(*VD); 908 LocalDeclMap.erase(VDInit); 909 Address VDAddr = GetAddrOfLocalVar(VD); 910 if (ThisFirstprivateIsLastprivate && 911 Lastprivates[OrigVD->getCanonicalDecl()] == 912 OMPC_LASTPRIVATE_conditional) { 913 // Create/init special variable for lastprivate conditionals. 914 llvm::Value *V = 915 EmitLoadOfScalar(MakeAddrLValue(VDAddr, (*IRef)->getType(), 916 AlignmentSource::Decl), 917 (*IRef)->getExprLoc()); 918 VDAddr = CGM.getOpenMPRuntime().emitLastprivateConditionalInit( 919 *this, OrigVD); 920 EmitStoreOfScalar(V, MakeAddrLValue(VDAddr, (*IRef)->getType(), 921 AlignmentSource::Decl)); 922 LocalDeclMap.erase(VD); 923 setAddrOfLocalVar(VD, VDAddr); 924 } 925 IsRegistered = PrivateScope.addPrivate(OrigVD, VDAddr); 926 } 927 assert(IsRegistered && 928 "firstprivate var already registered as private"); 929 // Silence the warning about unused variable. 930 (void)IsRegistered; 931 } 932 ++IRef; 933 ++InitsRef; 934 } 935 } 936 return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty(); 937 } 938 939 void CodeGenFunction::EmitOMPPrivateClause( 940 const OMPExecutableDirective &D, 941 CodeGenFunction::OMPPrivateScope &PrivateScope) { 942 if (!HaveInsertPoint()) 943 return; 944 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 945 for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) { 946 auto IRef = C->varlist_begin(); 947 for (const Expr *IInit : C->private_copies()) { 948 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 949 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 950 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 951 EmitDecl(*VD); 952 // Emit private VarDecl with copy init. 953 bool IsRegistered = 954 PrivateScope.addPrivate(OrigVD, GetAddrOfLocalVar(VD)); 955 assert(IsRegistered && "private var already registered as private"); 956 // Silence the warning about unused variable. 957 (void)IsRegistered; 958 } 959 ++IRef; 960 } 961 } 962 } 963 964 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { 965 if (!HaveInsertPoint()) 966 return false; 967 // threadprivate_var1 = master_threadprivate_var1; 968 // operator=(threadprivate_var2, master_threadprivate_var2); 969 // ... 970 // __kmpc_barrier(&loc, global_tid); 971 llvm::DenseSet<const VarDecl *> CopiedVars; 972 llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr; 973 for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) { 974 auto IRef = C->varlist_begin(); 975 auto ISrcRef = C->source_exprs().begin(); 976 auto IDestRef = C->destination_exprs().begin(); 977 for (const Expr *AssignOp : C->assignment_ops()) { 978 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 979 QualType Type = VD->getType(); 980 if (CopiedVars.insert(VD->getCanonicalDecl()).second) { 981 // Get the address of the master variable. If we are emitting code with 982 // TLS support, the address is passed from the master as field in the 983 // captured declaration. 984 Address MasterAddr = Address::invalid(); 985 if (getLangOpts().OpenMPUseTLS && 986 getContext().getTargetInfo().isTLSSupported()) { 987 assert(CapturedStmtInfo->lookup(VD) && 988 "Copyin threadprivates should have been captured!"); 989 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), true, 990 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 991 MasterAddr = EmitLValue(&DRE).getAddress(*this); 992 LocalDeclMap.erase(VD); 993 } else { 994 MasterAddr = Address::deprecated( 995 VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD) 996 : CGM.GetAddrOfGlobal(VD), 997 getContext().getDeclAlign(VD)); 998 } 999 // Get the address of the threadprivate variable. 1000 Address PrivateAddr = EmitLValue(*IRef).getAddress(*this); 1001 if (CopiedVars.size() == 1) { 1002 // At first check if current thread is a master thread. If it is, no 1003 // need to copy data. 1004 CopyBegin = createBasicBlock("copyin.not.master"); 1005 CopyEnd = createBasicBlock("copyin.not.master.end"); 1006 // TODO: Avoid ptrtoint conversion. 1007 auto *MasterAddrInt = 1008 Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy); 1009 auto *PrivateAddrInt = 1010 Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy); 1011 Builder.CreateCondBr( 1012 Builder.CreateICmpNE(MasterAddrInt, PrivateAddrInt), CopyBegin, 1013 CopyEnd); 1014 EmitBlock(CopyBegin); 1015 } 1016 const auto *SrcVD = 1017 cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 1018 const auto *DestVD = 1019 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 1020 EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp); 1021 } 1022 ++IRef; 1023 ++ISrcRef; 1024 ++IDestRef; 1025 } 1026 } 1027 if (CopyEnd) { 1028 // Exit out of copying procedure for non-master thread. 1029 EmitBlock(CopyEnd, /*IsFinished=*/true); 1030 return true; 1031 } 1032 return false; 1033 } 1034 1035 bool CodeGenFunction::EmitOMPLastprivateClauseInit( 1036 const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) { 1037 if (!HaveInsertPoint()) 1038 return false; 1039 bool HasAtLeastOneLastprivate = false; 1040 llvm::DenseSet<const VarDecl *> SIMDLCVs; 1041 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 1042 const auto *LoopDirective = cast<OMPLoopDirective>(&D); 1043 for (const Expr *C : LoopDirective->counters()) { 1044 SIMDLCVs.insert( 1045 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 1046 } 1047 } 1048 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 1049 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 1050 HasAtLeastOneLastprivate = true; 1051 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 1052 !getLangOpts().OpenMPSimd) 1053 break; 1054 const auto *IRef = C->varlist_begin(); 1055 const auto *IDestRef = C->destination_exprs().begin(); 1056 for (const Expr *IInit : C->private_copies()) { 1057 // Keep the address of the original variable for future update at the end 1058 // of the loop. 1059 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 1060 // Taskloops do not require additional initialization, it is done in 1061 // runtime support library. 1062 if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) { 1063 const auto *DestVD = 1064 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 1065 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), 1066 /*RefersToEnclosingVariableOrCapture=*/ 1067 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1068 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 1069 PrivateScope.addPrivate(DestVD, EmitLValue(&DRE).getAddress(*this)); 1070 // Check if the variable is also a firstprivate: in this case IInit is 1071 // not generated. Initialization of this variable will happen in codegen 1072 // for 'firstprivate' clause. 1073 if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) { 1074 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 1075 Address VDAddr = Address::invalid(); 1076 if (C->getKind() == OMPC_LASTPRIVATE_conditional) { 1077 VDAddr = CGM.getOpenMPRuntime().emitLastprivateConditionalInit( 1078 *this, OrigVD); 1079 setAddrOfLocalVar(VD, VDAddr); 1080 } else { 1081 // Emit private VarDecl with copy init. 1082 EmitDecl(*VD); 1083 VDAddr = GetAddrOfLocalVar(VD); 1084 } 1085 bool IsRegistered = PrivateScope.addPrivate(OrigVD, VDAddr); 1086 assert(IsRegistered && 1087 "lastprivate var already registered as private"); 1088 (void)IsRegistered; 1089 } 1090 } 1091 ++IRef; 1092 ++IDestRef; 1093 } 1094 } 1095 return HasAtLeastOneLastprivate; 1096 } 1097 1098 void CodeGenFunction::EmitOMPLastprivateClauseFinal( 1099 const OMPExecutableDirective &D, bool NoFinals, 1100 llvm::Value *IsLastIterCond) { 1101 if (!HaveInsertPoint()) 1102 return; 1103 // Emit following code: 1104 // if (<IsLastIterCond>) { 1105 // orig_var1 = private_orig_var1; 1106 // ... 1107 // orig_varn = private_orig_varn; 1108 // } 1109 llvm::BasicBlock *ThenBB = nullptr; 1110 llvm::BasicBlock *DoneBB = nullptr; 1111 if (IsLastIterCond) { 1112 // Emit implicit barrier if at least one lastprivate conditional is found 1113 // and this is not a simd mode. 1114 if (!getLangOpts().OpenMPSimd && 1115 llvm::any_of(D.getClausesOfKind<OMPLastprivateClause>(), 1116 [](const OMPLastprivateClause *C) { 1117 return C->getKind() == OMPC_LASTPRIVATE_conditional; 1118 })) { 1119 CGM.getOpenMPRuntime().emitBarrierCall(*this, D.getBeginLoc(), 1120 OMPD_unknown, 1121 /*EmitChecks=*/false, 1122 /*ForceSimpleCall=*/true); 1123 } 1124 ThenBB = createBasicBlock(".omp.lastprivate.then"); 1125 DoneBB = createBasicBlock(".omp.lastprivate.done"); 1126 Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB); 1127 EmitBlock(ThenBB); 1128 } 1129 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 1130 llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates; 1131 if (const auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) { 1132 auto IC = LoopDirective->counters().begin(); 1133 for (const Expr *F : LoopDirective->finals()) { 1134 const auto *D = 1135 cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl(); 1136 if (NoFinals) 1137 AlreadyEmittedVars.insert(D); 1138 else 1139 LoopCountersAndUpdates[D] = F; 1140 ++IC; 1141 } 1142 } 1143 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 1144 auto IRef = C->varlist_begin(); 1145 auto ISrcRef = C->source_exprs().begin(); 1146 auto IDestRef = C->destination_exprs().begin(); 1147 for (const Expr *AssignOp : C->assignment_ops()) { 1148 const auto *PrivateVD = 1149 cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 1150 QualType Type = PrivateVD->getType(); 1151 const auto *CanonicalVD = PrivateVD->getCanonicalDecl(); 1152 if (AlreadyEmittedVars.insert(CanonicalVD).second) { 1153 // If lastprivate variable is a loop control variable for loop-based 1154 // directive, update its value before copyin back to original 1155 // variable. 1156 if (const Expr *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD)) 1157 EmitIgnoredExpr(FinalExpr); 1158 const auto *SrcVD = 1159 cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 1160 const auto *DestVD = 1161 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 1162 // Get the address of the private variable. 1163 Address PrivateAddr = GetAddrOfLocalVar(PrivateVD); 1164 if (const auto *RefTy = PrivateVD->getType()->getAs<ReferenceType>()) 1165 PrivateAddr = Address::deprecated( 1166 Builder.CreateLoad(PrivateAddr), 1167 CGM.getNaturalTypeAlignment(RefTy->getPointeeType())); 1168 // Store the last value to the private copy in the last iteration. 1169 if (C->getKind() == OMPC_LASTPRIVATE_conditional) 1170 CGM.getOpenMPRuntime().emitLastprivateConditionalFinalUpdate( 1171 *this, MakeAddrLValue(PrivateAddr, (*IRef)->getType()), PrivateVD, 1172 (*IRef)->getExprLoc()); 1173 // Get the address of the original variable. 1174 Address OriginalAddr = GetAddrOfLocalVar(DestVD); 1175 EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp); 1176 } 1177 ++IRef; 1178 ++ISrcRef; 1179 ++IDestRef; 1180 } 1181 if (const Expr *PostUpdate = C->getPostUpdateExpr()) 1182 EmitIgnoredExpr(PostUpdate); 1183 } 1184 if (IsLastIterCond) 1185 EmitBlock(DoneBB, /*IsFinished=*/true); 1186 } 1187 1188 void CodeGenFunction::EmitOMPReductionClauseInit( 1189 const OMPExecutableDirective &D, 1190 CodeGenFunction::OMPPrivateScope &PrivateScope, bool ForInscan) { 1191 if (!HaveInsertPoint()) 1192 return; 1193 SmallVector<const Expr *, 4> Shareds; 1194 SmallVector<const Expr *, 4> Privates; 1195 SmallVector<const Expr *, 4> ReductionOps; 1196 SmallVector<const Expr *, 4> LHSs; 1197 SmallVector<const Expr *, 4> RHSs; 1198 OMPTaskDataTy Data; 1199 SmallVector<const Expr *, 4> TaskLHSs; 1200 SmallVector<const Expr *, 4> TaskRHSs; 1201 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1202 if (ForInscan != (C->getModifier() == OMPC_REDUCTION_inscan)) 1203 continue; 1204 Shareds.append(C->varlist_begin(), C->varlist_end()); 1205 Privates.append(C->privates().begin(), C->privates().end()); 1206 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 1207 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 1208 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 1209 if (C->getModifier() == OMPC_REDUCTION_task) { 1210 Data.ReductionVars.append(C->privates().begin(), C->privates().end()); 1211 Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end()); 1212 Data.ReductionCopies.append(C->privates().begin(), C->privates().end()); 1213 Data.ReductionOps.append(C->reduction_ops().begin(), 1214 C->reduction_ops().end()); 1215 TaskLHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 1216 TaskRHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 1217 } 1218 } 1219 ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps); 1220 unsigned Count = 0; 1221 auto *ILHS = LHSs.begin(); 1222 auto *IRHS = RHSs.begin(); 1223 auto *IPriv = Privates.begin(); 1224 for (const Expr *IRef : Shareds) { 1225 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl()); 1226 // Emit private VarDecl with reduction init. 1227 RedCG.emitSharedOrigLValue(*this, Count); 1228 RedCG.emitAggregateType(*this, Count); 1229 AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD); 1230 RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(), 1231 RedCG.getSharedLValue(Count).getAddress(*this), 1232 [&Emission](CodeGenFunction &CGF) { 1233 CGF.EmitAutoVarInit(Emission); 1234 return true; 1235 }); 1236 EmitAutoVarCleanups(Emission); 1237 Address BaseAddr = RedCG.adjustPrivateAddress( 1238 *this, Count, Emission.getAllocatedAddress()); 1239 bool IsRegistered = 1240 PrivateScope.addPrivate(RedCG.getBaseDecl(Count), BaseAddr); 1241 assert(IsRegistered && "private var already registered as private"); 1242 // Silence the warning about unused variable. 1243 (void)IsRegistered; 1244 1245 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 1246 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 1247 QualType Type = PrivateVD->getType(); 1248 bool isaOMPArraySectionExpr = isa<OMPArraySectionExpr>(IRef); 1249 if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) { 1250 // Store the address of the original variable associated with the LHS 1251 // implicit variable. 1252 PrivateScope.addPrivate(LHSVD, 1253 RedCG.getSharedLValue(Count).getAddress(*this)); 1254 PrivateScope.addPrivate(RHSVD, GetAddrOfLocalVar(PrivateVD)); 1255 } else if ((isaOMPArraySectionExpr && Type->isScalarType()) || 1256 isa<ArraySubscriptExpr>(IRef)) { 1257 // Store the address of the original variable associated with the LHS 1258 // implicit variable. 1259 PrivateScope.addPrivate(LHSVD, 1260 RedCG.getSharedLValue(Count).getAddress(*this)); 1261 PrivateScope.addPrivate(RHSVD, Builder.CreateElementBitCast( 1262 GetAddrOfLocalVar(PrivateVD), 1263 ConvertTypeForMem(RHSVD->getType()), 1264 "rhs.begin")); 1265 } else { 1266 QualType Type = PrivateVD->getType(); 1267 bool IsArray = getContext().getAsArrayType(Type) != nullptr; 1268 Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress(*this); 1269 // Store the address of the original variable associated with the LHS 1270 // implicit variable. 1271 if (IsArray) { 1272 OriginalAddr = Builder.CreateElementBitCast( 1273 OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin"); 1274 } 1275 PrivateScope.addPrivate(LHSVD, OriginalAddr); 1276 PrivateScope.addPrivate( 1277 RHSVD, IsArray ? Builder.CreateElementBitCast( 1278 GetAddrOfLocalVar(PrivateVD), 1279 ConvertTypeForMem(RHSVD->getType()), "rhs.begin") 1280 : GetAddrOfLocalVar(PrivateVD)); 1281 } 1282 ++ILHS; 1283 ++IRHS; 1284 ++IPriv; 1285 ++Count; 1286 } 1287 if (!Data.ReductionVars.empty()) { 1288 Data.IsReductionWithTaskMod = true; 1289 Data.IsWorksharingReduction = 1290 isOpenMPWorksharingDirective(D.getDirectiveKind()); 1291 llvm::Value *ReductionDesc = CGM.getOpenMPRuntime().emitTaskReductionInit( 1292 *this, D.getBeginLoc(), TaskLHSs, TaskRHSs, Data); 1293 const Expr *TaskRedRef = nullptr; 1294 switch (D.getDirectiveKind()) { 1295 case OMPD_parallel: 1296 TaskRedRef = cast<OMPParallelDirective>(D).getTaskReductionRefExpr(); 1297 break; 1298 case OMPD_for: 1299 TaskRedRef = cast<OMPForDirective>(D).getTaskReductionRefExpr(); 1300 break; 1301 case OMPD_sections: 1302 TaskRedRef = cast<OMPSectionsDirective>(D).getTaskReductionRefExpr(); 1303 break; 1304 case OMPD_parallel_for: 1305 TaskRedRef = cast<OMPParallelForDirective>(D).getTaskReductionRefExpr(); 1306 break; 1307 case OMPD_parallel_master: 1308 TaskRedRef = 1309 cast<OMPParallelMasterDirective>(D).getTaskReductionRefExpr(); 1310 break; 1311 case OMPD_parallel_sections: 1312 TaskRedRef = 1313 cast<OMPParallelSectionsDirective>(D).getTaskReductionRefExpr(); 1314 break; 1315 case OMPD_target_parallel: 1316 TaskRedRef = 1317 cast<OMPTargetParallelDirective>(D).getTaskReductionRefExpr(); 1318 break; 1319 case OMPD_target_parallel_for: 1320 TaskRedRef = 1321 cast<OMPTargetParallelForDirective>(D).getTaskReductionRefExpr(); 1322 break; 1323 case OMPD_distribute_parallel_for: 1324 TaskRedRef = 1325 cast<OMPDistributeParallelForDirective>(D).getTaskReductionRefExpr(); 1326 break; 1327 case OMPD_teams_distribute_parallel_for: 1328 TaskRedRef = cast<OMPTeamsDistributeParallelForDirective>(D) 1329 .getTaskReductionRefExpr(); 1330 break; 1331 case OMPD_target_teams_distribute_parallel_for: 1332 TaskRedRef = cast<OMPTargetTeamsDistributeParallelForDirective>(D) 1333 .getTaskReductionRefExpr(); 1334 break; 1335 case OMPD_simd: 1336 case OMPD_for_simd: 1337 case OMPD_section: 1338 case OMPD_single: 1339 case OMPD_master: 1340 case OMPD_critical: 1341 case OMPD_parallel_for_simd: 1342 case OMPD_task: 1343 case OMPD_taskyield: 1344 case OMPD_barrier: 1345 case OMPD_taskwait: 1346 case OMPD_taskgroup: 1347 case OMPD_flush: 1348 case OMPD_depobj: 1349 case OMPD_scan: 1350 case OMPD_ordered: 1351 case OMPD_atomic: 1352 case OMPD_teams: 1353 case OMPD_target: 1354 case OMPD_cancellation_point: 1355 case OMPD_cancel: 1356 case OMPD_target_data: 1357 case OMPD_target_enter_data: 1358 case OMPD_target_exit_data: 1359 case OMPD_taskloop: 1360 case OMPD_taskloop_simd: 1361 case OMPD_master_taskloop: 1362 case OMPD_master_taskloop_simd: 1363 case OMPD_parallel_master_taskloop: 1364 case OMPD_parallel_master_taskloop_simd: 1365 case OMPD_distribute: 1366 case OMPD_target_update: 1367 case OMPD_distribute_parallel_for_simd: 1368 case OMPD_distribute_simd: 1369 case OMPD_target_parallel_for_simd: 1370 case OMPD_target_simd: 1371 case OMPD_teams_distribute: 1372 case OMPD_teams_distribute_simd: 1373 case OMPD_teams_distribute_parallel_for_simd: 1374 case OMPD_target_teams: 1375 case OMPD_target_teams_distribute: 1376 case OMPD_target_teams_distribute_parallel_for_simd: 1377 case OMPD_target_teams_distribute_simd: 1378 case OMPD_declare_target: 1379 case OMPD_end_declare_target: 1380 case OMPD_threadprivate: 1381 case OMPD_allocate: 1382 case OMPD_declare_reduction: 1383 case OMPD_declare_mapper: 1384 case OMPD_declare_simd: 1385 case OMPD_requires: 1386 case OMPD_declare_variant: 1387 case OMPD_begin_declare_variant: 1388 case OMPD_end_declare_variant: 1389 case OMPD_unknown: 1390 default: 1391 llvm_unreachable("Enexpected directive with task reductions."); 1392 } 1393 1394 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(TaskRedRef)->getDecl()); 1395 EmitVarDecl(*VD); 1396 EmitStoreOfScalar(ReductionDesc, GetAddrOfLocalVar(VD), 1397 /*Volatile=*/false, TaskRedRef->getType()); 1398 } 1399 } 1400 1401 void CodeGenFunction::EmitOMPReductionClauseFinal( 1402 const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) { 1403 if (!HaveInsertPoint()) 1404 return; 1405 llvm::SmallVector<const Expr *, 8> Privates; 1406 llvm::SmallVector<const Expr *, 8> LHSExprs; 1407 llvm::SmallVector<const Expr *, 8> RHSExprs; 1408 llvm::SmallVector<const Expr *, 8> ReductionOps; 1409 bool HasAtLeastOneReduction = false; 1410 bool IsReductionWithTaskMod = false; 1411 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1412 // Do not emit for inscan reductions. 1413 if (C->getModifier() == OMPC_REDUCTION_inscan) 1414 continue; 1415 HasAtLeastOneReduction = true; 1416 Privates.append(C->privates().begin(), C->privates().end()); 1417 LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 1418 RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 1419 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 1420 IsReductionWithTaskMod = 1421 IsReductionWithTaskMod || C->getModifier() == OMPC_REDUCTION_task; 1422 } 1423 if (HasAtLeastOneReduction) { 1424 if (IsReductionWithTaskMod) { 1425 CGM.getOpenMPRuntime().emitTaskReductionFini( 1426 *this, D.getBeginLoc(), 1427 isOpenMPWorksharingDirective(D.getDirectiveKind())); 1428 } 1429 bool WithNowait = D.getSingleClause<OMPNowaitClause>() || 1430 isOpenMPParallelDirective(D.getDirectiveKind()) || 1431 ReductionKind == OMPD_simd; 1432 bool SimpleReduction = ReductionKind == OMPD_simd; 1433 // Emit nowait reduction if nowait clause is present or directive is a 1434 // parallel directive (it always has implicit barrier). 1435 CGM.getOpenMPRuntime().emitReduction( 1436 *this, D.getEndLoc(), Privates, LHSExprs, RHSExprs, ReductionOps, 1437 {WithNowait, SimpleReduction, ReductionKind}); 1438 } 1439 } 1440 1441 static void emitPostUpdateForReductionClause( 1442 CodeGenFunction &CGF, const OMPExecutableDirective &D, 1443 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { 1444 if (!CGF.HaveInsertPoint()) 1445 return; 1446 llvm::BasicBlock *DoneBB = nullptr; 1447 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1448 if (const Expr *PostUpdate = C->getPostUpdateExpr()) { 1449 if (!DoneBB) { 1450 if (llvm::Value *Cond = CondGen(CGF)) { 1451 // If the first post-update expression is found, emit conditional 1452 // block if it was requested. 1453 llvm::BasicBlock *ThenBB = CGF.createBasicBlock(".omp.reduction.pu"); 1454 DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done"); 1455 CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1456 CGF.EmitBlock(ThenBB); 1457 } 1458 } 1459 CGF.EmitIgnoredExpr(PostUpdate); 1460 } 1461 } 1462 if (DoneBB) 1463 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 1464 } 1465 1466 namespace { 1467 /// Codegen lambda for appending distribute lower and upper bounds to outlined 1468 /// parallel function. This is necessary for combined constructs such as 1469 /// 'distribute parallel for' 1470 typedef llvm::function_ref<void(CodeGenFunction &, 1471 const OMPExecutableDirective &, 1472 llvm::SmallVectorImpl<llvm::Value *> &)> 1473 CodeGenBoundParametersTy; 1474 } // anonymous namespace 1475 1476 static void 1477 checkForLastprivateConditionalUpdate(CodeGenFunction &CGF, 1478 const OMPExecutableDirective &S) { 1479 if (CGF.getLangOpts().OpenMP < 50) 1480 return; 1481 llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> PrivateDecls; 1482 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 1483 for (const Expr *Ref : C->varlists()) { 1484 if (!Ref->getType()->isScalarType()) 1485 continue; 1486 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 1487 if (!DRE) 1488 continue; 1489 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); 1490 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref); 1491 } 1492 } 1493 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 1494 for (const Expr *Ref : C->varlists()) { 1495 if (!Ref->getType()->isScalarType()) 1496 continue; 1497 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 1498 if (!DRE) 1499 continue; 1500 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); 1501 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref); 1502 } 1503 } 1504 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 1505 for (const Expr *Ref : C->varlists()) { 1506 if (!Ref->getType()->isScalarType()) 1507 continue; 1508 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 1509 if (!DRE) 1510 continue; 1511 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); 1512 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref); 1513 } 1514 } 1515 // Privates should ne analyzed since they are not captured at all. 1516 // Task reductions may be skipped - tasks are ignored. 1517 // Firstprivates do not return value but may be passed by reference - no need 1518 // to check for updated lastprivate conditional. 1519 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 1520 for (const Expr *Ref : C->varlists()) { 1521 if (!Ref->getType()->isScalarType()) 1522 continue; 1523 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 1524 if (!DRE) 1525 continue; 1526 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); 1527 } 1528 } 1529 CGF.CGM.getOpenMPRuntime().checkAndEmitSharedLastprivateConditional( 1530 CGF, S, PrivateDecls); 1531 } 1532 1533 static void emitCommonOMPParallelDirective( 1534 CodeGenFunction &CGF, const OMPExecutableDirective &S, 1535 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1536 const CodeGenBoundParametersTy &CodeGenBoundParameters) { 1537 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); 1538 llvm::Value *NumThreads = nullptr; 1539 llvm::Function *OutlinedFn = 1540 CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( 1541 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 1542 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) { 1543 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 1544 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), 1545 /*IgnoreResultAssign=*/true); 1546 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause( 1547 CGF, NumThreads, NumThreadsClause->getBeginLoc()); 1548 } 1549 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) { 1550 CodeGenFunction::RunCleanupsScope ProcBindScope(CGF); 1551 CGF.CGM.getOpenMPRuntime().emitProcBindClause( 1552 CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getBeginLoc()); 1553 } 1554 const Expr *IfCond = nullptr; 1555 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 1556 if (C->getNameModifier() == OMPD_unknown || 1557 C->getNameModifier() == OMPD_parallel) { 1558 IfCond = C->getCondition(); 1559 break; 1560 } 1561 } 1562 1563 OMPParallelScope Scope(CGF, S); 1564 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 1565 // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk 1566 // lower and upper bounds with the pragma 'for' chunking mechanism. 1567 // The following lambda takes care of appending the lower and upper bound 1568 // parameters when necessary 1569 CodeGenBoundParameters(CGF, S, CapturedVars); 1570 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 1571 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getBeginLoc(), OutlinedFn, 1572 CapturedVars, IfCond, NumThreads); 1573 } 1574 1575 static bool isAllocatableDecl(const VarDecl *VD) { 1576 const VarDecl *CVD = VD->getCanonicalDecl(); 1577 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 1578 return false; 1579 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 1580 // Use the default allocation. 1581 return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || 1582 AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && 1583 !AA->getAllocator()); 1584 } 1585 1586 static void emitEmptyBoundParameters(CodeGenFunction &, 1587 const OMPExecutableDirective &, 1588 llvm::SmallVectorImpl<llvm::Value *> &) {} 1589 1590 Address CodeGenFunction::OMPBuilderCBHelpers::getAddressOfLocalVariable( 1591 CodeGenFunction &CGF, const VarDecl *VD) { 1592 CodeGenModule &CGM = CGF.CGM; 1593 auto &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1594 1595 if (!VD) 1596 return Address::invalid(); 1597 const VarDecl *CVD = VD->getCanonicalDecl(); 1598 if (!isAllocatableDecl(CVD)) 1599 return Address::invalid(); 1600 llvm::Value *Size; 1601 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 1602 if (CVD->getType()->isVariablyModifiedType()) { 1603 Size = CGF.getTypeSize(CVD->getType()); 1604 // Align the size: ((size + align - 1) / align) * align 1605 Size = CGF.Builder.CreateNUWAdd( 1606 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 1607 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 1608 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 1609 } else { 1610 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 1611 Size = CGM.getSize(Sz.alignTo(Align)); 1612 } 1613 1614 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 1615 assert(AA->getAllocator() && 1616 "Expected allocator expression for non-default allocator."); 1617 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); 1618 // According to the standard, the original allocator type is a enum (integer). 1619 // Convert to pointer type, if required. 1620 if (Allocator->getType()->isIntegerTy()) 1621 Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy); 1622 else if (Allocator->getType()->isPointerTy()) 1623 Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator, 1624 CGM.VoidPtrTy); 1625 1626 llvm::Value *Addr = OMPBuilder.createOMPAlloc( 1627 CGF.Builder, Size, Allocator, 1628 getNameWithSeparators({CVD->getName(), ".void.addr"}, ".", ".")); 1629 llvm::CallInst *FreeCI = 1630 OMPBuilder.createOMPFree(CGF.Builder, Addr, Allocator); 1631 1632 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FreeCI); 1633 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1634 Addr, 1635 CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())), 1636 getNameWithSeparators({CVD->getName(), ".addr"}, ".", ".")); 1637 return Address::deprecated(Addr, Align); 1638 } 1639 1640 Address CodeGenFunction::OMPBuilderCBHelpers::getAddrOfThreadPrivate( 1641 CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, 1642 SourceLocation Loc) { 1643 CodeGenModule &CGM = CGF.CGM; 1644 if (CGM.getLangOpts().OpenMPUseTLS && 1645 CGM.getContext().getTargetInfo().isTLSSupported()) 1646 return VDAddr; 1647 1648 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1649 1650 llvm::Type *VarTy = VDAddr.getElementType(); 1651 llvm::Value *Data = 1652 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy); 1653 llvm::ConstantInt *Size = CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)); 1654 std::string Suffix = getNameWithSeparators({"cache", ""}); 1655 llvm::Twine CacheName = Twine(CGM.getMangledName(VD)).concat(Suffix); 1656 1657 llvm::CallInst *ThreadPrivateCacheCall = 1658 OMPBuilder.createCachedThreadPrivate(CGF.Builder, Data, Size, CacheName); 1659 1660 return Address::deprecated(ThreadPrivateCacheCall, VDAddr.getAlignment()); 1661 } 1662 1663 std::string CodeGenFunction::OMPBuilderCBHelpers::getNameWithSeparators( 1664 ArrayRef<StringRef> Parts, StringRef FirstSeparator, StringRef Separator) { 1665 SmallString<128> Buffer; 1666 llvm::raw_svector_ostream OS(Buffer); 1667 StringRef Sep = FirstSeparator; 1668 for (StringRef Part : Parts) { 1669 OS << Sep << Part; 1670 Sep = Separator; 1671 } 1672 return OS.str().str(); 1673 } 1674 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { 1675 if (CGM.getLangOpts().OpenMPIRBuilder) { 1676 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1677 // Check if we have any if clause associated with the directive. 1678 llvm::Value *IfCond = nullptr; 1679 if (const auto *C = S.getSingleClause<OMPIfClause>()) 1680 IfCond = EmitScalarExpr(C->getCondition(), 1681 /*IgnoreResultAssign=*/true); 1682 1683 llvm::Value *NumThreads = nullptr; 1684 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) 1685 NumThreads = EmitScalarExpr(NumThreadsClause->getNumThreads(), 1686 /*IgnoreResultAssign=*/true); 1687 1688 ProcBindKind ProcBind = OMP_PROC_BIND_default; 1689 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) 1690 ProcBind = ProcBindClause->getProcBindKind(); 1691 1692 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 1693 1694 // The cleanup callback that finalizes all variabels at the given location, 1695 // thus calls destructors etc. 1696 auto FiniCB = [this](InsertPointTy IP) { 1697 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); 1698 }; 1699 1700 // Privatization callback that performs appropriate action for 1701 // shared/private/firstprivate/lastprivate/copyin/... variables. 1702 // 1703 // TODO: This defaults to shared right now. 1704 auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, 1705 llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) { 1706 // The next line is appropriate only for variables (Val) with the 1707 // data-sharing attribute "shared". 1708 ReplVal = &Val; 1709 1710 return CodeGenIP; 1711 }; 1712 1713 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); 1714 const Stmt *ParallelRegionBodyStmt = CS->getCapturedStmt(); 1715 1716 auto BodyGenCB = [ParallelRegionBodyStmt, 1717 this](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, 1718 llvm::BasicBlock &ContinuationBB) { 1719 OMPBuilderCBHelpers::OutlinedRegionBodyRAII ORB(*this, AllocaIP, 1720 ContinuationBB); 1721 OMPBuilderCBHelpers::EmitOMPRegionBody(*this, ParallelRegionBodyStmt, 1722 CodeGenIP, ContinuationBB); 1723 }; 1724 1725 CGCapturedStmtInfo CGSI(*CS, CR_OpenMP); 1726 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI); 1727 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( 1728 AllocaInsertPt->getParent(), AllocaInsertPt->getIterator()); 1729 Builder.restoreIP( 1730 OMPBuilder.createParallel(Builder, AllocaIP, BodyGenCB, PrivCB, FiniCB, 1731 IfCond, NumThreads, ProcBind, S.hasCancel())); 1732 return; 1733 } 1734 1735 // Emit parallel region as a standalone region. 1736 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 1737 Action.Enter(CGF); 1738 OMPPrivateScope PrivateScope(CGF); 1739 bool Copyins = CGF.EmitOMPCopyinClause(S); 1740 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 1741 if (Copyins) { 1742 // Emit implicit barrier to synchronize threads and avoid data races on 1743 // propagation master's thread values of threadprivate variables to local 1744 // instances of that variables of all other implicit threads. 1745 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 1746 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, 1747 /*ForceSimpleCall=*/true); 1748 } 1749 CGF.EmitOMPPrivateClause(S, PrivateScope); 1750 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 1751 (void)PrivateScope.Privatize(); 1752 CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt()); 1753 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 1754 }; 1755 { 1756 auto LPCRegion = 1757 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 1758 emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen, 1759 emitEmptyBoundParameters); 1760 emitPostUpdateForReductionClause(*this, S, 1761 [](CodeGenFunction &) { return nullptr; }); 1762 } 1763 // Check for outer lastprivate conditional update. 1764 checkForLastprivateConditionalUpdate(*this, S); 1765 } 1766 1767 void CodeGenFunction::EmitOMPMetaDirective(const OMPMetaDirective &S) { 1768 EmitStmt(S.getIfStmt()); 1769 } 1770 1771 namespace { 1772 /// RAII to handle scopes for loop transformation directives. 1773 class OMPTransformDirectiveScopeRAII { 1774 OMPLoopScope *Scope = nullptr; 1775 CodeGenFunction::CGCapturedStmtInfo *CGSI = nullptr; 1776 CodeGenFunction::CGCapturedStmtRAII *CapInfoRAII = nullptr; 1777 1778 public: 1779 OMPTransformDirectiveScopeRAII(CodeGenFunction &CGF, const Stmt *S) { 1780 if (const auto *Dir = dyn_cast<OMPLoopBasedDirective>(S)) { 1781 Scope = new OMPLoopScope(CGF, *Dir); 1782 CGSI = new CodeGenFunction::CGCapturedStmtInfo(CR_OpenMP); 1783 CapInfoRAII = new CodeGenFunction::CGCapturedStmtRAII(CGF, CGSI); 1784 } 1785 } 1786 ~OMPTransformDirectiveScopeRAII() { 1787 if (!Scope) 1788 return; 1789 delete CapInfoRAII; 1790 delete CGSI; 1791 delete Scope; 1792 } 1793 }; 1794 } // namespace 1795 1796 static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop, 1797 int MaxLevel, int Level = 0) { 1798 assert(Level < MaxLevel && "Too deep lookup during loop body codegen."); 1799 const Stmt *SimplifiedS = S->IgnoreContainers(); 1800 if (const auto *CS = dyn_cast<CompoundStmt>(SimplifiedS)) { 1801 PrettyStackTraceLoc CrashInfo( 1802 CGF.getContext().getSourceManager(), CS->getLBracLoc(), 1803 "LLVM IR generation of compound statement ('{}')"); 1804 1805 // Keep track of the current cleanup stack depth, including debug scopes. 1806 CodeGenFunction::LexicalScope Scope(CGF, S->getSourceRange()); 1807 for (const Stmt *CurStmt : CS->body()) 1808 emitBody(CGF, CurStmt, NextLoop, MaxLevel, Level); 1809 return; 1810 } 1811 if (SimplifiedS == NextLoop) { 1812 if (auto *Dir = dyn_cast<OMPLoopTransformationDirective>(SimplifiedS)) 1813 SimplifiedS = Dir->getTransformedStmt(); 1814 if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(SimplifiedS)) 1815 SimplifiedS = CanonLoop->getLoopStmt(); 1816 if (const auto *For = dyn_cast<ForStmt>(SimplifiedS)) { 1817 S = For->getBody(); 1818 } else { 1819 assert(isa<CXXForRangeStmt>(SimplifiedS) && 1820 "Expected canonical for loop or range-based for loop."); 1821 const auto *CXXFor = cast<CXXForRangeStmt>(SimplifiedS); 1822 CGF.EmitStmt(CXXFor->getLoopVarStmt()); 1823 S = CXXFor->getBody(); 1824 } 1825 if (Level + 1 < MaxLevel) { 1826 NextLoop = OMPLoopDirective::tryToFindNextInnerLoop( 1827 S, /*TryImperfectlyNestedLoops=*/true); 1828 emitBody(CGF, S, NextLoop, MaxLevel, Level + 1); 1829 return; 1830 } 1831 } 1832 CGF.EmitStmt(S); 1833 } 1834 1835 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, 1836 JumpDest LoopExit) { 1837 RunCleanupsScope BodyScope(*this); 1838 // Update counters values on current iteration. 1839 for (const Expr *UE : D.updates()) 1840 EmitIgnoredExpr(UE); 1841 // Update the linear variables. 1842 // In distribute directives only loop counters may be marked as linear, no 1843 // need to generate the code for them. 1844 if (!isOpenMPDistributeDirective(D.getDirectiveKind())) { 1845 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1846 for (const Expr *UE : C->updates()) 1847 EmitIgnoredExpr(UE); 1848 } 1849 } 1850 1851 // On a continue in the body, jump to the end. 1852 JumpDest Continue = getJumpDestInCurrentScope("omp.body.continue"); 1853 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1854 for (const Expr *E : D.finals_conditions()) { 1855 if (!E) 1856 continue; 1857 // Check that loop counter in non-rectangular nest fits into the iteration 1858 // space. 1859 llvm::BasicBlock *NextBB = createBasicBlock("omp.body.next"); 1860 EmitBranchOnBoolExpr(E, NextBB, Continue.getBlock(), 1861 getProfileCount(D.getBody())); 1862 EmitBlock(NextBB); 1863 } 1864 1865 OMPPrivateScope InscanScope(*this); 1866 EmitOMPReductionClauseInit(D, InscanScope, /*ForInscan=*/true); 1867 bool IsInscanRegion = InscanScope.Privatize(); 1868 if (IsInscanRegion) { 1869 // Need to remember the block before and after scan directive 1870 // to dispatch them correctly depending on the clause used in 1871 // this directive, inclusive or exclusive. For inclusive scan the natural 1872 // order of the blocks is used, for exclusive clause the blocks must be 1873 // executed in reverse order. 1874 OMPBeforeScanBlock = createBasicBlock("omp.before.scan.bb"); 1875 OMPAfterScanBlock = createBasicBlock("omp.after.scan.bb"); 1876 // No need to allocate inscan exit block, in simd mode it is selected in the 1877 // codegen for the scan directive. 1878 if (D.getDirectiveKind() != OMPD_simd && !getLangOpts().OpenMPSimd) 1879 OMPScanExitBlock = createBasicBlock("omp.exit.inscan.bb"); 1880 OMPScanDispatch = createBasicBlock("omp.inscan.dispatch"); 1881 EmitBranch(OMPScanDispatch); 1882 EmitBlock(OMPBeforeScanBlock); 1883 } 1884 1885 // Emit loop variables for C++ range loops. 1886 const Stmt *Body = 1887 D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(); 1888 // Emit loop body. 1889 emitBody(*this, Body, 1890 OMPLoopBasedDirective::tryToFindNextInnerLoop( 1891 Body, /*TryImperfectlyNestedLoops=*/true), 1892 D.getLoopsNumber()); 1893 1894 // Jump to the dispatcher at the end of the loop body. 1895 if (IsInscanRegion) 1896 EmitBranch(OMPScanExitBlock); 1897 1898 // The end (updates/cleanups). 1899 EmitBlock(Continue.getBlock()); 1900 BreakContinueStack.pop_back(); 1901 } 1902 1903 using EmittedClosureTy = std::pair<llvm::Function *, llvm::Value *>; 1904 1905 /// Emit a captured statement and return the function as well as its captured 1906 /// closure context. 1907 static EmittedClosureTy emitCapturedStmtFunc(CodeGenFunction &ParentCGF, 1908 const CapturedStmt *S) { 1909 LValue CapStruct = ParentCGF.InitCapturedStruct(*S); 1910 CodeGenFunction CGF(ParentCGF.CGM, /*suppressNewContext=*/true); 1911 std::unique_ptr<CodeGenFunction::CGCapturedStmtInfo> CSI = 1912 std::make_unique<CodeGenFunction::CGCapturedStmtInfo>(*S); 1913 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, CSI.get()); 1914 llvm::Function *F = CGF.GenerateCapturedStmtFunction(*S); 1915 1916 return {F, CapStruct.getPointer(ParentCGF)}; 1917 } 1918 1919 /// Emit a call to a previously captured closure. 1920 static llvm::CallInst * 1921 emitCapturedStmtCall(CodeGenFunction &ParentCGF, EmittedClosureTy Cap, 1922 llvm::ArrayRef<llvm::Value *> Args) { 1923 // Append the closure context to the argument. 1924 SmallVector<llvm::Value *> EffectiveArgs; 1925 EffectiveArgs.reserve(Args.size() + 1); 1926 llvm::append_range(EffectiveArgs, Args); 1927 EffectiveArgs.push_back(Cap.second); 1928 1929 return ParentCGF.Builder.CreateCall(Cap.first, EffectiveArgs); 1930 } 1931 1932 llvm::CanonicalLoopInfo * 1933 CodeGenFunction::EmitOMPCollapsedCanonicalLoopNest(const Stmt *S, int Depth) { 1934 assert(Depth == 1 && "Nested loops with OpenMPIRBuilder not yet implemented"); 1935 1936 // The caller is processing the loop-associated directive processing the \p 1937 // Depth loops nested in \p S. Put the previous pending loop-associated 1938 // directive to the stack. If the current loop-associated directive is a loop 1939 // transformation directive, it will push its generated loops onto the stack 1940 // such that together with the loops left here they form the combined loop 1941 // nest for the parent loop-associated directive. 1942 int ParentExpectedOMPLoopDepth = ExpectedOMPLoopDepth; 1943 ExpectedOMPLoopDepth = Depth; 1944 1945 EmitStmt(S); 1946 assert(OMPLoopNestStack.size() >= (size_t)Depth && "Found too few loops"); 1947 1948 // The last added loop is the outermost one. 1949 llvm::CanonicalLoopInfo *Result = OMPLoopNestStack.back(); 1950 1951 // Pop the \p Depth loops requested by the call from that stack and restore 1952 // the previous context. 1953 OMPLoopNestStack.pop_back_n(Depth); 1954 ExpectedOMPLoopDepth = ParentExpectedOMPLoopDepth; 1955 1956 return Result; 1957 } 1958 1959 void CodeGenFunction::EmitOMPCanonicalLoop(const OMPCanonicalLoop *S) { 1960 const Stmt *SyntacticalLoop = S->getLoopStmt(); 1961 if (!getLangOpts().OpenMPIRBuilder) { 1962 // Ignore if OpenMPIRBuilder is not enabled. 1963 EmitStmt(SyntacticalLoop); 1964 return; 1965 } 1966 1967 LexicalScope ForScope(*this, S->getSourceRange()); 1968 1969 // Emit init statements. The Distance/LoopVar funcs may reference variable 1970 // declarations they contain. 1971 const Stmt *BodyStmt; 1972 if (const auto *For = dyn_cast<ForStmt>(SyntacticalLoop)) { 1973 if (const Stmt *InitStmt = For->getInit()) 1974 EmitStmt(InitStmt); 1975 BodyStmt = For->getBody(); 1976 } else if (const auto *RangeFor = 1977 dyn_cast<CXXForRangeStmt>(SyntacticalLoop)) { 1978 if (const DeclStmt *RangeStmt = RangeFor->getRangeStmt()) 1979 EmitStmt(RangeStmt); 1980 if (const DeclStmt *BeginStmt = RangeFor->getBeginStmt()) 1981 EmitStmt(BeginStmt); 1982 if (const DeclStmt *EndStmt = RangeFor->getEndStmt()) 1983 EmitStmt(EndStmt); 1984 if (const DeclStmt *LoopVarStmt = RangeFor->getLoopVarStmt()) 1985 EmitStmt(LoopVarStmt); 1986 BodyStmt = RangeFor->getBody(); 1987 } else 1988 llvm_unreachable("Expected for-stmt or range-based for-stmt"); 1989 1990 // Emit closure for later use. By-value captures will be captured here. 1991 const CapturedStmt *DistanceFunc = S->getDistanceFunc(); 1992 EmittedClosureTy DistanceClosure = emitCapturedStmtFunc(*this, DistanceFunc); 1993 const CapturedStmt *LoopVarFunc = S->getLoopVarFunc(); 1994 EmittedClosureTy LoopVarClosure = emitCapturedStmtFunc(*this, LoopVarFunc); 1995 1996 // Call the distance function to get the number of iterations of the loop to 1997 // come. 1998 QualType LogicalTy = DistanceFunc->getCapturedDecl() 1999 ->getParam(0) 2000 ->getType() 2001 .getNonReferenceType(); 2002 Address CountAddr = CreateMemTemp(LogicalTy, ".count.addr"); 2003 emitCapturedStmtCall(*this, DistanceClosure, {CountAddr.getPointer()}); 2004 llvm::Value *DistVal = Builder.CreateLoad(CountAddr, ".count"); 2005 2006 // Emit the loop structure. 2007 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 2008 auto BodyGen = [&, this](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP, 2009 llvm::Value *IndVar) { 2010 Builder.restoreIP(CodeGenIP); 2011 2012 // Emit the loop body: Convert the logical iteration number to the loop 2013 // variable and emit the body. 2014 const DeclRefExpr *LoopVarRef = S->getLoopVarRef(); 2015 LValue LCVal = EmitLValue(LoopVarRef); 2016 Address LoopVarAddress = LCVal.getAddress(*this); 2017 emitCapturedStmtCall(*this, LoopVarClosure, 2018 {LoopVarAddress.getPointer(), IndVar}); 2019 2020 RunCleanupsScope BodyScope(*this); 2021 EmitStmt(BodyStmt); 2022 }; 2023 llvm::CanonicalLoopInfo *CL = 2024 OMPBuilder.createCanonicalLoop(Builder, BodyGen, DistVal); 2025 2026 // Finish up the loop. 2027 Builder.restoreIP(CL->getAfterIP()); 2028 ForScope.ForceCleanup(); 2029 2030 // Remember the CanonicalLoopInfo for parent AST nodes consuming it. 2031 OMPLoopNestStack.push_back(CL); 2032 } 2033 2034 void CodeGenFunction::EmitOMPInnerLoop( 2035 const OMPExecutableDirective &S, bool RequiresCleanup, const Expr *LoopCond, 2036 const Expr *IncExpr, 2037 const llvm::function_ref<void(CodeGenFunction &)> BodyGen, 2038 const llvm::function_ref<void(CodeGenFunction &)> PostIncGen) { 2039 auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end"); 2040 2041 // Start the loop with a block that tests the condition. 2042 auto CondBlock = createBasicBlock("omp.inner.for.cond"); 2043 EmitBlock(CondBlock); 2044 const SourceRange R = S.getSourceRange(); 2045 2046 // If attributes are attached, push to the basic block with them. 2047 const auto &OMPED = cast<OMPExecutableDirective>(S); 2048 const CapturedStmt *ICS = OMPED.getInnermostCapturedStmt(); 2049 const Stmt *SS = ICS->getCapturedStmt(); 2050 const AttributedStmt *AS = dyn_cast_or_null<AttributedStmt>(SS); 2051 OMPLoopNestStack.clear(); 2052 if (AS) 2053 LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(), 2054 AS->getAttrs(), SourceLocToDebugLoc(R.getBegin()), 2055 SourceLocToDebugLoc(R.getEnd())); 2056 else 2057 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), 2058 SourceLocToDebugLoc(R.getEnd())); 2059 2060 // If there are any cleanups between here and the loop-exit scope, 2061 // create a block to stage a loop exit along. 2062 llvm::BasicBlock *ExitBlock = LoopExit.getBlock(); 2063 if (RequiresCleanup) 2064 ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup"); 2065 2066 llvm::BasicBlock *LoopBody = createBasicBlock("omp.inner.for.body"); 2067 2068 // Emit condition. 2069 EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S)); 2070 if (ExitBlock != LoopExit.getBlock()) { 2071 EmitBlock(ExitBlock); 2072 EmitBranchThroughCleanup(LoopExit); 2073 } 2074 2075 EmitBlock(LoopBody); 2076 incrementProfileCounter(&S); 2077 2078 // Create a block for the increment. 2079 JumpDest Continue = getJumpDestInCurrentScope("omp.inner.for.inc"); 2080 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 2081 2082 BodyGen(*this); 2083 2084 // Emit "IV = IV + 1" and a back-edge to the condition block. 2085 EmitBlock(Continue.getBlock()); 2086 EmitIgnoredExpr(IncExpr); 2087 PostIncGen(*this); 2088 BreakContinueStack.pop_back(); 2089 EmitBranch(CondBlock); 2090 LoopStack.pop(); 2091 // Emit the fall-through block. 2092 EmitBlock(LoopExit.getBlock()); 2093 } 2094 2095 bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { 2096 if (!HaveInsertPoint()) 2097 return false; 2098 // Emit inits for the linear variables. 2099 bool HasLinears = false; 2100 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 2101 for (const Expr *Init : C->inits()) { 2102 HasLinears = true; 2103 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl()); 2104 if (const auto *Ref = 2105 dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) { 2106 AutoVarEmission Emission = EmitAutoVarAlloca(*VD); 2107 const auto *OrigVD = cast<VarDecl>(Ref->getDecl()); 2108 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), 2109 CapturedStmtInfo->lookup(OrigVD) != nullptr, 2110 VD->getInit()->getType(), VK_LValue, 2111 VD->getInit()->getExprLoc()); 2112 EmitExprAsInit( 2113 &DRE, VD, 2114 MakeAddrLValue(Emission.getAllocatedAddress(), VD->getType()), 2115 /*capturedByInit=*/false); 2116 EmitAutoVarCleanups(Emission); 2117 } else { 2118 EmitVarDecl(*VD); 2119 } 2120 } 2121 // Emit the linear steps for the linear clauses. 2122 // If a step is not constant, it is pre-calculated before the loop. 2123 if (const auto *CS = cast_or_null<BinaryOperator>(C->getCalcStep())) 2124 if (const auto *SaveRef = cast<DeclRefExpr>(CS->getLHS())) { 2125 EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl())); 2126 // Emit calculation of the linear step. 2127 EmitIgnoredExpr(CS); 2128 } 2129 } 2130 return HasLinears; 2131 } 2132 2133 void CodeGenFunction::EmitOMPLinearClauseFinal( 2134 const OMPLoopDirective &D, 2135 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { 2136 if (!HaveInsertPoint()) 2137 return; 2138 llvm::BasicBlock *DoneBB = nullptr; 2139 // Emit the final values of the linear variables. 2140 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 2141 auto IC = C->varlist_begin(); 2142 for (const Expr *F : C->finals()) { 2143 if (!DoneBB) { 2144 if (llvm::Value *Cond = CondGen(*this)) { 2145 // If the first post-update expression is found, emit conditional 2146 // block if it was requested. 2147 llvm::BasicBlock *ThenBB = createBasicBlock(".omp.linear.pu"); 2148 DoneBB = createBasicBlock(".omp.linear.pu.done"); 2149 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 2150 EmitBlock(ThenBB); 2151 } 2152 } 2153 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl()); 2154 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), 2155 CapturedStmtInfo->lookup(OrigVD) != nullptr, 2156 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); 2157 Address OrigAddr = EmitLValue(&DRE).getAddress(*this); 2158 CodeGenFunction::OMPPrivateScope VarScope(*this); 2159 VarScope.addPrivate(OrigVD, OrigAddr); 2160 (void)VarScope.Privatize(); 2161 EmitIgnoredExpr(F); 2162 ++IC; 2163 } 2164 if (const Expr *PostUpdate = C->getPostUpdateExpr()) 2165 EmitIgnoredExpr(PostUpdate); 2166 } 2167 if (DoneBB) 2168 EmitBlock(DoneBB, /*IsFinished=*/true); 2169 } 2170 2171 static void emitAlignedClause(CodeGenFunction &CGF, 2172 const OMPExecutableDirective &D) { 2173 if (!CGF.HaveInsertPoint()) 2174 return; 2175 for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) { 2176 llvm::APInt ClauseAlignment(64, 0); 2177 if (const Expr *AlignmentExpr = Clause->getAlignment()) { 2178 auto *AlignmentCI = 2179 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); 2180 ClauseAlignment = AlignmentCI->getValue(); 2181 } 2182 for (const Expr *E : Clause->varlists()) { 2183 llvm::APInt Alignment(ClauseAlignment); 2184 if (Alignment == 0) { 2185 // OpenMP [2.8.1, Description] 2186 // If no optional parameter is specified, implementation-defined default 2187 // alignments for SIMD instructions on the target platforms are assumed. 2188 Alignment = 2189 CGF.getContext() 2190 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign( 2191 E->getType()->getPointeeType())) 2192 .getQuantity(); 2193 } 2194 assert((Alignment == 0 || Alignment.isPowerOf2()) && 2195 "alignment is not power of 2"); 2196 if (Alignment != 0) { 2197 llvm::Value *PtrValue = CGF.EmitScalarExpr(E); 2198 CGF.emitAlignmentAssumption( 2199 PtrValue, E, /*No second loc needed*/ SourceLocation(), 2200 llvm::ConstantInt::get(CGF.getLLVMContext(), Alignment)); 2201 } 2202 } 2203 } 2204 } 2205 2206 void CodeGenFunction::EmitOMPPrivateLoopCounters( 2207 const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) { 2208 if (!HaveInsertPoint()) 2209 return; 2210 auto I = S.private_counters().begin(); 2211 for (const Expr *E : S.counters()) { 2212 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2213 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()); 2214 // Emit var without initialization. 2215 AutoVarEmission VarEmission = EmitAutoVarAlloca(*PrivateVD); 2216 EmitAutoVarCleanups(VarEmission); 2217 LocalDeclMap.erase(PrivateVD); 2218 (void)LoopScope.addPrivate(VD, VarEmission.getAllocatedAddress()); 2219 if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) || 2220 VD->hasGlobalStorage()) { 2221 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), 2222 LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD), 2223 E->getType(), VK_LValue, E->getExprLoc()); 2224 (void)LoopScope.addPrivate(PrivateVD, EmitLValue(&DRE).getAddress(*this)); 2225 } else { 2226 (void)LoopScope.addPrivate(PrivateVD, VarEmission.getAllocatedAddress()); 2227 } 2228 ++I; 2229 } 2230 // Privatize extra loop counters used in loops for ordered(n) clauses. 2231 for (const auto *C : S.getClausesOfKind<OMPOrderedClause>()) { 2232 if (!C->getNumForLoops()) 2233 continue; 2234 for (unsigned I = S.getLoopsNumber(), E = C->getLoopNumIterations().size(); 2235 I < E; ++I) { 2236 const auto *DRE = cast<DeclRefExpr>(C->getLoopCounter(I)); 2237 const auto *VD = cast<VarDecl>(DRE->getDecl()); 2238 // Override only those variables that can be captured to avoid re-emission 2239 // of the variables declared within the loops. 2240 if (DRE->refersToEnclosingVariableOrCapture()) { 2241 (void)LoopScope.addPrivate( 2242 VD, CreateMemTemp(DRE->getType(), VD->getName())); 2243 } 2244 } 2245 } 2246 } 2247 2248 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S, 2249 const Expr *Cond, llvm::BasicBlock *TrueBlock, 2250 llvm::BasicBlock *FalseBlock, uint64_t TrueCount) { 2251 if (!CGF.HaveInsertPoint()) 2252 return; 2253 { 2254 CodeGenFunction::OMPPrivateScope PreCondScope(CGF); 2255 CGF.EmitOMPPrivateLoopCounters(S, PreCondScope); 2256 (void)PreCondScope.Privatize(); 2257 // Get initial values of real counters. 2258 for (const Expr *I : S.inits()) { 2259 CGF.EmitIgnoredExpr(I); 2260 } 2261 } 2262 // Create temp loop control variables with their init values to support 2263 // non-rectangular loops. 2264 CodeGenFunction::OMPMapVars PreCondVars; 2265 for (const Expr *E : S.dependent_counters()) { 2266 if (!E) 2267 continue; 2268 assert(!E->getType().getNonReferenceType()->isRecordType() && 2269 "dependent counter must not be an iterator."); 2270 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2271 Address CounterAddr = 2272 CGF.CreateMemTemp(VD->getType().getNonReferenceType()); 2273 (void)PreCondVars.setVarAddr(CGF, VD, CounterAddr); 2274 } 2275 (void)PreCondVars.apply(CGF); 2276 for (const Expr *E : S.dependent_inits()) { 2277 if (!E) 2278 continue; 2279 CGF.EmitIgnoredExpr(E); 2280 } 2281 // Check that loop is executed at least one time. 2282 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount); 2283 PreCondVars.restore(CGF); 2284 } 2285 2286 void CodeGenFunction::EmitOMPLinearClause( 2287 const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) { 2288 if (!HaveInsertPoint()) 2289 return; 2290 llvm::DenseSet<const VarDecl *> SIMDLCVs; 2291 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 2292 const auto *LoopDirective = cast<OMPLoopDirective>(&D); 2293 for (const Expr *C : LoopDirective->counters()) { 2294 SIMDLCVs.insert( 2295 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 2296 } 2297 } 2298 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 2299 auto CurPrivate = C->privates().begin(); 2300 for (const Expr *E : C->varlists()) { 2301 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2302 const auto *PrivateVD = 2303 cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl()); 2304 if (!SIMDLCVs.count(VD->getCanonicalDecl())) { 2305 // Emit private VarDecl with copy init. 2306 EmitVarDecl(*PrivateVD); 2307 bool IsRegistered = 2308 PrivateScope.addPrivate(VD, GetAddrOfLocalVar(PrivateVD)); 2309 assert(IsRegistered && "linear var already registered as private"); 2310 // Silence the warning about unused variable. 2311 (void)IsRegistered; 2312 } else { 2313 EmitVarDecl(*PrivateVD); 2314 } 2315 ++CurPrivate; 2316 } 2317 } 2318 } 2319 2320 static void emitSimdlenSafelenClause(CodeGenFunction &CGF, 2321 const OMPExecutableDirective &D) { 2322 if (!CGF.HaveInsertPoint()) 2323 return; 2324 if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) { 2325 RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(), 2326 /*ignoreResult=*/true); 2327 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 2328 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 2329 // In presence of finite 'safelen', it may be unsafe to mark all 2330 // the memory instructions parallel, because loop-carried 2331 // dependences of 'safelen' iterations are possible. 2332 CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>()); 2333 } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) { 2334 RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(), 2335 /*ignoreResult=*/true); 2336 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 2337 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 2338 // In presence of finite 'safelen', it may be unsafe to mark all 2339 // the memory instructions parallel, because loop-carried 2340 // dependences of 'safelen' iterations are possible. 2341 CGF.LoopStack.setParallel(/*Enable=*/false); 2342 } 2343 } 2344 2345 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D) { 2346 // Walk clauses and process safelen/lastprivate. 2347 LoopStack.setParallel(/*Enable=*/true); 2348 LoopStack.setVectorizeEnable(); 2349 emitSimdlenSafelenClause(*this, D); 2350 if (const auto *C = D.getSingleClause<OMPOrderClause>()) 2351 if (C->getKind() == OMPC_ORDER_concurrent) 2352 LoopStack.setParallel(/*Enable=*/true); 2353 if ((D.getDirectiveKind() == OMPD_simd || 2354 (getLangOpts().OpenMPSimd && 2355 isOpenMPSimdDirective(D.getDirectiveKind()))) && 2356 llvm::any_of(D.getClausesOfKind<OMPReductionClause>(), 2357 [](const OMPReductionClause *C) { 2358 return C->getModifier() == OMPC_REDUCTION_inscan; 2359 })) 2360 // Disable parallel access in case of prefix sum. 2361 LoopStack.setParallel(/*Enable=*/false); 2362 } 2363 2364 void CodeGenFunction::EmitOMPSimdFinal( 2365 const OMPLoopDirective &D, 2366 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { 2367 if (!HaveInsertPoint()) 2368 return; 2369 llvm::BasicBlock *DoneBB = nullptr; 2370 auto IC = D.counters().begin(); 2371 auto IPC = D.private_counters().begin(); 2372 for (const Expr *F : D.finals()) { 2373 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl()); 2374 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl()); 2375 const auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD); 2376 if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) || 2377 OrigVD->hasGlobalStorage() || CED) { 2378 if (!DoneBB) { 2379 if (llvm::Value *Cond = CondGen(*this)) { 2380 // If the first post-update expression is found, emit conditional 2381 // block if it was requested. 2382 llvm::BasicBlock *ThenBB = createBasicBlock(".omp.final.then"); 2383 DoneBB = createBasicBlock(".omp.final.done"); 2384 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 2385 EmitBlock(ThenBB); 2386 } 2387 } 2388 Address OrigAddr = Address::invalid(); 2389 if (CED) { 2390 OrigAddr = 2391 EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress(*this); 2392 } else { 2393 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(PrivateVD), 2394 /*RefersToEnclosingVariableOrCapture=*/false, 2395 (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc()); 2396 OrigAddr = EmitLValue(&DRE).getAddress(*this); 2397 } 2398 OMPPrivateScope VarScope(*this); 2399 VarScope.addPrivate(OrigVD, OrigAddr); 2400 (void)VarScope.Privatize(); 2401 EmitIgnoredExpr(F); 2402 } 2403 ++IC; 2404 ++IPC; 2405 } 2406 if (DoneBB) 2407 EmitBlock(DoneBB, /*IsFinished=*/true); 2408 } 2409 2410 static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF, 2411 const OMPLoopDirective &S, 2412 CodeGenFunction::JumpDest LoopExit) { 2413 CGF.EmitOMPLoopBody(S, LoopExit); 2414 CGF.EmitStopPoint(&S); 2415 } 2416 2417 /// Emit a helper variable and return corresponding lvalue. 2418 static LValue EmitOMPHelperVar(CodeGenFunction &CGF, 2419 const DeclRefExpr *Helper) { 2420 auto VDecl = cast<VarDecl>(Helper->getDecl()); 2421 CGF.EmitVarDecl(*VDecl); 2422 return CGF.EmitLValue(Helper); 2423 } 2424 2425 static void emitCommonSimdLoop(CodeGenFunction &CGF, const OMPLoopDirective &S, 2426 const RegionCodeGenTy &SimdInitGen, 2427 const RegionCodeGenTy &BodyCodeGen) { 2428 auto &&ThenGen = [&S, &SimdInitGen, &BodyCodeGen](CodeGenFunction &CGF, 2429 PrePostActionTy &) { 2430 CGOpenMPRuntime::NontemporalDeclsRAII NontemporalsRegion(CGF.CGM, S); 2431 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); 2432 SimdInitGen(CGF); 2433 2434 BodyCodeGen(CGF); 2435 }; 2436 auto &&ElseGen = [&BodyCodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 2437 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); 2438 CGF.LoopStack.setVectorizeEnable(/*Enable=*/false); 2439 2440 BodyCodeGen(CGF); 2441 }; 2442 const Expr *IfCond = nullptr; 2443 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 2444 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 2445 if (CGF.getLangOpts().OpenMP >= 50 && 2446 (C->getNameModifier() == OMPD_unknown || 2447 C->getNameModifier() == OMPD_simd)) { 2448 IfCond = C->getCondition(); 2449 break; 2450 } 2451 } 2452 } 2453 if (IfCond) { 2454 CGF.CGM.getOpenMPRuntime().emitIfClause(CGF, IfCond, ThenGen, ElseGen); 2455 } else { 2456 RegionCodeGenTy ThenRCG(ThenGen); 2457 ThenRCG(CGF); 2458 } 2459 } 2460 2461 static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S, 2462 PrePostActionTy &Action) { 2463 Action.Enter(CGF); 2464 assert(isOpenMPSimdDirective(S.getDirectiveKind()) && 2465 "Expected simd directive"); 2466 OMPLoopScope PreInitScope(CGF, S); 2467 // if (PreCond) { 2468 // for (IV in 0..LastIteration) BODY; 2469 // <Final counter/linear vars updates>; 2470 // } 2471 // 2472 if (isOpenMPDistributeDirective(S.getDirectiveKind()) || 2473 isOpenMPWorksharingDirective(S.getDirectiveKind()) || 2474 isOpenMPTaskLoopDirective(S.getDirectiveKind())) { 2475 (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable())); 2476 (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable())); 2477 } 2478 2479 // Emit: if (PreCond) - begin. 2480 // If the condition constant folds and can be elided, avoid emitting the 2481 // whole loop. 2482 bool CondConstant; 2483 llvm::BasicBlock *ContBlock = nullptr; 2484 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 2485 if (!CondConstant) 2486 return; 2487 } else { 2488 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("simd.if.then"); 2489 ContBlock = CGF.createBasicBlock("simd.if.end"); 2490 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 2491 CGF.getProfileCount(&S)); 2492 CGF.EmitBlock(ThenBlock); 2493 CGF.incrementProfileCounter(&S); 2494 } 2495 2496 // Emit the loop iteration variable. 2497 const Expr *IVExpr = S.getIterationVariable(); 2498 const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 2499 CGF.EmitVarDecl(*IVDecl); 2500 CGF.EmitIgnoredExpr(S.getInit()); 2501 2502 // Emit the iterations count variable. 2503 // If it is not a variable, Sema decided to calculate iterations count on 2504 // each iteration (e.g., it is foldable into a constant). 2505 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 2506 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 2507 // Emit calculation of the iterations count. 2508 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 2509 } 2510 2511 emitAlignedClause(CGF, S); 2512 (void)CGF.EmitOMPLinearClauseInit(S); 2513 { 2514 CodeGenFunction::OMPPrivateScope LoopScope(CGF); 2515 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 2516 CGF.EmitOMPLinearClause(S, LoopScope); 2517 CGF.EmitOMPPrivateClause(S, LoopScope); 2518 CGF.EmitOMPReductionClauseInit(S, LoopScope); 2519 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion( 2520 CGF, S, CGF.EmitLValue(S.getIterationVariable())); 2521 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 2522 (void)LoopScope.Privatize(); 2523 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 2524 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); 2525 2526 emitCommonSimdLoop( 2527 CGF, S, 2528 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2529 CGF.EmitOMPSimdInit(S); 2530 }, 2531 [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { 2532 CGF.EmitOMPInnerLoop( 2533 S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), 2534 [&S](CodeGenFunction &CGF) { 2535 emitOMPLoopBodyWithStopPoint(CGF, S, 2536 CodeGenFunction::JumpDest()); 2537 }, 2538 [](CodeGenFunction &) {}); 2539 }); 2540 CGF.EmitOMPSimdFinal(S, [](CodeGenFunction &) { return nullptr; }); 2541 // Emit final copy of the lastprivate variables at the end of loops. 2542 if (HasLastprivateClause) 2543 CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true); 2544 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd); 2545 emitPostUpdateForReductionClause(CGF, S, 2546 [](CodeGenFunction &) { return nullptr; }); 2547 } 2548 CGF.EmitOMPLinearClauseFinal(S, [](CodeGenFunction &) { return nullptr; }); 2549 // Emit: if (PreCond) - end. 2550 if (ContBlock) { 2551 CGF.EmitBranch(ContBlock); 2552 CGF.EmitBlock(ContBlock, true); 2553 } 2554 } 2555 2556 static bool isSupportedByOpenMPIRBuilder(const OMPExecutableDirective &S) { 2557 // Check for unsupported clauses 2558 if (!S.clauses().empty()) { 2559 // Currently no clause is supported 2560 return false; 2561 } 2562 2563 // Check if we have a statement with the ordered directive. 2564 // Visit the statement hierarchy to find a compound statement 2565 // with a ordered directive in it. 2566 if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(S.getRawStmt())) { 2567 if (const Stmt *SyntacticalLoop = CanonLoop->getLoopStmt()) { 2568 for (const Stmt *SubStmt : SyntacticalLoop->children()) { 2569 if (!SubStmt) 2570 continue; 2571 if (const CompoundStmt *CS = dyn_cast<CompoundStmt>(SubStmt)) { 2572 for (const Stmt *CSSubStmt : CS->children()) { 2573 if (!CSSubStmt) 2574 continue; 2575 if (isa<OMPOrderedDirective>(CSSubStmt)) { 2576 return false; 2577 } 2578 } 2579 } 2580 } 2581 } 2582 } 2583 return true; 2584 } 2585 2586 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { 2587 bool UseOMPIRBuilder = 2588 CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(S); 2589 if (UseOMPIRBuilder) { 2590 auto &&CodeGenIRBuilder = [this, &S, UseOMPIRBuilder](CodeGenFunction &CGF, 2591 PrePostActionTy &) { 2592 // Use the OpenMPIRBuilder if enabled. 2593 if (UseOMPIRBuilder) { 2594 // Emit the associated statement and get its loop representation. 2595 llvm::DebugLoc DL = SourceLocToDebugLoc(S.getBeginLoc()); 2596 const Stmt *Inner = S.getRawStmt(); 2597 llvm::CanonicalLoopInfo *CLI = 2598 EmitOMPCollapsedCanonicalLoopNest(Inner, 1); 2599 2600 llvm::OpenMPIRBuilder &OMPBuilder = 2601 CGM.getOpenMPRuntime().getOMPBuilder(); 2602 // Add SIMD specific metadata 2603 OMPBuilder.applySimd(DL, CLI); 2604 return; 2605 } 2606 }; 2607 { 2608 auto LPCRegion = 2609 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 2610 OMPLexicalScope Scope(*this, S, OMPD_unknown); 2611 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, 2612 CodeGenIRBuilder); 2613 } 2614 return; 2615 } 2616 2617 ParentLoopDirectiveForScanRegion ScanRegion(*this, S); 2618 OMPFirstScanLoop = true; 2619 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2620 emitOMPSimdRegion(CGF, S, Action); 2621 }; 2622 { 2623 auto LPCRegion = 2624 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 2625 OMPLexicalScope Scope(*this, S, OMPD_unknown); 2626 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 2627 } 2628 // Check for outer lastprivate conditional update. 2629 checkForLastprivateConditionalUpdate(*this, S); 2630 } 2631 2632 void CodeGenFunction::EmitOMPTileDirective(const OMPTileDirective &S) { 2633 // Emit the de-sugared statement. 2634 OMPTransformDirectiveScopeRAII TileScope(*this, &S); 2635 EmitStmt(S.getTransformedStmt()); 2636 } 2637 2638 void CodeGenFunction::EmitOMPUnrollDirective(const OMPUnrollDirective &S) { 2639 bool UseOMPIRBuilder = CGM.getLangOpts().OpenMPIRBuilder; 2640 2641 if (UseOMPIRBuilder) { 2642 auto DL = SourceLocToDebugLoc(S.getBeginLoc()); 2643 const Stmt *Inner = S.getRawStmt(); 2644 2645 // Consume nested loop. Clear the entire remaining loop stack because a 2646 // fully unrolled loop is non-transformable. For partial unrolling the 2647 // generated outer loop is pushed back to the stack. 2648 llvm::CanonicalLoopInfo *CLI = EmitOMPCollapsedCanonicalLoopNest(Inner, 1); 2649 OMPLoopNestStack.clear(); 2650 2651 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 2652 2653 bool NeedsUnrolledCLI = ExpectedOMPLoopDepth >= 1; 2654 llvm::CanonicalLoopInfo *UnrolledCLI = nullptr; 2655 2656 if (S.hasClausesOfKind<OMPFullClause>()) { 2657 assert(ExpectedOMPLoopDepth == 0); 2658 OMPBuilder.unrollLoopFull(DL, CLI); 2659 } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) { 2660 uint64_t Factor = 0; 2661 if (Expr *FactorExpr = PartialClause->getFactor()) { 2662 Factor = FactorExpr->EvaluateKnownConstInt(getContext()).getZExtValue(); 2663 assert(Factor >= 1 && "Only positive factors are valid"); 2664 } 2665 OMPBuilder.unrollLoopPartial(DL, CLI, Factor, 2666 NeedsUnrolledCLI ? &UnrolledCLI : nullptr); 2667 } else { 2668 OMPBuilder.unrollLoopHeuristic(DL, CLI); 2669 } 2670 2671 assert((!NeedsUnrolledCLI || UnrolledCLI) && 2672 "NeedsUnrolledCLI implies UnrolledCLI to be set"); 2673 if (UnrolledCLI) 2674 OMPLoopNestStack.push_back(UnrolledCLI); 2675 2676 return; 2677 } 2678 2679 // This function is only called if the unrolled loop is not consumed by any 2680 // other loop-associated construct. Such a loop-associated construct will have 2681 // used the transformed AST. 2682 2683 // Set the unroll metadata for the next emitted loop. 2684 LoopStack.setUnrollState(LoopAttributes::Enable); 2685 2686 if (S.hasClausesOfKind<OMPFullClause>()) { 2687 LoopStack.setUnrollState(LoopAttributes::Full); 2688 } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) { 2689 if (Expr *FactorExpr = PartialClause->getFactor()) { 2690 uint64_t Factor = 2691 FactorExpr->EvaluateKnownConstInt(getContext()).getZExtValue(); 2692 assert(Factor >= 1 && "Only positive factors are valid"); 2693 LoopStack.setUnrollCount(Factor); 2694 } 2695 } 2696 2697 EmitStmt(S.getAssociatedStmt()); 2698 } 2699 2700 void CodeGenFunction::EmitOMPOuterLoop( 2701 bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S, 2702 CodeGenFunction::OMPPrivateScope &LoopScope, 2703 const CodeGenFunction::OMPLoopArguments &LoopArgs, 2704 const CodeGenFunction::CodeGenLoopTy &CodeGenLoop, 2705 const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) { 2706 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); 2707 2708 const Expr *IVExpr = S.getIterationVariable(); 2709 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2710 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2711 2712 JumpDest LoopExit = getJumpDestInCurrentScope("omp.dispatch.end"); 2713 2714 // Start the loop with a block that tests the condition. 2715 llvm::BasicBlock *CondBlock = createBasicBlock("omp.dispatch.cond"); 2716 EmitBlock(CondBlock); 2717 const SourceRange R = S.getSourceRange(); 2718 OMPLoopNestStack.clear(); 2719 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), 2720 SourceLocToDebugLoc(R.getEnd())); 2721 2722 llvm::Value *BoolCondVal = nullptr; 2723 if (!DynamicOrOrdered) { 2724 // UB = min(UB, GlobalUB) or 2725 // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g. 2726 // 'distribute parallel for') 2727 EmitIgnoredExpr(LoopArgs.EUB); 2728 // IV = LB 2729 EmitIgnoredExpr(LoopArgs.Init); 2730 // IV < UB 2731 BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond); 2732 } else { 2733 BoolCondVal = 2734 RT.emitForNext(*this, S.getBeginLoc(), IVSize, IVSigned, LoopArgs.IL, 2735 LoopArgs.LB, LoopArgs.UB, LoopArgs.ST); 2736 } 2737 2738 // If there are any cleanups between here and the loop-exit scope, 2739 // create a block to stage a loop exit along. 2740 llvm::BasicBlock *ExitBlock = LoopExit.getBlock(); 2741 if (LoopScope.requiresCleanups()) 2742 ExitBlock = createBasicBlock("omp.dispatch.cleanup"); 2743 2744 llvm::BasicBlock *LoopBody = createBasicBlock("omp.dispatch.body"); 2745 Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock); 2746 if (ExitBlock != LoopExit.getBlock()) { 2747 EmitBlock(ExitBlock); 2748 EmitBranchThroughCleanup(LoopExit); 2749 } 2750 EmitBlock(LoopBody); 2751 2752 // Emit "IV = LB" (in case of static schedule, we have already calculated new 2753 // LB for loop condition and emitted it above). 2754 if (DynamicOrOrdered) 2755 EmitIgnoredExpr(LoopArgs.Init); 2756 2757 // Create a block for the increment. 2758 JumpDest Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); 2759 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 2760 2761 emitCommonSimdLoop( 2762 *this, S, 2763 [&S, IsMonotonic](CodeGenFunction &CGF, PrePostActionTy &) { 2764 // Generate !llvm.loop.parallel metadata for loads and stores for loops 2765 // with dynamic/guided scheduling and without ordered clause. 2766 if (!isOpenMPSimdDirective(S.getDirectiveKind())) { 2767 CGF.LoopStack.setParallel(!IsMonotonic); 2768 if (const auto *C = S.getSingleClause<OMPOrderClause>()) 2769 if (C->getKind() == OMPC_ORDER_concurrent) 2770 CGF.LoopStack.setParallel(/*Enable=*/true); 2771 } else { 2772 CGF.EmitOMPSimdInit(S); 2773 } 2774 }, 2775 [&S, &LoopArgs, LoopExit, &CodeGenLoop, IVSize, IVSigned, &CodeGenOrdered, 2776 &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { 2777 SourceLocation Loc = S.getBeginLoc(); 2778 // when 'distribute' is not combined with a 'for': 2779 // while (idx <= UB) { BODY; ++idx; } 2780 // when 'distribute' is combined with a 'for' 2781 // (e.g. 'distribute parallel for') 2782 // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; } 2783 CGF.EmitOMPInnerLoop( 2784 S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr, 2785 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { 2786 CodeGenLoop(CGF, S, LoopExit); 2787 }, 2788 [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) { 2789 CodeGenOrdered(CGF, Loc, IVSize, IVSigned); 2790 }); 2791 }); 2792 2793 EmitBlock(Continue.getBlock()); 2794 BreakContinueStack.pop_back(); 2795 if (!DynamicOrOrdered) { 2796 // Emit "LB = LB + Stride", "UB = UB + Stride". 2797 EmitIgnoredExpr(LoopArgs.NextLB); 2798 EmitIgnoredExpr(LoopArgs.NextUB); 2799 } 2800 2801 EmitBranch(CondBlock); 2802 OMPLoopNestStack.clear(); 2803 LoopStack.pop(); 2804 // Emit the fall-through block. 2805 EmitBlock(LoopExit.getBlock()); 2806 2807 // Tell the runtime we are done. 2808 auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) { 2809 if (!DynamicOrOrdered) 2810 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(), 2811 S.getDirectiveKind()); 2812 }; 2813 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); 2814 } 2815 2816 void CodeGenFunction::EmitOMPForOuterLoop( 2817 const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic, 2818 const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, 2819 const OMPLoopArguments &LoopArgs, 2820 const CodeGenDispatchBoundsTy &CGDispatchBounds) { 2821 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); 2822 2823 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). 2824 const bool DynamicOrOrdered = Ordered || RT.isDynamic(ScheduleKind.Schedule); 2825 2826 assert((Ordered || !RT.isStaticNonchunked(ScheduleKind.Schedule, 2827 LoopArgs.Chunk != nullptr)) && 2828 "static non-chunked schedule does not need outer loop"); 2829 2830 // Emit outer loop. 2831 // 2832 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 2833 // When schedule(dynamic,chunk_size) is specified, the iterations are 2834 // distributed to threads in the team in chunks as the threads request them. 2835 // Each thread executes a chunk of iterations, then requests another chunk, 2836 // until no chunks remain to be distributed. Each chunk contains chunk_size 2837 // iterations, except for the last chunk to be distributed, which may have 2838 // fewer iterations. When no chunk_size is specified, it defaults to 1. 2839 // 2840 // When schedule(guided,chunk_size) is specified, the iterations are assigned 2841 // to threads in the team in chunks as the executing threads request them. 2842 // Each thread executes a chunk of iterations, then requests another chunk, 2843 // until no chunks remain to be assigned. For a chunk_size of 1, the size of 2844 // each chunk is proportional to the number of unassigned iterations divided 2845 // by the number of threads in the team, decreasing to 1. For a chunk_size 2846 // with value k (greater than 1), the size of each chunk is determined in the 2847 // same way, with the restriction that the chunks do not contain fewer than k 2848 // iterations (except for the last chunk to be assigned, which may have fewer 2849 // than k iterations). 2850 // 2851 // When schedule(auto) is specified, the decision regarding scheduling is 2852 // delegated to the compiler and/or runtime system. The programmer gives the 2853 // implementation the freedom to choose any possible mapping of iterations to 2854 // threads in the team. 2855 // 2856 // When schedule(runtime) is specified, the decision regarding scheduling is 2857 // deferred until run time, and the schedule and chunk size are taken from the 2858 // run-sched-var ICV. If the ICV is set to auto, the schedule is 2859 // implementation defined 2860 // 2861 // while(__kmpc_dispatch_next(&LB, &UB)) { 2862 // idx = LB; 2863 // while (idx <= UB) { BODY; ++idx; 2864 // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only. 2865 // } // inner loop 2866 // } 2867 // 2868 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 2869 // When schedule(static, chunk_size) is specified, iterations are divided into 2870 // chunks of size chunk_size, and the chunks are assigned to the threads in 2871 // the team in a round-robin fashion in the order of the thread number. 2872 // 2873 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { 2874 // while (idx <= UB) { BODY; ++idx; } // inner loop 2875 // LB = LB + ST; 2876 // UB = UB + ST; 2877 // } 2878 // 2879 2880 const Expr *IVExpr = S.getIterationVariable(); 2881 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2882 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2883 2884 if (DynamicOrOrdered) { 2885 const std::pair<llvm::Value *, llvm::Value *> DispatchBounds = 2886 CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB); 2887 llvm::Value *LBVal = DispatchBounds.first; 2888 llvm::Value *UBVal = DispatchBounds.second; 2889 CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal, 2890 LoopArgs.Chunk}; 2891 RT.emitForDispatchInit(*this, S.getBeginLoc(), ScheduleKind, IVSize, 2892 IVSigned, Ordered, DipatchRTInputValues); 2893 } else { 2894 CGOpenMPRuntime::StaticRTInput StaticInit( 2895 IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB, 2896 LoopArgs.ST, LoopArgs.Chunk); 2897 RT.emitForStaticInit(*this, S.getBeginLoc(), S.getDirectiveKind(), 2898 ScheduleKind, StaticInit); 2899 } 2900 2901 auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc, 2902 const unsigned IVSize, 2903 const bool IVSigned) { 2904 if (Ordered) { 2905 CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize, 2906 IVSigned); 2907 } 2908 }; 2909 2910 OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST, 2911 LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB); 2912 OuterLoopArgs.IncExpr = S.getInc(); 2913 OuterLoopArgs.Init = S.getInit(); 2914 OuterLoopArgs.Cond = S.getCond(); 2915 OuterLoopArgs.NextLB = S.getNextLowerBound(); 2916 OuterLoopArgs.NextUB = S.getNextUpperBound(); 2917 EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs, 2918 emitOMPLoopBodyWithStopPoint, CodeGenOrdered); 2919 } 2920 2921 static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc, 2922 const unsigned IVSize, const bool IVSigned) {} 2923 2924 void CodeGenFunction::EmitOMPDistributeOuterLoop( 2925 OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S, 2926 OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs, 2927 const CodeGenLoopTy &CodeGenLoopContent) { 2928 2929 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); 2930 2931 // Emit outer loop. 2932 // Same behavior as a OMPForOuterLoop, except that schedule cannot be 2933 // dynamic 2934 // 2935 2936 const Expr *IVExpr = S.getIterationVariable(); 2937 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2938 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2939 2940 CGOpenMPRuntime::StaticRTInput StaticInit( 2941 IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB, 2942 LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk); 2943 RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, StaticInit); 2944 2945 // for combined 'distribute' and 'for' the increment expression of distribute 2946 // is stored in DistInc. For 'distribute' alone, it is in Inc. 2947 Expr *IncExpr; 2948 if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())) 2949 IncExpr = S.getDistInc(); 2950 else 2951 IncExpr = S.getInc(); 2952 2953 // this routine is shared by 'omp distribute parallel for' and 2954 // 'omp distribute': select the right EUB expression depending on the 2955 // directive 2956 OMPLoopArguments OuterLoopArgs; 2957 OuterLoopArgs.LB = LoopArgs.LB; 2958 OuterLoopArgs.UB = LoopArgs.UB; 2959 OuterLoopArgs.ST = LoopArgs.ST; 2960 OuterLoopArgs.IL = LoopArgs.IL; 2961 OuterLoopArgs.Chunk = LoopArgs.Chunk; 2962 OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2963 ? S.getCombinedEnsureUpperBound() 2964 : S.getEnsureUpperBound(); 2965 OuterLoopArgs.IncExpr = IncExpr; 2966 OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2967 ? S.getCombinedInit() 2968 : S.getInit(); 2969 OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2970 ? S.getCombinedCond() 2971 : S.getCond(); 2972 OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2973 ? S.getCombinedNextLowerBound() 2974 : S.getNextLowerBound(); 2975 OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2976 ? S.getCombinedNextUpperBound() 2977 : S.getNextUpperBound(); 2978 2979 EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S, 2980 LoopScope, OuterLoopArgs, CodeGenLoopContent, 2981 emitEmptyOrdered); 2982 } 2983 2984 static std::pair<LValue, LValue> 2985 emitDistributeParallelForInnerBounds(CodeGenFunction &CGF, 2986 const OMPExecutableDirective &S) { 2987 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 2988 LValue LB = 2989 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); 2990 LValue UB = 2991 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); 2992 2993 // When composing 'distribute' with 'for' (e.g. as in 'distribute 2994 // parallel for') we need to use the 'distribute' 2995 // chunk lower and upper bounds rather than the whole loop iteration 2996 // space. These are parameters to the outlined function for 'parallel' 2997 // and we copy the bounds of the previous schedule into the 2998 // the current ones. 2999 LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable()); 3000 LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable()); 3001 llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar( 3002 PrevLB, LS.getPrevLowerBoundVariable()->getExprLoc()); 3003 PrevLBVal = CGF.EmitScalarConversion( 3004 PrevLBVal, LS.getPrevLowerBoundVariable()->getType(), 3005 LS.getIterationVariable()->getType(), 3006 LS.getPrevLowerBoundVariable()->getExprLoc()); 3007 llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar( 3008 PrevUB, LS.getPrevUpperBoundVariable()->getExprLoc()); 3009 PrevUBVal = CGF.EmitScalarConversion( 3010 PrevUBVal, LS.getPrevUpperBoundVariable()->getType(), 3011 LS.getIterationVariable()->getType(), 3012 LS.getPrevUpperBoundVariable()->getExprLoc()); 3013 3014 CGF.EmitStoreOfScalar(PrevLBVal, LB); 3015 CGF.EmitStoreOfScalar(PrevUBVal, UB); 3016 3017 return {LB, UB}; 3018 } 3019 3020 /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then 3021 /// we need to use the LB and UB expressions generated by the worksharing 3022 /// code generation support, whereas in non combined situations we would 3023 /// just emit 0 and the LastIteration expression 3024 /// This function is necessary due to the difference of the LB and UB 3025 /// types for the RT emission routines for 'for_static_init' and 3026 /// 'for_dispatch_init' 3027 static std::pair<llvm::Value *, llvm::Value *> 3028 emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF, 3029 const OMPExecutableDirective &S, 3030 Address LB, Address UB) { 3031 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 3032 const Expr *IVExpr = LS.getIterationVariable(); 3033 // when implementing a dynamic schedule for a 'for' combined with a 3034 // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop 3035 // is not normalized as each team only executes its own assigned 3036 // distribute chunk 3037 QualType IteratorTy = IVExpr->getType(); 3038 llvm::Value *LBVal = 3039 CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, S.getBeginLoc()); 3040 llvm::Value *UBVal = 3041 CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, S.getBeginLoc()); 3042 return {LBVal, UBVal}; 3043 } 3044 3045 static void emitDistributeParallelForDistributeInnerBoundParams( 3046 CodeGenFunction &CGF, const OMPExecutableDirective &S, 3047 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) { 3048 const auto &Dir = cast<OMPLoopDirective>(S); 3049 LValue LB = 3050 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable())); 3051 llvm::Value *LBCast = 3052 CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(LB.getAddress(CGF)), 3053 CGF.SizeTy, /*isSigned=*/false); 3054 CapturedVars.push_back(LBCast); 3055 LValue UB = 3056 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable())); 3057 3058 llvm::Value *UBCast = 3059 CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(UB.getAddress(CGF)), 3060 CGF.SizeTy, /*isSigned=*/false); 3061 CapturedVars.push_back(UBCast); 3062 } 3063 3064 static void 3065 emitInnerParallelForWhenCombined(CodeGenFunction &CGF, 3066 const OMPLoopDirective &S, 3067 CodeGenFunction::JumpDest LoopExit) { 3068 auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF, 3069 PrePostActionTy &Action) { 3070 Action.Enter(CGF); 3071 bool HasCancel = false; 3072 if (!isOpenMPSimdDirective(S.getDirectiveKind())) { 3073 if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(&S)) 3074 HasCancel = D->hasCancel(); 3075 else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(&S)) 3076 HasCancel = D->hasCancel(); 3077 else if (const auto *D = 3078 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&S)) 3079 HasCancel = D->hasCancel(); 3080 } 3081 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(), 3082 HasCancel); 3083 CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(), 3084 emitDistributeParallelForInnerBounds, 3085 emitDistributeParallelForDispatchBounds); 3086 }; 3087 3088 emitCommonOMPParallelDirective( 3089 CGF, S, 3090 isOpenMPSimdDirective(S.getDirectiveKind()) ? OMPD_for_simd : OMPD_for, 3091 CGInlinedWorksharingLoop, 3092 emitDistributeParallelForDistributeInnerBoundParams); 3093 } 3094 3095 void CodeGenFunction::EmitOMPDistributeParallelForDirective( 3096 const OMPDistributeParallelForDirective &S) { 3097 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3098 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 3099 S.getDistInc()); 3100 }; 3101 OMPLexicalScope Scope(*this, S, OMPD_parallel); 3102 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); 3103 } 3104 3105 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective( 3106 const OMPDistributeParallelForSimdDirective &S) { 3107 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3108 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 3109 S.getDistInc()); 3110 }; 3111 OMPLexicalScope Scope(*this, S, OMPD_parallel); 3112 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); 3113 } 3114 3115 void CodeGenFunction::EmitOMPDistributeSimdDirective( 3116 const OMPDistributeSimdDirective &S) { 3117 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3118 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 3119 }; 3120 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3121 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 3122 } 3123 3124 void CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 3125 CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) { 3126 // Emit SPMD target parallel for region as a standalone region. 3127 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3128 emitOMPSimdRegion(CGF, S, Action); 3129 }; 3130 llvm::Function *Fn; 3131 llvm::Constant *Addr; 3132 // Emit target region as a standalone region. 3133 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 3134 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 3135 assert(Fn && Addr && "Target device function emission failed."); 3136 } 3137 3138 void CodeGenFunction::EmitOMPTargetSimdDirective( 3139 const OMPTargetSimdDirective &S) { 3140 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3141 emitOMPSimdRegion(CGF, S, Action); 3142 }; 3143 emitCommonOMPTargetDirective(*this, S, CodeGen); 3144 } 3145 3146 namespace { 3147 struct ScheduleKindModifiersTy { 3148 OpenMPScheduleClauseKind Kind; 3149 OpenMPScheduleClauseModifier M1; 3150 OpenMPScheduleClauseModifier M2; 3151 ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind, 3152 OpenMPScheduleClauseModifier M1, 3153 OpenMPScheduleClauseModifier M2) 3154 : Kind(Kind), M1(M1), M2(M2) {} 3155 }; 3156 } // namespace 3157 3158 bool CodeGenFunction::EmitOMPWorksharingLoop( 3159 const OMPLoopDirective &S, Expr *EUB, 3160 const CodeGenLoopBoundsTy &CodeGenLoopBounds, 3161 const CodeGenDispatchBoundsTy &CGDispatchBounds) { 3162 // Emit the loop iteration variable. 3163 const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 3164 const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl()); 3165 EmitVarDecl(*IVDecl); 3166 3167 // Emit the iterations count variable. 3168 // If it is not a variable, Sema decided to calculate iterations count on each 3169 // iteration (e.g., it is foldable into a constant). 3170 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 3171 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 3172 // Emit calculation of the iterations count. 3173 EmitIgnoredExpr(S.getCalcLastIteration()); 3174 } 3175 3176 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); 3177 3178 bool HasLastprivateClause; 3179 // Check pre-condition. 3180 { 3181 OMPLoopScope PreInitScope(*this, S); 3182 // Skip the entire loop if we don't meet the precondition. 3183 // If the condition constant folds and can be elided, avoid emitting the 3184 // whole loop. 3185 bool CondConstant; 3186 llvm::BasicBlock *ContBlock = nullptr; 3187 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 3188 if (!CondConstant) 3189 return false; 3190 } else { 3191 llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then"); 3192 ContBlock = createBasicBlock("omp.precond.end"); 3193 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 3194 getProfileCount(&S)); 3195 EmitBlock(ThenBlock); 3196 incrementProfileCounter(&S); 3197 } 3198 3199 RunCleanupsScope DoacrossCleanupScope(*this); 3200 bool Ordered = false; 3201 if (const auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) { 3202 if (OrderedClause->getNumForLoops()) 3203 RT.emitDoacrossInit(*this, S, OrderedClause->getLoopNumIterations()); 3204 else 3205 Ordered = true; 3206 } 3207 3208 llvm::DenseSet<const Expr *> EmittedFinals; 3209 emitAlignedClause(*this, S); 3210 bool HasLinears = EmitOMPLinearClauseInit(S); 3211 // Emit helper vars inits. 3212 3213 std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S); 3214 LValue LB = Bounds.first; 3215 LValue UB = Bounds.second; 3216 LValue ST = 3217 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 3218 LValue IL = 3219 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 3220 3221 // Emit 'then' code. 3222 { 3223 OMPPrivateScope LoopScope(*this); 3224 if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) { 3225 // Emit implicit barrier to synchronize threads and avoid data races on 3226 // initialization of firstprivate variables and post-update of 3227 // lastprivate variables. 3228 CGM.getOpenMPRuntime().emitBarrierCall( 3229 *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, 3230 /*ForceSimpleCall=*/true); 3231 } 3232 EmitOMPPrivateClause(S, LoopScope); 3233 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion( 3234 *this, S, EmitLValue(S.getIterationVariable())); 3235 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 3236 EmitOMPReductionClauseInit(S, LoopScope); 3237 EmitOMPPrivateLoopCounters(S, LoopScope); 3238 EmitOMPLinearClause(S, LoopScope); 3239 (void)LoopScope.Privatize(); 3240 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 3241 CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S); 3242 3243 // Detect the loop schedule kind and chunk. 3244 const Expr *ChunkExpr = nullptr; 3245 OpenMPScheduleTy ScheduleKind; 3246 if (const auto *C = S.getSingleClause<OMPScheduleClause>()) { 3247 ScheduleKind.Schedule = C->getScheduleKind(); 3248 ScheduleKind.M1 = C->getFirstScheduleModifier(); 3249 ScheduleKind.M2 = C->getSecondScheduleModifier(); 3250 ChunkExpr = C->getChunkSize(); 3251 } else { 3252 // Default behaviour for schedule clause. 3253 CGM.getOpenMPRuntime().getDefaultScheduleAndChunk( 3254 *this, S, ScheduleKind.Schedule, ChunkExpr); 3255 } 3256 bool HasChunkSizeOne = false; 3257 llvm::Value *Chunk = nullptr; 3258 if (ChunkExpr) { 3259 Chunk = EmitScalarExpr(ChunkExpr); 3260 Chunk = EmitScalarConversion(Chunk, ChunkExpr->getType(), 3261 S.getIterationVariable()->getType(), 3262 S.getBeginLoc()); 3263 Expr::EvalResult Result; 3264 if (ChunkExpr->EvaluateAsInt(Result, getContext())) { 3265 llvm::APSInt EvaluatedChunk = Result.Val.getInt(); 3266 HasChunkSizeOne = (EvaluatedChunk.getLimitedValue() == 1); 3267 } 3268 } 3269 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 3270 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 3271 // OpenMP 4.5, 2.7.1 Loop Construct, Description. 3272 // If the static schedule kind is specified or if the ordered clause is 3273 // specified, and if no monotonic modifier is specified, the effect will 3274 // be as if the monotonic modifier was specified. 3275 bool StaticChunkedOne = 3276 RT.isStaticChunked(ScheduleKind.Schedule, 3277 /* Chunked */ Chunk != nullptr) && 3278 HasChunkSizeOne && 3279 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()); 3280 bool IsMonotonic = 3281 Ordered || 3282 (ScheduleKind.Schedule == OMPC_SCHEDULE_static && 3283 !(ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic || 3284 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_nonmonotonic)) || 3285 ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic || 3286 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic; 3287 if ((RT.isStaticNonchunked(ScheduleKind.Schedule, 3288 /* Chunked */ Chunk != nullptr) || 3289 StaticChunkedOne) && 3290 !Ordered) { 3291 JumpDest LoopExit = 3292 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 3293 emitCommonSimdLoop( 3294 *this, S, 3295 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3296 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 3297 CGF.EmitOMPSimdInit(S); 3298 } else if (const auto *C = S.getSingleClause<OMPOrderClause>()) { 3299 if (C->getKind() == OMPC_ORDER_concurrent) 3300 CGF.LoopStack.setParallel(/*Enable=*/true); 3301 } 3302 }, 3303 [IVSize, IVSigned, Ordered, IL, LB, UB, ST, StaticChunkedOne, Chunk, 3304 &S, ScheduleKind, LoopExit, 3305 &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { 3306 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 3307 // When no chunk_size is specified, the iteration space is divided 3308 // into chunks that are approximately equal in size, and at most 3309 // one chunk is distributed to each thread. Note that the size of 3310 // the chunks is unspecified in this case. 3311 CGOpenMPRuntime::StaticRTInput StaticInit( 3312 IVSize, IVSigned, Ordered, IL.getAddress(CGF), 3313 LB.getAddress(CGF), UB.getAddress(CGF), ST.getAddress(CGF), 3314 StaticChunkedOne ? Chunk : nullptr); 3315 CGF.CGM.getOpenMPRuntime().emitForStaticInit( 3316 CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, 3317 StaticInit); 3318 // UB = min(UB, GlobalUB); 3319 if (!StaticChunkedOne) 3320 CGF.EmitIgnoredExpr(S.getEnsureUpperBound()); 3321 // IV = LB; 3322 CGF.EmitIgnoredExpr(S.getInit()); 3323 // For unchunked static schedule generate: 3324 // 3325 // while (idx <= UB) { 3326 // BODY; 3327 // ++idx; 3328 // } 3329 // 3330 // For static schedule with chunk one: 3331 // 3332 // while (IV <= PrevUB) { 3333 // BODY; 3334 // IV += ST; 3335 // } 3336 CGF.EmitOMPInnerLoop( 3337 S, LoopScope.requiresCleanups(), 3338 StaticChunkedOne ? S.getCombinedParForInDistCond() 3339 : S.getCond(), 3340 StaticChunkedOne ? S.getDistInc() : S.getInc(), 3341 [&S, LoopExit](CodeGenFunction &CGF) { 3342 emitOMPLoopBodyWithStopPoint(CGF, S, LoopExit); 3343 }, 3344 [](CodeGenFunction &) {}); 3345 }); 3346 EmitBlock(LoopExit.getBlock()); 3347 // Tell the runtime we are done. 3348 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 3349 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(), 3350 S.getDirectiveKind()); 3351 }; 3352 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); 3353 } else { 3354 // Emit the outer loop, which requests its work chunk [LB..UB] from 3355 // runtime and runs the inner loop to process it. 3356 const OMPLoopArguments LoopArguments( 3357 LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this), 3358 IL.getAddress(*this), Chunk, EUB); 3359 EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered, 3360 LoopArguments, CGDispatchBounds); 3361 } 3362 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 3363 EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) { 3364 return CGF.Builder.CreateIsNotNull( 3365 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 3366 }); 3367 } 3368 EmitOMPReductionClauseFinal( 3369 S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind()) 3370 ? /*Parallel and Simd*/ OMPD_parallel_for_simd 3371 : /*Parallel only*/ OMPD_parallel); 3372 // Emit post-update of the reduction variables if IsLastIter != 0. 3373 emitPostUpdateForReductionClause( 3374 *this, S, [IL, &S](CodeGenFunction &CGF) { 3375 return CGF.Builder.CreateIsNotNull( 3376 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 3377 }); 3378 // Emit final copy of the lastprivate variables if IsLastIter != 0. 3379 if (HasLastprivateClause) 3380 EmitOMPLastprivateClauseFinal( 3381 S, isOpenMPSimdDirective(S.getDirectiveKind()), 3382 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc()))); 3383 } 3384 EmitOMPLinearClauseFinal(S, [IL, &S](CodeGenFunction &CGF) { 3385 return CGF.Builder.CreateIsNotNull( 3386 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 3387 }); 3388 DoacrossCleanupScope.ForceCleanup(); 3389 // We're now done with the loop, so jump to the continuation block. 3390 if (ContBlock) { 3391 EmitBranch(ContBlock); 3392 EmitBlock(ContBlock, /*IsFinished=*/true); 3393 } 3394 } 3395 return HasLastprivateClause; 3396 } 3397 3398 /// The following two functions generate expressions for the loop lower 3399 /// and upper bounds in case of static and dynamic (dispatch) schedule 3400 /// of the associated 'for' or 'distribute' loop. 3401 static std::pair<LValue, LValue> 3402 emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 3403 const auto &LS = cast<OMPLoopDirective>(S); 3404 LValue LB = 3405 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); 3406 LValue UB = 3407 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); 3408 return {LB, UB}; 3409 } 3410 3411 /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not 3412 /// consider the lower and upper bound expressions generated by the 3413 /// worksharing loop support, but we use 0 and the iteration space size as 3414 /// constants 3415 static std::pair<llvm::Value *, llvm::Value *> 3416 emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S, 3417 Address LB, Address UB) { 3418 const auto &LS = cast<OMPLoopDirective>(S); 3419 const Expr *IVExpr = LS.getIterationVariable(); 3420 const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType()); 3421 llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0); 3422 llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration()); 3423 return {LBVal, UBVal}; 3424 } 3425 3426 /// Emits internal temp array declarations for the directive with inscan 3427 /// reductions. 3428 /// The code is the following: 3429 /// \code 3430 /// size num_iters = <num_iters>; 3431 /// <type> buffer[num_iters]; 3432 /// \endcode 3433 static void emitScanBasedDirectiveDecls( 3434 CodeGenFunction &CGF, const OMPLoopDirective &S, 3435 llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) { 3436 llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast( 3437 NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false); 3438 SmallVector<const Expr *, 4> Shareds; 3439 SmallVector<const Expr *, 4> Privates; 3440 SmallVector<const Expr *, 4> ReductionOps; 3441 SmallVector<const Expr *, 4> CopyArrayTemps; 3442 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 3443 assert(C->getModifier() == OMPC_REDUCTION_inscan && 3444 "Only inscan reductions are expected."); 3445 Shareds.append(C->varlist_begin(), C->varlist_end()); 3446 Privates.append(C->privates().begin(), C->privates().end()); 3447 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 3448 CopyArrayTemps.append(C->copy_array_temps().begin(), 3449 C->copy_array_temps().end()); 3450 } 3451 { 3452 // Emit buffers for each reduction variables. 3453 // ReductionCodeGen is required to emit correctly the code for array 3454 // reductions. 3455 ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps); 3456 unsigned Count = 0; 3457 auto *ITA = CopyArrayTemps.begin(); 3458 for (const Expr *IRef : Privates) { 3459 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); 3460 // Emit variably modified arrays, used for arrays/array sections 3461 // reductions. 3462 if (PrivateVD->getType()->isVariablyModifiedType()) { 3463 RedCG.emitSharedOrigLValue(CGF, Count); 3464 RedCG.emitAggregateType(CGF, Count); 3465 } 3466 CodeGenFunction::OpaqueValueMapping DimMapping( 3467 CGF, 3468 cast<OpaqueValueExpr>( 3469 cast<VariableArrayType>((*ITA)->getType()->getAsArrayTypeUnsafe()) 3470 ->getSizeExpr()), 3471 RValue::get(OMPScanNumIterations)); 3472 // Emit temp buffer. 3473 CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(*ITA)->getDecl())); 3474 ++ITA; 3475 ++Count; 3476 } 3477 } 3478 } 3479 3480 /// Emits the code for the directive with inscan reductions. 3481 /// The code is the following: 3482 /// \code 3483 /// #pragma omp ... 3484 /// for (i: 0..<num_iters>) { 3485 /// <input phase>; 3486 /// buffer[i] = red; 3487 /// } 3488 /// #pragma omp master // in parallel region 3489 /// for (int k = 0; k != ceil(log2(num_iters)); ++k) 3490 /// for (size cnt = last_iter; cnt >= pow(2, k); --k) 3491 /// buffer[i] op= buffer[i-pow(2,k)]; 3492 /// #pragma omp barrier // in parallel region 3493 /// #pragma omp ... 3494 /// for (0..<num_iters>) { 3495 /// red = InclusiveScan ? buffer[i] : buffer[i-1]; 3496 /// <scan phase>; 3497 /// } 3498 /// \endcode 3499 static void emitScanBasedDirective( 3500 CodeGenFunction &CGF, const OMPLoopDirective &S, 3501 llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen, 3502 llvm::function_ref<void(CodeGenFunction &)> FirstGen, 3503 llvm::function_ref<void(CodeGenFunction &)> SecondGen) { 3504 llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast( 3505 NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false); 3506 SmallVector<const Expr *, 4> Privates; 3507 SmallVector<const Expr *, 4> ReductionOps; 3508 SmallVector<const Expr *, 4> LHSs; 3509 SmallVector<const Expr *, 4> RHSs; 3510 SmallVector<const Expr *, 4> CopyArrayElems; 3511 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 3512 assert(C->getModifier() == OMPC_REDUCTION_inscan && 3513 "Only inscan reductions are expected."); 3514 Privates.append(C->privates().begin(), C->privates().end()); 3515 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 3516 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 3517 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 3518 CopyArrayElems.append(C->copy_array_elems().begin(), 3519 C->copy_array_elems().end()); 3520 } 3521 CodeGenFunction::ParentLoopDirectiveForScanRegion ScanRegion(CGF, S); 3522 { 3523 // Emit loop with input phase: 3524 // #pragma omp ... 3525 // for (i: 0..<num_iters>) { 3526 // <input phase>; 3527 // buffer[i] = red; 3528 // } 3529 CGF.OMPFirstScanLoop = true; 3530 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); 3531 FirstGen(CGF); 3532 } 3533 // #pragma omp barrier // in parallel region 3534 auto &&CodeGen = [&S, OMPScanNumIterations, &LHSs, &RHSs, &CopyArrayElems, 3535 &ReductionOps, 3536 &Privates](CodeGenFunction &CGF, PrePostActionTy &Action) { 3537 Action.Enter(CGF); 3538 // Emit prefix reduction: 3539 // #pragma omp master // in parallel region 3540 // for (int k = 0; k <= ceil(log2(n)); ++k) 3541 llvm::BasicBlock *InputBB = CGF.Builder.GetInsertBlock(); 3542 llvm::BasicBlock *LoopBB = CGF.createBasicBlock("omp.outer.log.scan.body"); 3543 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.outer.log.scan.exit"); 3544 llvm::Function *F = 3545 CGF.CGM.getIntrinsic(llvm::Intrinsic::log2, CGF.DoubleTy); 3546 llvm::Value *Arg = 3547 CGF.Builder.CreateUIToFP(OMPScanNumIterations, CGF.DoubleTy); 3548 llvm::Value *LogVal = CGF.EmitNounwindRuntimeCall(F, Arg); 3549 F = CGF.CGM.getIntrinsic(llvm::Intrinsic::ceil, CGF.DoubleTy); 3550 LogVal = CGF.EmitNounwindRuntimeCall(F, LogVal); 3551 LogVal = CGF.Builder.CreateFPToUI(LogVal, CGF.IntTy); 3552 llvm::Value *NMin1 = CGF.Builder.CreateNUWSub( 3553 OMPScanNumIterations, llvm::ConstantInt::get(CGF.SizeTy, 1)); 3554 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getBeginLoc()); 3555 CGF.EmitBlock(LoopBB); 3556 auto *Counter = CGF.Builder.CreatePHI(CGF.IntTy, 2); 3557 // size pow2k = 1; 3558 auto *Pow2K = CGF.Builder.CreatePHI(CGF.SizeTy, 2); 3559 Counter->addIncoming(llvm::ConstantInt::get(CGF.IntTy, 0), InputBB); 3560 Pow2K->addIncoming(llvm::ConstantInt::get(CGF.SizeTy, 1), InputBB); 3561 // for (size i = n - 1; i >= 2 ^ k; --i) 3562 // tmp[i] op= tmp[i-pow2k]; 3563 llvm::BasicBlock *InnerLoopBB = 3564 CGF.createBasicBlock("omp.inner.log.scan.body"); 3565 llvm::BasicBlock *InnerExitBB = 3566 CGF.createBasicBlock("omp.inner.log.scan.exit"); 3567 llvm::Value *CmpI = CGF.Builder.CreateICmpUGE(NMin1, Pow2K); 3568 CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB); 3569 CGF.EmitBlock(InnerLoopBB); 3570 auto *IVal = CGF.Builder.CreatePHI(CGF.SizeTy, 2); 3571 IVal->addIncoming(NMin1, LoopBB); 3572 { 3573 CodeGenFunction::OMPPrivateScope PrivScope(CGF); 3574 auto *ILHS = LHSs.begin(); 3575 auto *IRHS = RHSs.begin(); 3576 for (const Expr *CopyArrayElem : CopyArrayElems) { 3577 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 3578 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 3579 Address LHSAddr = Address::invalid(); 3580 { 3581 CodeGenFunction::OpaqueValueMapping IdxMapping( 3582 CGF, 3583 cast<OpaqueValueExpr>( 3584 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), 3585 RValue::get(IVal)); 3586 LHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF); 3587 } 3588 PrivScope.addPrivate(LHSVD, LHSAddr); 3589 Address RHSAddr = Address::invalid(); 3590 { 3591 llvm::Value *OffsetIVal = CGF.Builder.CreateNUWSub(IVal, Pow2K); 3592 CodeGenFunction::OpaqueValueMapping IdxMapping( 3593 CGF, 3594 cast<OpaqueValueExpr>( 3595 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), 3596 RValue::get(OffsetIVal)); 3597 RHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF); 3598 } 3599 PrivScope.addPrivate(RHSVD, RHSAddr); 3600 ++ILHS; 3601 ++IRHS; 3602 } 3603 PrivScope.Privatize(); 3604 CGF.CGM.getOpenMPRuntime().emitReduction( 3605 CGF, S.getEndLoc(), Privates, LHSs, RHSs, ReductionOps, 3606 {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_unknown}); 3607 } 3608 llvm::Value *NextIVal = 3609 CGF.Builder.CreateNUWSub(IVal, llvm::ConstantInt::get(CGF.SizeTy, 1)); 3610 IVal->addIncoming(NextIVal, CGF.Builder.GetInsertBlock()); 3611 CmpI = CGF.Builder.CreateICmpUGE(NextIVal, Pow2K); 3612 CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB); 3613 CGF.EmitBlock(InnerExitBB); 3614 llvm::Value *Next = 3615 CGF.Builder.CreateNUWAdd(Counter, llvm::ConstantInt::get(CGF.IntTy, 1)); 3616 Counter->addIncoming(Next, CGF.Builder.GetInsertBlock()); 3617 // pow2k <<= 1; 3618 llvm::Value *NextPow2K = 3619 CGF.Builder.CreateShl(Pow2K, 1, "", /*HasNUW=*/true); 3620 Pow2K->addIncoming(NextPow2K, CGF.Builder.GetInsertBlock()); 3621 llvm::Value *Cmp = CGF.Builder.CreateICmpNE(Next, LogVal); 3622 CGF.Builder.CreateCondBr(Cmp, LoopBB, ExitBB); 3623 auto DL1 = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getEndLoc()); 3624 CGF.EmitBlock(ExitBB); 3625 }; 3626 if (isOpenMPParallelDirective(S.getDirectiveKind())) { 3627 CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc()); 3628 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 3629 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, 3630 /*ForceSimpleCall=*/true); 3631 } else { 3632 RegionCodeGenTy RCG(CodeGen); 3633 RCG(CGF); 3634 } 3635 3636 CGF.OMPFirstScanLoop = false; 3637 SecondGen(CGF); 3638 } 3639 3640 static bool emitWorksharingDirective(CodeGenFunction &CGF, 3641 const OMPLoopDirective &S, 3642 bool HasCancel) { 3643 bool HasLastprivates; 3644 if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(), 3645 [](const OMPReductionClause *C) { 3646 return C->getModifier() == OMPC_REDUCTION_inscan; 3647 })) { 3648 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) { 3649 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); 3650 OMPLoopScope LoopScope(CGF, S); 3651 return CGF.EmitScalarExpr(S.getNumIterations()); 3652 }; 3653 const auto &&FirstGen = [&S, HasCancel](CodeGenFunction &CGF) { 3654 CodeGenFunction::OMPCancelStackRAII CancelRegion( 3655 CGF, S.getDirectiveKind(), HasCancel); 3656 (void)CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 3657 emitForLoopBounds, 3658 emitDispatchForLoopBounds); 3659 // Emit an implicit barrier at the end. 3660 CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getBeginLoc(), 3661 OMPD_for); 3662 }; 3663 const auto &&SecondGen = [&S, HasCancel, 3664 &HasLastprivates](CodeGenFunction &CGF) { 3665 CodeGenFunction::OMPCancelStackRAII CancelRegion( 3666 CGF, S.getDirectiveKind(), HasCancel); 3667 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 3668 emitForLoopBounds, 3669 emitDispatchForLoopBounds); 3670 }; 3671 if (!isOpenMPParallelDirective(S.getDirectiveKind())) 3672 emitScanBasedDirectiveDecls(CGF, S, NumIteratorsGen); 3673 emitScanBasedDirective(CGF, S, NumIteratorsGen, FirstGen, SecondGen); 3674 } else { 3675 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(), 3676 HasCancel); 3677 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 3678 emitForLoopBounds, 3679 emitDispatchForLoopBounds); 3680 } 3681 return HasLastprivates; 3682 } 3683 3684 static bool isSupportedByOpenMPIRBuilder(const OMPForDirective &S) { 3685 if (S.hasCancel()) 3686 return false; 3687 for (OMPClause *C : S.clauses()) { 3688 if (isa<OMPNowaitClause>(C)) 3689 continue; 3690 3691 if (auto *SC = dyn_cast<OMPScheduleClause>(C)) { 3692 if (SC->getFirstScheduleModifier() != OMPC_SCHEDULE_MODIFIER_unknown) 3693 return false; 3694 if (SC->getSecondScheduleModifier() != OMPC_SCHEDULE_MODIFIER_unknown) 3695 return false; 3696 switch (SC->getScheduleKind()) { 3697 case OMPC_SCHEDULE_auto: 3698 case OMPC_SCHEDULE_dynamic: 3699 case OMPC_SCHEDULE_runtime: 3700 case OMPC_SCHEDULE_guided: 3701 case OMPC_SCHEDULE_static: 3702 continue; 3703 case OMPC_SCHEDULE_unknown: 3704 return false; 3705 } 3706 } 3707 3708 return false; 3709 } 3710 3711 return true; 3712 } 3713 3714 static llvm::omp::ScheduleKind 3715 convertClauseKindToSchedKind(OpenMPScheduleClauseKind ScheduleClauseKind) { 3716 switch (ScheduleClauseKind) { 3717 case OMPC_SCHEDULE_unknown: 3718 return llvm::omp::OMP_SCHEDULE_Default; 3719 case OMPC_SCHEDULE_auto: 3720 return llvm::omp::OMP_SCHEDULE_Auto; 3721 case OMPC_SCHEDULE_dynamic: 3722 return llvm::omp::OMP_SCHEDULE_Dynamic; 3723 case OMPC_SCHEDULE_guided: 3724 return llvm::omp::OMP_SCHEDULE_Guided; 3725 case OMPC_SCHEDULE_runtime: 3726 return llvm::omp::OMP_SCHEDULE_Runtime; 3727 case OMPC_SCHEDULE_static: 3728 return llvm::omp::OMP_SCHEDULE_Static; 3729 } 3730 llvm_unreachable("Unhandled schedule kind"); 3731 } 3732 3733 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { 3734 bool HasLastprivates = false; 3735 bool UseOMPIRBuilder = 3736 CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(S); 3737 auto &&CodeGen = [this, &S, &HasLastprivates, 3738 UseOMPIRBuilder](CodeGenFunction &CGF, PrePostActionTy &) { 3739 // Use the OpenMPIRBuilder if enabled. 3740 if (UseOMPIRBuilder) { 3741 bool NeedsBarrier = !S.getSingleClause<OMPNowaitClause>(); 3742 3743 llvm::omp::ScheduleKind SchedKind = llvm::omp::OMP_SCHEDULE_Default; 3744 llvm::Value *ChunkSize = nullptr; 3745 if (auto *SchedClause = S.getSingleClause<OMPScheduleClause>()) { 3746 SchedKind = 3747 convertClauseKindToSchedKind(SchedClause->getScheduleKind()); 3748 if (const Expr *ChunkSizeExpr = SchedClause->getChunkSize()) 3749 ChunkSize = EmitScalarExpr(ChunkSizeExpr); 3750 } 3751 3752 // Emit the associated statement and get its loop representation. 3753 const Stmt *Inner = S.getRawStmt(); 3754 llvm::CanonicalLoopInfo *CLI = 3755 EmitOMPCollapsedCanonicalLoopNest(Inner, 1); 3756 3757 llvm::OpenMPIRBuilder &OMPBuilder = 3758 CGM.getOpenMPRuntime().getOMPBuilder(); 3759 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( 3760 AllocaInsertPt->getParent(), AllocaInsertPt->getIterator()); 3761 OMPBuilder.applyWorkshareLoop(Builder.getCurrentDebugLocation(), CLI, 3762 AllocaIP, NeedsBarrier, SchedKind, 3763 ChunkSize); 3764 return; 3765 } 3766 3767 HasLastprivates = emitWorksharingDirective(CGF, S, S.hasCancel()); 3768 }; 3769 { 3770 auto LPCRegion = 3771 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 3772 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3773 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen, 3774 S.hasCancel()); 3775 } 3776 3777 if (!UseOMPIRBuilder) { 3778 // Emit an implicit barrier at the end. 3779 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) 3780 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for); 3781 } 3782 // Check for outer lastprivate conditional update. 3783 checkForLastprivateConditionalUpdate(*this, S); 3784 } 3785 3786 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { 3787 bool HasLastprivates = false; 3788 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, 3789 PrePostActionTy &) { 3790 HasLastprivates = emitWorksharingDirective(CGF, S, /*HasCancel=*/false); 3791 }; 3792 { 3793 auto LPCRegion = 3794 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 3795 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3796 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 3797 } 3798 3799 // Emit an implicit barrier at the end. 3800 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) 3801 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for); 3802 // Check for outer lastprivate conditional update. 3803 checkForLastprivateConditionalUpdate(*this, S); 3804 } 3805 3806 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, 3807 const Twine &Name, 3808 llvm::Value *Init = nullptr) { 3809 LValue LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty); 3810 if (Init) 3811 CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true); 3812 return LVal; 3813 } 3814 3815 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { 3816 const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt(); 3817 const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt); 3818 bool HasLastprivates = false; 3819 auto &&CodeGen = [&S, CapturedStmt, CS, 3820 &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) { 3821 const ASTContext &C = CGF.getContext(); 3822 QualType KmpInt32Ty = 3823 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 3824 // Emit helper vars inits. 3825 LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.", 3826 CGF.Builder.getInt32(0)); 3827 llvm::ConstantInt *GlobalUBVal = CS != nullptr 3828 ? CGF.Builder.getInt32(CS->size() - 1) 3829 : CGF.Builder.getInt32(0); 3830 LValue UB = 3831 createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal); 3832 LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.", 3833 CGF.Builder.getInt32(1)); 3834 LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.", 3835 CGF.Builder.getInt32(0)); 3836 // Loop counter. 3837 LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv."); 3838 OpaqueValueExpr IVRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue); 3839 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV); 3840 OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue); 3841 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); 3842 // Generate condition for loop. 3843 BinaryOperator *Cond = BinaryOperator::Create( 3844 C, &IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_PRValue, OK_Ordinary, 3845 S.getBeginLoc(), FPOptionsOverride()); 3846 // Increment for loop counter. 3847 UnaryOperator *Inc = UnaryOperator::Create( 3848 C, &IVRefExpr, UO_PreInc, KmpInt32Ty, VK_PRValue, OK_Ordinary, 3849 S.getBeginLoc(), true, FPOptionsOverride()); 3850 auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) { 3851 // Iterate through all sections and emit a switch construct: 3852 // switch (IV) { 3853 // case 0: 3854 // <SectionStmt[0]>; 3855 // break; 3856 // ... 3857 // case <NumSection> - 1: 3858 // <SectionStmt[<NumSection> - 1]>; 3859 // break; 3860 // } 3861 // .omp.sections.exit: 3862 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".omp.sections.exit"); 3863 llvm::SwitchInst *SwitchStmt = 3864 CGF.Builder.CreateSwitch(CGF.EmitLoadOfScalar(IV, S.getBeginLoc()), 3865 ExitBB, CS == nullptr ? 1 : CS->size()); 3866 if (CS) { 3867 unsigned CaseNumber = 0; 3868 for (const Stmt *SubStmt : CS->children()) { 3869 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 3870 CGF.EmitBlock(CaseBB); 3871 SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB); 3872 CGF.EmitStmt(SubStmt); 3873 CGF.EmitBranch(ExitBB); 3874 ++CaseNumber; 3875 } 3876 } else { 3877 llvm::BasicBlock *CaseBB = CGF.createBasicBlock(".omp.sections.case"); 3878 CGF.EmitBlock(CaseBB); 3879 SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB); 3880 CGF.EmitStmt(CapturedStmt); 3881 CGF.EmitBranch(ExitBB); 3882 } 3883 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 3884 }; 3885 3886 CodeGenFunction::OMPPrivateScope LoopScope(CGF); 3887 if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) { 3888 // Emit implicit barrier to synchronize threads and avoid data races on 3889 // initialization of firstprivate variables and post-update of lastprivate 3890 // variables. 3891 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 3892 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, 3893 /*ForceSimpleCall=*/true); 3894 } 3895 CGF.EmitOMPPrivateClause(S, LoopScope); 3896 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(CGF, S, IV); 3897 HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 3898 CGF.EmitOMPReductionClauseInit(S, LoopScope); 3899 (void)LoopScope.Privatize(); 3900 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 3901 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); 3902 3903 // Emit static non-chunked loop. 3904 OpenMPScheduleTy ScheduleKind; 3905 ScheduleKind.Schedule = OMPC_SCHEDULE_static; 3906 CGOpenMPRuntime::StaticRTInput StaticInit( 3907 /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(CGF), 3908 LB.getAddress(CGF), UB.getAddress(CGF), ST.getAddress(CGF)); 3909 CGF.CGM.getOpenMPRuntime().emitForStaticInit( 3910 CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, StaticInit); 3911 // UB = min(UB, GlobalUB); 3912 llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, S.getBeginLoc()); 3913 llvm::Value *MinUBGlobalUB = CGF.Builder.CreateSelect( 3914 CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal); 3915 CGF.EmitStoreOfScalar(MinUBGlobalUB, UB); 3916 // IV = LB; 3917 CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getBeginLoc()), IV); 3918 // while (idx <= UB) { BODY; ++idx; } 3919 CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, Cond, Inc, BodyGen, 3920 [](CodeGenFunction &) {}); 3921 // Tell the runtime we are done. 3922 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 3923 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(), 3924 S.getDirectiveKind()); 3925 }; 3926 CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen); 3927 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 3928 // Emit post-update of the reduction variables if IsLastIter != 0. 3929 emitPostUpdateForReductionClause(CGF, S, [IL, &S](CodeGenFunction &CGF) { 3930 return CGF.Builder.CreateIsNotNull( 3931 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 3932 }); 3933 3934 // Emit final copy of the lastprivate variables if IsLastIter != 0. 3935 if (HasLastprivates) 3936 CGF.EmitOMPLastprivateClauseFinal( 3937 S, /*NoFinals=*/false, 3938 CGF.Builder.CreateIsNotNull( 3939 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()))); 3940 }; 3941 3942 bool HasCancel = false; 3943 if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S)) 3944 HasCancel = OSD->hasCancel(); 3945 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S)) 3946 HasCancel = OPSD->hasCancel(); 3947 OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel); 3948 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen, 3949 HasCancel); 3950 // Emit barrier for lastprivates only if 'sections' directive has 'nowait' 3951 // clause. Otherwise the barrier will be generated by the codegen for the 3952 // directive. 3953 if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) { 3954 // Emit implicit barrier to synchronize threads and avoid data races on 3955 // initialization of firstprivate variables. 3956 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), 3957 OMPD_unknown); 3958 } 3959 } 3960 3961 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { 3962 if (CGM.getLangOpts().OpenMPIRBuilder) { 3963 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 3964 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 3965 using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy; 3966 3967 auto FiniCB = [this](InsertPointTy IP) { 3968 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); 3969 }; 3970 3971 const CapturedStmt *ICS = S.getInnermostCapturedStmt(); 3972 const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt(); 3973 const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt); 3974 llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector; 3975 if (CS) { 3976 for (const Stmt *SubStmt : CS->children()) { 3977 auto SectionCB = [this, SubStmt](InsertPointTy AllocaIP, 3978 InsertPointTy CodeGenIP, 3979 llvm::BasicBlock &FiniBB) { 3980 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, 3981 FiniBB); 3982 OMPBuilderCBHelpers::EmitOMPRegionBody(*this, SubStmt, CodeGenIP, 3983 FiniBB); 3984 }; 3985 SectionCBVector.push_back(SectionCB); 3986 } 3987 } else { 3988 auto SectionCB = [this, CapturedStmt](InsertPointTy AllocaIP, 3989 InsertPointTy CodeGenIP, 3990 llvm::BasicBlock &FiniBB) { 3991 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB); 3992 OMPBuilderCBHelpers::EmitOMPRegionBody(*this, CapturedStmt, CodeGenIP, 3993 FiniBB); 3994 }; 3995 SectionCBVector.push_back(SectionCB); 3996 } 3997 3998 // Privatization callback that performs appropriate action for 3999 // shared/private/firstprivate/lastprivate/copyin/... variables. 4000 // 4001 // TODO: This defaults to shared right now. 4002 auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, 4003 llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) { 4004 // The next line is appropriate only for variables (Val) with the 4005 // data-sharing attribute "shared". 4006 ReplVal = &Val; 4007 4008 return CodeGenIP; 4009 }; 4010 4011 CGCapturedStmtInfo CGSI(*ICS, CR_OpenMP); 4012 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI); 4013 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( 4014 AllocaInsertPt->getParent(), AllocaInsertPt->getIterator()); 4015 Builder.restoreIP(OMPBuilder.createSections( 4016 Builder, AllocaIP, SectionCBVector, PrivCB, FiniCB, S.hasCancel(), 4017 S.getSingleClause<OMPNowaitClause>())); 4018 return; 4019 } 4020 { 4021 auto LPCRegion = 4022 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 4023 OMPLexicalScope Scope(*this, S, OMPD_unknown); 4024 EmitSections(S); 4025 } 4026 // Emit an implicit barrier at the end. 4027 if (!S.getSingleClause<OMPNowaitClause>()) { 4028 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), 4029 OMPD_sections); 4030 } 4031 // Check for outer lastprivate conditional update. 4032 checkForLastprivateConditionalUpdate(*this, S); 4033 } 4034 4035 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { 4036 if (CGM.getLangOpts().OpenMPIRBuilder) { 4037 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 4038 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 4039 4040 const Stmt *SectionRegionBodyStmt = S.getAssociatedStmt(); 4041 auto FiniCB = [this](InsertPointTy IP) { 4042 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); 4043 }; 4044 4045 auto BodyGenCB = [SectionRegionBodyStmt, this](InsertPointTy AllocaIP, 4046 InsertPointTy CodeGenIP, 4047 llvm::BasicBlock &FiniBB) { 4048 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB); 4049 OMPBuilderCBHelpers::EmitOMPRegionBody(*this, SectionRegionBodyStmt, 4050 CodeGenIP, FiniBB); 4051 }; 4052 4053 LexicalScope Scope(*this, S.getSourceRange()); 4054 EmitStopPoint(&S); 4055 Builder.restoreIP(OMPBuilder.createSection(Builder, BodyGenCB, FiniCB)); 4056 4057 return; 4058 } 4059 LexicalScope Scope(*this, S.getSourceRange()); 4060 EmitStopPoint(&S); 4061 EmitStmt(S.getAssociatedStmt()); 4062 } 4063 4064 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { 4065 llvm::SmallVector<const Expr *, 8> CopyprivateVars; 4066 llvm::SmallVector<const Expr *, 8> DestExprs; 4067 llvm::SmallVector<const Expr *, 8> SrcExprs; 4068 llvm::SmallVector<const Expr *, 8> AssignmentOps; 4069 // Check if there are any 'copyprivate' clauses associated with this 4070 // 'single' construct. 4071 // Build a list of copyprivate variables along with helper expressions 4072 // (<source>, <destination>, <destination>=<source> expressions) 4073 for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) { 4074 CopyprivateVars.append(C->varlists().begin(), C->varlists().end()); 4075 DestExprs.append(C->destination_exprs().begin(), 4076 C->destination_exprs().end()); 4077 SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end()); 4078 AssignmentOps.append(C->assignment_ops().begin(), 4079 C->assignment_ops().end()); 4080 } 4081 // Emit code for 'single' region along with 'copyprivate' clauses 4082 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4083 Action.Enter(CGF); 4084 OMPPrivateScope SingleScope(CGF); 4085 (void)CGF.EmitOMPFirstprivateClause(S, SingleScope); 4086 CGF.EmitOMPPrivateClause(S, SingleScope); 4087 (void)SingleScope.Privatize(); 4088 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 4089 }; 4090 { 4091 auto LPCRegion = 4092 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 4093 OMPLexicalScope Scope(*this, S, OMPD_unknown); 4094 CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getBeginLoc(), 4095 CopyprivateVars, DestExprs, 4096 SrcExprs, AssignmentOps); 4097 } 4098 // Emit an implicit barrier at the end (to avoid data race on firstprivate 4099 // init or if no 'nowait' clause was specified and no 'copyprivate' clause). 4100 if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) { 4101 CGM.getOpenMPRuntime().emitBarrierCall( 4102 *this, S.getBeginLoc(), 4103 S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single); 4104 } 4105 // Check for outer lastprivate conditional update. 4106 checkForLastprivateConditionalUpdate(*this, S); 4107 } 4108 4109 static void emitMaster(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 4110 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4111 Action.Enter(CGF); 4112 CGF.EmitStmt(S.getRawStmt()); 4113 }; 4114 CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc()); 4115 } 4116 4117 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { 4118 if (CGM.getLangOpts().OpenMPIRBuilder) { 4119 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 4120 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 4121 4122 const Stmt *MasterRegionBodyStmt = S.getAssociatedStmt(); 4123 4124 auto FiniCB = [this](InsertPointTy IP) { 4125 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); 4126 }; 4127 4128 auto BodyGenCB = [MasterRegionBodyStmt, this](InsertPointTy AllocaIP, 4129 InsertPointTy CodeGenIP, 4130 llvm::BasicBlock &FiniBB) { 4131 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB); 4132 OMPBuilderCBHelpers::EmitOMPRegionBody(*this, MasterRegionBodyStmt, 4133 CodeGenIP, FiniBB); 4134 }; 4135 4136 LexicalScope Scope(*this, S.getSourceRange()); 4137 EmitStopPoint(&S); 4138 Builder.restoreIP(OMPBuilder.createMaster(Builder, BodyGenCB, FiniCB)); 4139 4140 return; 4141 } 4142 LexicalScope Scope(*this, S.getSourceRange()); 4143 EmitStopPoint(&S); 4144 emitMaster(*this, S); 4145 } 4146 4147 static void emitMasked(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 4148 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4149 Action.Enter(CGF); 4150 CGF.EmitStmt(S.getRawStmt()); 4151 }; 4152 Expr *Filter = nullptr; 4153 if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>()) 4154 Filter = FilterClause->getThreadID(); 4155 CGF.CGM.getOpenMPRuntime().emitMaskedRegion(CGF, CodeGen, S.getBeginLoc(), 4156 Filter); 4157 } 4158 4159 void CodeGenFunction::EmitOMPMaskedDirective(const OMPMaskedDirective &S) { 4160 if (CGM.getLangOpts().OpenMPIRBuilder) { 4161 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 4162 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 4163 4164 const Stmt *MaskedRegionBodyStmt = S.getAssociatedStmt(); 4165 const Expr *Filter = nullptr; 4166 if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>()) 4167 Filter = FilterClause->getThreadID(); 4168 llvm::Value *FilterVal = Filter 4169 ? EmitScalarExpr(Filter, CGM.Int32Ty) 4170 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0); 4171 4172 auto FiniCB = [this](InsertPointTy IP) { 4173 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); 4174 }; 4175 4176 auto BodyGenCB = [MaskedRegionBodyStmt, this](InsertPointTy AllocaIP, 4177 InsertPointTy CodeGenIP, 4178 llvm::BasicBlock &FiniBB) { 4179 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB); 4180 OMPBuilderCBHelpers::EmitOMPRegionBody(*this, MaskedRegionBodyStmt, 4181 CodeGenIP, FiniBB); 4182 }; 4183 4184 LexicalScope Scope(*this, S.getSourceRange()); 4185 EmitStopPoint(&S); 4186 Builder.restoreIP( 4187 OMPBuilder.createMasked(Builder, BodyGenCB, FiniCB, FilterVal)); 4188 4189 return; 4190 } 4191 LexicalScope Scope(*this, S.getSourceRange()); 4192 EmitStopPoint(&S); 4193 emitMasked(*this, S); 4194 } 4195 4196 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { 4197 if (CGM.getLangOpts().OpenMPIRBuilder) { 4198 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 4199 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 4200 4201 const Stmt *CriticalRegionBodyStmt = S.getAssociatedStmt(); 4202 const Expr *Hint = nullptr; 4203 if (const auto *HintClause = S.getSingleClause<OMPHintClause>()) 4204 Hint = HintClause->getHint(); 4205 4206 // TODO: This is slightly different from what's currently being done in 4207 // clang. Fix the Int32Ty to IntPtrTy (pointer width size) when everything 4208 // about typing is final. 4209 llvm::Value *HintInst = nullptr; 4210 if (Hint) 4211 HintInst = 4212 Builder.CreateIntCast(EmitScalarExpr(Hint), CGM.Int32Ty, false); 4213 4214 auto FiniCB = [this](InsertPointTy IP) { 4215 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); 4216 }; 4217 4218 auto BodyGenCB = [CriticalRegionBodyStmt, this](InsertPointTy AllocaIP, 4219 InsertPointTy CodeGenIP, 4220 llvm::BasicBlock &FiniBB) { 4221 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB); 4222 OMPBuilderCBHelpers::EmitOMPRegionBody(*this, CriticalRegionBodyStmt, 4223 CodeGenIP, FiniBB); 4224 }; 4225 4226 LexicalScope Scope(*this, S.getSourceRange()); 4227 EmitStopPoint(&S); 4228 Builder.restoreIP(OMPBuilder.createCritical( 4229 Builder, BodyGenCB, FiniCB, S.getDirectiveName().getAsString(), 4230 HintInst)); 4231 4232 return; 4233 } 4234 4235 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4236 Action.Enter(CGF); 4237 CGF.EmitStmt(S.getAssociatedStmt()); 4238 }; 4239 const Expr *Hint = nullptr; 4240 if (const auto *HintClause = S.getSingleClause<OMPHintClause>()) 4241 Hint = HintClause->getHint(); 4242 LexicalScope Scope(*this, S.getSourceRange()); 4243 EmitStopPoint(&S); 4244 CGM.getOpenMPRuntime().emitCriticalRegion(*this, 4245 S.getDirectiveName().getAsString(), 4246 CodeGen, S.getBeginLoc(), Hint); 4247 } 4248 4249 void CodeGenFunction::EmitOMPParallelForDirective( 4250 const OMPParallelForDirective &S) { 4251 // Emit directive as a combined directive that consists of two implicit 4252 // directives: 'parallel' with 'for' directive. 4253 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4254 Action.Enter(CGF); 4255 (void)emitWorksharingDirective(CGF, S, S.hasCancel()); 4256 }; 4257 { 4258 if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(), 4259 [](const OMPReductionClause *C) { 4260 return C->getModifier() == OMPC_REDUCTION_inscan; 4261 })) { 4262 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) { 4263 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); 4264 CGCapturedStmtInfo CGSI(CR_OpenMP); 4265 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI); 4266 OMPLoopScope LoopScope(CGF, S); 4267 return CGF.EmitScalarExpr(S.getNumIterations()); 4268 }; 4269 emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen); 4270 } 4271 auto LPCRegion = 4272 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 4273 emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen, 4274 emitEmptyBoundParameters); 4275 } 4276 // Check for outer lastprivate conditional update. 4277 checkForLastprivateConditionalUpdate(*this, S); 4278 } 4279 4280 void CodeGenFunction::EmitOMPParallelForSimdDirective( 4281 const OMPParallelForSimdDirective &S) { 4282 // Emit directive as a combined directive that consists of two implicit 4283 // directives: 'parallel' with 'for' directive. 4284 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4285 Action.Enter(CGF); 4286 (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false); 4287 }; 4288 { 4289 if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(), 4290 [](const OMPReductionClause *C) { 4291 return C->getModifier() == OMPC_REDUCTION_inscan; 4292 })) { 4293 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) { 4294 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); 4295 CGCapturedStmtInfo CGSI(CR_OpenMP); 4296 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI); 4297 OMPLoopScope LoopScope(CGF, S); 4298 return CGF.EmitScalarExpr(S.getNumIterations()); 4299 }; 4300 emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen); 4301 } 4302 auto LPCRegion = 4303 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 4304 emitCommonOMPParallelDirective(*this, S, OMPD_for_simd, CodeGen, 4305 emitEmptyBoundParameters); 4306 } 4307 // Check for outer lastprivate conditional update. 4308 checkForLastprivateConditionalUpdate(*this, S); 4309 } 4310 4311 void CodeGenFunction::EmitOMPParallelMasterDirective( 4312 const OMPParallelMasterDirective &S) { 4313 // Emit directive as a combined directive that consists of two implicit 4314 // directives: 'parallel' with 'master' directive. 4315 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4316 Action.Enter(CGF); 4317 OMPPrivateScope PrivateScope(CGF); 4318 bool Copyins = CGF.EmitOMPCopyinClause(S); 4319 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 4320 if (Copyins) { 4321 // Emit implicit barrier to synchronize threads and avoid data races on 4322 // propagation master's thread values of threadprivate variables to local 4323 // instances of that variables of all other implicit threads. 4324 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 4325 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, 4326 /*ForceSimpleCall=*/true); 4327 } 4328 CGF.EmitOMPPrivateClause(S, PrivateScope); 4329 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4330 (void)PrivateScope.Privatize(); 4331 emitMaster(CGF, S); 4332 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 4333 }; 4334 { 4335 auto LPCRegion = 4336 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 4337 emitCommonOMPParallelDirective(*this, S, OMPD_master, CodeGen, 4338 emitEmptyBoundParameters); 4339 emitPostUpdateForReductionClause(*this, S, 4340 [](CodeGenFunction &) { return nullptr; }); 4341 } 4342 // Check for outer lastprivate conditional update. 4343 checkForLastprivateConditionalUpdate(*this, S); 4344 } 4345 4346 void CodeGenFunction::EmitOMPParallelSectionsDirective( 4347 const OMPParallelSectionsDirective &S) { 4348 // Emit directive as a combined directive that consists of two implicit 4349 // directives: 'parallel' with 'sections' directive. 4350 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4351 Action.Enter(CGF); 4352 CGF.EmitSections(S); 4353 }; 4354 { 4355 auto LPCRegion = 4356 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 4357 emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen, 4358 emitEmptyBoundParameters); 4359 } 4360 // Check for outer lastprivate conditional update. 4361 checkForLastprivateConditionalUpdate(*this, S); 4362 } 4363 4364 namespace { 4365 /// Get the list of variables declared in the context of the untied tasks. 4366 class CheckVarsEscapingUntiedTaskDeclContext final 4367 : public ConstStmtVisitor<CheckVarsEscapingUntiedTaskDeclContext> { 4368 llvm::SmallVector<const VarDecl *, 4> PrivateDecls; 4369 4370 public: 4371 explicit CheckVarsEscapingUntiedTaskDeclContext() = default; 4372 virtual ~CheckVarsEscapingUntiedTaskDeclContext() = default; 4373 void VisitDeclStmt(const DeclStmt *S) { 4374 if (!S) 4375 return; 4376 // Need to privatize only local vars, static locals can be processed as is. 4377 for (const Decl *D : S->decls()) { 4378 if (const auto *VD = dyn_cast_or_null<VarDecl>(D)) 4379 if (VD->hasLocalStorage()) 4380 PrivateDecls.push_back(VD); 4381 } 4382 } 4383 void VisitOMPExecutableDirective(const OMPExecutableDirective *) {} 4384 void VisitCapturedStmt(const CapturedStmt *) {} 4385 void VisitLambdaExpr(const LambdaExpr *) {} 4386 void VisitBlockExpr(const BlockExpr *) {} 4387 void VisitStmt(const Stmt *S) { 4388 if (!S) 4389 return; 4390 for (const Stmt *Child : S->children()) 4391 if (Child) 4392 Visit(Child); 4393 } 4394 4395 /// Swaps list of vars with the provided one. 4396 ArrayRef<const VarDecl *> getPrivateDecls() const { return PrivateDecls; } 4397 }; 4398 } // anonymous namespace 4399 4400 void CodeGenFunction::EmitOMPTaskBasedDirective( 4401 const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion, 4402 const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen, 4403 OMPTaskDataTy &Data) { 4404 // Emit outlined function for task construct. 4405 const CapturedStmt *CS = S.getCapturedStmt(CapturedRegion); 4406 auto I = CS->getCapturedDecl()->param_begin(); 4407 auto PartId = std::next(I); 4408 auto TaskT = std::next(I, 4); 4409 // Check if the task is final 4410 if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) { 4411 // If the condition constant folds and can be elided, try to avoid emitting 4412 // the condition and the dead arm of the if/else. 4413 const Expr *Cond = Clause->getCondition(); 4414 bool CondConstant; 4415 if (ConstantFoldsToSimpleInteger(Cond, CondConstant)) 4416 Data.Final.setInt(CondConstant); 4417 else 4418 Data.Final.setPointer(EvaluateExprAsBool(Cond)); 4419 } else { 4420 // By default the task is not final. 4421 Data.Final.setInt(/*IntVal=*/false); 4422 } 4423 // Check if the task has 'priority' clause. 4424 if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) { 4425 const Expr *Prio = Clause->getPriority(); 4426 Data.Priority.setInt(/*IntVal=*/true); 4427 Data.Priority.setPointer(EmitScalarConversion( 4428 EmitScalarExpr(Prio), Prio->getType(), 4429 getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1), 4430 Prio->getExprLoc())); 4431 } 4432 // The first function argument for tasks is a thread id, the second one is a 4433 // part id (0 for tied tasks, >=0 for untied task). 4434 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 4435 // Get list of private variables. 4436 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 4437 auto IRef = C->varlist_begin(); 4438 for (const Expr *IInit : C->private_copies()) { 4439 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 4440 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 4441 Data.PrivateVars.push_back(*IRef); 4442 Data.PrivateCopies.push_back(IInit); 4443 } 4444 ++IRef; 4445 } 4446 } 4447 EmittedAsPrivate.clear(); 4448 // Get list of firstprivate variables. 4449 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 4450 auto IRef = C->varlist_begin(); 4451 auto IElemInitRef = C->inits().begin(); 4452 for (const Expr *IInit : C->private_copies()) { 4453 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 4454 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 4455 Data.FirstprivateVars.push_back(*IRef); 4456 Data.FirstprivateCopies.push_back(IInit); 4457 Data.FirstprivateInits.push_back(*IElemInitRef); 4458 } 4459 ++IRef; 4460 ++IElemInitRef; 4461 } 4462 } 4463 // Get list of lastprivate variables (for taskloops). 4464 llvm::MapVector<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs; 4465 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 4466 auto IRef = C->varlist_begin(); 4467 auto ID = C->destination_exprs().begin(); 4468 for (const Expr *IInit : C->private_copies()) { 4469 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 4470 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 4471 Data.LastprivateVars.push_back(*IRef); 4472 Data.LastprivateCopies.push_back(IInit); 4473 } 4474 LastprivateDstsOrigs.insert( 4475 std::make_pair(cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()), 4476 cast<DeclRefExpr>(*IRef))); 4477 ++IRef; 4478 ++ID; 4479 } 4480 } 4481 SmallVector<const Expr *, 4> LHSs; 4482 SmallVector<const Expr *, 4> RHSs; 4483 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 4484 Data.ReductionVars.append(C->varlist_begin(), C->varlist_end()); 4485 Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end()); 4486 Data.ReductionCopies.append(C->privates().begin(), C->privates().end()); 4487 Data.ReductionOps.append(C->reduction_ops().begin(), 4488 C->reduction_ops().end()); 4489 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 4490 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 4491 } 4492 Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit( 4493 *this, S.getBeginLoc(), LHSs, RHSs, Data); 4494 // Build list of dependences. 4495 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) { 4496 OMPTaskDataTy::DependData &DD = 4497 Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier()); 4498 DD.DepExprs.append(C->varlist_begin(), C->varlist_end()); 4499 } 4500 // Get list of local vars for untied tasks. 4501 if (!Data.Tied) { 4502 CheckVarsEscapingUntiedTaskDeclContext Checker; 4503 Checker.Visit(S.getInnermostCapturedStmt()->getCapturedStmt()); 4504 Data.PrivateLocals.append(Checker.getPrivateDecls().begin(), 4505 Checker.getPrivateDecls().end()); 4506 } 4507 auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs, 4508 CapturedRegion](CodeGenFunction &CGF, 4509 PrePostActionTy &Action) { 4510 llvm::MapVector<CanonicalDeclPtr<const VarDecl>, 4511 std::pair<Address, Address>> 4512 UntiedLocalVars; 4513 // Set proper addresses for generated private copies. 4514 OMPPrivateScope Scope(CGF); 4515 // Generate debug info for variables present in shared clause. 4516 if (auto *DI = CGF.getDebugInfo()) { 4517 llvm::SmallDenseMap<const VarDecl *, FieldDecl *> CaptureFields = 4518 CGF.CapturedStmtInfo->getCaptureFields(); 4519 llvm::Value *ContextValue = CGF.CapturedStmtInfo->getContextValue(); 4520 if (CaptureFields.size() && ContextValue) { 4521 unsigned CharWidth = CGF.getContext().getCharWidth(); 4522 // The shared variables are packed together as members of structure. 4523 // So the address of each shared variable can be computed by adding 4524 // offset of it (within record) to the base address of record. For each 4525 // shared variable, debug intrinsic llvm.dbg.declare is generated with 4526 // appropriate expressions (DIExpression). 4527 // Ex: 4528 // %12 = load %struct.anon*, %struct.anon** %__context.addr.i 4529 // call void @llvm.dbg.declare(metadata %struct.anon* %12, 4530 // metadata !svar1, 4531 // metadata !DIExpression(DW_OP_deref)) 4532 // call void @llvm.dbg.declare(metadata %struct.anon* %12, 4533 // metadata !svar2, 4534 // metadata !DIExpression(DW_OP_plus_uconst, 8, DW_OP_deref)) 4535 for (auto It = CaptureFields.begin(); It != CaptureFields.end(); ++It) { 4536 const VarDecl *SharedVar = It->first; 4537 RecordDecl *CaptureRecord = It->second->getParent(); 4538 const ASTRecordLayout &Layout = 4539 CGF.getContext().getASTRecordLayout(CaptureRecord); 4540 unsigned Offset = 4541 Layout.getFieldOffset(It->second->getFieldIndex()) / CharWidth; 4542 if (CGF.CGM.getCodeGenOpts().hasReducedDebugInfo()) 4543 (void)DI->EmitDeclareOfAutoVariable(SharedVar, ContextValue, 4544 CGF.Builder, false); 4545 llvm::Instruction &Last = CGF.Builder.GetInsertBlock()->back(); 4546 // Get the call dbg.declare instruction we just created and update 4547 // its DIExpression to add offset to base address. 4548 if (auto DDI = dyn_cast<llvm::DbgVariableIntrinsic>(&Last)) { 4549 SmallVector<uint64_t, 8> Ops; 4550 // Add offset to the base address if non zero. 4551 if (Offset) { 4552 Ops.push_back(llvm::dwarf::DW_OP_plus_uconst); 4553 Ops.push_back(Offset); 4554 } 4555 Ops.push_back(llvm::dwarf::DW_OP_deref); 4556 auto &Ctx = DDI->getContext(); 4557 llvm::DIExpression *DIExpr = llvm::DIExpression::get(Ctx, Ops); 4558 Last.setOperand(2, llvm::MetadataAsValue::get(Ctx, DIExpr)); 4559 } 4560 } 4561 } 4562 } 4563 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> FirstprivatePtrs; 4564 if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() || 4565 !Data.LastprivateVars.empty() || !Data.PrivateLocals.empty()) { 4566 enum { PrivatesParam = 2, CopyFnParam = 3 }; 4567 llvm::Value *CopyFn = CGF.Builder.CreateLoad( 4568 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam))); 4569 llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar( 4570 CS->getCapturedDecl()->getParam(PrivatesParam))); 4571 // Map privates. 4572 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; 4573 llvm::SmallVector<llvm::Value *, 16> CallArgs; 4574 llvm::SmallVector<llvm::Type *, 4> ParamTypes; 4575 CallArgs.push_back(PrivatesPtr); 4576 ParamTypes.push_back(PrivatesPtr->getType()); 4577 for (const Expr *E : Data.PrivateVars) { 4578 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4579 Address PrivatePtr = CGF.CreateMemTemp( 4580 CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr"); 4581 PrivatePtrs.emplace_back(VD, PrivatePtr); 4582 CallArgs.push_back(PrivatePtr.getPointer()); 4583 ParamTypes.push_back(PrivatePtr.getType()); 4584 } 4585 for (const Expr *E : Data.FirstprivateVars) { 4586 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4587 Address PrivatePtr = 4588 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 4589 ".firstpriv.ptr.addr"); 4590 PrivatePtrs.emplace_back(VD, PrivatePtr); 4591 FirstprivatePtrs.emplace_back(VD, PrivatePtr); 4592 CallArgs.push_back(PrivatePtr.getPointer()); 4593 ParamTypes.push_back(PrivatePtr.getType()); 4594 } 4595 for (const Expr *E : Data.LastprivateVars) { 4596 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4597 Address PrivatePtr = 4598 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 4599 ".lastpriv.ptr.addr"); 4600 PrivatePtrs.emplace_back(VD, PrivatePtr); 4601 CallArgs.push_back(PrivatePtr.getPointer()); 4602 ParamTypes.push_back(PrivatePtr.getType()); 4603 } 4604 for (const VarDecl *VD : Data.PrivateLocals) { 4605 QualType Ty = VD->getType().getNonReferenceType(); 4606 if (VD->getType()->isLValueReferenceType()) 4607 Ty = CGF.getContext().getPointerType(Ty); 4608 if (isAllocatableDecl(VD)) 4609 Ty = CGF.getContext().getPointerType(Ty); 4610 Address PrivatePtr = CGF.CreateMemTemp( 4611 CGF.getContext().getPointerType(Ty), ".local.ptr.addr"); 4612 auto Result = UntiedLocalVars.insert( 4613 std::make_pair(VD, std::make_pair(PrivatePtr, Address::invalid()))); 4614 // If key exists update in place. 4615 if (Result.second == false) 4616 *Result.first = std::make_pair( 4617 VD, std::make_pair(PrivatePtr, Address::invalid())); 4618 CallArgs.push_back(PrivatePtr.getPointer()); 4619 ParamTypes.push_back(PrivatePtr.getType()); 4620 } 4621 auto *CopyFnTy = llvm::FunctionType::get(CGF.Builder.getVoidTy(), 4622 ParamTypes, /*isVarArg=*/false); 4623 CopyFn = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4624 CopyFn, CopyFnTy->getPointerTo()); 4625 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall( 4626 CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs); 4627 for (const auto &Pair : LastprivateDstsOrigs) { 4628 const auto *OrigVD = cast<VarDecl>(Pair.second->getDecl()); 4629 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(OrigVD), 4630 /*RefersToEnclosingVariableOrCapture=*/ 4631 CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr, 4632 Pair.second->getType(), VK_LValue, 4633 Pair.second->getExprLoc()); 4634 Scope.addPrivate(Pair.first, CGF.EmitLValue(&DRE).getAddress(CGF)); 4635 } 4636 for (const auto &Pair : PrivatePtrs) { 4637 Address Replacement = 4638 Address::deprecated(CGF.Builder.CreateLoad(Pair.second), 4639 CGF.getContext().getDeclAlign(Pair.first)); 4640 Scope.addPrivate(Pair.first, Replacement); 4641 if (auto *DI = CGF.getDebugInfo()) 4642 if (CGF.CGM.getCodeGenOpts().hasReducedDebugInfo()) 4643 (void)DI->EmitDeclareOfAutoVariable( 4644 Pair.first, Pair.second.getPointer(), CGF.Builder, 4645 /*UsePointerValue*/ true); 4646 } 4647 // Adjust mapping for internal locals by mapping actual memory instead of 4648 // a pointer to this memory. 4649 for (auto &Pair : UntiedLocalVars) { 4650 if (isAllocatableDecl(Pair.first)) { 4651 llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first); 4652 Address Replacement = Address::deprecated(Ptr, CGF.getPointerAlign()); 4653 Pair.second.first = Replacement; 4654 Ptr = CGF.Builder.CreateLoad(Replacement); 4655 Replacement = Address::deprecated( 4656 Ptr, CGF.getContext().getDeclAlign(Pair.first)); 4657 Pair.second.second = Replacement; 4658 } else { 4659 llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first); 4660 Address Replacement = Address::deprecated( 4661 Ptr, CGF.getContext().getDeclAlign(Pair.first)); 4662 Pair.second.first = Replacement; 4663 } 4664 } 4665 } 4666 if (Data.Reductions) { 4667 OMPPrivateScope FirstprivateScope(CGF); 4668 for (const auto &Pair : FirstprivatePtrs) { 4669 Address Replacement = 4670 Address::deprecated(CGF.Builder.CreateLoad(Pair.second), 4671 CGF.getContext().getDeclAlign(Pair.first)); 4672 FirstprivateScope.addPrivate(Pair.first, Replacement); 4673 } 4674 (void)FirstprivateScope.Privatize(); 4675 OMPLexicalScope LexScope(CGF, S, CapturedRegion); 4676 ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars, 4677 Data.ReductionCopies, Data.ReductionOps); 4678 llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad( 4679 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9))); 4680 for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) { 4681 RedCG.emitSharedOrigLValue(CGF, Cnt); 4682 RedCG.emitAggregateType(CGF, Cnt); 4683 // FIXME: This must removed once the runtime library is fixed. 4684 // Emit required threadprivate variables for 4685 // initializer/combiner/finalizer. 4686 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(), 4687 RedCG, Cnt); 4688 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( 4689 CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); 4690 Replacement = Address::deprecated( 4691 CGF.EmitScalarConversion(Replacement.getPointer(), 4692 CGF.getContext().VoidPtrTy, 4693 CGF.getContext().getPointerType( 4694 Data.ReductionCopies[Cnt]->getType()), 4695 Data.ReductionCopies[Cnt]->getExprLoc()), 4696 Replacement.getAlignment()); 4697 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); 4698 Scope.addPrivate(RedCG.getBaseDecl(Cnt), Replacement); 4699 } 4700 } 4701 // Privatize all private variables except for in_reduction items. 4702 (void)Scope.Privatize(); 4703 SmallVector<const Expr *, 4> InRedVars; 4704 SmallVector<const Expr *, 4> InRedPrivs; 4705 SmallVector<const Expr *, 4> InRedOps; 4706 SmallVector<const Expr *, 4> TaskgroupDescriptors; 4707 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) { 4708 auto IPriv = C->privates().begin(); 4709 auto IRed = C->reduction_ops().begin(); 4710 auto ITD = C->taskgroup_descriptors().begin(); 4711 for (const Expr *Ref : C->varlists()) { 4712 InRedVars.emplace_back(Ref); 4713 InRedPrivs.emplace_back(*IPriv); 4714 InRedOps.emplace_back(*IRed); 4715 TaskgroupDescriptors.emplace_back(*ITD); 4716 std::advance(IPriv, 1); 4717 std::advance(IRed, 1); 4718 std::advance(ITD, 1); 4719 } 4720 } 4721 // Privatize in_reduction items here, because taskgroup descriptors must be 4722 // privatized earlier. 4723 OMPPrivateScope InRedScope(CGF); 4724 if (!InRedVars.empty()) { 4725 ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps); 4726 for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) { 4727 RedCG.emitSharedOrigLValue(CGF, Cnt); 4728 RedCG.emitAggregateType(CGF, Cnt); 4729 // The taskgroup descriptor variable is always implicit firstprivate and 4730 // privatized already during processing of the firstprivates. 4731 // FIXME: This must removed once the runtime library is fixed. 4732 // Emit required threadprivate variables for 4733 // initializer/combiner/finalizer. 4734 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(), 4735 RedCG, Cnt); 4736 llvm::Value *ReductionsPtr; 4737 if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) { 4738 ReductionsPtr = CGF.EmitLoadOfScalar(CGF.EmitLValue(TRExpr), 4739 TRExpr->getExprLoc()); 4740 } else { 4741 ReductionsPtr = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4742 } 4743 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( 4744 CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); 4745 Replacement = Address::deprecated( 4746 CGF.EmitScalarConversion( 4747 Replacement.getPointer(), CGF.getContext().VoidPtrTy, 4748 CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()), 4749 InRedPrivs[Cnt]->getExprLoc()), 4750 Replacement.getAlignment()); 4751 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); 4752 InRedScope.addPrivate(RedCG.getBaseDecl(Cnt), Replacement); 4753 } 4754 } 4755 (void)InRedScope.Privatize(); 4756 4757 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII LocalVarsScope(CGF, 4758 UntiedLocalVars); 4759 Action.Enter(CGF); 4760 BodyGen(CGF); 4761 }; 4762 llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( 4763 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied, 4764 Data.NumberOfParts); 4765 OMPLexicalScope Scope(*this, S, llvm::None, 4766 !isOpenMPParallelDirective(S.getDirectiveKind()) && 4767 !isOpenMPSimdDirective(S.getDirectiveKind())); 4768 TaskGen(*this, OutlinedFn, Data); 4769 } 4770 4771 static ImplicitParamDecl * 4772 createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data, 4773 QualType Ty, CapturedDecl *CD, 4774 SourceLocation Loc) { 4775 auto *OrigVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty, 4776 ImplicitParamDecl::Other); 4777 auto *OrigRef = DeclRefExpr::Create( 4778 C, NestedNameSpecifierLoc(), SourceLocation(), OrigVD, 4779 /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue); 4780 auto *PrivateVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty, 4781 ImplicitParamDecl::Other); 4782 auto *PrivateRef = DeclRefExpr::Create( 4783 C, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD, 4784 /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue); 4785 QualType ElemType = C.getBaseElementType(Ty); 4786 auto *InitVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, ElemType, 4787 ImplicitParamDecl::Other); 4788 auto *InitRef = DeclRefExpr::Create( 4789 C, NestedNameSpecifierLoc(), SourceLocation(), InitVD, 4790 /*RefersToEnclosingVariableOrCapture=*/false, Loc, ElemType, VK_LValue); 4791 PrivateVD->setInitStyle(VarDecl::CInit); 4792 PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue, 4793 InitRef, /*BasePath=*/nullptr, 4794 VK_PRValue, FPOptionsOverride())); 4795 Data.FirstprivateVars.emplace_back(OrigRef); 4796 Data.FirstprivateCopies.emplace_back(PrivateRef); 4797 Data.FirstprivateInits.emplace_back(InitRef); 4798 return OrigVD; 4799 } 4800 4801 void CodeGenFunction::EmitOMPTargetTaskBasedDirective( 4802 const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen, 4803 OMPTargetDataInfo &InputInfo) { 4804 // Emit outlined function for task construct. 4805 const CapturedStmt *CS = S.getCapturedStmt(OMPD_task); 4806 Address CapturedStruct = GenerateCapturedStmtArgument(*CS); 4807 QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 4808 auto I = CS->getCapturedDecl()->param_begin(); 4809 auto PartId = std::next(I); 4810 auto TaskT = std::next(I, 4); 4811 OMPTaskDataTy Data; 4812 // The task is not final. 4813 Data.Final.setInt(/*IntVal=*/false); 4814 // Get list of firstprivate variables. 4815 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 4816 auto IRef = C->varlist_begin(); 4817 auto IElemInitRef = C->inits().begin(); 4818 for (auto *IInit : C->private_copies()) { 4819 Data.FirstprivateVars.push_back(*IRef); 4820 Data.FirstprivateCopies.push_back(IInit); 4821 Data.FirstprivateInits.push_back(*IElemInitRef); 4822 ++IRef; 4823 ++IElemInitRef; 4824 } 4825 } 4826 OMPPrivateScope TargetScope(*this); 4827 VarDecl *BPVD = nullptr; 4828 VarDecl *PVD = nullptr; 4829 VarDecl *SVD = nullptr; 4830 VarDecl *MVD = nullptr; 4831 if (InputInfo.NumberOfTargetItems > 0) { 4832 auto *CD = CapturedDecl::Create( 4833 getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0); 4834 llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems); 4835 QualType BaseAndPointerAndMapperType = getContext().getConstantArrayType( 4836 getContext().VoidPtrTy, ArrSize, nullptr, ArrayType::Normal, 4837 /*IndexTypeQuals=*/0); 4838 BPVD = createImplicitFirstprivateForType( 4839 getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc()); 4840 PVD = createImplicitFirstprivateForType( 4841 getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc()); 4842 QualType SizesType = getContext().getConstantArrayType( 4843 getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1), 4844 ArrSize, nullptr, ArrayType::Normal, 4845 /*IndexTypeQuals=*/0); 4846 SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD, 4847 S.getBeginLoc()); 4848 TargetScope.addPrivate(BPVD, InputInfo.BasePointersArray); 4849 TargetScope.addPrivate(PVD, InputInfo.PointersArray); 4850 TargetScope.addPrivate(SVD, InputInfo.SizesArray); 4851 // If there is no user-defined mapper, the mapper array will be nullptr. In 4852 // this case, we don't need to privatize it. 4853 if (!isa_and_nonnull<llvm::ConstantPointerNull>( 4854 InputInfo.MappersArray.getPointer())) { 4855 MVD = createImplicitFirstprivateForType( 4856 getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc()); 4857 TargetScope.addPrivate(MVD, InputInfo.MappersArray); 4858 } 4859 } 4860 (void)TargetScope.Privatize(); 4861 // Build list of dependences. 4862 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) { 4863 OMPTaskDataTy::DependData &DD = 4864 Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier()); 4865 DD.DepExprs.append(C->varlist_begin(), C->varlist_end()); 4866 } 4867 auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD, MVD, 4868 &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) { 4869 // Set proper addresses for generated private copies. 4870 OMPPrivateScope Scope(CGF); 4871 if (!Data.FirstprivateVars.empty()) { 4872 enum { PrivatesParam = 2, CopyFnParam = 3 }; 4873 llvm::Value *CopyFn = CGF.Builder.CreateLoad( 4874 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam))); 4875 llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar( 4876 CS->getCapturedDecl()->getParam(PrivatesParam))); 4877 // Map privates. 4878 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; 4879 llvm::SmallVector<llvm::Value *, 16> CallArgs; 4880 llvm::SmallVector<llvm::Type *, 4> ParamTypes; 4881 CallArgs.push_back(PrivatesPtr); 4882 ParamTypes.push_back(PrivatesPtr->getType()); 4883 for (const Expr *E : Data.FirstprivateVars) { 4884 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4885 Address PrivatePtr = 4886 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 4887 ".firstpriv.ptr.addr"); 4888 PrivatePtrs.emplace_back(VD, PrivatePtr); 4889 CallArgs.push_back(PrivatePtr.getPointer()); 4890 ParamTypes.push_back(PrivatePtr.getType()); 4891 } 4892 auto *CopyFnTy = llvm::FunctionType::get(CGF.Builder.getVoidTy(), 4893 ParamTypes, /*isVarArg=*/false); 4894 CopyFn = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4895 CopyFn, CopyFnTy->getPointerTo()); 4896 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall( 4897 CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs); 4898 for (const auto &Pair : PrivatePtrs) { 4899 Address Replacement = 4900 Address::deprecated(CGF.Builder.CreateLoad(Pair.second), 4901 CGF.getContext().getDeclAlign(Pair.first)); 4902 Scope.addPrivate(Pair.first, Replacement); 4903 } 4904 } 4905 // Privatize all private variables except for in_reduction items. 4906 (void)Scope.Privatize(); 4907 if (InputInfo.NumberOfTargetItems > 0) { 4908 InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP( 4909 CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0); 4910 InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP( 4911 CGF.GetAddrOfLocalVar(PVD), /*Index=*/0); 4912 InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP( 4913 CGF.GetAddrOfLocalVar(SVD), /*Index=*/0); 4914 // If MVD is nullptr, the mapper array is not privatized 4915 if (MVD) 4916 InputInfo.MappersArray = CGF.Builder.CreateConstArrayGEP( 4917 CGF.GetAddrOfLocalVar(MVD), /*Index=*/0); 4918 } 4919 4920 Action.Enter(CGF); 4921 OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false); 4922 BodyGen(CGF); 4923 }; 4924 llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( 4925 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true, 4926 Data.NumberOfParts); 4927 llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0); 4928 IntegerLiteral IfCond(getContext(), TrueOrFalse, 4929 getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 4930 SourceLocation()); 4931 4932 CGM.getOpenMPRuntime().emitTaskCall(*this, S.getBeginLoc(), S, OutlinedFn, 4933 SharedsTy, CapturedStruct, &IfCond, Data); 4934 } 4935 4936 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { 4937 // Emit outlined function for task construct. 4938 const CapturedStmt *CS = S.getCapturedStmt(OMPD_task); 4939 Address CapturedStruct = GenerateCapturedStmtArgument(*CS); 4940 QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 4941 const Expr *IfCond = nullptr; 4942 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 4943 if (C->getNameModifier() == OMPD_unknown || 4944 C->getNameModifier() == OMPD_task) { 4945 IfCond = C->getCondition(); 4946 break; 4947 } 4948 } 4949 4950 OMPTaskDataTy Data; 4951 // Check if we should emit tied or untied task. 4952 Data.Tied = !S.getSingleClause<OMPUntiedClause>(); 4953 auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { 4954 CGF.EmitStmt(CS->getCapturedStmt()); 4955 }; 4956 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 4957 IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn, 4958 const OMPTaskDataTy &Data) { 4959 CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getBeginLoc(), S, OutlinedFn, 4960 SharedsTy, CapturedStruct, IfCond, 4961 Data); 4962 }; 4963 auto LPCRegion = 4964 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 4965 EmitOMPTaskBasedDirective(S, OMPD_task, BodyGen, TaskGen, Data); 4966 } 4967 4968 void CodeGenFunction::EmitOMPTaskyieldDirective( 4969 const OMPTaskyieldDirective &S) { 4970 CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getBeginLoc()); 4971 } 4972 4973 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { 4974 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_barrier); 4975 } 4976 4977 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) { 4978 OMPTaskDataTy Data; 4979 // Build list of dependences 4980 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) { 4981 OMPTaskDataTy::DependData &DD = 4982 Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier()); 4983 DD.DepExprs.append(C->varlist_begin(), C->varlist_end()); 4984 } 4985 CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getBeginLoc(), Data); 4986 } 4987 4988 void CodeGenFunction::EmitOMPTaskgroupDirective( 4989 const OMPTaskgroupDirective &S) { 4990 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4991 Action.Enter(CGF); 4992 if (const Expr *E = S.getReductionRef()) { 4993 SmallVector<const Expr *, 4> LHSs; 4994 SmallVector<const Expr *, 4> RHSs; 4995 OMPTaskDataTy Data; 4996 for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) { 4997 Data.ReductionVars.append(C->varlist_begin(), C->varlist_end()); 4998 Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end()); 4999 Data.ReductionCopies.append(C->privates().begin(), C->privates().end()); 5000 Data.ReductionOps.append(C->reduction_ops().begin(), 5001 C->reduction_ops().end()); 5002 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 5003 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 5004 } 5005 llvm::Value *ReductionDesc = 5006 CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getBeginLoc(), 5007 LHSs, RHSs, Data); 5008 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 5009 CGF.EmitVarDecl(*VD); 5010 CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD), 5011 /*Volatile=*/false, E->getType()); 5012 } 5013 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 5014 }; 5015 OMPLexicalScope Scope(*this, S, OMPD_unknown); 5016 CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getBeginLoc()); 5017 } 5018 5019 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { 5020 llvm::AtomicOrdering AO = S.getSingleClause<OMPFlushClause>() 5021 ? llvm::AtomicOrdering::NotAtomic 5022 : llvm::AtomicOrdering::AcquireRelease; 5023 CGM.getOpenMPRuntime().emitFlush( 5024 *this, 5025 [&S]() -> ArrayRef<const Expr *> { 5026 if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) 5027 return llvm::makeArrayRef(FlushClause->varlist_begin(), 5028 FlushClause->varlist_end()); 5029 return llvm::None; 5030 }(), 5031 S.getBeginLoc(), AO); 5032 } 5033 5034 void CodeGenFunction::EmitOMPDepobjDirective(const OMPDepobjDirective &S) { 5035 const auto *DO = S.getSingleClause<OMPDepobjClause>(); 5036 LValue DOLVal = EmitLValue(DO->getDepobj()); 5037 if (const auto *DC = S.getSingleClause<OMPDependClause>()) { 5038 OMPTaskDataTy::DependData Dependencies(DC->getDependencyKind(), 5039 DC->getModifier()); 5040 Dependencies.DepExprs.append(DC->varlist_begin(), DC->varlist_end()); 5041 Address DepAddr = CGM.getOpenMPRuntime().emitDepobjDependClause( 5042 *this, Dependencies, DC->getBeginLoc()); 5043 EmitStoreOfScalar(DepAddr.getPointer(), DOLVal); 5044 return; 5045 } 5046 if (const auto *DC = S.getSingleClause<OMPDestroyClause>()) { 5047 CGM.getOpenMPRuntime().emitDestroyClause(*this, DOLVal, DC->getBeginLoc()); 5048 return; 5049 } 5050 if (const auto *UC = S.getSingleClause<OMPUpdateClause>()) { 5051 CGM.getOpenMPRuntime().emitUpdateClause( 5052 *this, DOLVal, UC->getDependencyKind(), UC->getBeginLoc()); 5053 return; 5054 } 5055 } 5056 5057 void CodeGenFunction::EmitOMPScanDirective(const OMPScanDirective &S) { 5058 if (!OMPParentLoopDirectiveForScan) 5059 return; 5060 const OMPExecutableDirective &ParentDir = *OMPParentLoopDirectiveForScan; 5061 bool IsInclusive = S.hasClausesOfKind<OMPInclusiveClause>(); 5062 SmallVector<const Expr *, 4> Shareds; 5063 SmallVector<const Expr *, 4> Privates; 5064 SmallVector<const Expr *, 4> LHSs; 5065 SmallVector<const Expr *, 4> RHSs; 5066 SmallVector<const Expr *, 4> ReductionOps; 5067 SmallVector<const Expr *, 4> CopyOps; 5068 SmallVector<const Expr *, 4> CopyArrayTemps; 5069 SmallVector<const Expr *, 4> CopyArrayElems; 5070 for (const auto *C : ParentDir.getClausesOfKind<OMPReductionClause>()) { 5071 if (C->getModifier() != OMPC_REDUCTION_inscan) 5072 continue; 5073 Shareds.append(C->varlist_begin(), C->varlist_end()); 5074 Privates.append(C->privates().begin(), C->privates().end()); 5075 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 5076 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 5077 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 5078 CopyOps.append(C->copy_ops().begin(), C->copy_ops().end()); 5079 CopyArrayTemps.append(C->copy_array_temps().begin(), 5080 C->copy_array_temps().end()); 5081 CopyArrayElems.append(C->copy_array_elems().begin(), 5082 C->copy_array_elems().end()); 5083 } 5084 if (ParentDir.getDirectiveKind() == OMPD_simd || 5085 (getLangOpts().OpenMPSimd && 5086 isOpenMPSimdDirective(ParentDir.getDirectiveKind()))) { 5087 // For simd directive and simd-based directives in simd only mode, use the 5088 // following codegen: 5089 // int x = 0; 5090 // #pragma omp simd reduction(inscan, +: x) 5091 // for (..) { 5092 // <first part> 5093 // #pragma omp scan inclusive(x) 5094 // <second part> 5095 // } 5096 // is transformed to: 5097 // int x = 0; 5098 // for (..) { 5099 // int x_priv = 0; 5100 // <first part> 5101 // x = x_priv + x; 5102 // x_priv = x; 5103 // <second part> 5104 // } 5105 // and 5106 // int x = 0; 5107 // #pragma omp simd reduction(inscan, +: x) 5108 // for (..) { 5109 // <first part> 5110 // #pragma omp scan exclusive(x) 5111 // <second part> 5112 // } 5113 // to 5114 // int x = 0; 5115 // for (..) { 5116 // int x_priv = 0; 5117 // <second part> 5118 // int temp = x; 5119 // x = x_priv + x; 5120 // x_priv = temp; 5121 // <first part> 5122 // } 5123 llvm::BasicBlock *OMPScanReduce = createBasicBlock("omp.inscan.reduce"); 5124 EmitBranch(IsInclusive 5125 ? OMPScanReduce 5126 : BreakContinueStack.back().ContinueBlock.getBlock()); 5127 EmitBlock(OMPScanDispatch); 5128 { 5129 // New scope for correct construction/destruction of temp variables for 5130 // exclusive scan. 5131 LexicalScope Scope(*this, S.getSourceRange()); 5132 EmitBranch(IsInclusive ? OMPBeforeScanBlock : OMPAfterScanBlock); 5133 EmitBlock(OMPScanReduce); 5134 if (!IsInclusive) { 5135 // Create temp var and copy LHS value to this temp value. 5136 // TMP = LHS; 5137 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { 5138 const Expr *PrivateExpr = Privates[I]; 5139 const Expr *TempExpr = CopyArrayTemps[I]; 5140 EmitAutoVarDecl( 5141 *cast<VarDecl>(cast<DeclRefExpr>(TempExpr)->getDecl())); 5142 LValue DestLVal = EmitLValue(TempExpr); 5143 LValue SrcLVal = EmitLValue(LHSs[I]); 5144 EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this), 5145 SrcLVal.getAddress(*this), 5146 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), 5147 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), 5148 CopyOps[I]); 5149 } 5150 } 5151 CGM.getOpenMPRuntime().emitReduction( 5152 *this, ParentDir.getEndLoc(), Privates, LHSs, RHSs, ReductionOps, 5153 {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_simd}); 5154 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { 5155 const Expr *PrivateExpr = Privates[I]; 5156 LValue DestLVal; 5157 LValue SrcLVal; 5158 if (IsInclusive) { 5159 DestLVal = EmitLValue(RHSs[I]); 5160 SrcLVal = EmitLValue(LHSs[I]); 5161 } else { 5162 const Expr *TempExpr = CopyArrayTemps[I]; 5163 DestLVal = EmitLValue(RHSs[I]); 5164 SrcLVal = EmitLValue(TempExpr); 5165 } 5166 EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this), 5167 SrcLVal.getAddress(*this), 5168 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), 5169 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), 5170 CopyOps[I]); 5171 } 5172 } 5173 EmitBranch(IsInclusive ? OMPAfterScanBlock : OMPBeforeScanBlock); 5174 OMPScanExitBlock = IsInclusive 5175 ? BreakContinueStack.back().ContinueBlock.getBlock() 5176 : OMPScanReduce; 5177 EmitBlock(OMPAfterScanBlock); 5178 return; 5179 } 5180 if (!IsInclusive) { 5181 EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock()); 5182 EmitBlock(OMPScanExitBlock); 5183 } 5184 if (OMPFirstScanLoop) { 5185 // Emit buffer[i] = red; at the end of the input phase. 5186 const auto *IVExpr = cast<OMPLoopDirective>(ParentDir) 5187 .getIterationVariable() 5188 ->IgnoreParenImpCasts(); 5189 LValue IdxLVal = EmitLValue(IVExpr); 5190 llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc()); 5191 IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false); 5192 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { 5193 const Expr *PrivateExpr = Privates[I]; 5194 const Expr *OrigExpr = Shareds[I]; 5195 const Expr *CopyArrayElem = CopyArrayElems[I]; 5196 OpaqueValueMapping IdxMapping( 5197 *this, 5198 cast<OpaqueValueExpr>( 5199 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), 5200 RValue::get(IdxVal)); 5201 LValue DestLVal = EmitLValue(CopyArrayElem); 5202 LValue SrcLVal = EmitLValue(OrigExpr); 5203 EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this), 5204 SrcLVal.getAddress(*this), 5205 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), 5206 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), 5207 CopyOps[I]); 5208 } 5209 } 5210 EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock()); 5211 if (IsInclusive) { 5212 EmitBlock(OMPScanExitBlock); 5213 EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock()); 5214 } 5215 EmitBlock(OMPScanDispatch); 5216 if (!OMPFirstScanLoop) { 5217 // Emit red = buffer[i]; at the entrance to the scan phase. 5218 const auto *IVExpr = cast<OMPLoopDirective>(ParentDir) 5219 .getIterationVariable() 5220 ->IgnoreParenImpCasts(); 5221 LValue IdxLVal = EmitLValue(IVExpr); 5222 llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc()); 5223 IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false); 5224 llvm::BasicBlock *ExclusiveExitBB = nullptr; 5225 if (!IsInclusive) { 5226 llvm::BasicBlock *ContBB = createBasicBlock("omp.exclusive.dec"); 5227 ExclusiveExitBB = createBasicBlock("omp.exclusive.copy.exit"); 5228 llvm::Value *Cmp = Builder.CreateIsNull(IdxVal); 5229 Builder.CreateCondBr(Cmp, ExclusiveExitBB, ContBB); 5230 EmitBlock(ContBB); 5231 // Use idx - 1 iteration for exclusive scan. 5232 IdxVal = Builder.CreateNUWSub(IdxVal, llvm::ConstantInt::get(SizeTy, 1)); 5233 } 5234 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { 5235 const Expr *PrivateExpr = Privates[I]; 5236 const Expr *OrigExpr = Shareds[I]; 5237 const Expr *CopyArrayElem = CopyArrayElems[I]; 5238 OpaqueValueMapping IdxMapping( 5239 *this, 5240 cast<OpaqueValueExpr>( 5241 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), 5242 RValue::get(IdxVal)); 5243 LValue SrcLVal = EmitLValue(CopyArrayElem); 5244 LValue DestLVal = EmitLValue(OrigExpr); 5245 EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this), 5246 SrcLVal.getAddress(*this), 5247 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), 5248 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), 5249 CopyOps[I]); 5250 } 5251 if (!IsInclusive) { 5252 EmitBlock(ExclusiveExitBB); 5253 } 5254 } 5255 EmitBranch((OMPFirstScanLoop == IsInclusive) ? OMPBeforeScanBlock 5256 : OMPAfterScanBlock); 5257 EmitBlock(OMPAfterScanBlock); 5258 } 5259 5260 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, 5261 const CodeGenLoopTy &CodeGenLoop, 5262 Expr *IncExpr) { 5263 // Emit the loop iteration variable. 5264 const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 5265 const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl()); 5266 EmitVarDecl(*IVDecl); 5267 5268 // Emit the iterations count variable. 5269 // If it is not a variable, Sema decided to calculate iterations count on each 5270 // iteration (e.g., it is foldable into a constant). 5271 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 5272 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 5273 // Emit calculation of the iterations count. 5274 EmitIgnoredExpr(S.getCalcLastIteration()); 5275 } 5276 5277 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); 5278 5279 bool HasLastprivateClause = false; 5280 // Check pre-condition. 5281 { 5282 OMPLoopScope PreInitScope(*this, S); 5283 // Skip the entire loop if we don't meet the precondition. 5284 // If the condition constant folds and can be elided, avoid emitting the 5285 // whole loop. 5286 bool CondConstant; 5287 llvm::BasicBlock *ContBlock = nullptr; 5288 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 5289 if (!CondConstant) 5290 return; 5291 } else { 5292 llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then"); 5293 ContBlock = createBasicBlock("omp.precond.end"); 5294 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 5295 getProfileCount(&S)); 5296 EmitBlock(ThenBlock); 5297 incrementProfileCounter(&S); 5298 } 5299 5300 emitAlignedClause(*this, S); 5301 // Emit 'then' code. 5302 { 5303 // Emit helper vars inits. 5304 5305 LValue LB = EmitOMPHelperVar( 5306 *this, cast<DeclRefExpr>( 5307 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 5308 ? S.getCombinedLowerBoundVariable() 5309 : S.getLowerBoundVariable()))); 5310 LValue UB = EmitOMPHelperVar( 5311 *this, cast<DeclRefExpr>( 5312 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 5313 ? S.getCombinedUpperBoundVariable() 5314 : S.getUpperBoundVariable()))); 5315 LValue ST = 5316 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 5317 LValue IL = 5318 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 5319 5320 OMPPrivateScope LoopScope(*this); 5321 if (EmitOMPFirstprivateClause(S, LoopScope)) { 5322 // Emit implicit barrier to synchronize threads and avoid data races 5323 // on initialization of firstprivate variables and post-update of 5324 // lastprivate variables. 5325 CGM.getOpenMPRuntime().emitBarrierCall( 5326 *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, 5327 /*ForceSimpleCall=*/true); 5328 } 5329 EmitOMPPrivateClause(S, LoopScope); 5330 if (isOpenMPSimdDirective(S.getDirectiveKind()) && 5331 !isOpenMPParallelDirective(S.getDirectiveKind()) && 5332 !isOpenMPTeamsDirective(S.getDirectiveKind())) 5333 EmitOMPReductionClauseInit(S, LoopScope); 5334 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 5335 EmitOMPPrivateLoopCounters(S, LoopScope); 5336 (void)LoopScope.Privatize(); 5337 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 5338 CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S); 5339 5340 // Detect the distribute schedule kind and chunk. 5341 llvm::Value *Chunk = nullptr; 5342 OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown; 5343 if (const auto *C = S.getSingleClause<OMPDistScheduleClause>()) { 5344 ScheduleKind = C->getDistScheduleKind(); 5345 if (const Expr *Ch = C->getChunkSize()) { 5346 Chunk = EmitScalarExpr(Ch); 5347 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 5348 S.getIterationVariable()->getType(), 5349 S.getBeginLoc()); 5350 } 5351 } else { 5352 // Default behaviour for dist_schedule clause. 5353 CGM.getOpenMPRuntime().getDefaultDistScheduleAndChunk( 5354 *this, S, ScheduleKind, Chunk); 5355 } 5356 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 5357 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 5358 5359 // OpenMP [2.10.8, distribute Construct, Description] 5360 // If dist_schedule is specified, kind must be static. If specified, 5361 // iterations are divided into chunks of size chunk_size, chunks are 5362 // assigned to the teams of the league in a round-robin fashion in the 5363 // order of the team number. When no chunk_size is specified, the 5364 // iteration space is divided into chunks that are approximately equal 5365 // in size, and at most one chunk is distributed to each team of the 5366 // league. The size of the chunks is unspecified in this case. 5367 bool StaticChunked = 5368 RT.isStaticChunked(ScheduleKind, /* Chunked */ Chunk != nullptr) && 5369 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()); 5370 if (RT.isStaticNonchunked(ScheduleKind, 5371 /* Chunked */ Chunk != nullptr) || 5372 StaticChunked) { 5373 CGOpenMPRuntime::StaticRTInput StaticInit( 5374 IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(*this), 5375 LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this), 5376 StaticChunked ? Chunk : nullptr); 5377 RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, 5378 StaticInit); 5379 JumpDest LoopExit = 5380 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 5381 // UB = min(UB, GlobalUB); 5382 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 5383 ? S.getCombinedEnsureUpperBound() 5384 : S.getEnsureUpperBound()); 5385 // IV = LB; 5386 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 5387 ? S.getCombinedInit() 5388 : S.getInit()); 5389 5390 const Expr *Cond = 5391 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 5392 ? S.getCombinedCond() 5393 : S.getCond(); 5394 5395 if (StaticChunked) 5396 Cond = S.getCombinedDistCond(); 5397 5398 // For static unchunked schedules generate: 5399 // 5400 // 1. For distribute alone, codegen 5401 // while (idx <= UB) { 5402 // BODY; 5403 // ++idx; 5404 // } 5405 // 5406 // 2. When combined with 'for' (e.g. as in 'distribute parallel for') 5407 // while (idx <= UB) { 5408 // <CodeGen rest of pragma>(LB, UB); 5409 // idx += ST; 5410 // } 5411 // 5412 // For static chunk one schedule generate: 5413 // 5414 // while (IV <= GlobalUB) { 5415 // <CodeGen rest of pragma>(LB, UB); 5416 // LB += ST; 5417 // UB += ST; 5418 // UB = min(UB, GlobalUB); 5419 // IV = LB; 5420 // } 5421 // 5422 emitCommonSimdLoop( 5423 *this, S, 5424 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 5425 if (isOpenMPSimdDirective(S.getDirectiveKind())) 5426 CGF.EmitOMPSimdInit(S); 5427 }, 5428 [&S, &LoopScope, Cond, IncExpr, LoopExit, &CodeGenLoop, 5429 StaticChunked](CodeGenFunction &CGF, PrePostActionTy &) { 5430 CGF.EmitOMPInnerLoop( 5431 S, LoopScope.requiresCleanups(), Cond, IncExpr, 5432 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { 5433 CodeGenLoop(CGF, S, LoopExit); 5434 }, 5435 [&S, StaticChunked](CodeGenFunction &CGF) { 5436 if (StaticChunked) { 5437 CGF.EmitIgnoredExpr(S.getCombinedNextLowerBound()); 5438 CGF.EmitIgnoredExpr(S.getCombinedNextUpperBound()); 5439 CGF.EmitIgnoredExpr(S.getCombinedEnsureUpperBound()); 5440 CGF.EmitIgnoredExpr(S.getCombinedInit()); 5441 } 5442 }); 5443 }); 5444 EmitBlock(LoopExit.getBlock()); 5445 // Tell the runtime we are done. 5446 RT.emitForStaticFinish(*this, S.getEndLoc(), S.getDirectiveKind()); 5447 } else { 5448 // Emit the outer loop, which requests its work chunk [LB..UB] from 5449 // runtime and runs the inner loop to process it. 5450 const OMPLoopArguments LoopArguments = { 5451 LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this), 5452 IL.getAddress(*this), Chunk}; 5453 EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments, 5454 CodeGenLoop); 5455 } 5456 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 5457 EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) { 5458 return CGF.Builder.CreateIsNotNull( 5459 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 5460 }); 5461 } 5462 if (isOpenMPSimdDirective(S.getDirectiveKind()) && 5463 !isOpenMPParallelDirective(S.getDirectiveKind()) && 5464 !isOpenMPTeamsDirective(S.getDirectiveKind())) { 5465 EmitOMPReductionClauseFinal(S, OMPD_simd); 5466 // Emit post-update of the reduction variables if IsLastIter != 0. 5467 emitPostUpdateForReductionClause( 5468 *this, S, [IL, &S](CodeGenFunction &CGF) { 5469 return CGF.Builder.CreateIsNotNull( 5470 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 5471 }); 5472 } 5473 // Emit final copy of the lastprivate variables if IsLastIter != 0. 5474 if (HasLastprivateClause) { 5475 EmitOMPLastprivateClauseFinal( 5476 S, /*NoFinals=*/false, 5477 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc()))); 5478 } 5479 } 5480 5481 // We're now done with the loop, so jump to the continuation block. 5482 if (ContBlock) { 5483 EmitBranch(ContBlock); 5484 EmitBlock(ContBlock, true); 5485 } 5486 } 5487 } 5488 5489 void CodeGenFunction::EmitOMPDistributeDirective( 5490 const OMPDistributeDirective &S) { 5491 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 5492 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 5493 }; 5494 OMPLexicalScope Scope(*this, S, OMPD_unknown); 5495 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); 5496 } 5497 5498 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, 5499 const CapturedStmt *S, 5500 SourceLocation Loc) { 5501 CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); 5502 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; 5503 CGF.CapturedStmtInfo = &CapStmtInfo; 5504 llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S, Loc); 5505 Fn->setDoesNotRecurse(); 5506 return Fn; 5507 } 5508 5509 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { 5510 if (CGM.getLangOpts().OpenMPIRBuilder) { 5511 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 5512 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 5513 5514 if (S.hasClausesOfKind<OMPDependClause>()) { 5515 // The ordered directive with depend clause. 5516 assert(!S.hasAssociatedStmt() && 5517 "No associated statement must be in ordered depend construct."); 5518 InsertPointTy AllocaIP(AllocaInsertPt->getParent(), 5519 AllocaInsertPt->getIterator()); 5520 for (const auto *DC : S.getClausesOfKind<OMPDependClause>()) { 5521 unsigned NumLoops = DC->getNumLoops(); 5522 QualType Int64Ty = CGM.getContext().getIntTypeForBitwidth( 5523 /*DestWidth=*/64, /*Signed=*/1); 5524 llvm::SmallVector<llvm::Value *> StoreValues; 5525 for (unsigned I = 0; I < NumLoops; I++) { 5526 const Expr *CounterVal = DC->getLoopData(I); 5527 assert(CounterVal); 5528 llvm::Value *StoreValue = EmitScalarConversion( 5529 EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 5530 CounterVal->getExprLoc()); 5531 StoreValues.emplace_back(StoreValue); 5532 } 5533 bool IsDependSource = false; 5534 if (DC->getDependencyKind() == OMPC_DEPEND_source) 5535 IsDependSource = true; 5536 Builder.restoreIP(OMPBuilder.createOrderedDepend( 5537 Builder, AllocaIP, NumLoops, StoreValues, ".cnt.addr", 5538 IsDependSource)); 5539 } 5540 } else { 5541 // The ordered directive with threads or simd clause, or without clause. 5542 // Without clause, it behaves as if the threads clause is specified. 5543 const auto *C = S.getSingleClause<OMPSIMDClause>(); 5544 5545 auto FiniCB = [this](InsertPointTy IP) { 5546 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); 5547 }; 5548 5549 auto BodyGenCB = [&S, C, this](InsertPointTy AllocaIP, 5550 InsertPointTy CodeGenIP, 5551 llvm::BasicBlock &FiniBB) { 5552 const CapturedStmt *CS = S.getInnermostCapturedStmt(); 5553 if (C) { 5554 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 5555 GenerateOpenMPCapturedVars(*CS, CapturedVars); 5556 llvm::Function *OutlinedFn = 5557 emitOutlinedOrderedFunction(CGM, CS, S.getBeginLoc()); 5558 assert(S.getBeginLoc().isValid() && 5559 "Outlined function call location must be valid."); 5560 ApplyDebugLocation::CreateDefaultArtificial(*this, S.getBeginLoc()); 5561 OMPBuilderCBHelpers::EmitCaptureStmt(*this, CodeGenIP, FiniBB, 5562 OutlinedFn, CapturedVars); 5563 } else { 5564 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, 5565 FiniBB); 5566 OMPBuilderCBHelpers::EmitOMPRegionBody(*this, CS->getCapturedStmt(), 5567 CodeGenIP, FiniBB); 5568 } 5569 }; 5570 5571 OMPLexicalScope Scope(*this, S, OMPD_unknown); 5572 Builder.restoreIP( 5573 OMPBuilder.createOrderedThreadsSimd(Builder, BodyGenCB, FiniCB, !C)); 5574 } 5575 return; 5576 } 5577 5578 if (S.hasClausesOfKind<OMPDependClause>()) { 5579 assert(!S.hasAssociatedStmt() && 5580 "No associated statement must be in ordered depend construct."); 5581 for (const auto *DC : S.getClausesOfKind<OMPDependClause>()) 5582 CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC); 5583 return; 5584 } 5585 const auto *C = S.getSingleClause<OMPSIMDClause>(); 5586 auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF, 5587 PrePostActionTy &Action) { 5588 const CapturedStmt *CS = S.getInnermostCapturedStmt(); 5589 if (C) { 5590 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 5591 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 5592 llvm::Function *OutlinedFn = 5593 emitOutlinedOrderedFunction(CGM, CS, S.getBeginLoc()); 5594 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getBeginLoc(), 5595 OutlinedFn, CapturedVars); 5596 } else { 5597 Action.Enter(CGF); 5598 CGF.EmitStmt(CS->getCapturedStmt()); 5599 } 5600 }; 5601 OMPLexicalScope Scope(*this, S, OMPD_unknown); 5602 CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getBeginLoc(), !C); 5603 } 5604 5605 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, 5606 QualType SrcType, QualType DestType, 5607 SourceLocation Loc) { 5608 assert(CGF.hasScalarEvaluationKind(DestType) && 5609 "DestType must have scalar evaluation kind."); 5610 assert(!Val.isAggregate() && "Must be a scalar or complex."); 5611 return Val.isScalar() ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, 5612 DestType, Loc) 5613 : CGF.EmitComplexToScalarConversion( 5614 Val.getComplexVal(), SrcType, DestType, Loc); 5615 } 5616 5617 static CodeGenFunction::ComplexPairTy 5618 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, 5619 QualType DestType, SourceLocation Loc) { 5620 assert(CGF.getEvaluationKind(DestType) == TEK_Complex && 5621 "DestType must have complex evaluation kind."); 5622 CodeGenFunction::ComplexPairTy ComplexVal; 5623 if (Val.isScalar()) { 5624 // Convert the input element to the element type of the complex. 5625 QualType DestElementType = 5626 DestType->castAs<ComplexType>()->getElementType(); 5627 llvm::Value *ScalarVal = CGF.EmitScalarConversion( 5628 Val.getScalarVal(), SrcType, DestElementType, Loc); 5629 ComplexVal = CodeGenFunction::ComplexPairTy( 5630 ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType())); 5631 } else { 5632 assert(Val.isComplex() && "Must be a scalar or complex."); 5633 QualType SrcElementType = SrcType->castAs<ComplexType>()->getElementType(); 5634 QualType DestElementType = 5635 DestType->castAs<ComplexType>()->getElementType(); 5636 ComplexVal.first = CGF.EmitScalarConversion( 5637 Val.getComplexVal().first, SrcElementType, DestElementType, Loc); 5638 ComplexVal.second = CGF.EmitScalarConversion( 5639 Val.getComplexVal().second, SrcElementType, DestElementType, Loc); 5640 } 5641 return ComplexVal; 5642 } 5643 5644 static void emitSimpleAtomicStore(CodeGenFunction &CGF, llvm::AtomicOrdering AO, 5645 LValue LVal, RValue RVal) { 5646 if (LVal.isGlobalReg()) 5647 CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal); 5648 else 5649 CGF.EmitAtomicStore(RVal, LVal, AO, LVal.isVolatile(), /*isInit=*/false); 5650 } 5651 5652 static RValue emitSimpleAtomicLoad(CodeGenFunction &CGF, 5653 llvm::AtomicOrdering AO, LValue LVal, 5654 SourceLocation Loc) { 5655 if (LVal.isGlobalReg()) 5656 return CGF.EmitLoadOfLValue(LVal, Loc); 5657 return CGF.EmitAtomicLoad( 5658 LVal, Loc, llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO), 5659 LVal.isVolatile()); 5660 } 5661 5662 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal, 5663 QualType RValTy, SourceLocation Loc) { 5664 switch (getEvaluationKind(LVal.getType())) { 5665 case TEK_Scalar: 5666 EmitStoreThroughLValue(RValue::get(convertToScalarValue( 5667 *this, RVal, RValTy, LVal.getType(), Loc)), 5668 LVal); 5669 break; 5670 case TEK_Complex: 5671 EmitStoreOfComplex( 5672 convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal, 5673 /*isInit=*/false); 5674 break; 5675 case TEK_Aggregate: 5676 llvm_unreachable("Must be a scalar or complex."); 5677 } 5678 } 5679 5680 static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, llvm::AtomicOrdering AO, 5681 const Expr *X, const Expr *V, 5682 SourceLocation Loc) { 5683 // v = x; 5684 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue"); 5685 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue"); 5686 LValue XLValue = CGF.EmitLValue(X); 5687 LValue VLValue = CGF.EmitLValue(V); 5688 RValue Res = emitSimpleAtomicLoad(CGF, AO, XLValue, Loc); 5689 // OpenMP, 2.17.7, atomic Construct 5690 // If the read or capture clause is specified and the acquire, acq_rel, or 5691 // seq_cst clause is specified then the strong flush on exit from the atomic 5692 // operation is also an acquire flush. 5693 switch (AO) { 5694 case llvm::AtomicOrdering::Acquire: 5695 case llvm::AtomicOrdering::AcquireRelease: 5696 case llvm::AtomicOrdering::SequentiallyConsistent: 5697 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, 5698 llvm::AtomicOrdering::Acquire); 5699 break; 5700 case llvm::AtomicOrdering::Monotonic: 5701 case llvm::AtomicOrdering::Release: 5702 break; 5703 case llvm::AtomicOrdering::NotAtomic: 5704 case llvm::AtomicOrdering::Unordered: 5705 llvm_unreachable("Unexpected ordering."); 5706 } 5707 CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc); 5708 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V); 5709 } 5710 5711 static void emitOMPAtomicWriteExpr(CodeGenFunction &CGF, 5712 llvm::AtomicOrdering AO, const Expr *X, 5713 const Expr *E, SourceLocation Loc) { 5714 // x = expr; 5715 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue"); 5716 emitSimpleAtomicStore(CGF, AO, CGF.EmitLValue(X), CGF.EmitAnyExpr(E)); 5717 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X); 5718 // OpenMP, 2.17.7, atomic Construct 5719 // If the write, update, or capture clause is specified and the release, 5720 // acq_rel, or seq_cst clause is specified then the strong flush on entry to 5721 // the atomic operation is also a release flush. 5722 switch (AO) { 5723 case llvm::AtomicOrdering::Release: 5724 case llvm::AtomicOrdering::AcquireRelease: 5725 case llvm::AtomicOrdering::SequentiallyConsistent: 5726 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, 5727 llvm::AtomicOrdering::Release); 5728 break; 5729 case llvm::AtomicOrdering::Acquire: 5730 case llvm::AtomicOrdering::Monotonic: 5731 break; 5732 case llvm::AtomicOrdering::NotAtomic: 5733 case llvm::AtomicOrdering::Unordered: 5734 llvm_unreachable("Unexpected ordering."); 5735 } 5736 } 5737 5738 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, 5739 RValue Update, 5740 BinaryOperatorKind BO, 5741 llvm::AtomicOrdering AO, 5742 bool IsXLHSInRHSPart) { 5743 ASTContext &Context = CGF.getContext(); 5744 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x' 5745 // expression is simple and atomic is allowed for the given type for the 5746 // target platform. 5747 if (BO == BO_Comma || !Update.isScalar() || 5748 !Update.getScalarVal()->getType()->isIntegerTy() || !X.isSimple() || 5749 (!isa<llvm::ConstantInt>(Update.getScalarVal()) && 5750 (Update.getScalarVal()->getType() != 5751 X.getAddress(CGF).getElementType())) || 5752 !X.getAddress(CGF).getElementType()->isIntegerTy() || 5753 !Context.getTargetInfo().hasBuiltinAtomic( 5754 Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment()))) 5755 return std::make_pair(false, RValue::get(nullptr)); 5756 5757 llvm::AtomicRMWInst::BinOp RMWOp; 5758 switch (BO) { 5759 case BO_Add: 5760 RMWOp = llvm::AtomicRMWInst::Add; 5761 break; 5762 case BO_Sub: 5763 if (!IsXLHSInRHSPart) 5764 return std::make_pair(false, RValue::get(nullptr)); 5765 RMWOp = llvm::AtomicRMWInst::Sub; 5766 break; 5767 case BO_And: 5768 RMWOp = llvm::AtomicRMWInst::And; 5769 break; 5770 case BO_Or: 5771 RMWOp = llvm::AtomicRMWInst::Or; 5772 break; 5773 case BO_Xor: 5774 RMWOp = llvm::AtomicRMWInst::Xor; 5775 break; 5776 case BO_LT: 5777 RMWOp = X.getType()->hasSignedIntegerRepresentation() 5778 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min 5779 : llvm::AtomicRMWInst::Max) 5780 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin 5781 : llvm::AtomicRMWInst::UMax); 5782 break; 5783 case BO_GT: 5784 RMWOp = X.getType()->hasSignedIntegerRepresentation() 5785 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max 5786 : llvm::AtomicRMWInst::Min) 5787 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax 5788 : llvm::AtomicRMWInst::UMin); 5789 break; 5790 case BO_Assign: 5791 RMWOp = llvm::AtomicRMWInst::Xchg; 5792 break; 5793 case BO_Mul: 5794 case BO_Div: 5795 case BO_Rem: 5796 case BO_Shl: 5797 case BO_Shr: 5798 case BO_LAnd: 5799 case BO_LOr: 5800 return std::make_pair(false, RValue::get(nullptr)); 5801 case BO_PtrMemD: 5802 case BO_PtrMemI: 5803 case BO_LE: 5804 case BO_GE: 5805 case BO_EQ: 5806 case BO_NE: 5807 case BO_Cmp: 5808 case BO_AddAssign: 5809 case BO_SubAssign: 5810 case BO_AndAssign: 5811 case BO_OrAssign: 5812 case BO_XorAssign: 5813 case BO_MulAssign: 5814 case BO_DivAssign: 5815 case BO_RemAssign: 5816 case BO_ShlAssign: 5817 case BO_ShrAssign: 5818 case BO_Comma: 5819 llvm_unreachable("Unsupported atomic update operation"); 5820 } 5821 llvm::Value *UpdateVal = Update.getScalarVal(); 5822 if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) { 5823 UpdateVal = CGF.Builder.CreateIntCast( 5824 IC, X.getAddress(CGF).getElementType(), 5825 X.getType()->hasSignedIntegerRepresentation()); 5826 } 5827 llvm::Value *Res = 5828 CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(CGF), UpdateVal, AO); 5829 return std::make_pair(true, RValue::get(Res)); 5830 } 5831 5832 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr( 5833 LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart, 5834 llvm::AtomicOrdering AO, SourceLocation Loc, 5835 const llvm::function_ref<RValue(RValue)> CommonGen) { 5836 // Update expressions are allowed to have the following forms: 5837 // x binop= expr; -> xrval + expr; 5838 // x++, ++x -> xrval + 1; 5839 // x--, --x -> xrval - 1; 5840 // x = x binop expr; -> xrval binop expr 5841 // x = expr Op x; - > expr binop xrval; 5842 auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart); 5843 if (!Res.first) { 5844 if (X.isGlobalReg()) { 5845 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop 5846 // 'xrval'. 5847 EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X); 5848 } else { 5849 // Perform compare-and-swap procedure. 5850 EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified()); 5851 } 5852 } 5853 return Res; 5854 } 5855 5856 static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF, 5857 llvm::AtomicOrdering AO, const Expr *X, 5858 const Expr *E, const Expr *UE, 5859 bool IsXLHSInRHSPart, SourceLocation Loc) { 5860 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 5861 "Update expr in 'atomic update' must be a binary operator."); 5862 const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 5863 // Update expressions are allowed to have the following forms: 5864 // x binop= expr; -> xrval + expr; 5865 // x++, ++x -> xrval + 1; 5866 // x--, --x -> xrval - 1; 5867 // x = x binop expr; -> xrval binop expr 5868 // x = expr Op x; - > expr binop xrval; 5869 assert(X->isLValue() && "X of 'omp atomic update' is not lvalue"); 5870 LValue XLValue = CGF.EmitLValue(X); 5871 RValue ExprRValue = CGF.EmitAnyExpr(E); 5872 const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 5873 const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 5874 const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 5875 const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 5876 auto &&Gen = [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) { 5877 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 5878 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 5879 return CGF.EmitAnyExpr(UE); 5880 }; 5881 (void)CGF.EmitOMPAtomicSimpleUpdateExpr( 5882 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 5883 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X); 5884 // OpenMP, 2.17.7, atomic Construct 5885 // If the write, update, or capture clause is specified and the release, 5886 // acq_rel, or seq_cst clause is specified then the strong flush on entry to 5887 // the atomic operation is also a release flush. 5888 switch (AO) { 5889 case llvm::AtomicOrdering::Release: 5890 case llvm::AtomicOrdering::AcquireRelease: 5891 case llvm::AtomicOrdering::SequentiallyConsistent: 5892 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, 5893 llvm::AtomicOrdering::Release); 5894 break; 5895 case llvm::AtomicOrdering::Acquire: 5896 case llvm::AtomicOrdering::Monotonic: 5897 break; 5898 case llvm::AtomicOrdering::NotAtomic: 5899 case llvm::AtomicOrdering::Unordered: 5900 llvm_unreachable("Unexpected ordering."); 5901 } 5902 } 5903 5904 static RValue convertToType(CodeGenFunction &CGF, RValue Value, 5905 QualType SourceType, QualType ResType, 5906 SourceLocation Loc) { 5907 switch (CGF.getEvaluationKind(ResType)) { 5908 case TEK_Scalar: 5909 return RValue::get( 5910 convertToScalarValue(CGF, Value, SourceType, ResType, Loc)); 5911 case TEK_Complex: { 5912 auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc); 5913 return RValue::getComplex(Res.first, Res.second); 5914 } 5915 case TEK_Aggregate: 5916 break; 5917 } 5918 llvm_unreachable("Must be a scalar or complex."); 5919 } 5920 5921 static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF, 5922 llvm::AtomicOrdering AO, 5923 bool IsPostfixUpdate, const Expr *V, 5924 const Expr *X, const Expr *E, 5925 const Expr *UE, bool IsXLHSInRHSPart, 5926 SourceLocation Loc) { 5927 assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue"); 5928 assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue"); 5929 RValue NewVVal; 5930 LValue VLValue = CGF.EmitLValue(V); 5931 LValue XLValue = CGF.EmitLValue(X); 5932 RValue ExprRValue = CGF.EmitAnyExpr(E); 5933 QualType NewVValType; 5934 if (UE) { 5935 // 'x' is updated with some additional value. 5936 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 5937 "Update expr in 'atomic capture' must be a binary operator."); 5938 const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 5939 // Update expressions are allowed to have the following forms: 5940 // x binop= expr; -> xrval + expr; 5941 // x++, ++x -> xrval + 1; 5942 // x--, --x -> xrval - 1; 5943 // x = x binop expr; -> xrval binop expr 5944 // x = expr Op x; - > expr binop xrval; 5945 const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 5946 const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 5947 const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 5948 NewVValType = XRValExpr->getType(); 5949 const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 5950 auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr, 5951 IsPostfixUpdate](RValue XRValue) { 5952 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 5953 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 5954 RValue Res = CGF.EmitAnyExpr(UE); 5955 NewVVal = IsPostfixUpdate ? XRValue : Res; 5956 return Res; 5957 }; 5958 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 5959 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 5960 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X); 5961 if (Res.first) { 5962 // 'atomicrmw' instruction was generated. 5963 if (IsPostfixUpdate) { 5964 // Use old value from 'atomicrmw'. 5965 NewVVal = Res.second; 5966 } else { 5967 // 'atomicrmw' does not provide new value, so evaluate it using old 5968 // value of 'x'. 5969 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 5970 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second); 5971 NewVVal = CGF.EmitAnyExpr(UE); 5972 } 5973 } 5974 } else { 5975 // 'x' is simply rewritten with some 'expr'. 5976 NewVValType = X->getType().getNonReferenceType(); 5977 ExprRValue = convertToType(CGF, ExprRValue, E->getType(), 5978 X->getType().getNonReferenceType(), Loc); 5979 auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) { 5980 NewVVal = XRValue; 5981 return ExprRValue; 5982 }; 5983 // Try to perform atomicrmw xchg, otherwise simple exchange. 5984 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 5985 XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO, 5986 Loc, Gen); 5987 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X); 5988 if (Res.first) { 5989 // 'atomicrmw' instruction was generated. 5990 NewVVal = IsPostfixUpdate ? Res.second : ExprRValue; 5991 } 5992 } 5993 // Emit post-update store to 'v' of old/new 'x' value. 5994 CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc); 5995 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V); 5996 // OpenMP 5.1 removes the required flush for capture clause. 5997 if (CGF.CGM.getLangOpts().OpenMP < 51) { 5998 // OpenMP, 2.17.7, atomic Construct 5999 // If the write, update, or capture clause is specified and the release, 6000 // acq_rel, or seq_cst clause is specified then the strong flush on entry to 6001 // the atomic operation is also a release flush. 6002 // If the read or capture clause is specified and the acquire, acq_rel, or 6003 // seq_cst clause is specified then the strong flush on exit from the atomic 6004 // operation is also an acquire flush. 6005 switch (AO) { 6006 case llvm::AtomicOrdering::Release: 6007 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, 6008 llvm::AtomicOrdering::Release); 6009 break; 6010 case llvm::AtomicOrdering::Acquire: 6011 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, 6012 llvm::AtomicOrdering::Acquire); 6013 break; 6014 case llvm::AtomicOrdering::AcquireRelease: 6015 case llvm::AtomicOrdering::SequentiallyConsistent: 6016 CGF.CGM.getOpenMPRuntime().emitFlush( 6017 CGF, llvm::None, Loc, llvm::AtomicOrdering::AcquireRelease); 6018 break; 6019 case llvm::AtomicOrdering::Monotonic: 6020 break; 6021 case llvm::AtomicOrdering::NotAtomic: 6022 case llvm::AtomicOrdering::Unordered: 6023 llvm_unreachable("Unexpected ordering."); 6024 } 6025 } 6026 } 6027 6028 static void emitOMPAtomicCompareExpr(CodeGenFunction &CGF, 6029 llvm::AtomicOrdering AO, const Expr *X, 6030 const Expr *E, const Expr *D, 6031 const Expr *CE, bool IsXBinopExpr, 6032 SourceLocation Loc) { 6033 llvm::OpenMPIRBuilder &OMPBuilder = 6034 CGF.CGM.getOpenMPRuntime().getOMPBuilder(); 6035 6036 OMPAtomicCompareOp Op; 6037 assert(isa<BinaryOperator>(CE) && "CE is not a BinaryOperator"); 6038 switch (cast<BinaryOperator>(CE)->getOpcode()) { 6039 case BO_EQ: 6040 Op = OMPAtomicCompareOp::EQ; 6041 break; 6042 case BO_LT: 6043 Op = OMPAtomicCompareOp::MIN; 6044 break; 6045 case BO_GT: 6046 Op = OMPAtomicCompareOp::MAX; 6047 break; 6048 default: 6049 llvm_unreachable("unsupported atomic compare binary operator"); 6050 } 6051 6052 LValue XLVal = CGF.EmitLValue(X); 6053 Address XAddr = XLVal.getAddress(CGF); 6054 llvm::Value *EVal = CGF.EmitScalarExpr(E); 6055 llvm::Value *DVal = D ? CGF.EmitScalarExpr(D) : nullptr; 6056 6057 llvm::OpenMPIRBuilder::AtomicOpValue XOpVal{ 6058 XAddr.getPointer(), XAddr.getElementType(), 6059 X->getType().isVolatileQualified(), 6060 X->getType()->hasSignedIntegerRepresentation()}; 6061 6062 CGF.Builder.restoreIP(OMPBuilder.createAtomicCompare( 6063 CGF.Builder, XOpVal, EVal, DVal, AO, Op, IsXBinopExpr)); 6064 } 6065 6066 static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, 6067 llvm::AtomicOrdering AO, bool IsPostfixUpdate, 6068 const Expr *X, const Expr *V, const Expr *E, 6069 const Expr *UE, const Expr *D, const Expr *CE, 6070 bool IsXLHSInRHSPart, bool IsCompareCapture, 6071 SourceLocation Loc) { 6072 switch (Kind) { 6073 case OMPC_read: 6074 emitOMPAtomicReadExpr(CGF, AO, X, V, Loc); 6075 break; 6076 case OMPC_write: 6077 emitOMPAtomicWriteExpr(CGF, AO, X, E, Loc); 6078 break; 6079 case OMPC_unknown: 6080 case OMPC_update: 6081 emitOMPAtomicUpdateExpr(CGF, AO, X, E, UE, IsXLHSInRHSPart, Loc); 6082 break; 6083 case OMPC_capture: 6084 emitOMPAtomicCaptureExpr(CGF, AO, IsPostfixUpdate, V, X, E, UE, 6085 IsXLHSInRHSPart, Loc); 6086 break; 6087 case OMPC_compare: { 6088 if (IsCompareCapture) { 6089 // Emit an error here. 6090 unsigned DiagID = CGF.CGM.getDiags().getCustomDiagID( 6091 DiagnosticsEngine::Error, 6092 "'atomic compare capture' is not supported for now"); 6093 CGF.CGM.getDiags().Report(DiagID); 6094 } else { 6095 emitOMPAtomicCompareExpr(CGF, AO, X, E, D, CE, IsXLHSInRHSPart, Loc); 6096 } 6097 break; 6098 } 6099 case OMPC_if: 6100 case OMPC_final: 6101 case OMPC_num_threads: 6102 case OMPC_private: 6103 case OMPC_firstprivate: 6104 case OMPC_lastprivate: 6105 case OMPC_reduction: 6106 case OMPC_task_reduction: 6107 case OMPC_in_reduction: 6108 case OMPC_safelen: 6109 case OMPC_simdlen: 6110 case OMPC_sizes: 6111 case OMPC_full: 6112 case OMPC_partial: 6113 case OMPC_allocator: 6114 case OMPC_allocate: 6115 case OMPC_collapse: 6116 case OMPC_default: 6117 case OMPC_seq_cst: 6118 case OMPC_acq_rel: 6119 case OMPC_acquire: 6120 case OMPC_release: 6121 case OMPC_relaxed: 6122 case OMPC_shared: 6123 case OMPC_linear: 6124 case OMPC_aligned: 6125 case OMPC_copyin: 6126 case OMPC_copyprivate: 6127 case OMPC_flush: 6128 case OMPC_depobj: 6129 case OMPC_proc_bind: 6130 case OMPC_schedule: 6131 case OMPC_ordered: 6132 case OMPC_nowait: 6133 case OMPC_untied: 6134 case OMPC_threadprivate: 6135 case OMPC_depend: 6136 case OMPC_mergeable: 6137 case OMPC_device: 6138 case OMPC_threads: 6139 case OMPC_simd: 6140 case OMPC_map: 6141 case OMPC_num_teams: 6142 case OMPC_thread_limit: 6143 case OMPC_priority: 6144 case OMPC_grainsize: 6145 case OMPC_nogroup: 6146 case OMPC_num_tasks: 6147 case OMPC_hint: 6148 case OMPC_dist_schedule: 6149 case OMPC_defaultmap: 6150 case OMPC_uniform: 6151 case OMPC_to: 6152 case OMPC_from: 6153 case OMPC_use_device_ptr: 6154 case OMPC_use_device_addr: 6155 case OMPC_is_device_ptr: 6156 case OMPC_unified_address: 6157 case OMPC_unified_shared_memory: 6158 case OMPC_reverse_offload: 6159 case OMPC_dynamic_allocators: 6160 case OMPC_atomic_default_mem_order: 6161 case OMPC_device_type: 6162 case OMPC_match: 6163 case OMPC_nontemporal: 6164 case OMPC_order: 6165 case OMPC_destroy: 6166 case OMPC_detach: 6167 case OMPC_inclusive: 6168 case OMPC_exclusive: 6169 case OMPC_uses_allocators: 6170 case OMPC_affinity: 6171 case OMPC_init: 6172 case OMPC_inbranch: 6173 case OMPC_notinbranch: 6174 case OMPC_link: 6175 case OMPC_indirect: 6176 case OMPC_use: 6177 case OMPC_novariants: 6178 case OMPC_nocontext: 6179 case OMPC_filter: 6180 case OMPC_when: 6181 case OMPC_adjust_args: 6182 case OMPC_append_args: 6183 case OMPC_memory_order: 6184 case OMPC_bind: 6185 case OMPC_align: 6186 llvm_unreachable("Clause is not allowed in 'omp atomic'."); 6187 } 6188 } 6189 6190 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { 6191 llvm::AtomicOrdering AO = llvm::AtomicOrdering::Monotonic; 6192 bool MemOrderingSpecified = false; 6193 if (S.getSingleClause<OMPSeqCstClause>()) { 6194 AO = llvm::AtomicOrdering::SequentiallyConsistent; 6195 MemOrderingSpecified = true; 6196 } else if (S.getSingleClause<OMPAcqRelClause>()) { 6197 AO = llvm::AtomicOrdering::AcquireRelease; 6198 MemOrderingSpecified = true; 6199 } else if (S.getSingleClause<OMPAcquireClause>()) { 6200 AO = llvm::AtomicOrdering::Acquire; 6201 MemOrderingSpecified = true; 6202 } else if (S.getSingleClause<OMPReleaseClause>()) { 6203 AO = llvm::AtomicOrdering::Release; 6204 MemOrderingSpecified = true; 6205 } else if (S.getSingleClause<OMPRelaxedClause>()) { 6206 AO = llvm::AtomicOrdering::Monotonic; 6207 MemOrderingSpecified = true; 6208 } 6209 llvm::SmallSet<OpenMPClauseKind, 2> KindsEncountered; 6210 OpenMPClauseKind Kind = OMPC_unknown; 6211 for (const OMPClause *C : S.clauses()) { 6212 // Find first clause (skip seq_cst|acq_rel|aqcuire|release|relaxed clause, 6213 // if it is first). 6214 OpenMPClauseKind K = C->getClauseKind(); 6215 if (K == OMPC_seq_cst || K == OMPC_acq_rel || K == OMPC_acquire || 6216 K == OMPC_release || K == OMPC_relaxed || K == OMPC_hint) 6217 continue; 6218 Kind = K; 6219 KindsEncountered.insert(K); 6220 } 6221 bool IsCompareCapture = false; 6222 if (KindsEncountered.contains(OMPC_compare) && 6223 KindsEncountered.contains(OMPC_capture)) { 6224 IsCompareCapture = true; 6225 Kind = OMPC_compare; 6226 } 6227 if (!MemOrderingSpecified) { 6228 llvm::AtomicOrdering DefaultOrder = 6229 CGM.getOpenMPRuntime().getDefaultMemoryOrdering(); 6230 if (DefaultOrder == llvm::AtomicOrdering::Monotonic || 6231 DefaultOrder == llvm::AtomicOrdering::SequentiallyConsistent || 6232 (DefaultOrder == llvm::AtomicOrdering::AcquireRelease && 6233 Kind == OMPC_capture)) { 6234 AO = DefaultOrder; 6235 } else if (DefaultOrder == llvm::AtomicOrdering::AcquireRelease) { 6236 if (Kind == OMPC_unknown || Kind == OMPC_update || Kind == OMPC_write) { 6237 AO = llvm::AtomicOrdering::Release; 6238 } else if (Kind == OMPC_read) { 6239 assert(Kind == OMPC_read && "Unexpected atomic kind."); 6240 AO = llvm::AtomicOrdering::Acquire; 6241 } 6242 } 6243 } 6244 6245 LexicalScope Scope(*this, S.getSourceRange()); 6246 EmitStopPoint(S.getAssociatedStmt()); 6247 emitOMPAtomicExpr(*this, Kind, AO, S.isPostfixUpdate(), S.getX(), S.getV(), 6248 S.getExpr(), S.getUpdateExpr(), S.getD(), S.getCondExpr(), 6249 S.isXLHSInRHSPart(), IsCompareCapture, S.getBeginLoc()); 6250 } 6251 6252 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, 6253 const OMPExecutableDirective &S, 6254 const RegionCodeGenTy &CodeGen) { 6255 assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind())); 6256 CodeGenModule &CGM = CGF.CGM; 6257 6258 // On device emit this construct as inlined code. 6259 if (CGM.getLangOpts().OpenMPIsDevice) { 6260 OMPLexicalScope Scope(CGF, S, OMPD_target); 6261 CGM.getOpenMPRuntime().emitInlinedDirective( 6262 CGF, OMPD_target, [&S](CodeGenFunction &CGF, PrePostActionTy &) { 6263 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 6264 }); 6265 return; 6266 } 6267 6268 auto LPCRegion = CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S); 6269 llvm::Function *Fn = nullptr; 6270 llvm::Constant *FnID = nullptr; 6271 6272 const Expr *IfCond = nullptr; 6273 // Check for the at most one if clause associated with the target region. 6274 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 6275 if (C->getNameModifier() == OMPD_unknown || 6276 C->getNameModifier() == OMPD_target) { 6277 IfCond = C->getCondition(); 6278 break; 6279 } 6280 } 6281 6282 // Check if we have any device clause associated with the directive. 6283 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device( 6284 nullptr, OMPC_DEVICE_unknown); 6285 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 6286 Device.setPointerAndInt(C->getDevice(), C->getModifier()); 6287 6288 // Check if we have an if clause whose conditional always evaluates to false 6289 // or if we do not have any targets specified. If so the target region is not 6290 // an offload entry point. 6291 bool IsOffloadEntry = true; 6292 if (IfCond) { 6293 bool Val; 6294 if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val) 6295 IsOffloadEntry = false; 6296 } 6297 if (CGM.getLangOpts().OMPTargetTriples.empty()) 6298 IsOffloadEntry = false; 6299 6300 if (CGM.getLangOpts().OpenMPOffloadMandatory && !IsOffloadEntry) { 6301 unsigned DiagID = CGM.getDiags().getCustomDiagID( 6302 DiagnosticsEngine::Error, 6303 "No offloading entry generated while offloading is mandatory."); 6304 CGM.getDiags().Report(DiagID); 6305 } 6306 6307 assert(CGF.CurFuncDecl && "No parent declaration for target region!"); 6308 StringRef ParentName; 6309 // In case we have Ctors/Dtors we use the complete type variant to produce 6310 // the mangling of the device outlined kernel. 6311 if (const auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl)) 6312 ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete)); 6313 else if (const auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl)) 6314 ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete)); 6315 else 6316 ParentName = 6317 CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl))); 6318 6319 // Emit target region as a standalone region. 6320 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID, 6321 IsOffloadEntry, CodeGen); 6322 OMPLexicalScope Scope(CGF, S, OMPD_task); 6323 auto &&SizeEmitter = 6324 [IsOffloadEntry](CodeGenFunction &CGF, 6325 const OMPLoopDirective &D) -> llvm::Value * { 6326 if (IsOffloadEntry) { 6327 OMPLoopScope(CGF, D); 6328 // Emit calculation of the iterations count. 6329 llvm::Value *NumIterations = CGF.EmitScalarExpr(D.getNumIterations()); 6330 NumIterations = CGF.Builder.CreateIntCast(NumIterations, CGF.Int64Ty, 6331 /*isSigned=*/false); 6332 return NumIterations; 6333 } 6334 return nullptr; 6335 }; 6336 CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device, 6337 SizeEmitter); 6338 } 6339 6340 static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S, 6341 PrePostActionTy &Action) { 6342 Action.Enter(CGF); 6343 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6344 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 6345 CGF.EmitOMPPrivateClause(S, PrivateScope); 6346 (void)PrivateScope.Privatize(); 6347 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 6348 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); 6349 6350 CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt()); 6351 CGF.EnsureInsertPoint(); 6352 } 6353 6354 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM, 6355 StringRef ParentName, 6356 const OMPTargetDirective &S) { 6357 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6358 emitTargetRegion(CGF, S, Action); 6359 }; 6360 llvm::Function *Fn; 6361 llvm::Constant *Addr; 6362 // Emit target region as a standalone region. 6363 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 6364 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 6365 assert(Fn && Addr && "Target device function emission failed."); 6366 } 6367 6368 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { 6369 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6370 emitTargetRegion(CGF, S, Action); 6371 }; 6372 emitCommonOMPTargetDirective(*this, S, CodeGen); 6373 } 6374 6375 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF, 6376 const OMPExecutableDirective &S, 6377 OpenMPDirectiveKind InnermostKind, 6378 const RegionCodeGenTy &CodeGen) { 6379 const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams); 6380 llvm::Function *OutlinedFn = 6381 CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction( 6382 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 6383 6384 const auto *NT = S.getSingleClause<OMPNumTeamsClause>(); 6385 const auto *TL = S.getSingleClause<OMPThreadLimitClause>(); 6386 if (NT || TL) { 6387 const Expr *NumTeams = NT ? NT->getNumTeams() : nullptr; 6388 const Expr *ThreadLimit = TL ? TL->getThreadLimit() : nullptr; 6389 6390 CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit, 6391 S.getBeginLoc()); 6392 } 6393 6394 OMPTeamsScope Scope(CGF, S); 6395 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 6396 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 6397 CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getBeginLoc(), OutlinedFn, 6398 CapturedVars); 6399 } 6400 6401 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) { 6402 // Emit teams region as a standalone region. 6403 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6404 Action.Enter(CGF); 6405 OMPPrivateScope PrivateScope(CGF); 6406 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 6407 CGF.EmitOMPPrivateClause(S, PrivateScope); 6408 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 6409 (void)PrivateScope.Privatize(); 6410 CGF.EmitStmt(S.getCapturedStmt(OMPD_teams)->getCapturedStmt()); 6411 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 6412 }; 6413 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen); 6414 emitPostUpdateForReductionClause(*this, S, 6415 [](CodeGenFunction &) { return nullptr; }); 6416 } 6417 6418 static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action, 6419 const OMPTargetTeamsDirective &S) { 6420 auto *CS = S.getCapturedStmt(OMPD_teams); 6421 Action.Enter(CGF); 6422 // Emit teams region as a standalone region. 6423 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) { 6424 Action.Enter(CGF); 6425 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6426 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 6427 CGF.EmitOMPPrivateClause(S, PrivateScope); 6428 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 6429 (void)PrivateScope.Privatize(); 6430 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 6431 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); 6432 CGF.EmitStmt(CS->getCapturedStmt()); 6433 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 6434 }; 6435 emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen); 6436 emitPostUpdateForReductionClause(CGF, S, 6437 [](CodeGenFunction &) { return nullptr; }); 6438 } 6439 6440 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 6441 CodeGenModule &CGM, StringRef ParentName, 6442 const OMPTargetTeamsDirective &S) { 6443 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6444 emitTargetTeamsRegion(CGF, Action, S); 6445 }; 6446 llvm::Function *Fn; 6447 llvm::Constant *Addr; 6448 // Emit target region as a standalone region. 6449 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 6450 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 6451 assert(Fn && Addr && "Target device function emission failed."); 6452 } 6453 6454 void CodeGenFunction::EmitOMPTargetTeamsDirective( 6455 const OMPTargetTeamsDirective &S) { 6456 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6457 emitTargetTeamsRegion(CGF, Action, S); 6458 }; 6459 emitCommonOMPTargetDirective(*this, S, CodeGen); 6460 } 6461 6462 static void 6463 emitTargetTeamsDistributeRegion(CodeGenFunction &CGF, PrePostActionTy &Action, 6464 const OMPTargetTeamsDistributeDirective &S) { 6465 Action.Enter(CGF); 6466 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 6467 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 6468 }; 6469 6470 // Emit teams region as a standalone region. 6471 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 6472 PrePostActionTy &Action) { 6473 Action.Enter(CGF); 6474 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6475 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 6476 (void)PrivateScope.Privatize(); 6477 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 6478 CodeGenDistribute); 6479 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 6480 }; 6481 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute, CodeGen); 6482 emitPostUpdateForReductionClause(CGF, S, 6483 [](CodeGenFunction &) { return nullptr; }); 6484 } 6485 6486 void CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 6487 CodeGenModule &CGM, StringRef ParentName, 6488 const OMPTargetTeamsDistributeDirective &S) { 6489 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6490 emitTargetTeamsDistributeRegion(CGF, Action, S); 6491 }; 6492 llvm::Function *Fn; 6493 llvm::Constant *Addr; 6494 // Emit target region as a standalone region. 6495 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 6496 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 6497 assert(Fn && Addr && "Target device function emission failed."); 6498 } 6499 6500 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective( 6501 const OMPTargetTeamsDistributeDirective &S) { 6502 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6503 emitTargetTeamsDistributeRegion(CGF, Action, S); 6504 }; 6505 emitCommonOMPTargetDirective(*this, S, CodeGen); 6506 } 6507 6508 static void emitTargetTeamsDistributeSimdRegion( 6509 CodeGenFunction &CGF, PrePostActionTy &Action, 6510 const OMPTargetTeamsDistributeSimdDirective &S) { 6511 Action.Enter(CGF); 6512 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 6513 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 6514 }; 6515 6516 // Emit teams region as a standalone region. 6517 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 6518 PrePostActionTy &Action) { 6519 Action.Enter(CGF); 6520 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6521 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 6522 (void)PrivateScope.Privatize(); 6523 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 6524 CodeGenDistribute); 6525 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 6526 }; 6527 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_simd, CodeGen); 6528 emitPostUpdateForReductionClause(CGF, S, 6529 [](CodeGenFunction &) { return nullptr; }); 6530 } 6531 6532 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 6533 CodeGenModule &CGM, StringRef ParentName, 6534 const OMPTargetTeamsDistributeSimdDirective &S) { 6535 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6536 emitTargetTeamsDistributeSimdRegion(CGF, Action, S); 6537 }; 6538 llvm::Function *Fn; 6539 llvm::Constant *Addr; 6540 // Emit target region as a standalone region. 6541 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 6542 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 6543 assert(Fn && Addr && "Target device function emission failed."); 6544 } 6545 6546 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective( 6547 const OMPTargetTeamsDistributeSimdDirective &S) { 6548 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6549 emitTargetTeamsDistributeSimdRegion(CGF, Action, S); 6550 }; 6551 emitCommonOMPTargetDirective(*this, S, CodeGen); 6552 } 6553 6554 void CodeGenFunction::EmitOMPTeamsDistributeDirective( 6555 const OMPTeamsDistributeDirective &S) { 6556 6557 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 6558 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 6559 }; 6560 6561 // Emit teams region as a standalone region. 6562 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 6563 PrePostActionTy &Action) { 6564 Action.Enter(CGF); 6565 OMPPrivateScope PrivateScope(CGF); 6566 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 6567 (void)PrivateScope.Privatize(); 6568 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 6569 CodeGenDistribute); 6570 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 6571 }; 6572 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen); 6573 emitPostUpdateForReductionClause(*this, S, 6574 [](CodeGenFunction &) { return nullptr; }); 6575 } 6576 6577 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective( 6578 const OMPTeamsDistributeSimdDirective &S) { 6579 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 6580 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 6581 }; 6582 6583 // Emit teams region as a standalone region. 6584 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 6585 PrePostActionTy &Action) { 6586 Action.Enter(CGF); 6587 OMPPrivateScope PrivateScope(CGF); 6588 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 6589 (void)PrivateScope.Privatize(); 6590 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_simd, 6591 CodeGenDistribute); 6592 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 6593 }; 6594 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_simd, CodeGen); 6595 emitPostUpdateForReductionClause(*this, S, 6596 [](CodeGenFunction &) { return nullptr; }); 6597 } 6598 6599 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective( 6600 const OMPTeamsDistributeParallelForDirective &S) { 6601 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 6602 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 6603 S.getDistInc()); 6604 }; 6605 6606 // Emit teams region as a standalone region. 6607 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 6608 PrePostActionTy &Action) { 6609 Action.Enter(CGF); 6610 OMPPrivateScope PrivateScope(CGF); 6611 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 6612 (void)PrivateScope.Privatize(); 6613 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 6614 CodeGenDistribute); 6615 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 6616 }; 6617 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen); 6618 emitPostUpdateForReductionClause(*this, S, 6619 [](CodeGenFunction &) { return nullptr; }); 6620 } 6621 6622 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective( 6623 const OMPTeamsDistributeParallelForSimdDirective &S) { 6624 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 6625 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 6626 S.getDistInc()); 6627 }; 6628 6629 // Emit teams region as a standalone region. 6630 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 6631 PrePostActionTy &Action) { 6632 Action.Enter(CGF); 6633 OMPPrivateScope PrivateScope(CGF); 6634 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 6635 (void)PrivateScope.Privatize(); 6636 CGF.CGM.getOpenMPRuntime().emitInlinedDirective( 6637 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); 6638 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 6639 }; 6640 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for_simd, 6641 CodeGen); 6642 emitPostUpdateForReductionClause(*this, S, 6643 [](CodeGenFunction &) { return nullptr; }); 6644 } 6645 6646 void CodeGenFunction::EmitOMPInteropDirective(const OMPInteropDirective &S) { 6647 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 6648 llvm::Value *Device = nullptr; 6649 if (const auto *C = S.getSingleClause<OMPDeviceClause>()) 6650 Device = EmitScalarExpr(C->getDevice()); 6651 6652 llvm::Value *NumDependences = nullptr; 6653 llvm::Value *DependenceAddress = nullptr; 6654 if (const auto *DC = S.getSingleClause<OMPDependClause>()) { 6655 OMPTaskDataTy::DependData Dependencies(DC->getDependencyKind(), 6656 DC->getModifier()); 6657 Dependencies.DepExprs.append(DC->varlist_begin(), DC->varlist_end()); 6658 std::pair<llvm::Value *, Address> DependencePair = 6659 CGM.getOpenMPRuntime().emitDependClause(*this, Dependencies, 6660 DC->getBeginLoc()); 6661 NumDependences = DependencePair.first; 6662 DependenceAddress = Builder.CreatePointerCast( 6663 DependencePair.second.getPointer(), CGM.Int8PtrTy); 6664 } 6665 6666 assert(!(S.hasClausesOfKind<OMPNowaitClause>() && 6667 !(S.getSingleClause<OMPInitClause>() || 6668 S.getSingleClause<OMPDestroyClause>() || 6669 S.getSingleClause<OMPUseClause>())) && 6670 "OMPNowaitClause clause is used separately in OMPInteropDirective."); 6671 6672 if (const auto *C = S.getSingleClause<OMPInitClause>()) { 6673 llvm::Value *InteropvarPtr = 6674 EmitLValue(C->getInteropVar()).getPointer(*this); 6675 llvm::omp::OMPInteropType InteropType = llvm::omp::OMPInteropType::Unknown; 6676 if (C->getIsTarget()) { 6677 InteropType = llvm::omp::OMPInteropType::Target; 6678 } else { 6679 assert(C->getIsTargetSync() && "Expected interop-type target/targetsync"); 6680 InteropType = llvm::omp::OMPInteropType::TargetSync; 6681 } 6682 OMPBuilder.createOMPInteropInit(Builder, InteropvarPtr, InteropType, Device, 6683 NumDependences, DependenceAddress, 6684 S.hasClausesOfKind<OMPNowaitClause>()); 6685 } else if (const auto *C = S.getSingleClause<OMPDestroyClause>()) { 6686 llvm::Value *InteropvarPtr = 6687 EmitLValue(C->getInteropVar()).getPointer(*this); 6688 OMPBuilder.createOMPInteropDestroy(Builder, InteropvarPtr, Device, 6689 NumDependences, DependenceAddress, 6690 S.hasClausesOfKind<OMPNowaitClause>()); 6691 } else if (const auto *C = S.getSingleClause<OMPUseClause>()) { 6692 llvm::Value *InteropvarPtr = 6693 EmitLValue(C->getInteropVar()).getPointer(*this); 6694 OMPBuilder.createOMPInteropUse(Builder, InteropvarPtr, Device, 6695 NumDependences, DependenceAddress, 6696 S.hasClausesOfKind<OMPNowaitClause>()); 6697 } 6698 } 6699 6700 static void emitTargetTeamsDistributeParallelForRegion( 6701 CodeGenFunction &CGF, const OMPTargetTeamsDistributeParallelForDirective &S, 6702 PrePostActionTy &Action) { 6703 Action.Enter(CGF); 6704 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 6705 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 6706 S.getDistInc()); 6707 }; 6708 6709 // Emit teams region as a standalone region. 6710 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 6711 PrePostActionTy &Action) { 6712 Action.Enter(CGF); 6713 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6714 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 6715 (void)PrivateScope.Privatize(); 6716 CGF.CGM.getOpenMPRuntime().emitInlinedDirective( 6717 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); 6718 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 6719 }; 6720 6721 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for, 6722 CodeGenTeams); 6723 emitPostUpdateForReductionClause(CGF, S, 6724 [](CodeGenFunction &) { return nullptr; }); 6725 } 6726 6727 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 6728 CodeGenModule &CGM, StringRef ParentName, 6729 const OMPTargetTeamsDistributeParallelForDirective &S) { 6730 // Emit SPMD target teams distribute parallel for region as a standalone 6731 // region. 6732 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6733 emitTargetTeamsDistributeParallelForRegion(CGF, S, Action); 6734 }; 6735 llvm::Function *Fn; 6736 llvm::Constant *Addr; 6737 // Emit target region as a standalone region. 6738 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 6739 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 6740 assert(Fn && Addr && "Target device function emission failed."); 6741 } 6742 6743 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective( 6744 const OMPTargetTeamsDistributeParallelForDirective &S) { 6745 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6746 emitTargetTeamsDistributeParallelForRegion(CGF, S, Action); 6747 }; 6748 emitCommonOMPTargetDirective(*this, S, CodeGen); 6749 } 6750 6751 static void emitTargetTeamsDistributeParallelForSimdRegion( 6752 CodeGenFunction &CGF, 6753 const OMPTargetTeamsDistributeParallelForSimdDirective &S, 6754 PrePostActionTy &Action) { 6755 Action.Enter(CGF); 6756 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 6757 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 6758 S.getDistInc()); 6759 }; 6760 6761 // Emit teams region as a standalone region. 6762 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 6763 PrePostActionTy &Action) { 6764 Action.Enter(CGF); 6765 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6766 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 6767 (void)PrivateScope.Privatize(); 6768 CGF.CGM.getOpenMPRuntime().emitInlinedDirective( 6769 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); 6770 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 6771 }; 6772 6773 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for_simd, 6774 CodeGenTeams); 6775 emitPostUpdateForReductionClause(CGF, S, 6776 [](CodeGenFunction &) { return nullptr; }); 6777 } 6778 6779 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 6780 CodeGenModule &CGM, StringRef ParentName, 6781 const OMPTargetTeamsDistributeParallelForSimdDirective &S) { 6782 // Emit SPMD target teams distribute parallel for simd region as a standalone 6783 // region. 6784 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6785 emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action); 6786 }; 6787 llvm::Function *Fn; 6788 llvm::Constant *Addr; 6789 // Emit target region as a standalone region. 6790 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 6791 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 6792 assert(Fn && Addr && "Target device function emission failed."); 6793 } 6794 6795 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective( 6796 const OMPTargetTeamsDistributeParallelForSimdDirective &S) { 6797 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6798 emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action); 6799 }; 6800 emitCommonOMPTargetDirective(*this, S, CodeGen); 6801 } 6802 6803 void CodeGenFunction::EmitOMPCancellationPointDirective( 6804 const OMPCancellationPointDirective &S) { 6805 CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getBeginLoc(), 6806 S.getCancelRegion()); 6807 } 6808 6809 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { 6810 const Expr *IfCond = nullptr; 6811 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 6812 if (C->getNameModifier() == OMPD_unknown || 6813 C->getNameModifier() == OMPD_cancel) { 6814 IfCond = C->getCondition(); 6815 break; 6816 } 6817 } 6818 if (CGM.getLangOpts().OpenMPIRBuilder) { 6819 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 6820 // TODO: This check is necessary as we only generate `omp parallel` through 6821 // the OpenMPIRBuilder for now. 6822 if (S.getCancelRegion() == OMPD_parallel || 6823 S.getCancelRegion() == OMPD_sections || 6824 S.getCancelRegion() == OMPD_section) { 6825 llvm::Value *IfCondition = nullptr; 6826 if (IfCond) 6827 IfCondition = EmitScalarExpr(IfCond, 6828 /*IgnoreResultAssign=*/true); 6829 return Builder.restoreIP( 6830 OMPBuilder.createCancel(Builder, IfCondition, S.getCancelRegion())); 6831 } 6832 } 6833 6834 CGM.getOpenMPRuntime().emitCancelCall(*this, S.getBeginLoc(), IfCond, 6835 S.getCancelRegion()); 6836 } 6837 6838 CodeGenFunction::JumpDest 6839 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) { 6840 if (Kind == OMPD_parallel || Kind == OMPD_task || 6841 Kind == OMPD_target_parallel || Kind == OMPD_taskloop || 6842 Kind == OMPD_master_taskloop || Kind == OMPD_parallel_master_taskloop) 6843 return ReturnBlock; 6844 assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections || 6845 Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for || 6846 Kind == OMPD_distribute_parallel_for || 6847 Kind == OMPD_target_parallel_for || 6848 Kind == OMPD_teams_distribute_parallel_for || 6849 Kind == OMPD_target_teams_distribute_parallel_for); 6850 return OMPCancelStack.getExitBlock(); 6851 } 6852 6853 void CodeGenFunction::EmitOMPUseDevicePtrClause( 6854 const OMPUseDevicePtrClause &C, OMPPrivateScope &PrivateScope, 6855 const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) { 6856 auto OrigVarIt = C.varlist_begin(); 6857 auto InitIt = C.inits().begin(); 6858 for (const Expr *PvtVarIt : C.private_copies()) { 6859 const auto *OrigVD = 6860 cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl()); 6861 const auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl()); 6862 const auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl()); 6863 6864 // In order to identify the right initializer we need to match the 6865 // declaration used by the mapping logic. In some cases we may get 6866 // OMPCapturedExprDecl that refers to the original declaration. 6867 const ValueDecl *MatchingVD = OrigVD; 6868 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) { 6869 // OMPCapturedExprDecl are used to privative fields of the current 6870 // structure. 6871 const auto *ME = cast<MemberExpr>(OED->getInit()); 6872 assert(isa<CXXThisExpr>(ME->getBase()) && 6873 "Base should be the current struct!"); 6874 MatchingVD = ME->getMemberDecl(); 6875 } 6876 6877 // If we don't have information about the current list item, move on to 6878 // the next one. 6879 auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD); 6880 if (InitAddrIt == CaptureDeviceAddrMap.end()) 6881 continue; 6882 6883 // Initialize the temporary initialization variable with the address 6884 // we get from the runtime library. We have to cast the source address 6885 // because it is always a void *. References are materialized in the 6886 // privatization scope, so the initialization here disregards the fact 6887 // the original variable is a reference. 6888 llvm::Type *Ty = ConvertTypeForMem(OrigVD->getType().getNonReferenceType()); 6889 Address InitAddr = Builder.CreateElementBitCast(InitAddrIt->second, Ty); 6890 setAddrOfLocalVar(InitVD, InitAddr); 6891 6892 // Emit private declaration, it will be initialized by the value we 6893 // declaration we just added to the local declarations map. 6894 EmitDecl(*PvtVD); 6895 6896 // The initialization variables reached its purpose in the emission 6897 // of the previous declaration, so we don't need it anymore. 6898 LocalDeclMap.erase(InitVD); 6899 6900 // Return the address of the private variable. 6901 bool IsRegistered = 6902 PrivateScope.addPrivate(OrigVD, GetAddrOfLocalVar(PvtVD)); 6903 assert(IsRegistered && "firstprivate var already registered as private"); 6904 // Silence the warning about unused variable. 6905 (void)IsRegistered; 6906 6907 ++OrigVarIt; 6908 ++InitIt; 6909 } 6910 } 6911 6912 static const VarDecl *getBaseDecl(const Expr *Ref) { 6913 const Expr *Base = Ref->IgnoreParenImpCasts(); 6914 while (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Base)) 6915 Base = OASE->getBase()->IgnoreParenImpCasts(); 6916 while (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Base)) 6917 Base = ASE->getBase()->IgnoreParenImpCasts(); 6918 return cast<VarDecl>(cast<DeclRefExpr>(Base)->getDecl()); 6919 } 6920 6921 void CodeGenFunction::EmitOMPUseDeviceAddrClause( 6922 const OMPUseDeviceAddrClause &C, OMPPrivateScope &PrivateScope, 6923 const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) { 6924 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; 6925 for (const Expr *Ref : C.varlists()) { 6926 const VarDecl *OrigVD = getBaseDecl(Ref); 6927 if (!Processed.insert(OrigVD).second) 6928 continue; 6929 // In order to identify the right initializer we need to match the 6930 // declaration used by the mapping logic. In some cases we may get 6931 // OMPCapturedExprDecl that refers to the original declaration. 6932 const ValueDecl *MatchingVD = OrigVD; 6933 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) { 6934 // OMPCapturedExprDecl are used to privative fields of the current 6935 // structure. 6936 const auto *ME = cast<MemberExpr>(OED->getInit()); 6937 assert(isa<CXXThisExpr>(ME->getBase()) && 6938 "Base should be the current struct!"); 6939 MatchingVD = ME->getMemberDecl(); 6940 } 6941 6942 // If we don't have information about the current list item, move on to 6943 // the next one. 6944 auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD); 6945 if (InitAddrIt == CaptureDeviceAddrMap.end()) 6946 continue; 6947 6948 Address PrivAddr = InitAddrIt->getSecond(); 6949 // For declrefs and variable length array need to load the pointer for 6950 // correct mapping, since the pointer to the data was passed to the runtime. 6951 if (isa<DeclRefExpr>(Ref->IgnoreParenImpCasts()) || 6952 MatchingVD->getType()->isArrayType()) 6953 PrivAddr = 6954 EmitLoadOfPointer(PrivAddr, getContext() 6955 .getPointerType(OrigVD->getType()) 6956 ->castAs<PointerType>()); 6957 llvm::Type *RealElTy = 6958 ConvertTypeForMem(OrigVD->getType().getNonReferenceType()); 6959 llvm::Type *RealTy = RealElTy->getPointerTo(); 6960 PrivAddr = 6961 Builder.CreatePointerBitCastOrAddrSpaceCast(PrivAddr, RealTy, RealElTy); 6962 6963 (void)PrivateScope.addPrivate(OrigVD, PrivAddr); 6964 } 6965 } 6966 6967 // Generate the instructions for '#pragma omp target data' directive. 6968 void CodeGenFunction::EmitOMPTargetDataDirective( 6969 const OMPTargetDataDirective &S) { 6970 CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true, 6971 /*SeparateBeginEndCalls=*/true); 6972 6973 // Create a pre/post action to signal the privatization of the device pointer. 6974 // This action can be replaced by the OpenMP runtime code generation to 6975 // deactivate privatization. 6976 bool PrivatizeDevicePointers = false; 6977 class DevicePointerPrivActionTy : public PrePostActionTy { 6978 bool &PrivatizeDevicePointers; 6979 6980 public: 6981 explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers) 6982 : PrivatizeDevicePointers(PrivatizeDevicePointers) {} 6983 void Enter(CodeGenFunction &CGF) override { 6984 PrivatizeDevicePointers = true; 6985 } 6986 }; 6987 DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers); 6988 6989 auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers]( 6990 CodeGenFunction &CGF, PrePostActionTy &Action) { 6991 auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 6992 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 6993 }; 6994 6995 // Codegen that selects whether to generate the privatization code or not. 6996 auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers, 6997 &InnermostCodeGen](CodeGenFunction &CGF, 6998 PrePostActionTy &Action) { 6999 RegionCodeGenTy RCG(InnermostCodeGen); 7000 PrivatizeDevicePointers = false; 7001 7002 // Call the pre-action to change the status of PrivatizeDevicePointers if 7003 // needed. 7004 Action.Enter(CGF); 7005 7006 if (PrivatizeDevicePointers) { 7007 OMPPrivateScope PrivateScope(CGF); 7008 // Emit all instances of the use_device_ptr clause. 7009 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>()) 7010 CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope, 7011 Info.CaptureDeviceAddrMap); 7012 for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>()) 7013 CGF.EmitOMPUseDeviceAddrClause(*C, PrivateScope, 7014 Info.CaptureDeviceAddrMap); 7015 (void)PrivateScope.Privatize(); 7016 RCG(CGF); 7017 } else { 7018 OMPLexicalScope Scope(CGF, S, OMPD_unknown); 7019 RCG(CGF); 7020 } 7021 }; 7022 7023 // Forward the provided action to the privatization codegen. 7024 RegionCodeGenTy PrivRCG(PrivCodeGen); 7025 PrivRCG.setAction(Action); 7026 7027 // Notwithstanding the body of the region is emitted as inlined directive, 7028 // we don't use an inline scope as changes in the references inside the 7029 // region are expected to be visible outside, so we do not privative them. 7030 OMPLexicalScope Scope(CGF, S); 7031 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data, 7032 PrivRCG); 7033 }; 7034 7035 RegionCodeGenTy RCG(CodeGen); 7036 7037 // If we don't have target devices, don't bother emitting the data mapping 7038 // code. 7039 if (CGM.getLangOpts().OMPTargetTriples.empty()) { 7040 RCG(*this); 7041 return; 7042 } 7043 7044 // Check if we have any if clause associated with the directive. 7045 const Expr *IfCond = nullptr; 7046 if (const auto *C = S.getSingleClause<OMPIfClause>()) 7047 IfCond = C->getCondition(); 7048 7049 // Check if we have any device clause associated with the directive. 7050 const Expr *Device = nullptr; 7051 if (const auto *C = S.getSingleClause<OMPDeviceClause>()) 7052 Device = C->getDevice(); 7053 7054 // Set the action to signal privatization of device pointers. 7055 RCG.setAction(PrivAction); 7056 7057 // Emit region code. 7058 CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG, 7059 Info); 7060 } 7061 7062 void CodeGenFunction::EmitOMPTargetEnterDataDirective( 7063 const OMPTargetEnterDataDirective &S) { 7064 // If we don't have target devices, don't bother emitting the data mapping 7065 // code. 7066 if (CGM.getLangOpts().OMPTargetTriples.empty()) 7067 return; 7068 7069 // Check if we have any if clause associated with the directive. 7070 const Expr *IfCond = nullptr; 7071 if (const auto *C = S.getSingleClause<OMPIfClause>()) 7072 IfCond = C->getCondition(); 7073 7074 // Check if we have any device clause associated with the directive. 7075 const Expr *Device = nullptr; 7076 if (const auto *C = S.getSingleClause<OMPDeviceClause>()) 7077 Device = C->getDevice(); 7078 7079 OMPLexicalScope Scope(*this, S, OMPD_task); 7080 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 7081 } 7082 7083 void CodeGenFunction::EmitOMPTargetExitDataDirective( 7084 const OMPTargetExitDataDirective &S) { 7085 // If we don't have target devices, don't bother emitting the data mapping 7086 // code. 7087 if (CGM.getLangOpts().OMPTargetTriples.empty()) 7088 return; 7089 7090 // Check if we have any if clause associated with the directive. 7091 const Expr *IfCond = nullptr; 7092 if (const auto *C = S.getSingleClause<OMPIfClause>()) 7093 IfCond = C->getCondition(); 7094 7095 // Check if we have any device clause associated with the directive. 7096 const Expr *Device = nullptr; 7097 if (const auto *C = S.getSingleClause<OMPDeviceClause>()) 7098 Device = C->getDevice(); 7099 7100 OMPLexicalScope Scope(*this, S, OMPD_task); 7101 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 7102 } 7103 7104 static void emitTargetParallelRegion(CodeGenFunction &CGF, 7105 const OMPTargetParallelDirective &S, 7106 PrePostActionTy &Action) { 7107 // Get the captured statement associated with the 'parallel' region. 7108 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); 7109 Action.Enter(CGF); 7110 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) { 7111 Action.Enter(CGF); 7112 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 7113 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 7114 CGF.EmitOMPPrivateClause(S, PrivateScope); 7115 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 7116 (void)PrivateScope.Privatize(); 7117 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 7118 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); 7119 // TODO: Add support for clauses. 7120 CGF.EmitStmt(CS->getCapturedStmt()); 7121 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 7122 }; 7123 emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen, 7124 emitEmptyBoundParameters); 7125 emitPostUpdateForReductionClause(CGF, S, 7126 [](CodeGenFunction &) { return nullptr; }); 7127 } 7128 7129 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 7130 CodeGenModule &CGM, StringRef ParentName, 7131 const OMPTargetParallelDirective &S) { 7132 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7133 emitTargetParallelRegion(CGF, S, Action); 7134 }; 7135 llvm::Function *Fn; 7136 llvm::Constant *Addr; 7137 // Emit target region as a standalone region. 7138 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 7139 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 7140 assert(Fn && Addr && "Target device function emission failed."); 7141 } 7142 7143 void CodeGenFunction::EmitOMPTargetParallelDirective( 7144 const OMPTargetParallelDirective &S) { 7145 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7146 emitTargetParallelRegion(CGF, S, Action); 7147 }; 7148 emitCommonOMPTargetDirective(*this, S, CodeGen); 7149 } 7150 7151 static void emitTargetParallelForRegion(CodeGenFunction &CGF, 7152 const OMPTargetParallelForDirective &S, 7153 PrePostActionTy &Action) { 7154 Action.Enter(CGF); 7155 // Emit directive as a combined directive that consists of two implicit 7156 // directives: 'parallel' with 'for' directive. 7157 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7158 Action.Enter(CGF); 7159 CodeGenFunction::OMPCancelStackRAII CancelRegion( 7160 CGF, OMPD_target_parallel_for, S.hasCancel()); 7161 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 7162 emitDispatchForLoopBounds); 7163 }; 7164 emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen, 7165 emitEmptyBoundParameters); 7166 } 7167 7168 void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 7169 CodeGenModule &CGM, StringRef ParentName, 7170 const OMPTargetParallelForDirective &S) { 7171 // Emit SPMD target parallel for region as a standalone region. 7172 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7173 emitTargetParallelForRegion(CGF, S, Action); 7174 }; 7175 llvm::Function *Fn; 7176 llvm::Constant *Addr; 7177 // Emit target region as a standalone region. 7178 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 7179 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 7180 assert(Fn && Addr && "Target device function emission failed."); 7181 } 7182 7183 void CodeGenFunction::EmitOMPTargetParallelForDirective( 7184 const OMPTargetParallelForDirective &S) { 7185 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7186 emitTargetParallelForRegion(CGF, S, Action); 7187 }; 7188 emitCommonOMPTargetDirective(*this, S, CodeGen); 7189 } 7190 7191 static void 7192 emitTargetParallelForSimdRegion(CodeGenFunction &CGF, 7193 const OMPTargetParallelForSimdDirective &S, 7194 PrePostActionTy &Action) { 7195 Action.Enter(CGF); 7196 // Emit directive as a combined directive that consists of two implicit 7197 // directives: 'parallel' with 'for' directive. 7198 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7199 Action.Enter(CGF); 7200 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 7201 emitDispatchForLoopBounds); 7202 }; 7203 emitCommonOMPParallelDirective(CGF, S, OMPD_simd, CodeGen, 7204 emitEmptyBoundParameters); 7205 } 7206 7207 void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 7208 CodeGenModule &CGM, StringRef ParentName, 7209 const OMPTargetParallelForSimdDirective &S) { 7210 // Emit SPMD target parallel for region as a standalone region. 7211 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7212 emitTargetParallelForSimdRegion(CGF, S, Action); 7213 }; 7214 llvm::Function *Fn; 7215 llvm::Constant *Addr; 7216 // Emit target region as a standalone region. 7217 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 7218 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 7219 assert(Fn && Addr && "Target device function emission failed."); 7220 } 7221 7222 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective( 7223 const OMPTargetParallelForSimdDirective &S) { 7224 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7225 emitTargetParallelForSimdRegion(CGF, S, Action); 7226 }; 7227 emitCommonOMPTargetDirective(*this, S, CodeGen); 7228 } 7229 7230 /// Emit a helper variable and return corresponding lvalue. 7231 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper, 7232 const ImplicitParamDecl *PVD, 7233 CodeGenFunction::OMPPrivateScope &Privates) { 7234 const auto *VDecl = cast<VarDecl>(Helper->getDecl()); 7235 Privates.addPrivate(VDecl, CGF.GetAddrOfLocalVar(PVD)); 7236 } 7237 7238 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { 7239 assert(isOpenMPTaskLoopDirective(S.getDirectiveKind())); 7240 // Emit outlined function for task construct. 7241 const CapturedStmt *CS = S.getCapturedStmt(OMPD_taskloop); 7242 Address CapturedStruct = Address::invalid(); 7243 { 7244 OMPLexicalScope Scope(*this, S, OMPD_taskloop, /*EmitPreInitStmt=*/false); 7245 CapturedStruct = GenerateCapturedStmtArgument(*CS); 7246 } 7247 QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 7248 const Expr *IfCond = nullptr; 7249 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 7250 if (C->getNameModifier() == OMPD_unknown || 7251 C->getNameModifier() == OMPD_taskloop) { 7252 IfCond = C->getCondition(); 7253 break; 7254 } 7255 } 7256 7257 OMPTaskDataTy Data; 7258 // Check if taskloop must be emitted without taskgroup. 7259 Data.Nogroup = S.getSingleClause<OMPNogroupClause>(); 7260 // TODO: Check if we should emit tied or untied task. 7261 Data.Tied = true; 7262 // Set scheduling for taskloop 7263 if (const auto *Clause = S.getSingleClause<OMPGrainsizeClause>()) { 7264 // grainsize clause 7265 Data.Schedule.setInt(/*IntVal=*/false); 7266 Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize())); 7267 } else if (const auto *Clause = S.getSingleClause<OMPNumTasksClause>()) { 7268 // num_tasks clause 7269 Data.Schedule.setInt(/*IntVal=*/true); 7270 Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks())); 7271 } 7272 7273 auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) { 7274 // if (PreCond) { 7275 // for (IV in 0..LastIteration) BODY; 7276 // <Final counter/linear vars updates>; 7277 // } 7278 // 7279 7280 // Emit: if (PreCond) - begin. 7281 // If the condition constant folds and can be elided, avoid emitting the 7282 // whole loop. 7283 bool CondConstant; 7284 llvm::BasicBlock *ContBlock = nullptr; 7285 OMPLoopScope PreInitScope(CGF, S); 7286 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 7287 if (!CondConstant) 7288 return; 7289 } else { 7290 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("taskloop.if.then"); 7291 ContBlock = CGF.createBasicBlock("taskloop.if.end"); 7292 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 7293 CGF.getProfileCount(&S)); 7294 CGF.EmitBlock(ThenBlock); 7295 CGF.incrementProfileCounter(&S); 7296 } 7297 7298 (void)CGF.EmitOMPLinearClauseInit(S); 7299 7300 OMPPrivateScope LoopScope(CGF); 7301 // Emit helper vars inits. 7302 enum { LowerBound = 5, UpperBound, Stride, LastIter }; 7303 auto *I = CS->getCapturedDecl()->param_begin(); 7304 auto *LBP = std::next(I, LowerBound); 7305 auto *UBP = std::next(I, UpperBound); 7306 auto *STP = std::next(I, Stride); 7307 auto *LIP = std::next(I, LastIter); 7308 mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP, 7309 LoopScope); 7310 mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP, 7311 LoopScope); 7312 mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope); 7313 mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP, 7314 LoopScope); 7315 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 7316 CGF.EmitOMPLinearClause(S, LoopScope); 7317 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 7318 (void)LoopScope.Privatize(); 7319 // Emit the loop iteration variable. 7320 const Expr *IVExpr = S.getIterationVariable(); 7321 const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 7322 CGF.EmitVarDecl(*IVDecl); 7323 CGF.EmitIgnoredExpr(S.getInit()); 7324 7325 // Emit the iterations count variable. 7326 // If it is not a variable, Sema decided to calculate iterations count on 7327 // each iteration (e.g., it is foldable into a constant). 7328 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 7329 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 7330 // Emit calculation of the iterations count. 7331 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 7332 } 7333 7334 { 7335 OMPLexicalScope Scope(CGF, S, OMPD_taskloop, /*EmitPreInitStmt=*/false); 7336 emitCommonSimdLoop( 7337 CGF, S, 7338 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 7339 if (isOpenMPSimdDirective(S.getDirectiveKind())) 7340 CGF.EmitOMPSimdInit(S); 7341 }, 7342 [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { 7343 CGF.EmitOMPInnerLoop( 7344 S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), 7345 [&S](CodeGenFunction &CGF) { 7346 emitOMPLoopBodyWithStopPoint(CGF, S, 7347 CodeGenFunction::JumpDest()); 7348 }, 7349 [](CodeGenFunction &) {}); 7350 }); 7351 } 7352 // Emit: if (PreCond) - end. 7353 if (ContBlock) { 7354 CGF.EmitBranch(ContBlock); 7355 CGF.EmitBlock(ContBlock, true); 7356 } 7357 // Emit final copy of the lastprivate variables if IsLastIter != 0. 7358 if (HasLastprivateClause) { 7359 CGF.EmitOMPLastprivateClauseFinal( 7360 S, isOpenMPSimdDirective(S.getDirectiveKind()), 7361 CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar( 7362 CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false, 7363 (*LIP)->getType(), S.getBeginLoc()))); 7364 } 7365 CGF.EmitOMPLinearClauseFinal(S, [LIP, &S](CodeGenFunction &CGF) { 7366 return CGF.Builder.CreateIsNotNull( 7367 CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false, 7368 (*LIP)->getType(), S.getBeginLoc())); 7369 }); 7370 }; 7371 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 7372 IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn, 7373 const OMPTaskDataTy &Data) { 7374 auto &&CodeGen = [&S, OutlinedFn, SharedsTy, CapturedStruct, IfCond, 7375 &Data](CodeGenFunction &CGF, PrePostActionTy &) { 7376 OMPLoopScope PreInitScope(CGF, S); 7377 CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getBeginLoc(), S, 7378 OutlinedFn, SharedsTy, 7379 CapturedStruct, IfCond, Data); 7380 }; 7381 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop, 7382 CodeGen); 7383 }; 7384 if (Data.Nogroup) { 7385 EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, Data); 7386 } else { 7387 CGM.getOpenMPRuntime().emitTaskgroupRegion( 7388 *this, 7389 [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF, 7390 PrePostActionTy &Action) { 7391 Action.Enter(CGF); 7392 CGF.EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, 7393 Data); 7394 }, 7395 S.getBeginLoc()); 7396 } 7397 } 7398 7399 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) { 7400 auto LPCRegion = 7401 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 7402 EmitOMPTaskLoopBasedDirective(S); 7403 } 7404 7405 void CodeGenFunction::EmitOMPTaskLoopSimdDirective( 7406 const OMPTaskLoopSimdDirective &S) { 7407 auto LPCRegion = 7408 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 7409 OMPLexicalScope Scope(*this, S); 7410 EmitOMPTaskLoopBasedDirective(S); 7411 } 7412 7413 void CodeGenFunction::EmitOMPMasterTaskLoopDirective( 7414 const OMPMasterTaskLoopDirective &S) { 7415 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7416 Action.Enter(CGF); 7417 EmitOMPTaskLoopBasedDirective(S); 7418 }; 7419 auto LPCRegion = 7420 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 7421 OMPLexicalScope Scope(*this, S, llvm::None, /*EmitPreInitStmt=*/false); 7422 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc()); 7423 } 7424 7425 void CodeGenFunction::EmitOMPMasterTaskLoopSimdDirective( 7426 const OMPMasterTaskLoopSimdDirective &S) { 7427 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7428 Action.Enter(CGF); 7429 EmitOMPTaskLoopBasedDirective(S); 7430 }; 7431 auto LPCRegion = 7432 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 7433 OMPLexicalScope Scope(*this, S); 7434 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc()); 7435 } 7436 7437 void CodeGenFunction::EmitOMPParallelMasterTaskLoopDirective( 7438 const OMPParallelMasterTaskLoopDirective &S) { 7439 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7440 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF, 7441 PrePostActionTy &Action) { 7442 Action.Enter(CGF); 7443 CGF.EmitOMPTaskLoopBasedDirective(S); 7444 }; 7445 OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false); 7446 CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen, 7447 S.getBeginLoc()); 7448 }; 7449 auto LPCRegion = 7450 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 7451 emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop, CodeGen, 7452 emitEmptyBoundParameters); 7453 } 7454 7455 void CodeGenFunction::EmitOMPParallelMasterTaskLoopSimdDirective( 7456 const OMPParallelMasterTaskLoopSimdDirective &S) { 7457 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7458 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF, 7459 PrePostActionTy &Action) { 7460 Action.Enter(CGF); 7461 CGF.EmitOMPTaskLoopBasedDirective(S); 7462 }; 7463 OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false); 7464 CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen, 7465 S.getBeginLoc()); 7466 }; 7467 auto LPCRegion = 7468 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 7469 emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop_simd, CodeGen, 7470 emitEmptyBoundParameters); 7471 } 7472 7473 // Generate the instructions for '#pragma omp target update' directive. 7474 void CodeGenFunction::EmitOMPTargetUpdateDirective( 7475 const OMPTargetUpdateDirective &S) { 7476 // If we don't have target devices, don't bother emitting the data mapping 7477 // code. 7478 if (CGM.getLangOpts().OMPTargetTriples.empty()) 7479 return; 7480 7481 // Check if we have any if clause associated with the directive. 7482 const Expr *IfCond = nullptr; 7483 if (const auto *C = S.getSingleClause<OMPIfClause>()) 7484 IfCond = C->getCondition(); 7485 7486 // Check if we have any device clause associated with the directive. 7487 const Expr *Device = nullptr; 7488 if (const auto *C = S.getSingleClause<OMPDeviceClause>()) 7489 Device = C->getDevice(); 7490 7491 OMPLexicalScope Scope(*this, S, OMPD_task); 7492 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 7493 } 7494 7495 void CodeGenFunction::EmitOMPGenericLoopDirective( 7496 const OMPGenericLoopDirective &S) { 7497 // Unimplemented, just inline the underlying statement for now. 7498 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7499 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 7500 }; 7501 OMPLexicalScope Scope(*this, S, OMPD_unknown); 7502 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_loop, CodeGen); 7503 } 7504 7505 void CodeGenFunction::EmitSimpleOMPExecutableDirective( 7506 const OMPExecutableDirective &D) { 7507 if (const auto *SD = dyn_cast<OMPScanDirective>(&D)) { 7508 EmitOMPScanDirective(*SD); 7509 return; 7510 } 7511 if (!D.hasAssociatedStmt() || !D.getAssociatedStmt()) 7512 return; 7513 auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) { 7514 OMPPrivateScope GlobalsScope(CGF); 7515 if (isOpenMPTaskingDirective(D.getDirectiveKind())) { 7516 // Capture global firstprivates to avoid crash. 7517 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { 7518 for (const Expr *Ref : C->varlists()) { 7519 const auto *DRE = cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 7520 if (!DRE) 7521 continue; 7522 const auto *VD = dyn_cast<VarDecl>(DRE->getDecl()); 7523 if (!VD || VD->hasLocalStorage()) 7524 continue; 7525 if (!CGF.LocalDeclMap.count(VD)) { 7526 LValue GlobLVal = CGF.EmitLValue(Ref); 7527 GlobalsScope.addPrivate(VD, GlobLVal.getAddress(CGF)); 7528 } 7529 } 7530 } 7531 } 7532 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 7533 (void)GlobalsScope.Privatize(); 7534 ParentLoopDirectiveForScanRegion ScanRegion(CGF, D); 7535 emitOMPSimdRegion(CGF, cast<OMPLoopDirective>(D), Action); 7536 } else { 7537 if (const auto *LD = dyn_cast<OMPLoopDirective>(&D)) { 7538 for (const Expr *E : LD->counters()) { 7539 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 7540 if (!VD->hasLocalStorage() && !CGF.LocalDeclMap.count(VD)) { 7541 LValue GlobLVal = CGF.EmitLValue(E); 7542 GlobalsScope.addPrivate(VD, GlobLVal.getAddress(CGF)); 7543 } 7544 if (isa<OMPCapturedExprDecl>(VD)) { 7545 // Emit only those that were not explicitly referenced in clauses. 7546 if (!CGF.LocalDeclMap.count(VD)) 7547 CGF.EmitVarDecl(*VD); 7548 } 7549 } 7550 for (const auto *C : D.getClausesOfKind<OMPOrderedClause>()) { 7551 if (!C->getNumForLoops()) 7552 continue; 7553 for (unsigned I = LD->getLoopsNumber(), 7554 E = C->getLoopNumIterations().size(); 7555 I < E; ++I) { 7556 if (const auto *VD = dyn_cast<OMPCapturedExprDecl>( 7557 cast<DeclRefExpr>(C->getLoopCounter(I))->getDecl())) { 7558 // Emit only those that were not explicitly referenced in clauses. 7559 if (!CGF.LocalDeclMap.count(VD)) 7560 CGF.EmitVarDecl(*VD); 7561 } 7562 } 7563 } 7564 } 7565 (void)GlobalsScope.Privatize(); 7566 CGF.EmitStmt(D.getInnermostCapturedStmt()->getCapturedStmt()); 7567 } 7568 }; 7569 if (D.getDirectiveKind() == OMPD_atomic || 7570 D.getDirectiveKind() == OMPD_critical || 7571 D.getDirectiveKind() == OMPD_section || 7572 D.getDirectiveKind() == OMPD_master || 7573 D.getDirectiveKind() == OMPD_masked) { 7574 EmitStmt(D.getAssociatedStmt()); 7575 } else { 7576 auto LPCRegion = 7577 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, D); 7578 OMPSimdLexicalScope Scope(*this, D); 7579 CGM.getOpenMPRuntime().emitInlinedDirective( 7580 *this, 7581 isOpenMPSimdDirective(D.getDirectiveKind()) ? OMPD_simd 7582 : D.getDirectiveKind(), 7583 CodeGen); 7584 } 7585 // Check for outer lastprivate conditional update. 7586 checkForLastprivateConditionalUpdate(*this, D); 7587 } 7588