1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This contains code to emit OpenMP nodes as LLVM code. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGCleanup.h" 14 #include "CGOpenMPRuntime.h" 15 #include "CodeGenFunction.h" 16 #include "CodeGenModule.h" 17 #include "TargetInfo.h" 18 #include "clang/AST/ASTContext.h" 19 #include "clang/AST/Attr.h" 20 #include "clang/AST/DeclOpenMP.h" 21 #include "clang/AST/Stmt.h" 22 #include "clang/AST/StmtOpenMP.h" 23 #include "clang/Basic/PrettyStackTrace.h" 24 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" 25 using namespace clang; 26 using namespace CodeGen; 27 using namespace llvm::omp; 28 29 namespace { 30 /// Lexical scope for OpenMP executable constructs, that handles correct codegen 31 /// for captured expressions. 32 class OMPLexicalScope : public CodeGenFunction::LexicalScope { 33 void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 34 for (const auto *C : S.clauses()) { 35 if (const auto *CPI = OMPClauseWithPreInit::get(C)) { 36 if (const auto *PreInit = 37 cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { 38 for (const auto *I : PreInit->decls()) { 39 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 40 CGF.EmitVarDecl(cast<VarDecl>(*I)); 41 } else { 42 CodeGenFunction::AutoVarEmission Emission = 43 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 44 CGF.EmitAutoVarCleanups(Emission); 45 } 46 } 47 } 48 } 49 } 50 } 51 CodeGenFunction::OMPPrivateScope InlinedShareds; 52 53 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { 54 return CGF.LambdaCaptureFields.lookup(VD) || 55 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || 56 (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl)); 57 } 58 59 public: 60 OMPLexicalScope( 61 CodeGenFunction &CGF, const OMPExecutableDirective &S, 62 const llvm::Optional<OpenMPDirectiveKind> CapturedRegion = llvm::None, 63 const bool EmitPreInitStmt = true) 64 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), 65 InlinedShareds(CGF) { 66 if (EmitPreInitStmt) 67 emitPreInitStmt(CGF, S); 68 if (!CapturedRegion.hasValue()) 69 return; 70 assert(S.hasAssociatedStmt() && 71 "Expected associated statement for inlined directive."); 72 const CapturedStmt *CS = S.getCapturedStmt(*CapturedRegion); 73 for (const auto &C : CS->captures()) { 74 if (C.capturesVariable() || C.capturesVariableByCopy()) { 75 auto *VD = C.getCapturedVar(); 76 assert(VD == VD->getCanonicalDecl() && 77 "Canonical decl must be captured."); 78 DeclRefExpr DRE( 79 CGF.getContext(), const_cast<VarDecl *>(VD), 80 isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo && 81 InlinedShareds.isGlobalVarCaptured(VD)), 82 VD->getType().getNonReferenceType(), VK_LValue, C.getLocation()); 83 InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address { 84 return CGF.EmitLValue(&DRE).getAddress(CGF); 85 }); 86 } 87 } 88 (void)InlinedShareds.Privatize(); 89 } 90 }; 91 92 /// Lexical scope for OpenMP parallel construct, that handles correct codegen 93 /// for captured expressions. 94 class OMPParallelScope final : public OMPLexicalScope { 95 bool EmitPreInitStmt(const OMPExecutableDirective &S) { 96 OpenMPDirectiveKind Kind = S.getDirectiveKind(); 97 return !(isOpenMPTargetExecutionDirective(Kind) || 98 isOpenMPLoopBoundSharingDirective(Kind)) && 99 isOpenMPParallelDirective(Kind); 100 } 101 102 public: 103 OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 104 : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None, 105 EmitPreInitStmt(S)) {} 106 }; 107 108 /// Lexical scope for OpenMP teams construct, that handles correct codegen 109 /// for captured expressions. 110 class OMPTeamsScope final : public OMPLexicalScope { 111 bool EmitPreInitStmt(const OMPExecutableDirective &S) { 112 OpenMPDirectiveKind Kind = S.getDirectiveKind(); 113 return !isOpenMPTargetExecutionDirective(Kind) && 114 isOpenMPTeamsDirective(Kind); 115 } 116 117 public: 118 OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 119 : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None, 120 EmitPreInitStmt(S)) {} 121 }; 122 123 /// Private scope for OpenMP loop-based directives, that supports capturing 124 /// of used expression from loop statement. 125 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { 126 void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) { 127 CodeGenFunction::OMPMapVars PreCondVars; 128 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 129 for (const auto *E : S.counters()) { 130 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 131 EmittedAsPrivate.insert(VD->getCanonicalDecl()); 132 (void)PreCondVars.setVarAddr( 133 CGF, VD, CGF.CreateMemTemp(VD->getType().getNonReferenceType())); 134 } 135 // Mark private vars as undefs. 136 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 137 for (const Expr *IRef : C->varlists()) { 138 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); 139 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 140 (void)PreCondVars.setVarAddr( 141 CGF, OrigVD, 142 Address(llvm::UndefValue::get( 143 CGF.ConvertTypeForMem(CGF.getContext().getPointerType( 144 OrigVD->getType().getNonReferenceType()))), 145 CGF.getContext().getDeclAlign(OrigVD))); 146 } 147 } 148 } 149 (void)PreCondVars.apply(CGF); 150 // Emit init, __range and __end variables for C++ range loops. 151 const Stmt *Body = 152 S.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(); 153 for (unsigned Cnt = 0; Cnt < S.getCollapsedNumber(); ++Cnt) { 154 Body = OMPLoopDirective::tryToFindNextInnerLoop( 155 Body, /*TryImperfectlyNestedLoops=*/true); 156 if (auto *For = dyn_cast<ForStmt>(Body)) { 157 Body = For->getBody(); 158 } else { 159 assert(isa<CXXForRangeStmt>(Body) && 160 "Expected canonical for loop or range-based for loop."); 161 auto *CXXFor = cast<CXXForRangeStmt>(Body); 162 if (const Stmt *Init = CXXFor->getInit()) 163 CGF.EmitStmt(Init); 164 CGF.EmitStmt(CXXFor->getRangeStmt()); 165 CGF.EmitStmt(CXXFor->getEndStmt()); 166 Body = CXXFor->getBody(); 167 } 168 } 169 if (const auto *PreInits = cast_or_null<DeclStmt>(S.getPreInits())) { 170 for (const auto *I : PreInits->decls()) 171 CGF.EmitVarDecl(cast<VarDecl>(*I)); 172 } 173 PreCondVars.restore(CGF); 174 } 175 176 public: 177 OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S) 178 : CodeGenFunction::RunCleanupsScope(CGF) { 179 emitPreInitStmt(CGF, S); 180 } 181 }; 182 183 class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope { 184 CodeGenFunction::OMPPrivateScope InlinedShareds; 185 186 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { 187 return CGF.LambdaCaptureFields.lookup(VD) || 188 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || 189 (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) && 190 cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD)); 191 } 192 193 public: 194 OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 195 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), 196 InlinedShareds(CGF) { 197 for (const auto *C : S.clauses()) { 198 if (const auto *CPI = OMPClauseWithPreInit::get(C)) { 199 if (const auto *PreInit = 200 cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { 201 for (const auto *I : PreInit->decls()) { 202 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 203 CGF.EmitVarDecl(cast<VarDecl>(*I)); 204 } else { 205 CodeGenFunction::AutoVarEmission Emission = 206 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 207 CGF.EmitAutoVarCleanups(Emission); 208 } 209 } 210 } 211 } else if (const auto *UDP = dyn_cast<OMPUseDevicePtrClause>(C)) { 212 for (const Expr *E : UDP->varlists()) { 213 const Decl *D = cast<DeclRefExpr>(E)->getDecl(); 214 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D)) 215 CGF.EmitVarDecl(*OED); 216 } 217 } 218 } 219 if (!isOpenMPSimdDirective(S.getDirectiveKind())) 220 CGF.EmitOMPPrivateClause(S, InlinedShareds); 221 if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(&S)) { 222 if (const Expr *E = TG->getReductionRef()) 223 CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl())); 224 } 225 const auto *CS = cast_or_null<CapturedStmt>(S.getAssociatedStmt()); 226 while (CS) { 227 for (auto &C : CS->captures()) { 228 if (C.capturesVariable() || C.capturesVariableByCopy()) { 229 auto *VD = C.getCapturedVar(); 230 assert(VD == VD->getCanonicalDecl() && 231 "Canonical decl must be captured."); 232 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 233 isCapturedVar(CGF, VD) || 234 (CGF.CapturedStmtInfo && 235 InlinedShareds.isGlobalVarCaptured(VD)), 236 VD->getType().getNonReferenceType(), VK_LValue, 237 C.getLocation()); 238 InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address { 239 return CGF.EmitLValue(&DRE).getAddress(CGF); 240 }); 241 } 242 } 243 CS = dyn_cast<CapturedStmt>(CS->getCapturedStmt()); 244 } 245 (void)InlinedShareds.Privatize(); 246 } 247 }; 248 249 } // namespace 250 251 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, 252 const OMPExecutableDirective &S, 253 const RegionCodeGenTy &CodeGen); 254 255 LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) { 256 if (const auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) { 257 if (const auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) { 258 OrigVD = OrigVD->getCanonicalDecl(); 259 bool IsCaptured = 260 LambdaCaptureFields.lookup(OrigVD) || 261 (CapturedStmtInfo && CapturedStmtInfo->lookup(OrigVD)) || 262 (CurCodeDecl && isa<BlockDecl>(CurCodeDecl)); 263 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), IsCaptured, 264 OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc()); 265 return EmitLValue(&DRE); 266 } 267 } 268 return EmitLValue(E); 269 } 270 271 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) { 272 ASTContext &C = getContext(); 273 llvm::Value *Size = nullptr; 274 auto SizeInChars = C.getTypeSizeInChars(Ty); 275 if (SizeInChars.isZero()) { 276 // getTypeSizeInChars() returns 0 for a VLA. 277 while (const VariableArrayType *VAT = C.getAsVariableArrayType(Ty)) { 278 VlaSizePair VlaSize = getVLASize(VAT); 279 Ty = VlaSize.Type; 280 Size = Size ? Builder.CreateNUWMul(Size, VlaSize.NumElts) 281 : VlaSize.NumElts; 282 } 283 SizeInChars = C.getTypeSizeInChars(Ty); 284 if (SizeInChars.isZero()) 285 return llvm::ConstantInt::get(SizeTy, /*V=*/0); 286 return Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars)); 287 } 288 return CGM.getSize(SizeInChars); 289 } 290 291 void CodeGenFunction::GenerateOpenMPCapturedVars( 292 const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) { 293 const RecordDecl *RD = S.getCapturedRecordDecl(); 294 auto CurField = RD->field_begin(); 295 auto CurCap = S.captures().begin(); 296 for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(), 297 E = S.capture_init_end(); 298 I != E; ++I, ++CurField, ++CurCap) { 299 if (CurField->hasCapturedVLAType()) { 300 const VariableArrayType *VAT = CurField->getCapturedVLAType(); 301 llvm::Value *Val = VLASizeMap[VAT->getSizeExpr()]; 302 CapturedVars.push_back(Val); 303 } else if (CurCap->capturesThis()) { 304 CapturedVars.push_back(CXXThisValue); 305 } else if (CurCap->capturesVariableByCopy()) { 306 llvm::Value *CV = EmitLoadOfScalar(EmitLValue(*I), CurCap->getLocation()); 307 308 // If the field is not a pointer, we need to save the actual value 309 // and load it as a void pointer. 310 if (!CurField->getType()->isAnyPointerType()) { 311 ASTContext &Ctx = getContext(); 312 Address DstAddr = CreateMemTemp( 313 Ctx.getUIntPtrType(), 314 Twine(CurCap->getCapturedVar()->getName(), ".casted")); 315 LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType()); 316 317 llvm::Value *SrcAddrVal = EmitScalarConversion( 318 DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()), 319 Ctx.getPointerType(CurField->getType()), CurCap->getLocation()); 320 LValue SrcLV = 321 MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType()); 322 323 // Store the value using the source type pointer. 324 EmitStoreThroughLValue(RValue::get(CV), SrcLV); 325 326 // Load the value using the destination type pointer. 327 CV = EmitLoadOfScalar(DstLV, CurCap->getLocation()); 328 } 329 CapturedVars.push_back(CV); 330 } else { 331 assert(CurCap->capturesVariable() && "Expected capture by reference."); 332 CapturedVars.push_back(EmitLValue(*I).getAddress(*this).getPointer()); 333 } 334 } 335 } 336 337 static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc, 338 QualType DstType, StringRef Name, 339 LValue AddrLV) { 340 ASTContext &Ctx = CGF.getContext(); 341 342 llvm::Value *CastedPtr = CGF.EmitScalarConversion( 343 AddrLV.getAddress(CGF).getPointer(), Ctx.getUIntPtrType(), 344 Ctx.getPointerType(DstType), Loc); 345 Address TmpAddr = 346 CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType)) 347 .getAddress(CGF); 348 return TmpAddr; 349 } 350 351 static QualType getCanonicalParamType(ASTContext &C, QualType T) { 352 if (T->isLValueReferenceType()) 353 return C.getLValueReferenceType( 354 getCanonicalParamType(C, T.getNonReferenceType()), 355 /*SpelledAsLValue=*/false); 356 if (T->isPointerType()) 357 return C.getPointerType(getCanonicalParamType(C, T->getPointeeType())); 358 if (const ArrayType *A = T->getAsArrayTypeUnsafe()) { 359 if (const auto *VLA = dyn_cast<VariableArrayType>(A)) 360 return getCanonicalParamType(C, VLA->getElementType()); 361 if (!A->isVariablyModifiedType()) 362 return C.getCanonicalType(T); 363 } 364 return C.getCanonicalParamType(T); 365 } 366 367 namespace { 368 /// Contains required data for proper outlined function codegen. 369 struct FunctionOptions { 370 /// Captured statement for which the function is generated. 371 const CapturedStmt *S = nullptr; 372 /// true if cast to/from UIntPtr is required for variables captured by 373 /// value. 374 const bool UIntPtrCastRequired = true; 375 /// true if only casted arguments must be registered as local args or VLA 376 /// sizes. 377 const bool RegisterCastedArgsOnly = false; 378 /// Name of the generated function. 379 const StringRef FunctionName; 380 explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired, 381 bool RegisterCastedArgsOnly, 382 StringRef FunctionName) 383 : S(S), UIntPtrCastRequired(UIntPtrCastRequired), 384 RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly), 385 FunctionName(FunctionName) {} 386 }; 387 } 388 389 static llvm::Function *emitOutlinedFunctionPrologue( 390 CodeGenFunction &CGF, FunctionArgList &Args, 391 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> 392 &LocalAddrs, 393 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> 394 &VLASizes, 395 llvm::Value *&CXXThisValue, const FunctionOptions &FO) { 396 const CapturedDecl *CD = FO.S->getCapturedDecl(); 397 const RecordDecl *RD = FO.S->getCapturedRecordDecl(); 398 assert(CD->hasBody() && "missing CapturedDecl body"); 399 400 CXXThisValue = nullptr; 401 // Build the argument list. 402 CodeGenModule &CGM = CGF.CGM; 403 ASTContext &Ctx = CGM.getContext(); 404 FunctionArgList TargetArgs; 405 Args.append(CD->param_begin(), 406 std::next(CD->param_begin(), CD->getContextParamPosition())); 407 TargetArgs.append( 408 CD->param_begin(), 409 std::next(CD->param_begin(), CD->getContextParamPosition())); 410 auto I = FO.S->captures().begin(); 411 FunctionDecl *DebugFunctionDecl = nullptr; 412 if (!FO.UIntPtrCastRequired) { 413 FunctionProtoType::ExtProtoInfo EPI; 414 QualType FunctionTy = Ctx.getFunctionType(Ctx.VoidTy, llvm::None, EPI); 415 DebugFunctionDecl = FunctionDecl::Create( 416 Ctx, Ctx.getTranslationUnitDecl(), FO.S->getBeginLoc(), 417 SourceLocation(), DeclarationName(), FunctionTy, 418 Ctx.getTrivialTypeSourceInfo(FunctionTy), SC_Static, 419 /*isInlineSpecified=*/false, /*hasWrittenPrototype=*/false); 420 } 421 for (const FieldDecl *FD : RD->fields()) { 422 QualType ArgType = FD->getType(); 423 IdentifierInfo *II = nullptr; 424 VarDecl *CapVar = nullptr; 425 426 // If this is a capture by copy and the type is not a pointer, the outlined 427 // function argument type should be uintptr and the value properly casted to 428 // uintptr. This is necessary given that the runtime library is only able to 429 // deal with pointers. We can pass in the same way the VLA type sizes to the 430 // outlined function. 431 if (FO.UIntPtrCastRequired && 432 ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) || 433 I->capturesVariableArrayType())) 434 ArgType = Ctx.getUIntPtrType(); 435 436 if (I->capturesVariable() || I->capturesVariableByCopy()) { 437 CapVar = I->getCapturedVar(); 438 II = CapVar->getIdentifier(); 439 } else if (I->capturesThis()) { 440 II = &Ctx.Idents.get("this"); 441 } else { 442 assert(I->capturesVariableArrayType()); 443 II = &Ctx.Idents.get("vla"); 444 } 445 if (ArgType->isVariablyModifiedType()) 446 ArgType = getCanonicalParamType(Ctx, ArgType); 447 VarDecl *Arg; 448 if (DebugFunctionDecl && (CapVar || I->capturesThis())) { 449 Arg = ParmVarDecl::Create( 450 Ctx, DebugFunctionDecl, 451 CapVar ? CapVar->getBeginLoc() : FD->getBeginLoc(), 452 CapVar ? CapVar->getLocation() : FD->getLocation(), II, ArgType, 453 /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr); 454 } else { 455 Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(), 456 II, ArgType, ImplicitParamDecl::Other); 457 } 458 Args.emplace_back(Arg); 459 // Do not cast arguments if we emit function with non-original types. 460 TargetArgs.emplace_back( 461 FO.UIntPtrCastRequired 462 ? Arg 463 : CGM.getOpenMPRuntime().translateParameter(FD, Arg)); 464 ++I; 465 } 466 Args.append( 467 std::next(CD->param_begin(), CD->getContextParamPosition() + 1), 468 CD->param_end()); 469 TargetArgs.append( 470 std::next(CD->param_begin(), CD->getContextParamPosition() + 1), 471 CD->param_end()); 472 473 // Create the function declaration. 474 const CGFunctionInfo &FuncInfo = 475 CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs); 476 llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); 477 478 auto *F = 479 llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage, 480 FO.FunctionName, &CGM.getModule()); 481 CGM.SetInternalFunctionAttributes(CD, F, FuncInfo); 482 if (CD->isNothrow()) 483 F->setDoesNotThrow(); 484 F->setDoesNotRecurse(); 485 486 // Generate the function. 487 CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs, 488 FO.S->getBeginLoc(), CD->getBody()->getBeginLoc()); 489 unsigned Cnt = CD->getContextParamPosition(); 490 I = FO.S->captures().begin(); 491 for (const FieldDecl *FD : RD->fields()) { 492 // Do not map arguments if we emit function with non-original types. 493 Address LocalAddr(Address::invalid()); 494 if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) { 495 LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt], 496 TargetArgs[Cnt]); 497 } else { 498 LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]); 499 } 500 // If we are capturing a pointer by copy we don't need to do anything, just 501 // use the value that we get from the arguments. 502 if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) { 503 const VarDecl *CurVD = I->getCapturedVar(); 504 if (!FO.RegisterCastedArgsOnly) 505 LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}}); 506 ++Cnt; 507 ++I; 508 continue; 509 } 510 511 LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(), 512 AlignmentSource::Decl); 513 if (FD->hasCapturedVLAType()) { 514 if (FO.UIntPtrCastRequired) { 515 ArgLVal = CGF.MakeAddrLValue( 516 castValueFromUintptr(CGF, I->getLocation(), FD->getType(), 517 Args[Cnt]->getName(), ArgLVal), 518 FD->getType(), AlignmentSource::Decl); 519 } 520 llvm::Value *ExprArg = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation()); 521 const VariableArrayType *VAT = FD->getCapturedVLAType(); 522 VLASizes.try_emplace(Args[Cnt], VAT->getSizeExpr(), ExprArg); 523 } else if (I->capturesVariable()) { 524 const VarDecl *Var = I->getCapturedVar(); 525 QualType VarTy = Var->getType(); 526 Address ArgAddr = ArgLVal.getAddress(CGF); 527 if (ArgLVal.getType()->isLValueReferenceType()) { 528 ArgAddr = CGF.EmitLoadOfReference(ArgLVal); 529 } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) { 530 assert(ArgLVal.getType()->isPointerType()); 531 ArgAddr = CGF.EmitLoadOfPointer( 532 ArgAddr, ArgLVal.getType()->castAs<PointerType>()); 533 } 534 if (!FO.RegisterCastedArgsOnly) { 535 LocalAddrs.insert( 536 {Args[Cnt], 537 {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}}); 538 } 539 } else if (I->capturesVariableByCopy()) { 540 assert(!FD->getType()->isAnyPointerType() && 541 "Not expecting a captured pointer."); 542 const VarDecl *Var = I->getCapturedVar(); 543 LocalAddrs.insert({Args[Cnt], 544 {Var, FO.UIntPtrCastRequired 545 ? castValueFromUintptr( 546 CGF, I->getLocation(), FD->getType(), 547 Args[Cnt]->getName(), ArgLVal) 548 : ArgLVal.getAddress(CGF)}}); 549 } else { 550 // If 'this' is captured, load it into CXXThisValue. 551 assert(I->capturesThis()); 552 CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation()); 553 LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress(CGF)}}); 554 } 555 ++Cnt; 556 ++I; 557 } 558 559 return F; 560 } 561 562 llvm::Function * 563 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { 564 assert( 565 CapturedStmtInfo && 566 "CapturedStmtInfo should be set when generating the captured function"); 567 const CapturedDecl *CD = S.getCapturedDecl(); 568 // Build the argument list. 569 bool NeedWrapperFunction = 570 getDebugInfo() && 571 CGM.getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo; 572 FunctionArgList Args; 573 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs; 574 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes; 575 SmallString<256> Buffer; 576 llvm::raw_svector_ostream Out(Buffer); 577 Out << CapturedStmtInfo->getHelperName(); 578 if (NeedWrapperFunction) 579 Out << "_debug__"; 580 FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false, 581 Out.str()); 582 llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs, 583 VLASizes, CXXThisValue, FO); 584 CodeGenFunction::OMPPrivateScope LocalScope(*this); 585 for (const auto &LocalAddrPair : LocalAddrs) { 586 if (LocalAddrPair.second.first) { 587 LocalScope.addPrivate(LocalAddrPair.second.first, [&LocalAddrPair]() { 588 return LocalAddrPair.second.second; 589 }); 590 } 591 } 592 (void)LocalScope.Privatize(); 593 for (const auto &VLASizePair : VLASizes) 594 VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second; 595 PGO.assignRegionCounters(GlobalDecl(CD), F); 596 CapturedStmtInfo->EmitBody(*this, CD->getBody()); 597 (void)LocalScope.ForceCleanup(); 598 FinishFunction(CD->getBodyRBrace()); 599 if (!NeedWrapperFunction) 600 return F; 601 602 FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true, 603 /*RegisterCastedArgsOnly=*/true, 604 CapturedStmtInfo->getHelperName()); 605 CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true); 606 WrapperCGF.CapturedStmtInfo = CapturedStmtInfo; 607 Args.clear(); 608 LocalAddrs.clear(); 609 VLASizes.clear(); 610 llvm::Function *WrapperF = 611 emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes, 612 WrapperCGF.CXXThisValue, WrapperFO); 613 llvm::SmallVector<llvm::Value *, 4> CallArgs; 614 for (const auto *Arg : Args) { 615 llvm::Value *CallArg; 616 auto I = LocalAddrs.find(Arg); 617 if (I != LocalAddrs.end()) { 618 LValue LV = WrapperCGF.MakeAddrLValue( 619 I->second.second, 620 I->second.first ? I->second.first->getType() : Arg->getType(), 621 AlignmentSource::Decl); 622 CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc()); 623 } else { 624 auto EI = VLASizes.find(Arg); 625 if (EI != VLASizes.end()) { 626 CallArg = EI->second.second; 627 } else { 628 LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg), 629 Arg->getType(), 630 AlignmentSource::Decl); 631 CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc()); 632 } 633 } 634 CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType())); 635 } 636 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, S.getBeginLoc(), 637 F, CallArgs); 638 WrapperCGF.FinishFunction(); 639 return WrapperF; 640 } 641 642 //===----------------------------------------------------------------------===// 643 // OpenMP Directive Emission 644 //===----------------------------------------------------------------------===// 645 void CodeGenFunction::EmitOMPAggregateAssign( 646 Address DestAddr, Address SrcAddr, QualType OriginalType, 647 const llvm::function_ref<void(Address, Address)> CopyGen) { 648 // Perform element-by-element initialization. 649 QualType ElementTy; 650 651 // Drill down to the base element type on both arrays. 652 const ArrayType *ArrayTy = OriginalType->getAsArrayTypeUnsafe(); 653 llvm::Value *NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr); 654 SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 655 656 llvm::Value *SrcBegin = SrcAddr.getPointer(); 657 llvm::Value *DestBegin = DestAddr.getPointer(); 658 // Cast from pointer to array type to pointer to single element. 659 llvm::Value *DestEnd = Builder.CreateGEP(DestBegin, NumElements); 660 // The basic structure here is a while-do loop. 661 llvm::BasicBlock *BodyBB = createBasicBlock("omp.arraycpy.body"); 662 llvm::BasicBlock *DoneBB = createBasicBlock("omp.arraycpy.done"); 663 llvm::Value *IsEmpty = 664 Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty"); 665 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 666 667 // Enter the loop body, making that address the current address. 668 llvm::BasicBlock *EntryBB = Builder.GetInsertBlock(); 669 EmitBlock(BodyBB); 670 671 CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy); 672 673 llvm::PHINode *SrcElementPHI = 674 Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 675 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 676 Address SrcElementCurrent = 677 Address(SrcElementPHI, 678 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 679 680 llvm::PHINode *DestElementPHI = 681 Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 682 DestElementPHI->addIncoming(DestBegin, EntryBB); 683 Address DestElementCurrent = 684 Address(DestElementPHI, 685 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 686 687 // Emit copy. 688 CopyGen(DestElementCurrent, SrcElementCurrent); 689 690 // Shift the address forward by one element. 691 llvm::Value *DestElementNext = Builder.CreateConstGEP1_32( 692 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 693 llvm::Value *SrcElementNext = Builder.CreateConstGEP1_32( 694 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 695 // Check whether we've reached the end. 696 llvm::Value *Done = 697 Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 698 Builder.CreateCondBr(Done, DoneBB, BodyBB); 699 DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock()); 700 SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock()); 701 702 // Done. 703 EmitBlock(DoneBB, /*IsFinished=*/true); 704 } 705 706 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr, 707 Address SrcAddr, const VarDecl *DestVD, 708 const VarDecl *SrcVD, const Expr *Copy) { 709 if (OriginalType->isArrayType()) { 710 const auto *BO = dyn_cast<BinaryOperator>(Copy); 711 if (BO && BO->getOpcode() == BO_Assign) { 712 // Perform simple memcpy for simple copying. 713 LValue Dest = MakeAddrLValue(DestAddr, OriginalType); 714 LValue Src = MakeAddrLValue(SrcAddr, OriginalType); 715 EmitAggregateAssign(Dest, Src, OriginalType); 716 } else { 717 // For arrays with complex element types perform element by element 718 // copying. 719 EmitOMPAggregateAssign( 720 DestAddr, SrcAddr, OriginalType, 721 [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) { 722 // Working with the single array element, so have to remap 723 // destination and source variables to corresponding array 724 // elements. 725 CodeGenFunction::OMPPrivateScope Remap(*this); 726 Remap.addPrivate(DestVD, [DestElement]() { return DestElement; }); 727 Remap.addPrivate(SrcVD, [SrcElement]() { return SrcElement; }); 728 (void)Remap.Privatize(); 729 EmitIgnoredExpr(Copy); 730 }); 731 } 732 } else { 733 // Remap pseudo source variable to private copy. 734 CodeGenFunction::OMPPrivateScope Remap(*this); 735 Remap.addPrivate(SrcVD, [SrcAddr]() { return SrcAddr; }); 736 Remap.addPrivate(DestVD, [DestAddr]() { return DestAddr; }); 737 (void)Remap.Privatize(); 738 // Emit copying of the whole variable. 739 EmitIgnoredExpr(Copy); 740 } 741 } 742 743 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, 744 OMPPrivateScope &PrivateScope) { 745 if (!HaveInsertPoint()) 746 return false; 747 bool DeviceConstTarget = 748 getLangOpts().OpenMPIsDevice && 749 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 750 bool FirstprivateIsLastprivate = false; 751 llvm::DenseSet<const VarDecl *> Lastprivates; 752 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 753 for (const auto *D : C->varlists()) 754 Lastprivates.insert( 755 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); 756 } 757 llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate; 758 llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 759 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 760 // Force emission of the firstprivate copy if the directive does not emit 761 // outlined function, like omp for, omp simd, omp distribute etc. 762 bool MustEmitFirstprivateCopy = 763 CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown; 764 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { 765 auto IRef = C->varlist_begin(); 766 auto InitsRef = C->inits().begin(); 767 for (const Expr *IInit : C->private_copies()) { 768 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 769 bool ThisFirstprivateIsLastprivate = 770 Lastprivates.count(OrigVD->getCanonicalDecl()) > 0; 771 const FieldDecl *FD = CapturedStmtInfo->lookup(OrigVD); 772 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 773 if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD && 774 !FD->getType()->isReferenceType() && 775 (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) { 776 EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()); 777 ++IRef; 778 ++InitsRef; 779 continue; 780 } 781 // Do not emit copy for firstprivate constant variables in target regions, 782 // captured by reference. 783 if (DeviceConstTarget && OrigVD->getType().isConstant(getContext()) && 784 FD && FD->getType()->isReferenceType() && 785 (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) { 786 (void)CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(*this, 787 OrigVD); 788 ++IRef; 789 ++InitsRef; 790 continue; 791 } 792 FirstprivateIsLastprivate = 793 FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate; 794 if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) { 795 const auto *VDInit = 796 cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl()); 797 bool IsRegistered; 798 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), 799 /*RefersToEnclosingVariableOrCapture=*/FD != nullptr, 800 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 801 LValue OriginalLVal; 802 if (!FD) { 803 // Check if the firstprivate variable is just a constant value. 804 ConstantEmission CE = tryEmitAsConstant(&DRE); 805 if (CE && !CE.isReference()) { 806 // Constant value, no need to create a copy. 807 ++IRef; 808 ++InitsRef; 809 continue; 810 } 811 if (CE && CE.isReference()) { 812 OriginalLVal = CE.getReferenceLValue(*this, &DRE); 813 } else { 814 assert(!CE && "Expected non-constant firstprivate."); 815 OriginalLVal = EmitLValue(&DRE); 816 } 817 } else { 818 OriginalLVal = EmitLValue(&DRE); 819 } 820 QualType Type = VD->getType(); 821 if (Type->isArrayType()) { 822 // Emit VarDecl with copy init for arrays. 823 // Get the address of the original variable captured in current 824 // captured region. 825 IsRegistered = PrivateScope.addPrivate( 826 OrigVD, [this, VD, Type, OriginalLVal, VDInit]() { 827 AutoVarEmission Emission = EmitAutoVarAlloca(*VD); 828 const Expr *Init = VD->getInit(); 829 if (!isa<CXXConstructExpr>(Init) || 830 isTrivialInitializer(Init)) { 831 // Perform simple memcpy. 832 LValue Dest = 833 MakeAddrLValue(Emission.getAllocatedAddress(), Type); 834 EmitAggregateAssign(Dest, OriginalLVal, Type); 835 } else { 836 EmitOMPAggregateAssign( 837 Emission.getAllocatedAddress(), 838 OriginalLVal.getAddress(*this), Type, 839 [this, VDInit, Init](Address DestElement, 840 Address SrcElement) { 841 // Clean up any temporaries needed by the 842 // initialization. 843 RunCleanupsScope InitScope(*this); 844 // Emit initialization for single element. 845 setAddrOfLocalVar(VDInit, SrcElement); 846 EmitAnyExprToMem(Init, DestElement, 847 Init->getType().getQualifiers(), 848 /*IsInitializer*/ false); 849 LocalDeclMap.erase(VDInit); 850 }); 851 } 852 EmitAutoVarCleanups(Emission); 853 return Emission.getAllocatedAddress(); 854 }); 855 } else { 856 Address OriginalAddr = OriginalLVal.getAddress(*this); 857 IsRegistered = PrivateScope.addPrivate( 858 OrigVD, [this, VDInit, OriginalAddr, VD]() { 859 // Emit private VarDecl with copy init. 860 // Remap temp VDInit variable to the address of the original 861 // variable (for proper handling of captured global variables). 862 setAddrOfLocalVar(VDInit, OriginalAddr); 863 EmitDecl(*VD); 864 LocalDeclMap.erase(VDInit); 865 return GetAddrOfLocalVar(VD); 866 }); 867 } 868 assert(IsRegistered && 869 "firstprivate var already registered as private"); 870 // Silence the warning about unused variable. 871 (void)IsRegistered; 872 } 873 ++IRef; 874 ++InitsRef; 875 } 876 } 877 return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty(); 878 } 879 880 void CodeGenFunction::EmitOMPPrivateClause( 881 const OMPExecutableDirective &D, 882 CodeGenFunction::OMPPrivateScope &PrivateScope) { 883 if (!HaveInsertPoint()) 884 return; 885 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 886 for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) { 887 auto IRef = C->varlist_begin(); 888 for (const Expr *IInit : C->private_copies()) { 889 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 890 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 891 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 892 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD]() { 893 // Emit private VarDecl with copy init. 894 EmitDecl(*VD); 895 return GetAddrOfLocalVar(VD); 896 }); 897 assert(IsRegistered && "private var already registered as private"); 898 // Silence the warning about unused variable. 899 (void)IsRegistered; 900 } 901 ++IRef; 902 } 903 } 904 } 905 906 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { 907 if (!HaveInsertPoint()) 908 return false; 909 // threadprivate_var1 = master_threadprivate_var1; 910 // operator=(threadprivate_var2, master_threadprivate_var2); 911 // ... 912 // __kmpc_barrier(&loc, global_tid); 913 llvm::DenseSet<const VarDecl *> CopiedVars; 914 llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr; 915 for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) { 916 auto IRef = C->varlist_begin(); 917 auto ISrcRef = C->source_exprs().begin(); 918 auto IDestRef = C->destination_exprs().begin(); 919 for (const Expr *AssignOp : C->assignment_ops()) { 920 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 921 QualType Type = VD->getType(); 922 if (CopiedVars.insert(VD->getCanonicalDecl()).second) { 923 // Get the address of the master variable. If we are emitting code with 924 // TLS support, the address is passed from the master as field in the 925 // captured declaration. 926 Address MasterAddr = Address::invalid(); 927 if (getLangOpts().OpenMPUseTLS && 928 getContext().getTargetInfo().isTLSSupported()) { 929 assert(CapturedStmtInfo->lookup(VD) && 930 "Copyin threadprivates should have been captured!"); 931 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), true, 932 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 933 MasterAddr = EmitLValue(&DRE).getAddress(*this); 934 LocalDeclMap.erase(VD); 935 } else { 936 MasterAddr = 937 Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD) 938 : CGM.GetAddrOfGlobal(VD), 939 getContext().getDeclAlign(VD)); 940 } 941 // Get the address of the threadprivate variable. 942 Address PrivateAddr = EmitLValue(*IRef).getAddress(*this); 943 if (CopiedVars.size() == 1) { 944 // At first check if current thread is a master thread. If it is, no 945 // need to copy data. 946 CopyBegin = createBasicBlock("copyin.not.master"); 947 CopyEnd = createBasicBlock("copyin.not.master.end"); 948 Builder.CreateCondBr( 949 Builder.CreateICmpNE( 950 Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy), 951 Builder.CreatePtrToInt(PrivateAddr.getPointer(), 952 CGM.IntPtrTy)), 953 CopyBegin, CopyEnd); 954 EmitBlock(CopyBegin); 955 } 956 const auto *SrcVD = 957 cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 958 const auto *DestVD = 959 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 960 EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp); 961 } 962 ++IRef; 963 ++ISrcRef; 964 ++IDestRef; 965 } 966 } 967 if (CopyEnd) { 968 // Exit out of copying procedure for non-master thread. 969 EmitBlock(CopyEnd, /*IsFinished=*/true); 970 return true; 971 } 972 return false; 973 } 974 975 bool CodeGenFunction::EmitOMPLastprivateClauseInit( 976 const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) { 977 if (!HaveInsertPoint()) 978 return false; 979 bool HasAtLeastOneLastprivate = false; 980 llvm::DenseSet<const VarDecl *> SIMDLCVs; 981 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 982 const auto *LoopDirective = cast<OMPLoopDirective>(&D); 983 for (const Expr *C : LoopDirective->counters()) { 984 SIMDLCVs.insert( 985 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 986 } 987 } 988 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 989 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 990 HasAtLeastOneLastprivate = true; 991 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 992 !getLangOpts().OpenMPSimd) 993 break; 994 auto IRef = C->varlist_begin(); 995 auto IDestRef = C->destination_exprs().begin(); 996 for (const Expr *IInit : C->private_copies()) { 997 // Keep the address of the original variable for future update at the end 998 // of the loop. 999 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 1000 // Taskloops do not require additional initialization, it is done in 1001 // runtime support library. 1002 if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) { 1003 const auto *DestVD = 1004 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 1005 PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() { 1006 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), 1007 /*RefersToEnclosingVariableOrCapture=*/ 1008 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1009 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 1010 return EmitLValue(&DRE).getAddress(*this); 1011 }); 1012 // Check if the variable is also a firstprivate: in this case IInit is 1013 // not generated. Initialization of this variable will happen in codegen 1014 // for 'firstprivate' clause. 1015 if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) { 1016 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 1017 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD]() { 1018 // Emit private VarDecl with copy init. 1019 EmitDecl(*VD); 1020 return GetAddrOfLocalVar(VD); 1021 }); 1022 assert(IsRegistered && 1023 "lastprivate var already registered as private"); 1024 (void)IsRegistered; 1025 } 1026 } 1027 ++IRef; 1028 ++IDestRef; 1029 } 1030 } 1031 return HasAtLeastOneLastprivate; 1032 } 1033 1034 void CodeGenFunction::EmitOMPLastprivateClauseFinal( 1035 const OMPExecutableDirective &D, bool NoFinals, 1036 llvm::Value *IsLastIterCond) { 1037 if (!HaveInsertPoint()) 1038 return; 1039 // Emit following code: 1040 // if (<IsLastIterCond>) { 1041 // orig_var1 = private_orig_var1; 1042 // ... 1043 // orig_varn = private_orig_varn; 1044 // } 1045 llvm::BasicBlock *ThenBB = nullptr; 1046 llvm::BasicBlock *DoneBB = nullptr; 1047 if (IsLastIterCond) { 1048 // Emit implicit barrier if at least one lastprivate conditional is found 1049 // and this is not a simd mode. 1050 if (!getLangOpts().OpenMPSimd && 1051 llvm::any_of(D.getClausesOfKind<OMPLastprivateClause>(), 1052 [](const OMPLastprivateClause *C) { 1053 return C->getKind() == OMPC_LASTPRIVATE_conditional; 1054 })) { 1055 CGM.getOpenMPRuntime().emitBarrierCall(*this, D.getBeginLoc(), 1056 OMPD_unknown, 1057 /*EmitChecks=*/false, 1058 /*ForceSimpleCall=*/true); 1059 } 1060 ThenBB = createBasicBlock(".omp.lastprivate.then"); 1061 DoneBB = createBasicBlock(".omp.lastprivate.done"); 1062 Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB); 1063 EmitBlock(ThenBB); 1064 } 1065 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 1066 llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates; 1067 if (const auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) { 1068 auto IC = LoopDirective->counters().begin(); 1069 for (const Expr *F : LoopDirective->finals()) { 1070 const auto *D = 1071 cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl(); 1072 if (NoFinals) 1073 AlreadyEmittedVars.insert(D); 1074 else 1075 LoopCountersAndUpdates[D] = F; 1076 ++IC; 1077 } 1078 } 1079 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 1080 auto IRef = C->varlist_begin(); 1081 auto ISrcRef = C->source_exprs().begin(); 1082 auto IDestRef = C->destination_exprs().begin(); 1083 for (const Expr *AssignOp : C->assignment_ops()) { 1084 const auto *PrivateVD = 1085 cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 1086 QualType Type = PrivateVD->getType(); 1087 const auto *CanonicalVD = PrivateVD->getCanonicalDecl(); 1088 if (AlreadyEmittedVars.insert(CanonicalVD).second) { 1089 // If lastprivate variable is a loop control variable for loop-based 1090 // directive, update its value before copyin back to original 1091 // variable. 1092 if (const Expr *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD)) 1093 EmitIgnoredExpr(FinalExpr); 1094 const auto *SrcVD = 1095 cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 1096 const auto *DestVD = 1097 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 1098 // Get the address of the private variable. 1099 Address PrivateAddr = GetAddrOfLocalVar(PrivateVD); 1100 if (const auto *RefTy = PrivateVD->getType()->getAs<ReferenceType>()) 1101 PrivateAddr = 1102 Address(Builder.CreateLoad(PrivateAddr), 1103 getNaturalTypeAlignment(RefTy->getPointeeType())); 1104 // Store the last value to the private copy in the last iteration. 1105 if (C->getKind() == OMPC_LASTPRIVATE_conditional) 1106 CGM.getOpenMPRuntime().emitLastprivateConditionalFinalUpdate( 1107 *this, MakeAddrLValue(PrivateAddr, (*IRef)->getType()), PrivateVD, 1108 (*IRef)->getExprLoc()); 1109 // Get the address of the original variable. 1110 Address OriginalAddr = GetAddrOfLocalVar(DestVD); 1111 EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp); 1112 } 1113 ++IRef; 1114 ++ISrcRef; 1115 ++IDestRef; 1116 } 1117 if (const Expr *PostUpdate = C->getPostUpdateExpr()) 1118 EmitIgnoredExpr(PostUpdate); 1119 } 1120 if (IsLastIterCond) 1121 EmitBlock(DoneBB, /*IsFinished=*/true); 1122 } 1123 1124 void CodeGenFunction::EmitOMPReductionClauseInit( 1125 const OMPExecutableDirective &D, 1126 CodeGenFunction::OMPPrivateScope &PrivateScope) { 1127 if (!HaveInsertPoint()) 1128 return; 1129 SmallVector<const Expr *, 4> Shareds; 1130 SmallVector<const Expr *, 4> Privates; 1131 SmallVector<const Expr *, 4> ReductionOps; 1132 SmallVector<const Expr *, 4> LHSs; 1133 SmallVector<const Expr *, 4> RHSs; 1134 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1135 auto IPriv = C->privates().begin(); 1136 auto IRed = C->reduction_ops().begin(); 1137 auto ILHS = C->lhs_exprs().begin(); 1138 auto IRHS = C->rhs_exprs().begin(); 1139 for (const Expr *Ref : C->varlists()) { 1140 Shareds.emplace_back(Ref); 1141 Privates.emplace_back(*IPriv); 1142 ReductionOps.emplace_back(*IRed); 1143 LHSs.emplace_back(*ILHS); 1144 RHSs.emplace_back(*IRHS); 1145 std::advance(IPriv, 1); 1146 std::advance(IRed, 1); 1147 std::advance(ILHS, 1); 1148 std::advance(IRHS, 1); 1149 } 1150 } 1151 ReductionCodeGen RedCG(Shareds, Privates, ReductionOps); 1152 unsigned Count = 0; 1153 auto ILHS = LHSs.begin(); 1154 auto IRHS = RHSs.begin(); 1155 auto IPriv = Privates.begin(); 1156 for (const Expr *IRef : Shareds) { 1157 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl()); 1158 // Emit private VarDecl with reduction init. 1159 RedCG.emitSharedLValue(*this, Count); 1160 RedCG.emitAggregateType(*this, Count); 1161 AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD); 1162 RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(), 1163 RedCG.getSharedLValue(Count), 1164 [&Emission](CodeGenFunction &CGF) { 1165 CGF.EmitAutoVarInit(Emission); 1166 return true; 1167 }); 1168 EmitAutoVarCleanups(Emission); 1169 Address BaseAddr = RedCG.adjustPrivateAddress( 1170 *this, Count, Emission.getAllocatedAddress()); 1171 bool IsRegistered = PrivateScope.addPrivate( 1172 RedCG.getBaseDecl(Count), [BaseAddr]() { return BaseAddr; }); 1173 assert(IsRegistered && "private var already registered as private"); 1174 // Silence the warning about unused variable. 1175 (void)IsRegistered; 1176 1177 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 1178 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 1179 QualType Type = PrivateVD->getType(); 1180 bool isaOMPArraySectionExpr = isa<OMPArraySectionExpr>(IRef); 1181 if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) { 1182 // Store the address of the original variable associated with the LHS 1183 // implicit variable. 1184 PrivateScope.addPrivate(LHSVD, [&RedCG, Count, this]() { 1185 return RedCG.getSharedLValue(Count).getAddress(*this); 1186 }); 1187 PrivateScope.addPrivate( 1188 RHSVD, [this, PrivateVD]() { return GetAddrOfLocalVar(PrivateVD); }); 1189 } else if ((isaOMPArraySectionExpr && Type->isScalarType()) || 1190 isa<ArraySubscriptExpr>(IRef)) { 1191 // Store the address of the original variable associated with the LHS 1192 // implicit variable. 1193 PrivateScope.addPrivate(LHSVD, [&RedCG, Count, this]() { 1194 return RedCG.getSharedLValue(Count).getAddress(*this); 1195 }); 1196 PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() { 1197 return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD), 1198 ConvertTypeForMem(RHSVD->getType()), 1199 "rhs.begin"); 1200 }); 1201 } else { 1202 QualType Type = PrivateVD->getType(); 1203 bool IsArray = getContext().getAsArrayType(Type) != nullptr; 1204 Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress(*this); 1205 // Store the address of the original variable associated with the LHS 1206 // implicit variable. 1207 if (IsArray) { 1208 OriginalAddr = Builder.CreateElementBitCast( 1209 OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin"); 1210 } 1211 PrivateScope.addPrivate(LHSVD, [OriginalAddr]() { return OriginalAddr; }); 1212 PrivateScope.addPrivate( 1213 RHSVD, [this, PrivateVD, RHSVD, IsArray]() { 1214 return IsArray 1215 ? Builder.CreateElementBitCast( 1216 GetAddrOfLocalVar(PrivateVD), 1217 ConvertTypeForMem(RHSVD->getType()), "rhs.begin") 1218 : GetAddrOfLocalVar(PrivateVD); 1219 }); 1220 } 1221 ++ILHS; 1222 ++IRHS; 1223 ++IPriv; 1224 ++Count; 1225 } 1226 } 1227 1228 void CodeGenFunction::EmitOMPReductionClauseFinal( 1229 const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) { 1230 if (!HaveInsertPoint()) 1231 return; 1232 llvm::SmallVector<const Expr *, 8> Privates; 1233 llvm::SmallVector<const Expr *, 8> LHSExprs; 1234 llvm::SmallVector<const Expr *, 8> RHSExprs; 1235 llvm::SmallVector<const Expr *, 8> ReductionOps; 1236 bool HasAtLeastOneReduction = false; 1237 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1238 HasAtLeastOneReduction = true; 1239 Privates.append(C->privates().begin(), C->privates().end()); 1240 LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 1241 RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 1242 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 1243 } 1244 if (HasAtLeastOneReduction) { 1245 bool WithNowait = D.getSingleClause<OMPNowaitClause>() || 1246 isOpenMPParallelDirective(D.getDirectiveKind()) || 1247 ReductionKind == OMPD_simd; 1248 bool SimpleReduction = ReductionKind == OMPD_simd; 1249 // Emit nowait reduction if nowait clause is present or directive is a 1250 // parallel directive (it always has implicit barrier). 1251 CGM.getOpenMPRuntime().emitReduction( 1252 *this, D.getEndLoc(), Privates, LHSExprs, RHSExprs, ReductionOps, 1253 {WithNowait, SimpleReduction, ReductionKind}); 1254 } 1255 } 1256 1257 static void emitPostUpdateForReductionClause( 1258 CodeGenFunction &CGF, const OMPExecutableDirective &D, 1259 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { 1260 if (!CGF.HaveInsertPoint()) 1261 return; 1262 llvm::BasicBlock *DoneBB = nullptr; 1263 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1264 if (const Expr *PostUpdate = C->getPostUpdateExpr()) { 1265 if (!DoneBB) { 1266 if (llvm::Value *Cond = CondGen(CGF)) { 1267 // If the first post-update expression is found, emit conditional 1268 // block if it was requested. 1269 llvm::BasicBlock *ThenBB = CGF.createBasicBlock(".omp.reduction.pu"); 1270 DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done"); 1271 CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1272 CGF.EmitBlock(ThenBB); 1273 } 1274 } 1275 CGF.EmitIgnoredExpr(PostUpdate); 1276 } 1277 } 1278 if (DoneBB) 1279 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 1280 } 1281 1282 namespace { 1283 /// Codegen lambda for appending distribute lower and upper bounds to outlined 1284 /// parallel function. This is necessary for combined constructs such as 1285 /// 'distribute parallel for' 1286 typedef llvm::function_ref<void(CodeGenFunction &, 1287 const OMPExecutableDirective &, 1288 llvm::SmallVectorImpl<llvm::Value *> &)> 1289 CodeGenBoundParametersTy; 1290 } // anonymous namespace 1291 1292 static void emitCommonOMPParallelDirective( 1293 CodeGenFunction &CGF, const OMPExecutableDirective &S, 1294 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1295 const CodeGenBoundParametersTy &CodeGenBoundParameters) { 1296 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); 1297 llvm::Function *OutlinedFn = 1298 CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( 1299 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 1300 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) { 1301 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 1302 llvm::Value *NumThreads = 1303 CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), 1304 /*IgnoreResultAssign=*/true); 1305 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause( 1306 CGF, NumThreads, NumThreadsClause->getBeginLoc()); 1307 } 1308 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) { 1309 CodeGenFunction::RunCleanupsScope ProcBindScope(CGF); 1310 CGF.CGM.getOpenMPRuntime().emitProcBindClause( 1311 CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getBeginLoc()); 1312 } 1313 const Expr *IfCond = nullptr; 1314 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 1315 if (C->getNameModifier() == OMPD_unknown || 1316 C->getNameModifier() == OMPD_parallel) { 1317 IfCond = C->getCondition(); 1318 break; 1319 } 1320 } 1321 1322 OMPParallelScope Scope(CGF, S); 1323 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 1324 // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk 1325 // lower and upper bounds with the pragma 'for' chunking mechanism. 1326 // The following lambda takes care of appending the lower and upper bound 1327 // parameters when necessary 1328 CodeGenBoundParameters(CGF, S, CapturedVars); 1329 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 1330 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getBeginLoc(), OutlinedFn, 1331 CapturedVars, IfCond); 1332 } 1333 1334 static void emitEmptyBoundParameters(CodeGenFunction &, 1335 const OMPExecutableDirective &, 1336 llvm::SmallVectorImpl<llvm::Value *> &) {} 1337 1338 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { 1339 1340 if (llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder()) { 1341 // Check if we have any if clause associated with the directive. 1342 llvm::Value *IfCond = nullptr; 1343 if (const auto *C = S.getSingleClause<OMPIfClause>()) 1344 IfCond = EmitScalarExpr(C->getCondition(), 1345 /*IgnoreResultAssign=*/true); 1346 1347 llvm::Value *NumThreads = nullptr; 1348 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) 1349 NumThreads = EmitScalarExpr(NumThreadsClause->getNumThreads(), 1350 /*IgnoreResultAssign=*/true); 1351 1352 ProcBindKind ProcBind = OMP_PROC_BIND_default; 1353 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) 1354 ProcBind = ProcBindClause->getProcBindKind(); 1355 1356 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 1357 1358 // The cleanup callback that finalizes all variabels at the given location, 1359 // thus calls destructors etc. 1360 auto FiniCB = [this](InsertPointTy IP) { 1361 CGBuilderTy::InsertPointGuard IPG(Builder); 1362 assert(IP.getBlock()->end() != IP.getPoint() && 1363 "OpenMP IR Builder should cause terminated block!"); 1364 llvm::BasicBlock *IPBB = IP.getBlock(); 1365 llvm::BasicBlock *DestBB = IPBB->splitBasicBlock(IP.getPoint()); 1366 IPBB->getTerminator()->eraseFromParent(); 1367 Builder.SetInsertPoint(IPBB); 1368 CodeGenFunction::JumpDest Dest = getJumpDestInCurrentScope(DestBB); 1369 EmitBranchThroughCleanup(Dest); 1370 }; 1371 1372 // Privatization callback that performs appropriate action for 1373 // shared/private/firstprivate/lastprivate/copyin/... variables. 1374 // 1375 // TODO: This defaults to shared right now. 1376 auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, 1377 llvm::Value &Val, llvm::Value *&ReplVal) { 1378 // The next line is appropriate only for variables (Val) with the 1379 // data-sharing attribute "shared". 1380 ReplVal = &Val; 1381 1382 return CodeGenIP; 1383 }; 1384 1385 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); 1386 const Stmt *ParallelRegionBodyStmt = CS->getCapturedStmt(); 1387 1388 auto BodyGenCB = [ParallelRegionBodyStmt, 1389 this](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, 1390 llvm::BasicBlock &ContinuationBB) { 1391 auto OldAllocaIP = AllocaInsertPt; 1392 AllocaInsertPt = &*AllocaIP.getPoint(); 1393 1394 auto OldReturnBlock = ReturnBlock; 1395 ReturnBlock = getJumpDestInCurrentScope(&ContinuationBB); 1396 1397 llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock(); 1398 CodeGenIPBB->splitBasicBlock(CodeGenIP.getPoint()); 1399 llvm::Instruction *CodeGenIPBBTI = CodeGenIPBB->getTerminator(); 1400 CodeGenIPBBTI->removeFromParent(); 1401 1402 Builder.SetInsertPoint(CodeGenIPBB); 1403 1404 EmitStmt(ParallelRegionBodyStmt); 1405 1406 Builder.Insert(CodeGenIPBBTI); 1407 1408 AllocaInsertPt = OldAllocaIP; 1409 ReturnBlock = OldReturnBlock; 1410 }; 1411 1412 CGCapturedStmtInfo CGSI(*CS, CR_OpenMP); 1413 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI); 1414 Builder.restoreIP(OMPBuilder->CreateParallel(Builder, BodyGenCB, PrivCB, 1415 FiniCB, IfCond, NumThreads, 1416 ProcBind, S.hasCancel())); 1417 return; 1418 } 1419 1420 // Emit parallel region as a standalone region. 1421 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 1422 Action.Enter(CGF); 1423 OMPPrivateScope PrivateScope(CGF); 1424 bool Copyins = CGF.EmitOMPCopyinClause(S); 1425 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 1426 if (Copyins) { 1427 // Emit implicit barrier to synchronize threads and avoid data races on 1428 // propagation master's thread values of threadprivate variables to local 1429 // instances of that variables of all other implicit threads. 1430 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 1431 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, 1432 /*ForceSimpleCall=*/true); 1433 } 1434 CGF.EmitOMPPrivateClause(S, PrivateScope); 1435 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 1436 (void)PrivateScope.Privatize(); 1437 CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt()); 1438 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 1439 }; 1440 emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen, 1441 emitEmptyBoundParameters); 1442 emitPostUpdateForReductionClause(*this, S, 1443 [](CodeGenFunction &) { return nullptr; }); 1444 } 1445 1446 static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop, 1447 int MaxLevel, int Level = 0) { 1448 assert(Level < MaxLevel && "Too deep lookup during loop body codegen."); 1449 const Stmt *SimplifiedS = S->IgnoreContainers(); 1450 if (const auto *CS = dyn_cast<CompoundStmt>(SimplifiedS)) { 1451 PrettyStackTraceLoc CrashInfo( 1452 CGF.getContext().getSourceManager(), CS->getLBracLoc(), 1453 "LLVM IR generation of compound statement ('{}')"); 1454 1455 // Keep track of the current cleanup stack depth, including debug scopes. 1456 CodeGenFunction::LexicalScope Scope(CGF, S->getSourceRange()); 1457 for (const Stmt *CurStmt : CS->body()) 1458 emitBody(CGF, CurStmt, NextLoop, MaxLevel, Level); 1459 return; 1460 } 1461 if (SimplifiedS == NextLoop) { 1462 if (const auto *For = dyn_cast<ForStmt>(SimplifiedS)) { 1463 S = For->getBody(); 1464 } else { 1465 assert(isa<CXXForRangeStmt>(SimplifiedS) && 1466 "Expected canonical for loop or range-based for loop."); 1467 const auto *CXXFor = cast<CXXForRangeStmt>(SimplifiedS); 1468 CGF.EmitStmt(CXXFor->getLoopVarStmt()); 1469 S = CXXFor->getBody(); 1470 } 1471 if (Level + 1 < MaxLevel) { 1472 NextLoop = OMPLoopDirective::tryToFindNextInnerLoop( 1473 S, /*TryImperfectlyNestedLoops=*/true); 1474 emitBody(CGF, S, NextLoop, MaxLevel, Level + 1); 1475 return; 1476 } 1477 } 1478 CGF.EmitStmt(S); 1479 } 1480 1481 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, 1482 JumpDest LoopExit) { 1483 RunCleanupsScope BodyScope(*this); 1484 // Update counters values on current iteration. 1485 for (const Expr *UE : D.updates()) 1486 EmitIgnoredExpr(UE); 1487 // Update the linear variables. 1488 // In distribute directives only loop counters may be marked as linear, no 1489 // need to generate the code for them. 1490 if (!isOpenMPDistributeDirective(D.getDirectiveKind())) { 1491 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1492 for (const Expr *UE : C->updates()) 1493 EmitIgnoredExpr(UE); 1494 } 1495 } 1496 1497 // On a continue in the body, jump to the end. 1498 JumpDest Continue = getJumpDestInCurrentScope("omp.body.continue"); 1499 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1500 for (const Expr *E : D.finals_conditions()) { 1501 if (!E) 1502 continue; 1503 // Check that loop counter in non-rectangular nest fits into the iteration 1504 // space. 1505 llvm::BasicBlock *NextBB = createBasicBlock("omp.body.next"); 1506 EmitBranchOnBoolExpr(E, NextBB, Continue.getBlock(), 1507 getProfileCount(D.getBody())); 1508 EmitBlock(NextBB); 1509 } 1510 // Emit loop variables for C++ range loops. 1511 const Stmt *Body = 1512 D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(); 1513 // Emit loop body. 1514 emitBody(*this, Body, 1515 OMPLoopDirective::tryToFindNextInnerLoop( 1516 Body, /*TryImperfectlyNestedLoops=*/true), 1517 D.getCollapsedNumber()); 1518 1519 // The end (updates/cleanups). 1520 EmitBlock(Continue.getBlock()); 1521 BreakContinueStack.pop_back(); 1522 } 1523 1524 void CodeGenFunction::EmitOMPInnerLoop( 1525 const Stmt &S, bool RequiresCleanup, const Expr *LoopCond, 1526 const Expr *IncExpr, 1527 const llvm::function_ref<void(CodeGenFunction &)> BodyGen, 1528 const llvm::function_ref<void(CodeGenFunction &)> PostIncGen) { 1529 auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end"); 1530 1531 // Start the loop with a block that tests the condition. 1532 auto CondBlock = createBasicBlock("omp.inner.for.cond"); 1533 EmitBlock(CondBlock); 1534 const SourceRange R = S.getSourceRange(); 1535 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), 1536 SourceLocToDebugLoc(R.getEnd())); 1537 1538 // If there are any cleanups between here and the loop-exit scope, 1539 // create a block to stage a loop exit along. 1540 llvm::BasicBlock *ExitBlock = LoopExit.getBlock(); 1541 if (RequiresCleanup) 1542 ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup"); 1543 1544 llvm::BasicBlock *LoopBody = createBasicBlock("omp.inner.for.body"); 1545 1546 // Emit condition. 1547 EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S)); 1548 if (ExitBlock != LoopExit.getBlock()) { 1549 EmitBlock(ExitBlock); 1550 EmitBranchThroughCleanup(LoopExit); 1551 } 1552 1553 EmitBlock(LoopBody); 1554 incrementProfileCounter(&S); 1555 1556 // Create a block for the increment. 1557 JumpDest Continue = getJumpDestInCurrentScope("omp.inner.for.inc"); 1558 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1559 1560 BodyGen(*this); 1561 1562 // Emit "IV = IV + 1" and a back-edge to the condition block. 1563 EmitBlock(Continue.getBlock()); 1564 EmitIgnoredExpr(IncExpr); 1565 PostIncGen(*this); 1566 BreakContinueStack.pop_back(); 1567 EmitBranch(CondBlock); 1568 LoopStack.pop(); 1569 // Emit the fall-through block. 1570 EmitBlock(LoopExit.getBlock()); 1571 } 1572 1573 bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { 1574 if (!HaveInsertPoint()) 1575 return false; 1576 // Emit inits for the linear variables. 1577 bool HasLinears = false; 1578 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1579 for (const Expr *Init : C->inits()) { 1580 HasLinears = true; 1581 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl()); 1582 if (const auto *Ref = 1583 dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) { 1584 AutoVarEmission Emission = EmitAutoVarAlloca(*VD); 1585 const auto *OrigVD = cast<VarDecl>(Ref->getDecl()); 1586 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), 1587 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1588 VD->getInit()->getType(), VK_LValue, 1589 VD->getInit()->getExprLoc()); 1590 EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(), 1591 VD->getType()), 1592 /*capturedByInit=*/false); 1593 EmitAutoVarCleanups(Emission); 1594 } else { 1595 EmitVarDecl(*VD); 1596 } 1597 } 1598 // Emit the linear steps for the linear clauses. 1599 // If a step is not constant, it is pre-calculated before the loop. 1600 if (const auto *CS = cast_or_null<BinaryOperator>(C->getCalcStep())) 1601 if (const auto *SaveRef = cast<DeclRefExpr>(CS->getLHS())) { 1602 EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl())); 1603 // Emit calculation of the linear step. 1604 EmitIgnoredExpr(CS); 1605 } 1606 } 1607 return HasLinears; 1608 } 1609 1610 void CodeGenFunction::EmitOMPLinearClauseFinal( 1611 const OMPLoopDirective &D, 1612 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { 1613 if (!HaveInsertPoint()) 1614 return; 1615 llvm::BasicBlock *DoneBB = nullptr; 1616 // Emit the final values of the linear variables. 1617 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1618 auto IC = C->varlist_begin(); 1619 for (const Expr *F : C->finals()) { 1620 if (!DoneBB) { 1621 if (llvm::Value *Cond = CondGen(*this)) { 1622 // If the first post-update expression is found, emit conditional 1623 // block if it was requested. 1624 llvm::BasicBlock *ThenBB = createBasicBlock(".omp.linear.pu"); 1625 DoneBB = createBasicBlock(".omp.linear.pu.done"); 1626 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1627 EmitBlock(ThenBB); 1628 } 1629 } 1630 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl()); 1631 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), 1632 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1633 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); 1634 Address OrigAddr = EmitLValue(&DRE).getAddress(*this); 1635 CodeGenFunction::OMPPrivateScope VarScope(*this); 1636 VarScope.addPrivate(OrigVD, [OrigAddr]() { return OrigAddr; }); 1637 (void)VarScope.Privatize(); 1638 EmitIgnoredExpr(F); 1639 ++IC; 1640 } 1641 if (const Expr *PostUpdate = C->getPostUpdateExpr()) 1642 EmitIgnoredExpr(PostUpdate); 1643 } 1644 if (DoneBB) 1645 EmitBlock(DoneBB, /*IsFinished=*/true); 1646 } 1647 1648 static void emitAlignedClause(CodeGenFunction &CGF, 1649 const OMPExecutableDirective &D) { 1650 if (!CGF.HaveInsertPoint()) 1651 return; 1652 for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) { 1653 llvm::APInt ClauseAlignment(64, 0); 1654 if (const Expr *AlignmentExpr = Clause->getAlignment()) { 1655 auto *AlignmentCI = 1656 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); 1657 ClauseAlignment = AlignmentCI->getValue(); 1658 } 1659 for (const Expr *E : Clause->varlists()) { 1660 llvm::APInt Alignment(ClauseAlignment); 1661 if (Alignment == 0) { 1662 // OpenMP [2.8.1, Description] 1663 // If no optional parameter is specified, implementation-defined default 1664 // alignments for SIMD instructions on the target platforms are assumed. 1665 Alignment = 1666 CGF.getContext() 1667 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign( 1668 E->getType()->getPointeeType())) 1669 .getQuantity(); 1670 } 1671 assert((Alignment == 0 || Alignment.isPowerOf2()) && 1672 "alignment is not power of 2"); 1673 if (Alignment != 0) { 1674 llvm::Value *PtrValue = CGF.EmitScalarExpr(E); 1675 CGF.EmitAlignmentAssumption( 1676 PtrValue, E, /*No second loc needed*/ SourceLocation(), 1677 llvm::ConstantInt::get(CGF.getLLVMContext(), Alignment)); 1678 } 1679 } 1680 } 1681 } 1682 1683 void CodeGenFunction::EmitOMPPrivateLoopCounters( 1684 const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) { 1685 if (!HaveInsertPoint()) 1686 return; 1687 auto I = S.private_counters().begin(); 1688 for (const Expr *E : S.counters()) { 1689 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1690 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()); 1691 // Emit var without initialization. 1692 AutoVarEmission VarEmission = EmitAutoVarAlloca(*PrivateVD); 1693 EmitAutoVarCleanups(VarEmission); 1694 LocalDeclMap.erase(PrivateVD); 1695 (void)LoopScope.addPrivate(VD, [&VarEmission]() { 1696 return VarEmission.getAllocatedAddress(); 1697 }); 1698 if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) || 1699 VD->hasGlobalStorage()) { 1700 (void)LoopScope.addPrivate(PrivateVD, [this, VD, E]() { 1701 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), 1702 LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD), 1703 E->getType(), VK_LValue, E->getExprLoc()); 1704 return EmitLValue(&DRE).getAddress(*this); 1705 }); 1706 } else { 1707 (void)LoopScope.addPrivate(PrivateVD, [&VarEmission]() { 1708 return VarEmission.getAllocatedAddress(); 1709 }); 1710 } 1711 ++I; 1712 } 1713 // Privatize extra loop counters used in loops for ordered(n) clauses. 1714 for (const auto *C : S.getClausesOfKind<OMPOrderedClause>()) { 1715 if (!C->getNumForLoops()) 1716 continue; 1717 for (unsigned I = S.getCollapsedNumber(), 1718 E = C->getLoopNumIterations().size(); 1719 I < E; ++I) { 1720 const auto *DRE = cast<DeclRefExpr>(C->getLoopCounter(I)); 1721 const auto *VD = cast<VarDecl>(DRE->getDecl()); 1722 // Override only those variables that can be captured to avoid re-emission 1723 // of the variables declared within the loops. 1724 if (DRE->refersToEnclosingVariableOrCapture()) { 1725 (void)LoopScope.addPrivate(VD, [this, DRE, VD]() { 1726 return CreateMemTemp(DRE->getType(), VD->getName()); 1727 }); 1728 } 1729 } 1730 } 1731 } 1732 1733 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S, 1734 const Expr *Cond, llvm::BasicBlock *TrueBlock, 1735 llvm::BasicBlock *FalseBlock, uint64_t TrueCount) { 1736 if (!CGF.HaveInsertPoint()) 1737 return; 1738 { 1739 CodeGenFunction::OMPPrivateScope PreCondScope(CGF); 1740 CGF.EmitOMPPrivateLoopCounters(S, PreCondScope); 1741 (void)PreCondScope.Privatize(); 1742 // Get initial values of real counters. 1743 for (const Expr *I : S.inits()) { 1744 CGF.EmitIgnoredExpr(I); 1745 } 1746 } 1747 // Create temp loop control variables with their init values to support 1748 // non-rectangular loops. 1749 CodeGenFunction::OMPMapVars PreCondVars; 1750 for (const Expr * E: S.dependent_counters()) { 1751 if (!E) 1752 continue; 1753 assert(!E->getType().getNonReferenceType()->isRecordType() && 1754 "dependent counter must not be an iterator."); 1755 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1756 Address CounterAddr = 1757 CGF.CreateMemTemp(VD->getType().getNonReferenceType()); 1758 (void)PreCondVars.setVarAddr(CGF, VD, CounterAddr); 1759 } 1760 (void)PreCondVars.apply(CGF); 1761 for (const Expr *E : S.dependent_inits()) { 1762 if (!E) 1763 continue; 1764 CGF.EmitIgnoredExpr(E); 1765 } 1766 // Check that loop is executed at least one time. 1767 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount); 1768 PreCondVars.restore(CGF); 1769 } 1770 1771 void CodeGenFunction::EmitOMPLinearClause( 1772 const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) { 1773 if (!HaveInsertPoint()) 1774 return; 1775 llvm::DenseSet<const VarDecl *> SIMDLCVs; 1776 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 1777 const auto *LoopDirective = cast<OMPLoopDirective>(&D); 1778 for (const Expr *C : LoopDirective->counters()) { 1779 SIMDLCVs.insert( 1780 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 1781 } 1782 } 1783 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1784 auto CurPrivate = C->privates().begin(); 1785 for (const Expr *E : C->varlists()) { 1786 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1787 const auto *PrivateVD = 1788 cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl()); 1789 if (!SIMDLCVs.count(VD->getCanonicalDecl())) { 1790 bool IsRegistered = PrivateScope.addPrivate(VD, [this, PrivateVD]() { 1791 // Emit private VarDecl with copy init. 1792 EmitVarDecl(*PrivateVD); 1793 return GetAddrOfLocalVar(PrivateVD); 1794 }); 1795 assert(IsRegistered && "linear var already registered as private"); 1796 // Silence the warning about unused variable. 1797 (void)IsRegistered; 1798 } else { 1799 EmitVarDecl(*PrivateVD); 1800 } 1801 ++CurPrivate; 1802 } 1803 } 1804 } 1805 1806 static void emitSimdlenSafelenClause(CodeGenFunction &CGF, 1807 const OMPExecutableDirective &D, 1808 bool IsMonotonic) { 1809 if (!CGF.HaveInsertPoint()) 1810 return; 1811 if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) { 1812 RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(), 1813 /*ignoreResult=*/true); 1814 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 1815 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 1816 // In presence of finite 'safelen', it may be unsafe to mark all 1817 // the memory instructions parallel, because loop-carried 1818 // dependences of 'safelen' iterations are possible. 1819 if (!IsMonotonic) 1820 CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>()); 1821 } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) { 1822 RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(), 1823 /*ignoreResult=*/true); 1824 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 1825 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 1826 // In presence of finite 'safelen', it may be unsafe to mark all 1827 // the memory instructions parallel, because loop-carried 1828 // dependences of 'safelen' iterations are possible. 1829 CGF.LoopStack.setParallel(/*Enable=*/false); 1830 } 1831 } 1832 1833 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D, 1834 bool IsMonotonic) { 1835 // Walk clauses and process safelen/lastprivate. 1836 LoopStack.setParallel(!IsMonotonic); 1837 LoopStack.setVectorizeEnable(); 1838 emitSimdlenSafelenClause(*this, D, IsMonotonic); 1839 } 1840 1841 void CodeGenFunction::EmitOMPSimdFinal( 1842 const OMPLoopDirective &D, 1843 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { 1844 if (!HaveInsertPoint()) 1845 return; 1846 llvm::BasicBlock *DoneBB = nullptr; 1847 auto IC = D.counters().begin(); 1848 auto IPC = D.private_counters().begin(); 1849 for (const Expr *F : D.finals()) { 1850 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl()); 1851 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl()); 1852 const auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD); 1853 if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) || 1854 OrigVD->hasGlobalStorage() || CED) { 1855 if (!DoneBB) { 1856 if (llvm::Value *Cond = CondGen(*this)) { 1857 // If the first post-update expression is found, emit conditional 1858 // block if it was requested. 1859 llvm::BasicBlock *ThenBB = createBasicBlock(".omp.final.then"); 1860 DoneBB = createBasicBlock(".omp.final.done"); 1861 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1862 EmitBlock(ThenBB); 1863 } 1864 } 1865 Address OrigAddr = Address::invalid(); 1866 if (CED) { 1867 OrigAddr = 1868 EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress(*this); 1869 } else { 1870 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(PrivateVD), 1871 /*RefersToEnclosingVariableOrCapture=*/false, 1872 (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc()); 1873 OrigAddr = EmitLValue(&DRE).getAddress(*this); 1874 } 1875 OMPPrivateScope VarScope(*this); 1876 VarScope.addPrivate(OrigVD, [OrigAddr]() { return OrigAddr; }); 1877 (void)VarScope.Privatize(); 1878 EmitIgnoredExpr(F); 1879 } 1880 ++IC; 1881 ++IPC; 1882 } 1883 if (DoneBB) 1884 EmitBlock(DoneBB, /*IsFinished=*/true); 1885 } 1886 1887 static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF, 1888 const OMPLoopDirective &S, 1889 CodeGenFunction::JumpDest LoopExit) { 1890 CGF.EmitOMPLoopBody(S, LoopExit); 1891 CGF.EmitStopPoint(&S); 1892 } 1893 1894 /// Emit a helper variable and return corresponding lvalue. 1895 static LValue EmitOMPHelperVar(CodeGenFunction &CGF, 1896 const DeclRefExpr *Helper) { 1897 auto VDecl = cast<VarDecl>(Helper->getDecl()); 1898 CGF.EmitVarDecl(*VDecl); 1899 return CGF.EmitLValue(Helper); 1900 } 1901 1902 static void emitCommonSimdLoop(CodeGenFunction &CGF, const OMPLoopDirective &S, 1903 const RegionCodeGenTy &SimdInitGen, 1904 const RegionCodeGenTy &BodyCodeGen) { 1905 auto &&ThenGen = [&S, &SimdInitGen, &BodyCodeGen](CodeGenFunction &CGF, 1906 PrePostActionTy &) { 1907 CGOpenMPRuntime::NontemporalDeclsRAII NontemporalsRegion(CGF.CGM, S); 1908 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); 1909 SimdInitGen(CGF); 1910 1911 BodyCodeGen(CGF); 1912 }; 1913 auto &&ElseGen = [&BodyCodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 1914 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); 1915 CGF.LoopStack.setVectorizeEnable(/*Enable=*/false); 1916 1917 BodyCodeGen(CGF); 1918 }; 1919 const Expr *IfCond = nullptr; 1920 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 1921 if (CGF.getLangOpts().OpenMP >= 50 && 1922 (C->getNameModifier() == OMPD_unknown || 1923 C->getNameModifier() == OMPD_simd)) { 1924 IfCond = C->getCondition(); 1925 break; 1926 } 1927 } 1928 if (IfCond) { 1929 CGF.CGM.getOpenMPRuntime().emitIfClause(CGF, IfCond, ThenGen, ElseGen); 1930 } else { 1931 RegionCodeGenTy ThenRCG(ThenGen); 1932 ThenRCG(CGF); 1933 } 1934 } 1935 1936 static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S, 1937 PrePostActionTy &Action) { 1938 Action.Enter(CGF); 1939 assert(isOpenMPSimdDirective(S.getDirectiveKind()) && 1940 "Expected simd directive"); 1941 OMPLoopScope PreInitScope(CGF, S); 1942 // if (PreCond) { 1943 // for (IV in 0..LastIteration) BODY; 1944 // <Final counter/linear vars updates>; 1945 // } 1946 // 1947 if (isOpenMPDistributeDirective(S.getDirectiveKind()) || 1948 isOpenMPWorksharingDirective(S.getDirectiveKind()) || 1949 isOpenMPTaskLoopDirective(S.getDirectiveKind())) { 1950 (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable())); 1951 (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable())); 1952 } 1953 1954 // Emit: if (PreCond) - begin. 1955 // If the condition constant folds and can be elided, avoid emitting the 1956 // whole loop. 1957 bool CondConstant; 1958 llvm::BasicBlock *ContBlock = nullptr; 1959 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 1960 if (!CondConstant) 1961 return; 1962 } else { 1963 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("simd.if.then"); 1964 ContBlock = CGF.createBasicBlock("simd.if.end"); 1965 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 1966 CGF.getProfileCount(&S)); 1967 CGF.EmitBlock(ThenBlock); 1968 CGF.incrementProfileCounter(&S); 1969 } 1970 1971 // Emit the loop iteration variable. 1972 const Expr *IVExpr = S.getIterationVariable(); 1973 const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 1974 CGF.EmitVarDecl(*IVDecl); 1975 CGF.EmitIgnoredExpr(S.getInit()); 1976 1977 // Emit the iterations count variable. 1978 // If it is not a variable, Sema decided to calculate iterations count on 1979 // each iteration (e.g., it is foldable into a constant). 1980 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 1981 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 1982 // Emit calculation of the iterations count. 1983 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 1984 } 1985 1986 emitAlignedClause(CGF, S); 1987 (void)CGF.EmitOMPLinearClauseInit(S); 1988 { 1989 CodeGenFunction::OMPPrivateScope LoopScope(CGF); 1990 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 1991 CGF.EmitOMPLinearClause(S, LoopScope); 1992 CGF.EmitOMPPrivateClause(S, LoopScope); 1993 CGF.EmitOMPReductionClauseInit(S, LoopScope); 1994 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion( 1995 CGF, S, CGF.EmitLValue(S.getIterationVariable())); 1996 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 1997 (void)LoopScope.Privatize(); 1998 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 1999 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); 2000 2001 emitCommonSimdLoop( 2002 CGF, S, 2003 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2004 CGF.EmitOMPSimdInit(S); 2005 }, 2006 [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { 2007 CGF.EmitOMPInnerLoop( 2008 S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), 2009 [&S](CodeGenFunction &CGF) { 2010 CGF.EmitOMPLoopBody(S, CodeGenFunction::JumpDest()); 2011 CGF.EmitStopPoint(&S); 2012 }, 2013 [](CodeGenFunction &) {}); 2014 }); 2015 CGF.EmitOMPSimdFinal(S, [](CodeGenFunction &) { return nullptr; }); 2016 // Emit final copy of the lastprivate variables at the end of loops. 2017 if (HasLastprivateClause) 2018 CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true); 2019 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd); 2020 emitPostUpdateForReductionClause(CGF, S, 2021 [](CodeGenFunction &) { return nullptr; }); 2022 } 2023 CGF.EmitOMPLinearClauseFinal(S, [](CodeGenFunction &) { return nullptr; }); 2024 // Emit: if (PreCond) - end. 2025 if (ContBlock) { 2026 CGF.EmitBranch(ContBlock); 2027 CGF.EmitBlock(ContBlock, true); 2028 } 2029 } 2030 2031 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { 2032 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2033 emitOMPSimdRegion(CGF, S, Action); 2034 }; 2035 OMPLexicalScope Scope(*this, S, OMPD_unknown); 2036 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 2037 } 2038 2039 void CodeGenFunction::EmitOMPOuterLoop( 2040 bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S, 2041 CodeGenFunction::OMPPrivateScope &LoopScope, 2042 const CodeGenFunction::OMPLoopArguments &LoopArgs, 2043 const CodeGenFunction::CodeGenLoopTy &CodeGenLoop, 2044 const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) { 2045 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); 2046 2047 const Expr *IVExpr = S.getIterationVariable(); 2048 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2049 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2050 2051 JumpDest LoopExit = getJumpDestInCurrentScope("omp.dispatch.end"); 2052 2053 // Start the loop with a block that tests the condition. 2054 llvm::BasicBlock *CondBlock = createBasicBlock("omp.dispatch.cond"); 2055 EmitBlock(CondBlock); 2056 const SourceRange R = S.getSourceRange(); 2057 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), 2058 SourceLocToDebugLoc(R.getEnd())); 2059 2060 llvm::Value *BoolCondVal = nullptr; 2061 if (!DynamicOrOrdered) { 2062 // UB = min(UB, GlobalUB) or 2063 // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g. 2064 // 'distribute parallel for') 2065 EmitIgnoredExpr(LoopArgs.EUB); 2066 // IV = LB 2067 EmitIgnoredExpr(LoopArgs.Init); 2068 // IV < UB 2069 BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond); 2070 } else { 2071 BoolCondVal = 2072 RT.emitForNext(*this, S.getBeginLoc(), IVSize, IVSigned, LoopArgs.IL, 2073 LoopArgs.LB, LoopArgs.UB, LoopArgs.ST); 2074 } 2075 2076 // If there are any cleanups between here and the loop-exit scope, 2077 // create a block to stage a loop exit along. 2078 llvm::BasicBlock *ExitBlock = LoopExit.getBlock(); 2079 if (LoopScope.requiresCleanups()) 2080 ExitBlock = createBasicBlock("omp.dispatch.cleanup"); 2081 2082 llvm::BasicBlock *LoopBody = createBasicBlock("omp.dispatch.body"); 2083 Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock); 2084 if (ExitBlock != LoopExit.getBlock()) { 2085 EmitBlock(ExitBlock); 2086 EmitBranchThroughCleanup(LoopExit); 2087 } 2088 EmitBlock(LoopBody); 2089 2090 // Emit "IV = LB" (in case of static schedule, we have already calculated new 2091 // LB for loop condition and emitted it above). 2092 if (DynamicOrOrdered) 2093 EmitIgnoredExpr(LoopArgs.Init); 2094 2095 // Create a block for the increment. 2096 JumpDest Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); 2097 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 2098 2099 emitCommonSimdLoop( 2100 *this, S, 2101 [&S, IsMonotonic](CodeGenFunction &CGF, PrePostActionTy &) { 2102 // Generate !llvm.loop.parallel metadata for loads and stores for loops 2103 // with dynamic/guided scheduling and without ordered clause. 2104 if (!isOpenMPSimdDirective(S.getDirectiveKind())) 2105 CGF.LoopStack.setParallel(!IsMonotonic); 2106 else 2107 CGF.EmitOMPSimdInit(S, IsMonotonic); 2108 }, 2109 [&S, &LoopArgs, LoopExit, &CodeGenLoop, IVSize, IVSigned, &CodeGenOrdered, 2110 &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { 2111 SourceLocation Loc = S.getBeginLoc(); 2112 // when 'distribute' is not combined with a 'for': 2113 // while (idx <= UB) { BODY; ++idx; } 2114 // when 'distribute' is combined with a 'for' 2115 // (e.g. 'distribute parallel for') 2116 // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; } 2117 CGF.EmitOMPInnerLoop( 2118 S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr, 2119 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { 2120 CodeGenLoop(CGF, S, LoopExit); 2121 }, 2122 [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) { 2123 CodeGenOrdered(CGF, Loc, IVSize, IVSigned); 2124 }); 2125 }); 2126 2127 EmitBlock(Continue.getBlock()); 2128 BreakContinueStack.pop_back(); 2129 if (!DynamicOrOrdered) { 2130 // Emit "LB = LB + Stride", "UB = UB + Stride". 2131 EmitIgnoredExpr(LoopArgs.NextLB); 2132 EmitIgnoredExpr(LoopArgs.NextUB); 2133 } 2134 2135 EmitBranch(CondBlock); 2136 LoopStack.pop(); 2137 // Emit the fall-through block. 2138 EmitBlock(LoopExit.getBlock()); 2139 2140 // Tell the runtime we are done. 2141 auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) { 2142 if (!DynamicOrOrdered) 2143 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(), 2144 S.getDirectiveKind()); 2145 }; 2146 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); 2147 } 2148 2149 void CodeGenFunction::EmitOMPForOuterLoop( 2150 const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic, 2151 const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, 2152 const OMPLoopArguments &LoopArgs, 2153 const CodeGenDispatchBoundsTy &CGDispatchBounds) { 2154 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); 2155 2156 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). 2157 const bool DynamicOrOrdered = 2158 Ordered || RT.isDynamic(ScheduleKind.Schedule); 2159 2160 assert((Ordered || 2161 !RT.isStaticNonchunked(ScheduleKind.Schedule, 2162 LoopArgs.Chunk != nullptr)) && 2163 "static non-chunked schedule does not need outer loop"); 2164 2165 // Emit outer loop. 2166 // 2167 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 2168 // When schedule(dynamic,chunk_size) is specified, the iterations are 2169 // distributed to threads in the team in chunks as the threads request them. 2170 // Each thread executes a chunk of iterations, then requests another chunk, 2171 // until no chunks remain to be distributed. Each chunk contains chunk_size 2172 // iterations, except for the last chunk to be distributed, which may have 2173 // fewer iterations. When no chunk_size is specified, it defaults to 1. 2174 // 2175 // When schedule(guided,chunk_size) is specified, the iterations are assigned 2176 // to threads in the team in chunks as the executing threads request them. 2177 // Each thread executes a chunk of iterations, then requests another chunk, 2178 // until no chunks remain to be assigned. For a chunk_size of 1, the size of 2179 // each chunk is proportional to the number of unassigned iterations divided 2180 // by the number of threads in the team, decreasing to 1. For a chunk_size 2181 // with value k (greater than 1), the size of each chunk is determined in the 2182 // same way, with the restriction that the chunks do not contain fewer than k 2183 // iterations (except for the last chunk to be assigned, which may have fewer 2184 // than k iterations). 2185 // 2186 // When schedule(auto) is specified, the decision regarding scheduling is 2187 // delegated to the compiler and/or runtime system. The programmer gives the 2188 // implementation the freedom to choose any possible mapping of iterations to 2189 // threads in the team. 2190 // 2191 // When schedule(runtime) is specified, the decision regarding scheduling is 2192 // deferred until run time, and the schedule and chunk size are taken from the 2193 // run-sched-var ICV. If the ICV is set to auto, the schedule is 2194 // implementation defined 2195 // 2196 // while(__kmpc_dispatch_next(&LB, &UB)) { 2197 // idx = LB; 2198 // while (idx <= UB) { BODY; ++idx; 2199 // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only. 2200 // } // inner loop 2201 // } 2202 // 2203 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 2204 // When schedule(static, chunk_size) is specified, iterations are divided into 2205 // chunks of size chunk_size, and the chunks are assigned to the threads in 2206 // the team in a round-robin fashion in the order of the thread number. 2207 // 2208 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { 2209 // while (idx <= UB) { BODY; ++idx; } // inner loop 2210 // LB = LB + ST; 2211 // UB = UB + ST; 2212 // } 2213 // 2214 2215 const Expr *IVExpr = S.getIterationVariable(); 2216 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2217 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2218 2219 if (DynamicOrOrdered) { 2220 const std::pair<llvm::Value *, llvm::Value *> DispatchBounds = 2221 CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB); 2222 llvm::Value *LBVal = DispatchBounds.first; 2223 llvm::Value *UBVal = DispatchBounds.second; 2224 CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal, 2225 LoopArgs.Chunk}; 2226 RT.emitForDispatchInit(*this, S.getBeginLoc(), ScheduleKind, IVSize, 2227 IVSigned, Ordered, DipatchRTInputValues); 2228 } else { 2229 CGOpenMPRuntime::StaticRTInput StaticInit( 2230 IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB, 2231 LoopArgs.ST, LoopArgs.Chunk); 2232 RT.emitForStaticInit(*this, S.getBeginLoc(), S.getDirectiveKind(), 2233 ScheduleKind, StaticInit); 2234 } 2235 2236 auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc, 2237 const unsigned IVSize, 2238 const bool IVSigned) { 2239 if (Ordered) { 2240 CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize, 2241 IVSigned); 2242 } 2243 }; 2244 2245 OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST, 2246 LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB); 2247 OuterLoopArgs.IncExpr = S.getInc(); 2248 OuterLoopArgs.Init = S.getInit(); 2249 OuterLoopArgs.Cond = S.getCond(); 2250 OuterLoopArgs.NextLB = S.getNextLowerBound(); 2251 OuterLoopArgs.NextUB = S.getNextUpperBound(); 2252 EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs, 2253 emitOMPLoopBodyWithStopPoint, CodeGenOrdered); 2254 } 2255 2256 static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc, 2257 const unsigned IVSize, const bool IVSigned) {} 2258 2259 void CodeGenFunction::EmitOMPDistributeOuterLoop( 2260 OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S, 2261 OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs, 2262 const CodeGenLoopTy &CodeGenLoopContent) { 2263 2264 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); 2265 2266 // Emit outer loop. 2267 // Same behavior as a OMPForOuterLoop, except that schedule cannot be 2268 // dynamic 2269 // 2270 2271 const Expr *IVExpr = S.getIterationVariable(); 2272 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2273 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2274 2275 CGOpenMPRuntime::StaticRTInput StaticInit( 2276 IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB, 2277 LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk); 2278 RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, StaticInit); 2279 2280 // for combined 'distribute' and 'for' the increment expression of distribute 2281 // is stored in DistInc. For 'distribute' alone, it is in Inc. 2282 Expr *IncExpr; 2283 if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())) 2284 IncExpr = S.getDistInc(); 2285 else 2286 IncExpr = S.getInc(); 2287 2288 // this routine is shared by 'omp distribute parallel for' and 2289 // 'omp distribute': select the right EUB expression depending on the 2290 // directive 2291 OMPLoopArguments OuterLoopArgs; 2292 OuterLoopArgs.LB = LoopArgs.LB; 2293 OuterLoopArgs.UB = LoopArgs.UB; 2294 OuterLoopArgs.ST = LoopArgs.ST; 2295 OuterLoopArgs.IL = LoopArgs.IL; 2296 OuterLoopArgs.Chunk = LoopArgs.Chunk; 2297 OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2298 ? S.getCombinedEnsureUpperBound() 2299 : S.getEnsureUpperBound(); 2300 OuterLoopArgs.IncExpr = IncExpr; 2301 OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2302 ? S.getCombinedInit() 2303 : S.getInit(); 2304 OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2305 ? S.getCombinedCond() 2306 : S.getCond(); 2307 OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2308 ? S.getCombinedNextLowerBound() 2309 : S.getNextLowerBound(); 2310 OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2311 ? S.getCombinedNextUpperBound() 2312 : S.getNextUpperBound(); 2313 2314 EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S, 2315 LoopScope, OuterLoopArgs, CodeGenLoopContent, 2316 emitEmptyOrdered); 2317 } 2318 2319 static std::pair<LValue, LValue> 2320 emitDistributeParallelForInnerBounds(CodeGenFunction &CGF, 2321 const OMPExecutableDirective &S) { 2322 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 2323 LValue LB = 2324 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); 2325 LValue UB = 2326 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); 2327 2328 // When composing 'distribute' with 'for' (e.g. as in 'distribute 2329 // parallel for') we need to use the 'distribute' 2330 // chunk lower and upper bounds rather than the whole loop iteration 2331 // space. These are parameters to the outlined function for 'parallel' 2332 // and we copy the bounds of the previous schedule into the 2333 // the current ones. 2334 LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable()); 2335 LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable()); 2336 llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar( 2337 PrevLB, LS.getPrevLowerBoundVariable()->getExprLoc()); 2338 PrevLBVal = CGF.EmitScalarConversion( 2339 PrevLBVal, LS.getPrevLowerBoundVariable()->getType(), 2340 LS.getIterationVariable()->getType(), 2341 LS.getPrevLowerBoundVariable()->getExprLoc()); 2342 llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar( 2343 PrevUB, LS.getPrevUpperBoundVariable()->getExprLoc()); 2344 PrevUBVal = CGF.EmitScalarConversion( 2345 PrevUBVal, LS.getPrevUpperBoundVariable()->getType(), 2346 LS.getIterationVariable()->getType(), 2347 LS.getPrevUpperBoundVariable()->getExprLoc()); 2348 2349 CGF.EmitStoreOfScalar(PrevLBVal, LB); 2350 CGF.EmitStoreOfScalar(PrevUBVal, UB); 2351 2352 return {LB, UB}; 2353 } 2354 2355 /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then 2356 /// we need to use the LB and UB expressions generated by the worksharing 2357 /// code generation support, whereas in non combined situations we would 2358 /// just emit 0 and the LastIteration expression 2359 /// This function is necessary due to the difference of the LB and UB 2360 /// types for the RT emission routines for 'for_static_init' and 2361 /// 'for_dispatch_init' 2362 static std::pair<llvm::Value *, llvm::Value *> 2363 emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF, 2364 const OMPExecutableDirective &S, 2365 Address LB, Address UB) { 2366 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 2367 const Expr *IVExpr = LS.getIterationVariable(); 2368 // when implementing a dynamic schedule for a 'for' combined with a 2369 // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop 2370 // is not normalized as each team only executes its own assigned 2371 // distribute chunk 2372 QualType IteratorTy = IVExpr->getType(); 2373 llvm::Value *LBVal = 2374 CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, S.getBeginLoc()); 2375 llvm::Value *UBVal = 2376 CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, S.getBeginLoc()); 2377 return {LBVal, UBVal}; 2378 } 2379 2380 static void emitDistributeParallelForDistributeInnerBoundParams( 2381 CodeGenFunction &CGF, const OMPExecutableDirective &S, 2382 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) { 2383 const auto &Dir = cast<OMPLoopDirective>(S); 2384 LValue LB = 2385 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable())); 2386 llvm::Value *LBCast = 2387 CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(LB.getAddress(CGF)), 2388 CGF.SizeTy, /*isSigned=*/false); 2389 CapturedVars.push_back(LBCast); 2390 LValue UB = 2391 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable())); 2392 2393 llvm::Value *UBCast = 2394 CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(UB.getAddress(CGF)), 2395 CGF.SizeTy, /*isSigned=*/false); 2396 CapturedVars.push_back(UBCast); 2397 } 2398 2399 static void 2400 emitInnerParallelForWhenCombined(CodeGenFunction &CGF, 2401 const OMPLoopDirective &S, 2402 CodeGenFunction::JumpDest LoopExit) { 2403 auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF, 2404 PrePostActionTy &Action) { 2405 Action.Enter(CGF); 2406 bool HasCancel = false; 2407 if (!isOpenMPSimdDirective(S.getDirectiveKind())) { 2408 if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(&S)) 2409 HasCancel = D->hasCancel(); 2410 else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(&S)) 2411 HasCancel = D->hasCancel(); 2412 else if (const auto *D = 2413 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&S)) 2414 HasCancel = D->hasCancel(); 2415 } 2416 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(), 2417 HasCancel); 2418 CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(), 2419 emitDistributeParallelForInnerBounds, 2420 emitDistributeParallelForDispatchBounds); 2421 }; 2422 2423 emitCommonOMPParallelDirective( 2424 CGF, S, 2425 isOpenMPSimdDirective(S.getDirectiveKind()) ? OMPD_for_simd : OMPD_for, 2426 CGInlinedWorksharingLoop, 2427 emitDistributeParallelForDistributeInnerBoundParams); 2428 } 2429 2430 void CodeGenFunction::EmitOMPDistributeParallelForDirective( 2431 const OMPDistributeParallelForDirective &S) { 2432 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2433 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 2434 S.getDistInc()); 2435 }; 2436 OMPLexicalScope Scope(*this, S, OMPD_parallel); 2437 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); 2438 } 2439 2440 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective( 2441 const OMPDistributeParallelForSimdDirective &S) { 2442 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2443 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 2444 S.getDistInc()); 2445 }; 2446 OMPLexicalScope Scope(*this, S, OMPD_parallel); 2447 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); 2448 } 2449 2450 void CodeGenFunction::EmitOMPDistributeSimdDirective( 2451 const OMPDistributeSimdDirective &S) { 2452 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2453 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 2454 }; 2455 OMPLexicalScope Scope(*this, S, OMPD_unknown); 2456 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 2457 } 2458 2459 void CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 2460 CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) { 2461 // Emit SPMD target parallel for region as a standalone region. 2462 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2463 emitOMPSimdRegion(CGF, S, Action); 2464 }; 2465 llvm::Function *Fn; 2466 llvm::Constant *Addr; 2467 // Emit target region as a standalone region. 2468 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 2469 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 2470 assert(Fn && Addr && "Target device function emission failed."); 2471 } 2472 2473 void CodeGenFunction::EmitOMPTargetSimdDirective( 2474 const OMPTargetSimdDirective &S) { 2475 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2476 emitOMPSimdRegion(CGF, S, Action); 2477 }; 2478 emitCommonOMPTargetDirective(*this, S, CodeGen); 2479 } 2480 2481 namespace { 2482 struct ScheduleKindModifiersTy { 2483 OpenMPScheduleClauseKind Kind; 2484 OpenMPScheduleClauseModifier M1; 2485 OpenMPScheduleClauseModifier M2; 2486 ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind, 2487 OpenMPScheduleClauseModifier M1, 2488 OpenMPScheduleClauseModifier M2) 2489 : Kind(Kind), M1(M1), M2(M2) {} 2490 }; 2491 } // namespace 2492 2493 bool CodeGenFunction::EmitOMPWorksharingLoop( 2494 const OMPLoopDirective &S, Expr *EUB, 2495 const CodeGenLoopBoundsTy &CodeGenLoopBounds, 2496 const CodeGenDispatchBoundsTy &CGDispatchBounds) { 2497 // Emit the loop iteration variable. 2498 const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 2499 const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl()); 2500 EmitVarDecl(*IVDecl); 2501 2502 // Emit the iterations count variable. 2503 // If it is not a variable, Sema decided to calculate iterations count on each 2504 // iteration (e.g., it is foldable into a constant). 2505 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 2506 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 2507 // Emit calculation of the iterations count. 2508 EmitIgnoredExpr(S.getCalcLastIteration()); 2509 } 2510 2511 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); 2512 2513 bool HasLastprivateClause; 2514 // Check pre-condition. 2515 { 2516 OMPLoopScope PreInitScope(*this, S); 2517 // Skip the entire loop if we don't meet the precondition. 2518 // If the condition constant folds and can be elided, avoid emitting the 2519 // whole loop. 2520 bool CondConstant; 2521 llvm::BasicBlock *ContBlock = nullptr; 2522 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 2523 if (!CondConstant) 2524 return false; 2525 } else { 2526 llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then"); 2527 ContBlock = createBasicBlock("omp.precond.end"); 2528 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 2529 getProfileCount(&S)); 2530 EmitBlock(ThenBlock); 2531 incrementProfileCounter(&S); 2532 } 2533 2534 RunCleanupsScope DoacrossCleanupScope(*this); 2535 bool Ordered = false; 2536 if (const auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) { 2537 if (OrderedClause->getNumForLoops()) 2538 RT.emitDoacrossInit(*this, S, OrderedClause->getLoopNumIterations()); 2539 else 2540 Ordered = true; 2541 } 2542 2543 llvm::DenseSet<const Expr *> EmittedFinals; 2544 emitAlignedClause(*this, S); 2545 bool HasLinears = EmitOMPLinearClauseInit(S); 2546 // Emit helper vars inits. 2547 2548 std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S); 2549 LValue LB = Bounds.first; 2550 LValue UB = Bounds.second; 2551 LValue ST = 2552 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 2553 LValue IL = 2554 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 2555 2556 // Emit 'then' code. 2557 { 2558 OMPPrivateScope LoopScope(*this); 2559 if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) { 2560 // Emit implicit barrier to synchronize threads and avoid data races on 2561 // initialization of firstprivate variables and post-update of 2562 // lastprivate variables. 2563 CGM.getOpenMPRuntime().emitBarrierCall( 2564 *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, 2565 /*ForceSimpleCall=*/true); 2566 } 2567 EmitOMPPrivateClause(S, LoopScope); 2568 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion( 2569 *this, S, EmitLValue(S.getIterationVariable())); 2570 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 2571 EmitOMPReductionClauseInit(S, LoopScope); 2572 EmitOMPPrivateLoopCounters(S, LoopScope); 2573 EmitOMPLinearClause(S, LoopScope); 2574 (void)LoopScope.Privatize(); 2575 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 2576 CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S); 2577 2578 // Detect the loop schedule kind and chunk. 2579 const Expr *ChunkExpr = nullptr; 2580 OpenMPScheduleTy ScheduleKind; 2581 if (const auto *C = S.getSingleClause<OMPScheduleClause>()) { 2582 ScheduleKind.Schedule = C->getScheduleKind(); 2583 ScheduleKind.M1 = C->getFirstScheduleModifier(); 2584 ScheduleKind.M2 = C->getSecondScheduleModifier(); 2585 ChunkExpr = C->getChunkSize(); 2586 } else { 2587 // Default behaviour for schedule clause. 2588 CGM.getOpenMPRuntime().getDefaultScheduleAndChunk( 2589 *this, S, ScheduleKind.Schedule, ChunkExpr); 2590 } 2591 bool HasChunkSizeOne = false; 2592 llvm::Value *Chunk = nullptr; 2593 if (ChunkExpr) { 2594 Chunk = EmitScalarExpr(ChunkExpr); 2595 Chunk = EmitScalarConversion(Chunk, ChunkExpr->getType(), 2596 S.getIterationVariable()->getType(), 2597 S.getBeginLoc()); 2598 Expr::EvalResult Result; 2599 if (ChunkExpr->EvaluateAsInt(Result, getContext())) { 2600 llvm::APSInt EvaluatedChunk = Result.Val.getInt(); 2601 HasChunkSizeOne = (EvaluatedChunk.getLimitedValue() == 1); 2602 } 2603 } 2604 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2605 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2606 // OpenMP 4.5, 2.7.1 Loop Construct, Description. 2607 // If the static schedule kind is specified or if the ordered clause is 2608 // specified, and if no monotonic modifier is specified, the effect will 2609 // be as if the monotonic modifier was specified. 2610 bool StaticChunkedOne = RT.isStaticChunked(ScheduleKind.Schedule, 2611 /* Chunked */ Chunk != nullptr) && HasChunkSizeOne && 2612 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()); 2613 if ((RT.isStaticNonchunked(ScheduleKind.Schedule, 2614 /* Chunked */ Chunk != nullptr) || 2615 StaticChunkedOne) && 2616 !Ordered) { 2617 JumpDest LoopExit = 2618 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 2619 emitCommonSimdLoop( 2620 *this, S, 2621 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2622 if (isOpenMPSimdDirective(S.getDirectiveKind())) 2623 CGF.EmitOMPSimdInit(S, /*IsMonotonic=*/true); 2624 }, 2625 [IVSize, IVSigned, Ordered, IL, LB, UB, ST, StaticChunkedOne, Chunk, 2626 &S, ScheduleKind, LoopExit, 2627 &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { 2628 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 2629 // When no chunk_size is specified, the iteration space is divided 2630 // into chunks that are approximately equal in size, and at most 2631 // one chunk is distributed to each thread. Note that the size of 2632 // the chunks is unspecified in this case. 2633 CGOpenMPRuntime::StaticRTInput StaticInit( 2634 IVSize, IVSigned, Ordered, IL.getAddress(CGF), 2635 LB.getAddress(CGF), UB.getAddress(CGF), ST.getAddress(CGF), 2636 StaticChunkedOne ? Chunk : nullptr); 2637 CGF.CGM.getOpenMPRuntime().emitForStaticInit( 2638 CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, 2639 StaticInit); 2640 // UB = min(UB, GlobalUB); 2641 if (!StaticChunkedOne) 2642 CGF.EmitIgnoredExpr(S.getEnsureUpperBound()); 2643 // IV = LB; 2644 CGF.EmitIgnoredExpr(S.getInit()); 2645 // For unchunked static schedule generate: 2646 // 2647 // while (idx <= UB) { 2648 // BODY; 2649 // ++idx; 2650 // } 2651 // 2652 // For static schedule with chunk one: 2653 // 2654 // while (IV <= PrevUB) { 2655 // BODY; 2656 // IV += ST; 2657 // } 2658 CGF.EmitOMPInnerLoop( 2659 S, LoopScope.requiresCleanups(), 2660 StaticChunkedOne ? S.getCombinedParForInDistCond() 2661 : S.getCond(), 2662 StaticChunkedOne ? S.getDistInc() : S.getInc(), 2663 [&S, LoopExit](CodeGenFunction &CGF) { 2664 CGF.EmitOMPLoopBody(S, LoopExit); 2665 CGF.EmitStopPoint(&S); 2666 }, 2667 [](CodeGenFunction &) {}); 2668 }); 2669 EmitBlock(LoopExit.getBlock()); 2670 // Tell the runtime we are done. 2671 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 2672 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(), 2673 S.getDirectiveKind()); 2674 }; 2675 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); 2676 } else { 2677 const bool IsMonotonic = 2678 Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static || 2679 ScheduleKind.Schedule == OMPC_SCHEDULE_unknown || 2680 ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic || 2681 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic; 2682 // Emit the outer loop, which requests its work chunk [LB..UB] from 2683 // runtime and runs the inner loop to process it. 2684 const OMPLoopArguments LoopArguments( 2685 LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this), 2686 IL.getAddress(*this), Chunk, EUB); 2687 EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered, 2688 LoopArguments, CGDispatchBounds); 2689 } 2690 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 2691 EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) { 2692 return CGF.Builder.CreateIsNotNull( 2693 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 2694 }); 2695 } 2696 EmitOMPReductionClauseFinal( 2697 S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind()) 2698 ? /*Parallel and Simd*/ OMPD_parallel_for_simd 2699 : /*Parallel only*/ OMPD_parallel); 2700 // Emit post-update of the reduction variables if IsLastIter != 0. 2701 emitPostUpdateForReductionClause( 2702 *this, S, [IL, &S](CodeGenFunction &CGF) { 2703 return CGF.Builder.CreateIsNotNull( 2704 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 2705 }); 2706 // Emit final copy of the lastprivate variables if IsLastIter != 0. 2707 if (HasLastprivateClause) 2708 EmitOMPLastprivateClauseFinal( 2709 S, isOpenMPSimdDirective(S.getDirectiveKind()), 2710 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc()))); 2711 } 2712 EmitOMPLinearClauseFinal(S, [IL, &S](CodeGenFunction &CGF) { 2713 return CGF.Builder.CreateIsNotNull( 2714 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 2715 }); 2716 DoacrossCleanupScope.ForceCleanup(); 2717 // We're now done with the loop, so jump to the continuation block. 2718 if (ContBlock) { 2719 EmitBranch(ContBlock); 2720 EmitBlock(ContBlock, /*IsFinished=*/true); 2721 } 2722 } 2723 return HasLastprivateClause; 2724 } 2725 2726 /// The following two functions generate expressions for the loop lower 2727 /// and upper bounds in case of static and dynamic (dispatch) schedule 2728 /// of the associated 'for' or 'distribute' loop. 2729 static std::pair<LValue, LValue> 2730 emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 2731 const auto &LS = cast<OMPLoopDirective>(S); 2732 LValue LB = 2733 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); 2734 LValue UB = 2735 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); 2736 return {LB, UB}; 2737 } 2738 2739 /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not 2740 /// consider the lower and upper bound expressions generated by the 2741 /// worksharing loop support, but we use 0 and the iteration space size as 2742 /// constants 2743 static std::pair<llvm::Value *, llvm::Value *> 2744 emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S, 2745 Address LB, Address UB) { 2746 const auto &LS = cast<OMPLoopDirective>(S); 2747 const Expr *IVExpr = LS.getIterationVariable(); 2748 const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType()); 2749 llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0); 2750 llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration()); 2751 return {LBVal, UBVal}; 2752 } 2753 2754 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { 2755 bool HasLastprivates = false; 2756 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, 2757 PrePostActionTy &) { 2758 OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel()); 2759 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 2760 emitForLoopBounds, 2761 emitDispatchForLoopBounds); 2762 }; 2763 { 2764 OMPLexicalScope Scope(*this, S, OMPD_unknown); 2765 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen, 2766 S.hasCancel()); 2767 } 2768 2769 // Emit an implicit barrier at the end. 2770 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) 2771 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for); 2772 } 2773 2774 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { 2775 bool HasLastprivates = false; 2776 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, 2777 PrePostActionTy &) { 2778 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 2779 emitForLoopBounds, 2780 emitDispatchForLoopBounds); 2781 }; 2782 { 2783 OMPLexicalScope Scope(*this, S, OMPD_unknown); 2784 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 2785 } 2786 2787 // Emit an implicit barrier at the end. 2788 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) 2789 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for); 2790 } 2791 2792 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, 2793 const Twine &Name, 2794 llvm::Value *Init = nullptr) { 2795 LValue LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty); 2796 if (Init) 2797 CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true); 2798 return LVal; 2799 } 2800 2801 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { 2802 const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt(); 2803 const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt); 2804 bool HasLastprivates = false; 2805 auto &&CodeGen = [&S, CapturedStmt, CS, 2806 &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) { 2807 ASTContext &C = CGF.getContext(); 2808 QualType KmpInt32Ty = 2809 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2810 // Emit helper vars inits. 2811 LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.", 2812 CGF.Builder.getInt32(0)); 2813 llvm::ConstantInt *GlobalUBVal = CS != nullptr 2814 ? CGF.Builder.getInt32(CS->size() - 1) 2815 : CGF.Builder.getInt32(0); 2816 LValue UB = 2817 createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal); 2818 LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.", 2819 CGF.Builder.getInt32(1)); 2820 LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.", 2821 CGF.Builder.getInt32(0)); 2822 // Loop counter. 2823 LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv."); 2824 OpaqueValueExpr IVRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue); 2825 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV); 2826 OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue); 2827 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); 2828 // Generate condition for loop. 2829 BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, 2830 OK_Ordinary, S.getBeginLoc(), FPOptions()); 2831 // Increment for loop counter. 2832 UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary, 2833 S.getBeginLoc(), true); 2834 auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) { 2835 // Iterate through all sections and emit a switch construct: 2836 // switch (IV) { 2837 // case 0: 2838 // <SectionStmt[0]>; 2839 // break; 2840 // ... 2841 // case <NumSection> - 1: 2842 // <SectionStmt[<NumSection> - 1]>; 2843 // break; 2844 // } 2845 // .omp.sections.exit: 2846 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".omp.sections.exit"); 2847 llvm::SwitchInst *SwitchStmt = 2848 CGF.Builder.CreateSwitch(CGF.EmitLoadOfScalar(IV, S.getBeginLoc()), 2849 ExitBB, CS == nullptr ? 1 : CS->size()); 2850 if (CS) { 2851 unsigned CaseNumber = 0; 2852 for (const Stmt *SubStmt : CS->children()) { 2853 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 2854 CGF.EmitBlock(CaseBB); 2855 SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB); 2856 CGF.EmitStmt(SubStmt); 2857 CGF.EmitBranch(ExitBB); 2858 ++CaseNumber; 2859 } 2860 } else { 2861 llvm::BasicBlock *CaseBB = CGF.createBasicBlock(".omp.sections.case"); 2862 CGF.EmitBlock(CaseBB); 2863 SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB); 2864 CGF.EmitStmt(CapturedStmt); 2865 CGF.EmitBranch(ExitBB); 2866 } 2867 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 2868 }; 2869 2870 CodeGenFunction::OMPPrivateScope LoopScope(CGF); 2871 if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) { 2872 // Emit implicit barrier to synchronize threads and avoid data races on 2873 // initialization of firstprivate variables and post-update of lastprivate 2874 // variables. 2875 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 2876 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, 2877 /*ForceSimpleCall=*/true); 2878 } 2879 CGF.EmitOMPPrivateClause(S, LoopScope); 2880 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(CGF, S, IV); 2881 HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 2882 CGF.EmitOMPReductionClauseInit(S, LoopScope); 2883 (void)LoopScope.Privatize(); 2884 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 2885 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); 2886 2887 // Emit static non-chunked loop. 2888 OpenMPScheduleTy ScheduleKind; 2889 ScheduleKind.Schedule = OMPC_SCHEDULE_static; 2890 CGOpenMPRuntime::StaticRTInput StaticInit( 2891 /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(CGF), 2892 LB.getAddress(CGF), UB.getAddress(CGF), ST.getAddress(CGF)); 2893 CGF.CGM.getOpenMPRuntime().emitForStaticInit( 2894 CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, StaticInit); 2895 // UB = min(UB, GlobalUB); 2896 llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, S.getBeginLoc()); 2897 llvm::Value *MinUBGlobalUB = CGF.Builder.CreateSelect( 2898 CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal); 2899 CGF.EmitStoreOfScalar(MinUBGlobalUB, UB); 2900 // IV = LB; 2901 CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getBeginLoc()), IV); 2902 // while (idx <= UB) { BODY; ++idx; } 2903 CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen, 2904 [](CodeGenFunction &) {}); 2905 // Tell the runtime we are done. 2906 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 2907 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(), 2908 S.getDirectiveKind()); 2909 }; 2910 CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen); 2911 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 2912 // Emit post-update of the reduction variables if IsLastIter != 0. 2913 emitPostUpdateForReductionClause(CGF, S, [IL, &S](CodeGenFunction &CGF) { 2914 return CGF.Builder.CreateIsNotNull( 2915 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 2916 }); 2917 2918 // Emit final copy of the lastprivate variables if IsLastIter != 0. 2919 if (HasLastprivates) 2920 CGF.EmitOMPLastprivateClauseFinal( 2921 S, /*NoFinals=*/false, 2922 CGF.Builder.CreateIsNotNull( 2923 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()))); 2924 }; 2925 2926 bool HasCancel = false; 2927 if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S)) 2928 HasCancel = OSD->hasCancel(); 2929 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S)) 2930 HasCancel = OPSD->hasCancel(); 2931 OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel); 2932 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen, 2933 HasCancel); 2934 // Emit barrier for lastprivates only if 'sections' directive has 'nowait' 2935 // clause. Otherwise the barrier will be generated by the codegen for the 2936 // directive. 2937 if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) { 2938 // Emit implicit barrier to synchronize threads and avoid data races on 2939 // initialization of firstprivate variables. 2940 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), 2941 OMPD_unknown); 2942 } 2943 } 2944 2945 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { 2946 { 2947 OMPLexicalScope Scope(*this, S, OMPD_unknown); 2948 EmitSections(S); 2949 } 2950 // Emit an implicit barrier at the end. 2951 if (!S.getSingleClause<OMPNowaitClause>()) { 2952 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), 2953 OMPD_sections); 2954 } 2955 } 2956 2957 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { 2958 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2959 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 2960 }; 2961 OMPLexicalScope Scope(*this, S, OMPD_unknown); 2962 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen, 2963 S.hasCancel()); 2964 } 2965 2966 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { 2967 llvm::SmallVector<const Expr *, 8> CopyprivateVars; 2968 llvm::SmallVector<const Expr *, 8> DestExprs; 2969 llvm::SmallVector<const Expr *, 8> SrcExprs; 2970 llvm::SmallVector<const Expr *, 8> AssignmentOps; 2971 // Check if there are any 'copyprivate' clauses associated with this 2972 // 'single' construct. 2973 // Build a list of copyprivate variables along with helper expressions 2974 // (<source>, <destination>, <destination>=<source> expressions) 2975 for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) { 2976 CopyprivateVars.append(C->varlists().begin(), C->varlists().end()); 2977 DestExprs.append(C->destination_exprs().begin(), 2978 C->destination_exprs().end()); 2979 SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end()); 2980 AssignmentOps.append(C->assignment_ops().begin(), 2981 C->assignment_ops().end()); 2982 } 2983 // Emit code for 'single' region along with 'copyprivate' clauses 2984 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2985 Action.Enter(CGF); 2986 OMPPrivateScope SingleScope(CGF); 2987 (void)CGF.EmitOMPFirstprivateClause(S, SingleScope); 2988 CGF.EmitOMPPrivateClause(S, SingleScope); 2989 (void)SingleScope.Privatize(); 2990 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 2991 }; 2992 { 2993 OMPLexicalScope Scope(*this, S, OMPD_unknown); 2994 CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getBeginLoc(), 2995 CopyprivateVars, DestExprs, 2996 SrcExprs, AssignmentOps); 2997 } 2998 // Emit an implicit barrier at the end (to avoid data race on firstprivate 2999 // init or if no 'nowait' clause was specified and no 'copyprivate' clause). 3000 if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) { 3001 CGM.getOpenMPRuntime().emitBarrierCall( 3002 *this, S.getBeginLoc(), 3003 S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single); 3004 } 3005 } 3006 3007 static void emitMaster(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 3008 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3009 Action.Enter(CGF); 3010 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 3011 }; 3012 CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc()); 3013 } 3014 3015 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { 3016 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3017 emitMaster(*this, S); 3018 } 3019 3020 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { 3021 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3022 Action.Enter(CGF); 3023 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 3024 }; 3025 const Expr *Hint = nullptr; 3026 if (const auto *HintClause = S.getSingleClause<OMPHintClause>()) 3027 Hint = HintClause->getHint(); 3028 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3029 CGM.getOpenMPRuntime().emitCriticalRegion(*this, 3030 S.getDirectiveName().getAsString(), 3031 CodeGen, S.getBeginLoc(), Hint); 3032 } 3033 3034 void CodeGenFunction::EmitOMPParallelForDirective( 3035 const OMPParallelForDirective &S) { 3036 // Emit directive as a combined directive that consists of two implicit 3037 // directives: 'parallel' with 'for' directive. 3038 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3039 Action.Enter(CGF); 3040 OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel()); 3041 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 3042 emitDispatchForLoopBounds); 3043 }; 3044 emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen, 3045 emitEmptyBoundParameters); 3046 } 3047 3048 void CodeGenFunction::EmitOMPParallelForSimdDirective( 3049 const OMPParallelForSimdDirective &S) { 3050 // Emit directive as a combined directive that consists of two implicit 3051 // directives: 'parallel' with 'for' directive. 3052 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3053 Action.Enter(CGF); 3054 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 3055 emitDispatchForLoopBounds); 3056 }; 3057 emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen, 3058 emitEmptyBoundParameters); 3059 } 3060 3061 void CodeGenFunction::EmitOMPParallelMasterDirective( 3062 const OMPParallelMasterDirective &S) { 3063 // Emit directive as a combined directive that consists of two implicit 3064 // directives: 'parallel' with 'master' directive. 3065 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3066 Action.Enter(CGF); 3067 OMPPrivateScope PrivateScope(CGF); 3068 bool Copyins = CGF.EmitOMPCopyinClause(S); 3069 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 3070 if (Copyins) { 3071 // Emit implicit barrier to synchronize threads and avoid data races on 3072 // propagation master's thread values of threadprivate variables to local 3073 // instances of that variables of all other implicit threads. 3074 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 3075 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, 3076 /*ForceSimpleCall=*/true); 3077 } 3078 CGF.EmitOMPPrivateClause(S, PrivateScope); 3079 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 3080 (void)PrivateScope.Privatize(); 3081 emitMaster(CGF, S); 3082 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 3083 }; 3084 emitCommonOMPParallelDirective(*this, S, OMPD_master, CodeGen, 3085 emitEmptyBoundParameters); 3086 emitPostUpdateForReductionClause(*this, S, 3087 [](CodeGenFunction &) { return nullptr; }); 3088 } 3089 3090 void CodeGenFunction::EmitOMPParallelSectionsDirective( 3091 const OMPParallelSectionsDirective &S) { 3092 // Emit directive as a combined directive that consists of two implicit 3093 // directives: 'parallel' with 'sections' directive. 3094 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3095 Action.Enter(CGF); 3096 CGF.EmitSections(S); 3097 }; 3098 emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen, 3099 emitEmptyBoundParameters); 3100 } 3101 3102 void CodeGenFunction::EmitOMPTaskBasedDirective( 3103 const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion, 3104 const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen, 3105 OMPTaskDataTy &Data) { 3106 // Emit outlined function for task construct. 3107 const CapturedStmt *CS = S.getCapturedStmt(CapturedRegion); 3108 auto I = CS->getCapturedDecl()->param_begin(); 3109 auto PartId = std::next(I); 3110 auto TaskT = std::next(I, 4); 3111 // Check if the task is final 3112 if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) { 3113 // If the condition constant folds and can be elided, try to avoid emitting 3114 // the condition and the dead arm of the if/else. 3115 const Expr *Cond = Clause->getCondition(); 3116 bool CondConstant; 3117 if (ConstantFoldsToSimpleInteger(Cond, CondConstant)) 3118 Data.Final.setInt(CondConstant); 3119 else 3120 Data.Final.setPointer(EvaluateExprAsBool(Cond)); 3121 } else { 3122 // By default the task is not final. 3123 Data.Final.setInt(/*IntVal=*/false); 3124 } 3125 // Check if the task has 'priority' clause. 3126 if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) { 3127 const Expr *Prio = Clause->getPriority(); 3128 Data.Priority.setInt(/*IntVal=*/true); 3129 Data.Priority.setPointer(EmitScalarConversion( 3130 EmitScalarExpr(Prio), Prio->getType(), 3131 getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1), 3132 Prio->getExprLoc())); 3133 } 3134 // The first function argument for tasks is a thread id, the second one is a 3135 // part id (0 for tied tasks, >=0 for untied task). 3136 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 3137 // Get list of private variables. 3138 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 3139 auto IRef = C->varlist_begin(); 3140 for (const Expr *IInit : C->private_copies()) { 3141 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 3142 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 3143 Data.PrivateVars.push_back(*IRef); 3144 Data.PrivateCopies.push_back(IInit); 3145 } 3146 ++IRef; 3147 } 3148 } 3149 EmittedAsPrivate.clear(); 3150 // Get list of firstprivate variables. 3151 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 3152 auto IRef = C->varlist_begin(); 3153 auto IElemInitRef = C->inits().begin(); 3154 for (const Expr *IInit : C->private_copies()) { 3155 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 3156 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 3157 Data.FirstprivateVars.push_back(*IRef); 3158 Data.FirstprivateCopies.push_back(IInit); 3159 Data.FirstprivateInits.push_back(*IElemInitRef); 3160 } 3161 ++IRef; 3162 ++IElemInitRef; 3163 } 3164 } 3165 // Get list of lastprivate variables (for taskloops). 3166 llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs; 3167 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 3168 auto IRef = C->varlist_begin(); 3169 auto ID = C->destination_exprs().begin(); 3170 for (const Expr *IInit : C->private_copies()) { 3171 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 3172 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 3173 Data.LastprivateVars.push_back(*IRef); 3174 Data.LastprivateCopies.push_back(IInit); 3175 } 3176 LastprivateDstsOrigs.insert( 3177 {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()), 3178 cast<DeclRefExpr>(*IRef)}); 3179 ++IRef; 3180 ++ID; 3181 } 3182 } 3183 SmallVector<const Expr *, 4> LHSs; 3184 SmallVector<const Expr *, 4> RHSs; 3185 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 3186 auto IPriv = C->privates().begin(); 3187 auto IRed = C->reduction_ops().begin(); 3188 auto ILHS = C->lhs_exprs().begin(); 3189 auto IRHS = C->rhs_exprs().begin(); 3190 for (const Expr *Ref : C->varlists()) { 3191 Data.ReductionVars.emplace_back(Ref); 3192 Data.ReductionCopies.emplace_back(*IPriv); 3193 Data.ReductionOps.emplace_back(*IRed); 3194 LHSs.emplace_back(*ILHS); 3195 RHSs.emplace_back(*IRHS); 3196 std::advance(IPriv, 1); 3197 std::advance(IRed, 1); 3198 std::advance(ILHS, 1); 3199 std::advance(IRHS, 1); 3200 } 3201 } 3202 Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit( 3203 *this, S.getBeginLoc(), LHSs, RHSs, Data); 3204 // Build list of dependences. 3205 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) 3206 for (const Expr *IRef : C->varlists()) 3207 Data.Dependences.emplace_back(C->getDependencyKind(), IRef); 3208 auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs, 3209 CapturedRegion](CodeGenFunction &CGF, 3210 PrePostActionTy &Action) { 3211 // Set proper addresses for generated private copies. 3212 OMPPrivateScope Scope(CGF); 3213 if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() || 3214 !Data.LastprivateVars.empty()) { 3215 llvm::FunctionType *CopyFnTy = llvm::FunctionType::get( 3216 CGF.Builder.getVoidTy(), {CGF.Builder.getInt8PtrTy()}, true); 3217 enum { PrivatesParam = 2, CopyFnParam = 3 }; 3218 llvm::Value *CopyFn = CGF.Builder.CreateLoad( 3219 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam))); 3220 llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar( 3221 CS->getCapturedDecl()->getParam(PrivatesParam))); 3222 // Map privates. 3223 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; 3224 llvm::SmallVector<llvm::Value *, 16> CallArgs; 3225 CallArgs.push_back(PrivatesPtr); 3226 for (const Expr *E : Data.PrivateVars) { 3227 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3228 Address PrivatePtr = CGF.CreateMemTemp( 3229 CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr"); 3230 PrivatePtrs.emplace_back(VD, PrivatePtr); 3231 CallArgs.push_back(PrivatePtr.getPointer()); 3232 } 3233 for (const Expr *E : Data.FirstprivateVars) { 3234 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3235 Address PrivatePtr = 3236 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 3237 ".firstpriv.ptr.addr"); 3238 PrivatePtrs.emplace_back(VD, PrivatePtr); 3239 CallArgs.push_back(PrivatePtr.getPointer()); 3240 } 3241 for (const Expr *E : Data.LastprivateVars) { 3242 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3243 Address PrivatePtr = 3244 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 3245 ".lastpriv.ptr.addr"); 3246 PrivatePtrs.emplace_back(VD, PrivatePtr); 3247 CallArgs.push_back(PrivatePtr.getPointer()); 3248 } 3249 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall( 3250 CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs); 3251 for (const auto &Pair : LastprivateDstsOrigs) { 3252 const auto *OrigVD = cast<VarDecl>(Pair.second->getDecl()); 3253 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(OrigVD), 3254 /*RefersToEnclosingVariableOrCapture=*/ 3255 CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr, 3256 Pair.second->getType(), VK_LValue, 3257 Pair.second->getExprLoc()); 3258 Scope.addPrivate(Pair.first, [&CGF, &DRE]() { 3259 return CGF.EmitLValue(&DRE).getAddress(CGF); 3260 }); 3261 } 3262 for (const auto &Pair : PrivatePtrs) { 3263 Address Replacement(CGF.Builder.CreateLoad(Pair.second), 3264 CGF.getContext().getDeclAlign(Pair.first)); 3265 Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); 3266 } 3267 } 3268 if (Data.Reductions) { 3269 OMPLexicalScope LexScope(CGF, S, CapturedRegion); 3270 ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionCopies, 3271 Data.ReductionOps); 3272 llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad( 3273 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9))); 3274 for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) { 3275 RedCG.emitSharedLValue(CGF, Cnt); 3276 RedCG.emitAggregateType(CGF, Cnt); 3277 // FIXME: This must removed once the runtime library is fixed. 3278 // Emit required threadprivate variables for 3279 // initializer/combiner/finalizer. 3280 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(), 3281 RedCG, Cnt); 3282 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( 3283 CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); 3284 Replacement = 3285 Address(CGF.EmitScalarConversion( 3286 Replacement.getPointer(), CGF.getContext().VoidPtrTy, 3287 CGF.getContext().getPointerType( 3288 Data.ReductionCopies[Cnt]->getType()), 3289 Data.ReductionCopies[Cnt]->getExprLoc()), 3290 Replacement.getAlignment()); 3291 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); 3292 Scope.addPrivate(RedCG.getBaseDecl(Cnt), 3293 [Replacement]() { return Replacement; }); 3294 } 3295 } 3296 // Privatize all private variables except for in_reduction items. 3297 (void)Scope.Privatize(); 3298 SmallVector<const Expr *, 4> InRedVars; 3299 SmallVector<const Expr *, 4> InRedPrivs; 3300 SmallVector<const Expr *, 4> InRedOps; 3301 SmallVector<const Expr *, 4> TaskgroupDescriptors; 3302 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) { 3303 auto IPriv = C->privates().begin(); 3304 auto IRed = C->reduction_ops().begin(); 3305 auto ITD = C->taskgroup_descriptors().begin(); 3306 for (const Expr *Ref : C->varlists()) { 3307 InRedVars.emplace_back(Ref); 3308 InRedPrivs.emplace_back(*IPriv); 3309 InRedOps.emplace_back(*IRed); 3310 TaskgroupDescriptors.emplace_back(*ITD); 3311 std::advance(IPriv, 1); 3312 std::advance(IRed, 1); 3313 std::advance(ITD, 1); 3314 } 3315 } 3316 // Privatize in_reduction items here, because taskgroup descriptors must be 3317 // privatized earlier. 3318 OMPPrivateScope InRedScope(CGF); 3319 if (!InRedVars.empty()) { 3320 ReductionCodeGen RedCG(InRedVars, InRedPrivs, InRedOps); 3321 for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) { 3322 RedCG.emitSharedLValue(CGF, Cnt); 3323 RedCG.emitAggregateType(CGF, Cnt); 3324 // The taskgroup descriptor variable is always implicit firstprivate and 3325 // privatized already during processing of the firstprivates. 3326 // FIXME: This must removed once the runtime library is fixed. 3327 // Emit required threadprivate variables for 3328 // initializer/combiner/finalizer. 3329 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(), 3330 RedCG, Cnt); 3331 llvm::Value *ReductionsPtr = 3332 CGF.EmitLoadOfScalar(CGF.EmitLValue(TaskgroupDescriptors[Cnt]), 3333 TaskgroupDescriptors[Cnt]->getExprLoc()); 3334 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( 3335 CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); 3336 Replacement = Address( 3337 CGF.EmitScalarConversion( 3338 Replacement.getPointer(), CGF.getContext().VoidPtrTy, 3339 CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()), 3340 InRedPrivs[Cnt]->getExprLoc()), 3341 Replacement.getAlignment()); 3342 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); 3343 InRedScope.addPrivate(RedCG.getBaseDecl(Cnt), 3344 [Replacement]() { return Replacement; }); 3345 } 3346 } 3347 (void)InRedScope.Privatize(); 3348 3349 Action.Enter(CGF); 3350 BodyGen(CGF); 3351 }; 3352 llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( 3353 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied, 3354 Data.NumberOfParts); 3355 OMPLexicalScope Scope(*this, S, llvm::None, 3356 !isOpenMPParallelDirective(S.getDirectiveKind()) && 3357 !isOpenMPSimdDirective(S.getDirectiveKind())); 3358 TaskGen(*this, OutlinedFn, Data); 3359 } 3360 3361 static ImplicitParamDecl * 3362 createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data, 3363 QualType Ty, CapturedDecl *CD, 3364 SourceLocation Loc) { 3365 auto *OrigVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty, 3366 ImplicitParamDecl::Other); 3367 auto *OrigRef = DeclRefExpr::Create( 3368 C, NestedNameSpecifierLoc(), SourceLocation(), OrigVD, 3369 /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue); 3370 auto *PrivateVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty, 3371 ImplicitParamDecl::Other); 3372 auto *PrivateRef = DeclRefExpr::Create( 3373 C, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD, 3374 /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue); 3375 QualType ElemType = C.getBaseElementType(Ty); 3376 auto *InitVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, ElemType, 3377 ImplicitParamDecl::Other); 3378 auto *InitRef = DeclRefExpr::Create( 3379 C, NestedNameSpecifierLoc(), SourceLocation(), InitVD, 3380 /*RefersToEnclosingVariableOrCapture=*/false, Loc, ElemType, VK_LValue); 3381 PrivateVD->setInitStyle(VarDecl::CInit); 3382 PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue, 3383 InitRef, /*BasePath=*/nullptr, 3384 VK_RValue)); 3385 Data.FirstprivateVars.emplace_back(OrigRef); 3386 Data.FirstprivateCopies.emplace_back(PrivateRef); 3387 Data.FirstprivateInits.emplace_back(InitRef); 3388 return OrigVD; 3389 } 3390 3391 void CodeGenFunction::EmitOMPTargetTaskBasedDirective( 3392 const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen, 3393 OMPTargetDataInfo &InputInfo) { 3394 // Emit outlined function for task construct. 3395 const CapturedStmt *CS = S.getCapturedStmt(OMPD_task); 3396 Address CapturedStruct = GenerateCapturedStmtArgument(*CS); 3397 QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 3398 auto I = CS->getCapturedDecl()->param_begin(); 3399 auto PartId = std::next(I); 3400 auto TaskT = std::next(I, 4); 3401 OMPTaskDataTy Data; 3402 // The task is not final. 3403 Data.Final.setInt(/*IntVal=*/false); 3404 // Get list of firstprivate variables. 3405 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 3406 auto IRef = C->varlist_begin(); 3407 auto IElemInitRef = C->inits().begin(); 3408 for (auto *IInit : C->private_copies()) { 3409 Data.FirstprivateVars.push_back(*IRef); 3410 Data.FirstprivateCopies.push_back(IInit); 3411 Data.FirstprivateInits.push_back(*IElemInitRef); 3412 ++IRef; 3413 ++IElemInitRef; 3414 } 3415 } 3416 OMPPrivateScope TargetScope(*this); 3417 VarDecl *BPVD = nullptr; 3418 VarDecl *PVD = nullptr; 3419 VarDecl *SVD = nullptr; 3420 if (InputInfo.NumberOfTargetItems > 0) { 3421 auto *CD = CapturedDecl::Create( 3422 getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0); 3423 llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems); 3424 QualType BaseAndPointersType = getContext().getConstantArrayType( 3425 getContext().VoidPtrTy, ArrSize, nullptr, ArrayType::Normal, 3426 /*IndexTypeQuals=*/0); 3427 BPVD = createImplicitFirstprivateForType( 3428 getContext(), Data, BaseAndPointersType, CD, S.getBeginLoc()); 3429 PVD = createImplicitFirstprivateForType( 3430 getContext(), Data, BaseAndPointersType, CD, S.getBeginLoc()); 3431 QualType SizesType = getContext().getConstantArrayType( 3432 getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1), 3433 ArrSize, nullptr, ArrayType::Normal, 3434 /*IndexTypeQuals=*/0); 3435 SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD, 3436 S.getBeginLoc()); 3437 TargetScope.addPrivate( 3438 BPVD, [&InputInfo]() { return InputInfo.BasePointersArray; }); 3439 TargetScope.addPrivate(PVD, 3440 [&InputInfo]() { return InputInfo.PointersArray; }); 3441 TargetScope.addPrivate(SVD, 3442 [&InputInfo]() { return InputInfo.SizesArray; }); 3443 } 3444 (void)TargetScope.Privatize(); 3445 // Build list of dependences. 3446 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) 3447 for (const Expr *IRef : C->varlists()) 3448 Data.Dependences.emplace_back(C->getDependencyKind(), IRef); 3449 auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD, 3450 &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) { 3451 // Set proper addresses for generated private copies. 3452 OMPPrivateScope Scope(CGF); 3453 if (!Data.FirstprivateVars.empty()) { 3454 llvm::FunctionType *CopyFnTy = llvm::FunctionType::get( 3455 CGF.Builder.getVoidTy(), {CGF.Builder.getInt8PtrTy()}, true); 3456 enum { PrivatesParam = 2, CopyFnParam = 3 }; 3457 llvm::Value *CopyFn = CGF.Builder.CreateLoad( 3458 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam))); 3459 llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar( 3460 CS->getCapturedDecl()->getParam(PrivatesParam))); 3461 // Map privates. 3462 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; 3463 llvm::SmallVector<llvm::Value *, 16> CallArgs; 3464 CallArgs.push_back(PrivatesPtr); 3465 for (const Expr *E : Data.FirstprivateVars) { 3466 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3467 Address PrivatePtr = 3468 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 3469 ".firstpriv.ptr.addr"); 3470 PrivatePtrs.emplace_back(VD, PrivatePtr); 3471 CallArgs.push_back(PrivatePtr.getPointer()); 3472 } 3473 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall( 3474 CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs); 3475 for (const auto &Pair : PrivatePtrs) { 3476 Address Replacement(CGF.Builder.CreateLoad(Pair.second), 3477 CGF.getContext().getDeclAlign(Pair.first)); 3478 Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); 3479 } 3480 } 3481 // Privatize all private variables except for in_reduction items. 3482 (void)Scope.Privatize(); 3483 if (InputInfo.NumberOfTargetItems > 0) { 3484 InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP( 3485 CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0); 3486 InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP( 3487 CGF.GetAddrOfLocalVar(PVD), /*Index=*/0); 3488 InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP( 3489 CGF.GetAddrOfLocalVar(SVD), /*Index=*/0); 3490 } 3491 3492 Action.Enter(CGF); 3493 OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false); 3494 BodyGen(CGF); 3495 }; 3496 llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( 3497 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true, 3498 Data.NumberOfParts); 3499 llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0); 3500 IntegerLiteral IfCond(getContext(), TrueOrFalse, 3501 getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 3502 SourceLocation()); 3503 3504 CGM.getOpenMPRuntime().emitTaskCall(*this, S.getBeginLoc(), S, OutlinedFn, 3505 SharedsTy, CapturedStruct, &IfCond, Data); 3506 } 3507 3508 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { 3509 // Emit outlined function for task construct. 3510 const CapturedStmt *CS = S.getCapturedStmt(OMPD_task); 3511 Address CapturedStruct = GenerateCapturedStmtArgument(*CS); 3512 QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 3513 const Expr *IfCond = nullptr; 3514 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 3515 if (C->getNameModifier() == OMPD_unknown || 3516 C->getNameModifier() == OMPD_task) { 3517 IfCond = C->getCondition(); 3518 break; 3519 } 3520 } 3521 3522 OMPTaskDataTy Data; 3523 // Check if we should emit tied or untied task. 3524 Data.Tied = !S.getSingleClause<OMPUntiedClause>(); 3525 auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { 3526 CGF.EmitStmt(CS->getCapturedStmt()); 3527 }; 3528 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 3529 IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn, 3530 const OMPTaskDataTy &Data) { 3531 CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getBeginLoc(), S, OutlinedFn, 3532 SharedsTy, CapturedStruct, IfCond, 3533 Data); 3534 }; 3535 EmitOMPTaskBasedDirective(S, OMPD_task, BodyGen, TaskGen, Data); 3536 } 3537 3538 void CodeGenFunction::EmitOMPTaskyieldDirective( 3539 const OMPTaskyieldDirective &S) { 3540 CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getBeginLoc()); 3541 } 3542 3543 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { 3544 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_barrier); 3545 } 3546 3547 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) { 3548 CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getBeginLoc()); 3549 } 3550 3551 void CodeGenFunction::EmitOMPTaskgroupDirective( 3552 const OMPTaskgroupDirective &S) { 3553 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3554 Action.Enter(CGF); 3555 if (const Expr *E = S.getReductionRef()) { 3556 SmallVector<const Expr *, 4> LHSs; 3557 SmallVector<const Expr *, 4> RHSs; 3558 OMPTaskDataTy Data; 3559 for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) { 3560 auto IPriv = C->privates().begin(); 3561 auto IRed = C->reduction_ops().begin(); 3562 auto ILHS = C->lhs_exprs().begin(); 3563 auto IRHS = C->rhs_exprs().begin(); 3564 for (const Expr *Ref : C->varlists()) { 3565 Data.ReductionVars.emplace_back(Ref); 3566 Data.ReductionCopies.emplace_back(*IPriv); 3567 Data.ReductionOps.emplace_back(*IRed); 3568 LHSs.emplace_back(*ILHS); 3569 RHSs.emplace_back(*IRHS); 3570 std::advance(IPriv, 1); 3571 std::advance(IRed, 1); 3572 std::advance(ILHS, 1); 3573 std::advance(IRHS, 1); 3574 } 3575 } 3576 llvm::Value *ReductionDesc = 3577 CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getBeginLoc(), 3578 LHSs, RHSs, Data); 3579 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3580 CGF.EmitVarDecl(*VD); 3581 CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD), 3582 /*Volatile=*/false, E->getType()); 3583 } 3584 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 3585 }; 3586 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3587 CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getBeginLoc()); 3588 } 3589 3590 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { 3591 CGM.getOpenMPRuntime().emitFlush( 3592 *this, 3593 [&S]() -> ArrayRef<const Expr *> { 3594 if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) 3595 return llvm::makeArrayRef(FlushClause->varlist_begin(), 3596 FlushClause->varlist_end()); 3597 return llvm::None; 3598 }(), 3599 S.getBeginLoc()); 3600 } 3601 3602 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, 3603 const CodeGenLoopTy &CodeGenLoop, 3604 Expr *IncExpr) { 3605 // Emit the loop iteration variable. 3606 const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 3607 const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl()); 3608 EmitVarDecl(*IVDecl); 3609 3610 // Emit the iterations count variable. 3611 // If it is not a variable, Sema decided to calculate iterations count on each 3612 // iteration (e.g., it is foldable into a constant). 3613 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 3614 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 3615 // Emit calculation of the iterations count. 3616 EmitIgnoredExpr(S.getCalcLastIteration()); 3617 } 3618 3619 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); 3620 3621 bool HasLastprivateClause = false; 3622 // Check pre-condition. 3623 { 3624 OMPLoopScope PreInitScope(*this, S); 3625 // Skip the entire loop if we don't meet the precondition. 3626 // If the condition constant folds and can be elided, avoid emitting the 3627 // whole loop. 3628 bool CondConstant; 3629 llvm::BasicBlock *ContBlock = nullptr; 3630 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 3631 if (!CondConstant) 3632 return; 3633 } else { 3634 llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then"); 3635 ContBlock = createBasicBlock("omp.precond.end"); 3636 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 3637 getProfileCount(&S)); 3638 EmitBlock(ThenBlock); 3639 incrementProfileCounter(&S); 3640 } 3641 3642 emitAlignedClause(*this, S); 3643 // Emit 'then' code. 3644 { 3645 // Emit helper vars inits. 3646 3647 LValue LB = EmitOMPHelperVar( 3648 *this, cast<DeclRefExpr>( 3649 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3650 ? S.getCombinedLowerBoundVariable() 3651 : S.getLowerBoundVariable()))); 3652 LValue UB = EmitOMPHelperVar( 3653 *this, cast<DeclRefExpr>( 3654 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3655 ? S.getCombinedUpperBoundVariable() 3656 : S.getUpperBoundVariable()))); 3657 LValue ST = 3658 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 3659 LValue IL = 3660 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 3661 3662 OMPPrivateScope LoopScope(*this); 3663 if (EmitOMPFirstprivateClause(S, LoopScope)) { 3664 // Emit implicit barrier to synchronize threads and avoid data races 3665 // on initialization of firstprivate variables and post-update of 3666 // lastprivate variables. 3667 CGM.getOpenMPRuntime().emitBarrierCall( 3668 *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, 3669 /*ForceSimpleCall=*/true); 3670 } 3671 EmitOMPPrivateClause(S, LoopScope); 3672 if (isOpenMPSimdDirective(S.getDirectiveKind()) && 3673 !isOpenMPParallelDirective(S.getDirectiveKind()) && 3674 !isOpenMPTeamsDirective(S.getDirectiveKind())) 3675 EmitOMPReductionClauseInit(S, LoopScope); 3676 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 3677 EmitOMPPrivateLoopCounters(S, LoopScope); 3678 (void)LoopScope.Privatize(); 3679 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 3680 CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S); 3681 3682 // Detect the distribute schedule kind and chunk. 3683 llvm::Value *Chunk = nullptr; 3684 OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown; 3685 if (const auto *C = S.getSingleClause<OMPDistScheduleClause>()) { 3686 ScheduleKind = C->getDistScheduleKind(); 3687 if (const Expr *Ch = C->getChunkSize()) { 3688 Chunk = EmitScalarExpr(Ch); 3689 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 3690 S.getIterationVariable()->getType(), 3691 S.getBeginLoc()); 3692 } 3693 } else { 3694 // Default behaviour for dist_schedule clause. 3695 CGM.getOpenMPRuntime().getDefaultDistScheduleAndChunk( 3696 *this, S, ScheduleKind, Chunk); 3697 } 3698 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 3699 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 3700 3701 // OpenMP [2.10.8, distribute Construct, Description] 3702 // If dist_schedule is specified, kind must be static. If specified, 3703 // iterations are divided into chunks of size chunk_size, chunks are 3704 // assigned to the teams of the league in a round-robin fashion in the 3705 // order of the team number. When no chunk_size is specified, the 3706 // iteration space is divided into chunks that are approximately equal 3707 // in size, and at most one chunk is distributed to each team of the 3708 // league. The size of the chunks is unspecified in this case. 3709 bool StaticChunked = RT.isStaticChunked( 3710 ScheduleKind, /* Chunked */ Chunk != nullptr) && 3711 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()); 3712 if (RT.isStaticNonchunked(ScheduleKind, 3713 /* Chunked */ Chunk != nullptr) || 3714 StaticChunked) { 3715 CGOpenMPRuntime::StaticRTInput StaticInit( 3716 IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(*this), 3717 LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this), 3718 StaticChunked ? Chunk : nullptr); 3719 RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, 3720 StaticInit); 3721 JumpDest LoopExit = 3722 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 3723 // UB = min(UB, GlobalUB); 3724 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3725 ? S.getCombinedEnsureUpperBound() 3726 : S.getEnsureUpperBound()); 3727 // IV = LB; 3728 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3729 ? S.getCombinedInit() 3730 : S.getInit()); 3731 3732 const Expr *Cond = 3733 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3734 ? S.getCombinedCond() 3735 : S.getCond(); 3736 3737 if (StaticChunked) 3738 Cond = S.getCombinedDistCond(); 3739 3740 // For static unchunked schedules generate: 3741 // 3742 // 1. For distribute alone, codegen 3743 // while (idx <= UB) { 3744 // BODY; 3745 // ++idx; 3746 // } 3747 // 3748 // 2. When combined with 'for' (e.g. as in 'distribute parallel for') 3749 // while (idx <= UB) { 3750 // <CodeGen rest of pragma>(LB, UB); 3751 // idx += ST; 3752 // } 3753 // 3754 // For static chunk one schedule generate: 3755 // 3756 // while (IV <= GlobalUB) { 3757 // <CodeGen rest of pragma>(LB, UB); 3758 // LB += ST; 3759 // UB += ST; 3760 // UB = min(UB, GlobalUB); 3761 // IV = LB; 3762 // } 3763 // 3764 emitCommonSimdLoop( 3765 *this, S, 3766 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3767 if (isOpenMPSimdDirective(S.getDirectiveKind())) 3768 CGF.EmitOMPSimdInit(S, /*IsMonotonic=*/true); 3769 }, 3770 [&S, &LoopScope, Cond, IncExpr, LoopExit, &CodeGenLoop, 3771 StaticChunked](CodeGenFunction &CGF, PrePostActionTy &) { 3772 CGF.EmitOMPInnerLoop( 3773 S, LoopScope.requiresCleanups(), Cond, IncExpr, 3774 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { 3775 CodeGenLoop(CGF, S, LoopExit); 3776 }, 3777 [&S, StaticChunked](CodeGenFunction &CGF) { 3778 if (StaticChunked) { 3779 CGF.EmitIgnoredExpr(S.getCombinedNextLowerBound()); 3780 CGF.EmitIgnoredExpr(S.getCombinedNextUpperBound()); 3781 CGF.EmitIgnoredExpr(S.getCombinedEnsureUpperBound()); 3782 CGF.EmitIgnoredExpr(S.getCombinedInit()); 3783 } 3784 }); 3785 }); 3786 EmitBlock(LoopExit.getBlock()); 3787 // Tell the runtime we are done. 3788 RT.emitForStaticFinish(*this, S.getBeginLoc(), S.getDirectiveKind()); 3789 } else { 3790 // Emit the outer loop, which requests its work chunk [LB..UB] from 3791 // runtime and runs the inner loop to process it. 3792 const OMPLoopArguments LoopArguments = { 3793 LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this), 3794 IL.getAddress(*this), Chunk}; 3795 EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments, 3796 CodeGenLoop); 3797 } 3798 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 3799 EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) { 3800 return CGF.Builder.CreateIsNotNull( 3801 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 3802 }); 3803 } 3804 if (isOpenMPSimdDirective(S.getDirectiveKind()) && 3805 !isOpenMPParallelDirective(S.getDirectiveKind()) && 3806 !isOpenMPTeamsDirective(S.getDirectiveKind())) { 3807 EmitOMPReductionClauseFinal(S, OMPD_simd); 3808 // Emit post-update of the reduction variables if IsLastIter != 0. 3809 emitPostUpdateForReductionClause( 3810 *this, S, [IL, &S](CodeGenFunction &CGF) { 3811 return CGF.Builder.CreateIsNotNull( 3812 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 3813 }); 3814 } 3815 // Emit final copy of the lastprivate variables if IsLastIter != 0. 3816 if (HasLastprivateClause) { 3817 EmitOMPLastprivateClauseFinal( 3818 S, /*NoFinals=*/false, 3819 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc()))); 3820 } 3821 } 3822 3823 // We're now done with the loop, so jump to the continuation block. 3824 if (ContBlock) { 3825 EmitBranch(ContBlock); 3826 EmitBlock(ContBlock, true); 3827 } 3828 } 3829 } 3830 3831 void CodeGenFunction::EmitOMPDistributeDirective( 3832 const OMPDistributeDirective &S) { 3833 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3834 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 3835 }; 3836 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3837 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); 3838 } 3839 3840 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, 3841 const CapturedStmt *S) { 3842 CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); 3843 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; 3844 CGF.CapturedStmtInfo = &CapStmtInfo; 3845 llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S); 3846 Fn->setDoesNotRecurse(); 3847 return Fn; 3848 } 3849 3850 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { 3851 if (S.hasClausesOfKind<OMPDependClause>()) { 3852 assert(!S.getAssociatedStmt() && 3853 "No associated statement must be in ordered depend construct."); 3854 for (const auto *DC : S.getClausesOfKind<OMPDependClause>()) 3855 CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC); 3856 return; 3857 } 3858 const auto *C = S.getSingleClause<OMPSIMDClause>(); 3859 auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF, 3860 PrePostActionTy &Action) { 3861 const CapturedStmt *CS = S.getInnermostCapturedStmt(); 3862 if (C) { 3863 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 3864 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 3865 llvm::Function *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS); 3866 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getBeginLoc(), 3867 OutlinedFn, CapturedVars); 3868 } else { 3869 Action.Enter(CGF); 3870 CGF.EmitStmt(CS->getCapturedStmt()); 3871 } 3872 }; 3873 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3874 CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getBeginLoc(), !C); 3875 } 3876 3877 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, 3878 QualType SrcType, QualType DestType, 3879 SourceLocation Loc) { 3880 assert(CGF.hasScalarEvaluationKind(DestType) && 3881 "DestType must have scalar evaluation kind."); 3882 assert(!Val.isAggregate() && "Must be a scalar or complex."); 3883 return Val.isScalar() ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, 3884 DestType, Loc) 3885 : CGF.EmitComplexToScalarConversion( 3886 Val.getComplexVal(), SrcType, DestType, Loc); 3887 } 3888 3889 static CodeGenFunction::ComplexPairTy 3890 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, 3891 QualType DestType, SourceLocation Loc) { 3892 assert(CGF.getEvaluationKind(DestType) == TEK_Complex && 3893 "DestType must have complex evaluation kind."); 3894 CodeGenFunction::ComplexPairTy ComplexVal; 3895 if (Val.isScalar()) { 3896 // Convert the input element to the element type of the complex. 3897 QualType DestElementType = 3898 DestType->castAs<ComplexType>()->getElementType(); 3899 llvm::Value *ScalarVal = CGF.EmitScalarConversion( 3900 Val.getScalarVal(), SrcType, DestElementType, Loc); 3901 ComplexVal = CodeGenFunction::ComplexPairTy( 3902 ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType())); 3903 } else { 3904 assert(Val.isComplex() && "Must be a scalar or complex."); 3905 QualType SrcElementType = SrcType->castAs<ComplexType>()->getElementType(); 3906 QualType DestElementType = 3907 DestType->castAs<ComplexType>()->getElementType(); 3908 ComplexVal.first = CGF.EmitScalarConversion( 3909 Val.getComplexVal().first, SrcElementType, DestElementType, Loc); 3910 ComplexVal.second = CGF.EmitScalarConversion( 3911 Val.getComplexVal().second, SrcElementType, DestElementType, Loc); 3912 } 3913 return ComplexVal; 3914 } 3915 3916 static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst, 3917 LValue LVal, RValue RVal) { 3918 if (LVal.isGlobalReg()) { 3919 CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal); 3920 } else { 3921 CGF.EmitAtomicStore(RVal, LVal, 3922 IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3923 : llvm::AtomicOrdering::Monotonic, 3924 LVal.isVolatile(), /*isInit=*/false); 3925 } 3926 } 3927 3928 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal, 3929 QualType RValTy, SourceLocation Loc) { 3930 switch (getEvaluationKind(LVal.getType())) { 3931 case TEK_Scalar: 3932 EmitStoreThroughLValue(RValue::get(convertToScalarValue( 3933 *this, RVal, RValTy, LVal.getType(), Loc)), 3934 LVal); 3935 break; 3936 case TEK_Complex: 3937 EmitStoreOfComplex( 3938 convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal, 3939 /*isInit=*/false); 3940 break; 3941 case TEK_Aggregate: 3942 llvm_unreachable("Must be a scalar or complex."); 3943 } 3944 } 3945 3946 static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst, 3947 const Expr *X, const Expr *V, 3948 SourceLocation Loc) { 3949 // v = x; 3950 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue"); 3951 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue"); 3952 LValue XLValue = CGF.EmitLValue(X); 3953 LValue VLValue = CGF.EmitLValue(V); 3954 RValue Res = XLValue.isGlobalReg() 3955 ? CGF.EmitLoadOfLValue(XLValue, Loc) 3956 : CGF.EmitAtomicLoad( 3957 XLValue, Loc, 3958 IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3959 : llvm::AtomicOrdering::Monotonic, 3960 XLValue.isVolatile()); 3961 // OpenMP, 2.12.6, atomic Construct 3962 // Any atomic construct with a seq_cst clause forces the atomically 3963 // performed operation to include an implicit flush operation without a 3964 // list. 3965 if (IsSeqCst) 3966 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3967 CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc); 3968 } 3969 3970 static void emitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst, 3971 const Expr *X, const Expr *E, 3972 SourceLocation Loc) { 3973 // x = expr; 3974 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue"); 3975 emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E)); 3976 // OpenMP, 2.12.6, atomic Construct 3977 // Any atomic construct with a seq_cst clause forces the atomically 3978 // performed operation to include an implicit flush operation without a 3979 // list. 3980 if (IsSeqCst) 3981 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3982 } 3983 3984 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, 3985 RValue Update, 3986 BinaryOperatorKind BO, 3987 llvm::AtomicOrdering AO, 3988 bool IsXLHSInRHSPart) { 3989 ASTContext &Context = CGF.getContext(); 3990 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x' 3991 // expression is simple and atomic is allowed for the given type for the 3992 // target platform. 3993 if (BO == BO_Comma || !Update.isScalar() || 3994 !Update.getScalarVal()->getType()->isIntegerTy() || !X.isSimple() || 3995 (!isa<llvm::ConstantInt>(Update.getScalarVal()) && 3996 (Update.getScalarVal()->getType() != 3997 X.getAddress(CGF).getElementType())) || 3998 !X.getAddress(CGF).getElementType()->isIntegerTy() || 3999 !Context.getTargetInfo().hasBuiltinAtomic( 4000 Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment()))) 4001 return std::make_pair(false, RValue::get(nullptr)); 4002 4003 llvm::AtomicRMWInst::BinOp RMWOp; 4004 switch (BO) { 4005 case BO_Add: 4006 RMWOp = llvm::AtomicRMWInst::Add; 4007 break; 4008 case BO_Sub: 4009 if (!IsXLHSInRHSPart) 4010 return std::make_pair(false, RValue::get(nullptr)); 4011 RMWOp = llvm::AtomicRMWInst::Sub; 4012 break; 4013 case BO_And: 4014 RMWOp = llvm::AtomicRMWInst::And; 4015 break; 4016 case BO_Or: 4017 RMWOp = llvm::AtomicRMWInst::Or; 4018 break; 4019 case BO_Xor: 4020 RMWOp = llvm::AtomicRMWInst::Xor; 4021 break; 4022 case BO_LT: 4023 RMWOp = X.getType()->hasSignedIntegerRepresentation() 4024 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min 4025 : llvm::AtomicRMWInst::Max) 4026 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin 4027 : llvm::AtomicRMWInst::UMax); 4028 break; 4029 case BO_GT: 4030 RMWOp = X.getType()->hasSignedIntegerRepresentation() 4031 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max 4032 : llvm::AtomicRMWInst::Min) 4033 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax 4034 : llvm::AtomicRMWInst::UMin); 4035 break; 4036 case BO_Assign: 4037 RMWOp = llvm::AtomicRMWInst::Xchg; 4038 break; 4039 case BO_Mul: 4040 case BO_Div: 4041 case BO_Rem: 4042 case BO_Shl: 4043 case BO_Shr: 4044 case BO_LAnd: 4045 case BO_LOr: 4046 return std::make_pair(false, RValue::get(nullptr)); 4047 case BO_PtrMemD: 4048 case BO_PtrMemI: 4049 case BO_LE: 4050 case BO_GE: 4051 case BO_EQ: 4052 case BO_NE: 4053 case BO_Cmp: 4054 case BO_AddAssign: 4055 case BO_SubAssign: 4056 case BO_AndAssign: 4057 case BO_OrAssign: 4058 case BO_XorAssign: 4059 case BO_MulAssign: 4060 case BO_DivAssign: 4061 case BO_RemAssign: 4062 case BO_ShlAssign: 4063 case BO_ShrAssign: 4064 case BO_Comma: 4065 llvm_unreachable("Unsupported atomic update operation"); 4066 } 4067 llvm::Value *UpdateVal = Update.getScalarVal(); 4068 if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) { 4069 UpdateVal = CGF.Builder.CreateIntCast( 4070 IC, X.getAddress(CGF).getElementType(), 4071 X.getType()->hasSignedIntegerRepresentation()); 4072 } 4073 llvm::Value *Res = 4074 CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(CGF), UpdateVal, AO); 4075 return std::make_pair(true, RValue::get(Res)); 4076 } 4077 4078 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr( 4079 LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart, 4080 llvm::AtomicOrdering AO, SourceLocation Loc, 4081 const llvm::function_ref<RValue(RValue)> CommonGen) { 4082 // Update expressions are allowed to have the following forms: 4083 // x binop= expr; -> xrval + expr; 4084 // x++, ++x -> xrval + 1; 4085 // x--, --x -> xrval - 1; 4086 // x = x binop expr; -> xrval binop expr 4087 // x = expr Op x; - > expr binop xrval; 4088 auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart); 4089 if (!Res.first) { 4090 if (X.isGlobalReg()) { 4091 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop 4092 // 'xrval'. 4093 EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X); 4094 } else { 4095 // Perform compare-and-swap procedure. 4096 EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified()); 4097 } 4098 } 4099 return Res; 4100 } 4101 4102 static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst, 4103 const Expr *X, const Expr *E, 4104 const Expr *UE, bool IsXLHSInRHSPart, 4105 SourceLocation Loc) { 4106 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 4107 "Update expr in 'atomic update' must be a binary operator."); 4108 const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 4109 // Update expressions are allowed to have the following forms: 4110 // x binop= expr; -> xrval + expr; 4111 // x++, ++x -> xrval + 1; 4112 // x--, --x -> xrval - 1; 4113 // x = x binop expr; -> xrval binop expr 4114 // x = expr Op x; - > expr binop xrval; 4115 assert(X->isLValue() && "X of 'omp atomic update' is not lvalue"); 4116 LValue XLValue = CGF.EmitLValue(X); 4117 RValue ExprRValue = CGF.EmitAnyExpr(E); 4118 llvm::AtomicOrdering AO = IsSeqCst 4119 ? llvm::AtomicOrdering::SequentiallyConsistent 4120 : llvm::AtomicOrdering::Monotonic; 4121 const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 4122 const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 4123 const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 4124 const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 4125 auto &&Gen = [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) { 4126 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 4127 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 4128 return CGF.EmitAnyExpr(UE); 4129 }; 4130 (void)CGF.EmitOMPAtomicSimpleUpdateExpr( 4131 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 4132 // OpenMP, 2.12.6, atomic Construct 4133 // Any atomic construct with a seq_cst clause forces the atomically 4134 // performed operation to include an implicit flush operation without a 4135 // list. 4136 if (IsSeqCst) 4137 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 4138 } 4139 4140 static RValue convertToType(CodeGenFunction &CGF, RValue Value, 4141 QualType SourceType, QualType ResType, 4142 SourceLocation Loc) { 4143 switch (CGF.getEvaluationKind(ResType)) { 4144 case TEK_Scalar: 4145 return RValue::get( 4146 convertToScalarValue(CGF, Value, SourceType, ResType, Loc)); 4147 case TEK_Complex: { 4148 auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc); 4149 return RValue::getComplex(Res.first, Res.second); 4150 } 4151 case TEK_Aggregate: 4152 break; 4153 } 4154 llvm_unreachable("Must be a scalar or complex."); 4155 } 4156 4157 static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst, 4158 bool IsPostfixUpdate, const Expr *V, 4159 const Expr *X, const Expr *E, 4160 const Expr *UE, bool IsXLHSInRHSPart, 4161 SourceLocation Loc) { 4162 assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue"); 4163 assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue"); 4164 RValue NewVVal; 4165 LValue VLValue = CGF.EmitLValue(V); 4166 LValue XLValue = CGF.EmitLValue(X); 4167 RValue ExprRValue = CGF.EmitAnyExpr(E); 4168 llvm::AtomicOrdering AO = IsSeqCst 4169 ? llvm::AtomicOrdering::SequentiallyConsistent 4170 : llvm::AtomicOrdering::Monotonic; 4171 QualType NewVValType; 4172 if (UE) { 4173 // 'x' is updated with some additional value. 4174 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 4175 "Update expr in 'atomic capture' must be a binary operator."); 4176 const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 4177 // Update expressions are allowed to have the following forms: 4178 // x binop= expr; -> xrval + expr; 4179 // x++, ++x -> xrval + 1; 4180 // x--, --x -> xrval - 1; 4181 // x = x binop expr; -> xrval binop expr 4182 // x = expr Op x; - > expr binop xrval; 4183 const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 4184 const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 4185 const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 4186 NewVValType = XRValExpr->getType(); 4187 const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 4188 auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr, 4189 IsPostfixUpdate](RValue XRValue) { 4190 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 4191 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 4192 RValue Res = CGF.EmitAnyExpr(UE); 4193 NewVVal = IsPostfixUpdate ? XRValue : Res; 4194 return Res; 4195 }; 4196 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 4197 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 4198 if (Res.first) { 4199 // 'atomicrmw' instruction was generated. 4200 if (IsPostfixUpdate) { 4201 // Use old value from 'atomicrmw'. 4202 NewVVal = Res.second; 4203 } else { 4204 // 'atomicrmw' does not provide new value, so evaluate it using old 4205 // value of 'x'. 4206 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 4207 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second); 4208 NewVVal = CGF.EmitAnyExpr(UE); 4209 } 4210 } 4211 } else { 4212 // 'x' is simply rewritten with some 'expr'. 4213 NewVValType = X->getType().getNonReferenceType(); 4214 ExprRValue = convertToType(CGF, ExprRValue, E->getType(), 4215 X->getType().getNonReferenceType(), Loc); 4216 auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) { 4217 NewVVal = XRValue; 4218 return ExprRValue; 4219 }; 4220 // Try to perform atomicrmw xchg, otherwise simple exchange. 4221 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 4222 XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO, 4223 Loc, Gen); 4224 if (Res.first) { 4225 // 'atomicrmw' instruction was generated. 4226 NewVVal = IsPostfixUpdate ? Res.second : ExprRValue; 4227 } 4228 } 4229 // Emit post-update store to 'v' of old/new 'x' value. 4230 CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc); 4231 // OpenMP, 2.12.6, atomic Construct 4232 // Any atomic construct with a seq_cst clause forces the atomically 4233 // performed operation to include an implicit flush operation without a 4234 // list. 4235 if (IsSeqCst) 4236 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 4237 } 4238 4239 static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, 4240 bool IsSeqCst, bool IsPostfixUpdate, 4241 const Expr *X, const Expr *V, const Expr *E, 4242 const Expr *UE, bool IsXLHSInRHSPart, 4243 SourceLocation Loc) { 4244 switch (Kind) { 4245 case OMPC_read: 4246 emitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc); 4247 break; 4248 case OMPC_write: 4249 emitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc); 4250 break; 4251 case OMPC_unknown: 4252 case OMPC_update: 4253 emitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc); 4254 break; 4255 case OMPC_capture: 4256 emitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE, 4257 IsXLHSInRHSPart, Loc); 4258 break; 4259 case OMPC_if: 4260 case OMPC_final: 4261 case OMPC_num_threads: 4262 case OMPC_private: 4263 case OMPC_firstprivate: 4264 case OMPC_lastprivate: 4265 case OMPC_reduction: 4266 case OMPC_task_reduction: 4267 case OMPC_in_reduction: 4268 case OMPC_safelen: 4269 case OMPC_simdlen: 4270 case OMPC_allocator: 4271 case OMPC_allocate: 4272 case OMPC_collapse: 4273 case OMPC_default: 4274 case OMPC_seq_cst: 4275 case OMPC_shared: 4276 case OMPC_linear: 4277 case OMPC_aligned: 4278 case OMPC_copyin: 4279 case OMPC_copyprivate: 4280 case OMPC_flush: 4281 case OMPC_proc_bind: 4282 case OMPC_schedule: 4283 case OMPC_ordered: 4284 case OMPC_nowait: 4285 case OMPC_untied: 4286 case OMPC_threadprivate: 4287 case OMPC_depend: 4288 case OMPC_mergeable: 4289 case OMPC_device: 4290 case OMPC_threads: 4291 case OMPC_simd: 4292 case OMPC_map: 4293 case OMPC_num_teams: 4294 case OMPC_thread_limit: 4295 case OMPC_priority: 4296 case OMPC_grainsize: 4297 case OMPC_nogroup: 4298 case OMPC_num_tasks: 4299 case OMPC_hint: 4300 case OMPC_dist_schedule: 4301 case OMPC_defaultmap: 4302 case OMPC_uniform: 4303 case OMPC_to: 4304 case OMPC_from: 4305 case OMPC_use_device_ptr: 4306 case OMPC_is_device_ptr: 4307 case OMPC_unified_address: 4308 case OMPC_unified_shared_memory: 4309 case OMPC_reverse_offload: 4310 case OMPC_dynamic_allocators: 4311 case OMPC_atomic_default_mem_order: 4312 case OMPC_device_type: 4313 case OMPC_match: 4314 case OMPC_nontemporal: 4315 llvm_unreachable("Clause is not allowed in 'omp atomic'."); 4316 } 4317 } 4318 4319 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { 4320 bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>(); 4321 OpenMPClauseKind Kind = OMPC_unknown; 4322 for (const OMPClause *C : S.clauses()) { 4323 // Find first clause (skip seq_cst clause, if it is first). 4324 if (C->getClauseKind() != OMPC_seq_cst) { 4325 Kind = C->getClauseKind(); 4326 break; 4327 } 4328 } 4329 4330 const Stmt *CS = S.getInnermostCapturedStmt()->IgnoreContainers(); 4331 if (const auto *FE = dyn_cast<FullExpr>(CS)) 4332 enterFullExpression(FE); 4333 // Processing for statements under 'atomic capture'. 4334 if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) { 4335 for (const Stmt *C : Compound->body()) { 4336 if (const auto *FE = dyn_cast<FullExpr>(C)) 4337 enterFullExpression(FE); 4338 } 4339 } 4340 4341 auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF, 4342 PrePostActionTy &) { 4343 CGF.EmitStopPoint(CS); 4344 emitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(), 4345 S.getV(), S.getExpr(), S.getUpdateExpr(), 4346 S.isXLHSInRHSPart(), S.getBeginLoc()); 4347 }; 4348 OMPLexicalScope Scope(*this, S, OMPD_unknown); 4349 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen); 4350 } 4351 4352 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, 4353 const OMPExecutableDirective &S, 4354 const RegionCodeGenTy &CodeGen) { 4355 assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind())); 4356 CodeGenModule &CGM = CGF.CGM; 4357 4358 // On device emit this construct as inlined code. 4359 if (CGM.getLangOpts().OpenMPIsDevice) { 4360 OMPLexicalScope Scope(CGF, S, OMPD_target); 4361 CGM.getOpenMPRuntime().emitInlinedDirective( 4362 CGF, OMPD_target, [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4363 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 4364 }); 4365 return; 4366 } 4367 4368 llvm::Function *Fn = nullptr; 4369 llvm::Constant *FnID = nullptr; 4370 4371 const Expr *IfCond = nullptr; 4372 // Check for the at most one if clause associated with the target region. 4373 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 4374 if (C->getNameModifier() == OMPD_unknown || 4375 C->getNameModifier() == OMPD_target) { 4376 IfCond = C->getCondition(); 4377 break; 4378 } 4379 } 4380 4381 // Check if we have any device clause associated with the directive. 4382 const Expr *Device = nullptr; 4383 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 4384 Device = C->getDevice(); 4385 4386 // Check if we have an if clause whose conditional always evaluates to false 4387 // or if we do not have any targets specified. If so the target region is not 4388 // an offload entry point. 4389 bool IsOffloadEntry = true; 4390 if (IfCond) { 4391 bool Val; 4392 if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val) 4393 IsOffloadEntry = false; 4394 } 4395 if (CGM.getLangOpts().OMPTargetTriples.empty()) 4396 IsOffloadEntry = false; 4397 4398 assert(CGF.CurFuncDecl && "No parent declaration for target region!"); 4399 StringRef ParentName; 4400 // In case we have Ctors/Dtors we use the complete type variant to produce 4401 // the mangling of the device outlined kernel. 4402 if (const auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl)) 4403 ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete)); 4404 else if (const auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl)) 4405 ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete)); 4406 else 4407 ParentName = 4408 CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl))); 4409 4410 // Emit target region as a standalone region. 4411 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID, 4412 IsOffloadEntry, CodeGen); 4413 OMPLexicalScope Scope(CGF, S, OMPD_task); 4414 auto &&SizeEmitter = 4415 [IsOffloadEntry](CodeGenFunction &CGF, 4416 const OMPLoopDirective &D) -> llvm::Value * { 4417 if (IsOffloadEntry) { 4418 OMPLoopScope(CGF, D); 4419 // Emit calculation of the iterations count. 4420 llvm::Value *NumIterations = CGF.EmitScalarExpr(D.getNumIterations()); 4421 NumIterations = CGF.Builder.CreateIntCast(NumIterations, CGF.Int64Ty, 4422 /*isSigned=*/false); 4423 return NumIterations; 4424 } 4425 return nullptr; 4426 }; 4427 CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device, 4428 SizeEmitter); 4429 } 4430 4431 static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S, 4432 PrePostActionTy &Action) { 4433 Action.Enter(CGF); 4434 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 4435 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 4436 CGF.EmitOMPPrivateClause(S, PrivateScope); 4437 (void)PrivateScope.Privatize(); 4438 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 4439 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); 4440 4441 CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt()); 4442 } 4443 4444 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM, 4445 StringRef ParentName, 4446 const OMPTargetDirective &S) { 4447 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4448 emitTargetRegion(CGF, S, Action); 4449 }; 4450 llvm::Function *Fn; 4451 llvm::Constant *Addr; 4452 // Emit target region as a standalone region. 4453 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 4454 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 4455 assert(Fn && Addr && "Target device function emission failed."); 4456 } 4457 4458 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { 4459 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4460 emitTargetRegion(CGF, S, Action); 4461 }; 4462 emitCommonOMPTargetDirective(*this, S, CodeGen); 4463 } 4464 4465 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF, 4466 const OMPExecutableDirective &S, 4467 OpenMPDirectiveKind InnermostKind, 4468 const RegionCodeGenTy &CodeGen) { 4469 const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams); 4470 llvm::Function *OutlinedFn = 4471 CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction( 4472 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 4473 4474 const auto *NT = S.getSingleClause<OMPNumTeamsClause>(); 4475 const auto *TL = S.getSingleClause<OMPThreadLimitClause>(); 4476 if (NT || TL) { 4477 const Expr *NumTeams = NT ? NT->getNumTeams() : nullptr; 4478 const Expr *ThreadLimit = TL ? TL->getThreadLimit() : nullptr; 4479 4480 CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit, 4481 S.getBeginLoc()); 4482 } 4483 4484 OMPTeamsScope Scope(CGF, S); 4485 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 4486 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 4487 CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getBeginLoc(), OutlinedFn, 4488 CapturedVars); 4489 } 4490 4491 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) { 4492 // Emit teams region as a standalone region. 4493 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4494 Action.Enter(CGF); 4495 OMPPrivateScope PrivateScope(CGF); 4496 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 4497 CGF.EmitOMPPrivateClause(S, PrivateScope); 4498 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4499 (void)PrivateScope.Privatize(); 4500 CGF.EmitStmt(S.getCapturedStmt(OMPD_teams)->getCapturedStmt()); 4501 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4502 }; 4503 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen); 4504 emitPostUpdateForReductionClause(*this, S, 4505 [](CodeGenFunction &) { return nullptr; }); 4506 } 4507 4508 static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action, 4509 const OMPTargetTeamsDirective &S) { 4510 auto *CS = S.getCapturedStmt(OMPD_teams); 4511 Action.Enter(CGF); 4512 // Emit teams region as a standalone region. 4513 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) { 4514 Action.Enter(CGF); 4515 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 4516 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 4517 CGF.EmitOMPPrivateClause(S, PrivateScope); 4518 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4519 (void)PrivateScope.Privatize(); 4520 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 4521 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); 4522 CGF.EmitStmt(CS->getCapturedStmt()); 4523 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4524 }; 4525 emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen); 4526 emitPostUpdateForReductionClause(CGF, S, 4527 [](CodeGenFunction &) { return nullptr; }); 4528 } 4529 4530 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 4531 CodeGenModule &CGM, StringRef ParentName, 4532 const OMPTargetTeamsDirective &S) { 4533 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4534 emitTargetTeamsRegion(CGF, Action, S); 4535 }; 4536 llvm::Function *Fn; 4537 llvm::Constant *Addr; 4538 // Emit target region as a standalone region. 4539 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 4540 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 4541 assert(Fn && Addr && "Target device function emission failed."); 4542 } 4543 4544 void CodeGenFunction::EmitOMPTargetTeamsDirective( 4545 const OMPTargetTeamsDirective &S) { 4546 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4547 emitTargetTeamsRegion(CGF, Action, S); 4548 }; 4549 emitCommonOMPTargetDirective(*this, S, CodeGen); 4550 } 4551 4552 static void 4553 emitTargetTeamsDistributeRegion(CodeGenFunction &CGF, PrePostActionTy &Action, 4554 const OMPTargetTeamsDistributeDirective &S) { 4555 Action.Enter(CGF); 4556 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4557 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 4558 }; 4559 4560 // Emit teams region as a standalone region. 4561 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 4562 PrePostActionTy &Action) { 4563 Action.Enter(CGF); 4564 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 4565 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4566 (void)PrivateScope.Privatize(); 4567 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 4568 CodeGenDistribute); 4569 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4570 }; 4571 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute, CodeGen); 4572 emitPostUpdateForReductionClause(CGF, S, 4573 [](CodeGenFunction &) { return nullptr; }); 4574 } 4575 4576 void CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 4577 CodeGenModule &CGM, StringRef ParentName, 4578 const OMPTargetTeamsDistributeDirective &S) { 4579 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4580 emitTargetTeamsDistributeRegion(CGF, Action, S); 4581 }; 4582 llvm::Function *Fn; 4583 llvm::Constant *Addr; 4584 // Emit target region as a standalone region. 4585 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 4586 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 4587 assert(Fn && Addr && "Target device function emission failed."); 4588 } 4589 4590 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective( 4591 const OMPTargetTeamsDistributeDirective &S) { 4592 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4593 emitTargetTeamsDistributeRegion(CGF, Action, S); 4594 }; 4595 emitCommonOMPTargetDirective(*this, S, CodeGen); 4596 } 4597 4598 static void emitTargetTeamsDistributeSimdRegion( 4599 CodeGenFunction &CGF, PrePostActionTy &Action, 4600 const OMPTargetTeamsDistributeSimdDirective &S) { 4601 Action.Enter(CGF); 4602 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4603 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 4604 }; 4605 4606 // Emit teams region as a standalone region. 4607 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 4608 PrePostActionTy &Action) { 4609 Action.Enter(CGF); 4610 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 4611 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4612 (void)PrivateScope.Privatize(); 4613 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 4614 CodeGenDistribute); 4615 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4616 }; 4617 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_simd, CodeGen); 4618 emitPostUpdateForReductionClause(CGF, S, 4619 [](CodeGenFunction &) { return nullptr; }); 4620 } 4621 4622 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 4623 CodeGenModule &CGM, StringRef ParentName, 4624 const OMPTargetTeamsDistributeSimdDirective &S) { 4625 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4626 emitTargetTeamsDistributeSimdRegion(CGF, Action, S); 4627 }; 4628 llvm::Function *Fn; 4629 llvm::Constant *Addr; 4630 // Emit target region as a standalone region. 4631 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 4632 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 4633 assert(Fn && Addr && "Target device function emission failed."); 4634 } 4635 4636 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective( 4637 const OMPTargetTeamsDistributeSimdDirective &S) { 4638 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4639 emitTargetTeamsDistributeSimdRegion(CGF, Action, S); 4640 }; 4641 emitCommonOMPTargetDirective(*this, S, CodeGen); 4642 } 4643 4644 void CodeGenFunction::EmitOMPTeamsDistributeDirective( 4645 const OMPTeamsDistributeDirective &S) { 4646 4647 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4648 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 4649 }; 4650 4651 // Emit teams region as a standalone region. 4652 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 4653 PrePostActionTy &Action) { 4654 Action.Enter(CGF); 4655 OMPPrivateScope PrivateScope(CGF); 4656 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4657 (void)PrivateScope.Privatize(); 4658 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 4659 CodeGenDistribute); 4660 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4661 }; 4662 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen); 4663 emitPostUpdateForReductionClause(*this, S, 4664 [](CodeGenFunction &) { return nullptr; }); 4665 } 4666 4667 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective( 4668 const OMPTeamsDistributeSimdDirective &S) { 4669 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4670 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 4671 }; 4672 4673 // Emit teams region as a standalone region. 4674 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 4675 PrePostActionTy &Action) { 4676 Action.Enter(CGF); 4677 OMPPrivateScope PrivateScope(CGF); 4678 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4679 (void)PrivateScope.Privatize(); 4680 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_simd, 4681 CodeGenDistribute); 4682 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4683 }; 4684 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_simd, CodeGen); 4685 emitPostUpdateForReductionClause(*this, S, 4686 [](CodeGenFunction &) { return nullptr; }); 4687 } 4688 4689 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective( 4690 const OMPTeamsDistributeParallelForDirective &S) { 4691 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4692 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 4693 S.getDistInc()); 4694 }; 4695 4696 // Emit teams region as a standalone region. 4697 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 4698 PrePostActionTy &Action) { 4699 Action.Enter(CGF); 4700 OMPPrivateScope PrivateScope(CGF); 4701 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4702 (void)PrivateScope.Privatize(); 4703 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 4704 CodeGenDistribute); 4705 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4706 }; 4707 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen); 4708 emitPostUpdateForReductionClause(*this, S, 4709 [](CodeGenFunction &) { return nullptr; }); 4710 } 4711 4712 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective( 4713 const OMPTeamsDistributeParallelForSimdDirective &S) { 4714 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4715 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 4716 S.getDistInc()); 4717 }; 4718 4719 // Emit teams region as a standalone region. 4720 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 4721 PrePostActionTy &Action) { 4722 Action.Enter(CGF); 4723 OMPPrivateScope PrivateScope(CGF); 4724 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4725 (void)PrivateScope.Privatize(); 4726 CGF.CGM.getOpenMPRuntime().emitInlinedDirective( 4727 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); 4728 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4729 }; 4730 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for_simd, 4731 CodeGen); 4732 emitPostUpdateForReductionClause(*this, S, 4733 [](CodeGenFunction &) { return nullptr; }); 4734 } 4735 4736 static void emitTargetTeamsDistributeParallelForRegion( 4737 CodeGenFunction &CGF, const OMPTargetTeamsDistributeParallelForDirective &S, 4738 PrePostActionTy &Action) { 4739 Action.Enter(CGF); 4740 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4741 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 4742 S.getDistInc()); 4743 }; 4744 4745 // Emit teams region as a standalone region. 4746 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 4747 PrePostActionTy &Action) { 4748 Action.Enter(CGF); 4749 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 4750 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4751 (void)PrivateScope.Privatize(); 4752 CGF.CGM.getOpenMPRuntime().emitInlinedDirective( 4753 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); 4754 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4755 }; 4756 4757 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for, 4758 CodeGenTeams); 4759 emitPostUpdateForReductionClause(CGF, S, 4760 [](CodeGenFunction &) { return nullptr; }); 4761 } 4762 4763 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 4764 CodeGenModule &CGM, StringRef ParentName, 4765 const OMPTargetTeamsDistributeParallelForDirective &S) { 4766 // Emit SPMD target teams distribute parallel for region as a standalone 4767 // region. 4768 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4769 emitTargetTeamsDistributeParallelForRegion(CGF, S, Action); 4770 }; 4771 llvm::Function *Fn; 4772 llvm::Constant *Addr; 4773 // Emit target region as a standalone region. 4774 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 4775 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 4776 assert(Fn && Addr && "Target device function emission failed."); 4777 } 4778 4779 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective( 4780 const OMPTargetTeamsDistributeParallelForDirective &S) { 4781 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4782 emitTargetTeamsDistributeParallelForRegion(CGF, S, Action); 4783 }; 4784 emitCommonOMPTargetDirective(*this, S, CodeGen); 4785 } 4786 4787 static void emitTargetTeamsDistributeParallelForSimdRegion( 4788 CodeGenFunction &CGF, 4789 const OMPTargetTeamsDistributeParallelForSimdDirective &S, 4790 PrePostActionTy &Action) { 4791 Action.Enter(CGF); 4792 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4793 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 4794 S.getDistInc()); 4795 }; 4796 4797 // Emit teams region as a standalone region. 4798 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 4799 PrePostActionTy &Action) { 4800 Action.Enter(CGF); 4801 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 4802 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4803 (void)PrivateScope.Privatize(); 4804 CGF.CGM.getOpenMPRuntime().emitInlinedDirective( 4805 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); 4806 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4807 }; 4808 4809 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for_simd, 4810 CodeGenTeams); 4811 emitPostUpdateForReductionClause(CGF, S, 4812 [](CodeGenFunction &) { return nullptr; }); 4813 } 4814 4815 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 4816 CodeGenModule &CGM, StringRef ParentName, 4817 const OMPTargetTeamsDistributeParallelForSimdDirective &S) { 4818 // Emit SPMD target teams distribute parallel for simd region as a standalone 4819 // region. 4820 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4821 emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action); 4822 }; 4823 llvm::Function *Fn; 4824 llvm::Constant *Addr; 4825 // Emit target region as a standalone region. 4826 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 4827 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 4828 assert(Fn && Addr && "Target device function emission failed."); 4829 } 4830 4831 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective( 4832 const OMPTargetTeamsDistributeParallelForSimdDirective &S) { 4833 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4834 emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action); 4835 }; 4836 emitCommonOMPTargetDirective(*this, S, CodeGen); 4837 } 4838 4839 void CodeGenFunction::EmitOMPCancellationPointDirective( 4840 const OMPCancellationPointDirective &S) { 4841 CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getBeginLoc(), 4842 S.getCancelRegion()); 4843 } 4844 4845 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { 4846 const Expr *IfCond = nullptr; 4847 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 4848 if (C->getNameModifier() == OMPD_unknown || 4849 C->getNameModifier() == OMPD_cancel) { 4850 IfCond = C->getCondition(); 4851 break; 4852 } 4853 } 4854 if (llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder()) { 4855 // TODO: This check is necessary as we only generate `omp parallel` through 4856 // the OpenMPIRBuilder for now. 4857 if (S.getCancelRegion() == OMPD_parallel) { 4858 llvm::Value *IfCondition = nullptr; 4859 if (IfCond) 4860 IfCondition = EmitScalarExpr(IfCond, 4861 /*IgnoreResultAssign=*/true); 4862 return Builder.restoreIP( 4863 OMPBuilder->CreateCancel(Builder, IfCondition, S.getCancelRegion())); 4864 } 4865 } 4866 4867 CGM.getOpenMPRuntime().emitCancelCall(*this, S.getBeginLoc(), IfCond, 4868 S.getCancelRegion()); 4869 } 4870 4871 CodeGenFunction::JumpDest 4872 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) { 4873 if (Kind == OMPD_parallel || Kind == OMPD_task || 4874 Kind == OMPD_target_parallel) 4875 return ReturnBlock; 4876 assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections || 4877 Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for || 4878 Kind == OMPD_distribute_parallel_for || 4879 Kind == OMPD_target_parallel_for || 4880 Kind == OMPD_teams_distribute_parallel_for || 4881 Kind == OMPD_target_teams_distribute_parallel_for); 4882 return OMPCancelStack.getExitBlock(); 4883 } 4884 4885 void CodeGenFunction::EmitOMPUseDevicePtrClause( 4886 const OMPClause &NC, OMPPrivateScope &PrivateScope, 4887 const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) { 4888 const auto &C = cast<OMPUseDevicePtrClause>(NC); 4889 auto OrigVarIt = C.varlist_begin(); 4890 auto InitIt = C.inits().begin(); 4891 for (const Expr *PvtVarIt : C.private_copies()) { 4892 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl()); 4893 const auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl()); 4894 const auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl()); 4895 4896 // In order to identify the right initializer we need to match the 4897 // declaration used by the mapping logic. In some cases we may get 4898 // OMPCapturedExprDecl that refers to the original declaration. 4899 const ValueDecl *MatchingVD = OrigVD; 4900 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) { 4901 // OMPCapturedExprDecl are used to privative fields of the current 4902 // structure. 4903 const auto *ME = cast<MemberExpr>(OED->getInit()); 4904 assert(isa<CXXThisExpr>(ME->getBase()) && 4905 "Base should be the current struct!"); 4906 MatchingVD = ME->getMemberDecl(); 4907 } 4908 4909 // If we don't have information about the current list item, move on to 4910 // the next one. 4911 auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD); 4912 if (InitAddrIt == CaptureDeviceAddrMap.end()) 4913 continue; 4914 4915 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, OrigVD, 4916 InitAddrIt, InitVD, 4917 PvtVD]() { 4918 // Initialize the temporary initialization variable with the address we 4919 // get from the runtime library. We have to cast the source address 4920 // because it is always a void *. References are materialized in the 4921 // privatization scope, so the initialization here disregards the fact 4922 // the original variable is a reference. 4923 QualType AddrQTy = 4924 getContext().getPointerType(OrigVD->getType().getNonReferenceType()); 4925 llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy); 4926 Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy); 4927 setAddrOfLocalVar(InitVD, InitAddr); 4928 4929 // Emit private declaration, it will be initialized by the value we 4930 // declaration we just added to the local declarations map. 4931 EmitDecl(*PvtVD); 4932 4933 // The initialization variables reached its purpose in the emission 4934 // of the previous declaration, so we don't need it anymore. 4935 LocalDeclMap.erase(InitVD); 4936 4937 // Return the address of the private variable. 4938 return GetAddrOfLocalVar(PvtVD); 4939 }); 4940 assert(IsRegistered && "firstprivate var already registered as private"); 4941 // Silence the warning about unused variable. 4942 (void)IsRegistered; 4943 4944 ++OrigVarIt; 4945 ++InitIt; 4946 } 4947 } 4948 4949 // Generate the instructions for '#pragma omp target data' directive. 4950 void CodeGenFunction::EmitOMPTargetDataDirective( 4951 const OMPTargetDataDirective &S) { 4952 CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true); 4953 4954 // Create a pre/post action to signal the privatization of the device pointer. 4955 // This action can be replaced by the OpenMP runtime code generation to 4956 // deactivate privatization. 4957 bool PrivatizeDevicePointers = false; 4958 class DevicePointerPrivActionTy : public PrePostActionTy { 4959 bool &PrivatizeDevicePointers; 4960 4961 public: 4962 explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers) 4963 : PrePostActionTy(), PrivatizeDevicePointers(PrivatizeDevicePointers) {} 4964 void Enter(CodeGenFunction &CGF) override { 4965 PrivatizeDevicePointers = true; 4966 } 4967 }; 4968 DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers); 4969 4970 auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers]( 4971 CodeGenFunction &CGF, PrePostActionTy &Action) { 4972 auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4973 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 4974 }; 4975 4976 // Codegen that selects whether to generate the privatization code or not. 4977 auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers, 4978 &InnermostCodeGen](CodeGenFunction &CGF, 4979 PrePostActionTy &Action) { 4980 RegionCodeGenTy RCG(InnermostCodeGen); 4981 PrivatizeDevicePointers = false; 4982 4983 // Call the pre-action to change the status of PrivatizeDevicePointers if 4984 // needed. 4985 Action.Enter(CGF); 4986 4987 if (PrivatizeDevicePointers) { 4988 OMPPrivateScope PrivateScope(CGF); 4989 // Emit all instances of the use_device_ptr clause. 4990 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>()) 4991 CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope, 4992 Info.CaptureDeviceAddrMap); 4993 (void)PrivateScope.Privatize(); 4994 RCG(CGF); 4995 } else { 4996 RCG(CGF); 4997 } 4998 }; 4999 5000 // Forward the provided action to the privatization codegen. 5001 RegionCodeGenTy PrivRCG(PrivCodeGen); 5002 PrivRCG.setAction(Action); 5003 5004 // Notwithstanding the body of the region is emitted as inlined directive, 5005 // we don't use an inline scope as changes in the references inside the 5006 // region are expected to be visible outside, so we do not privative them. 5007 OMPLexicalScope Scope(CGF, S); 5008 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data, 5009 PrivRCG); 5010 }; 5011 5012 RegionCodeGenTy RCG(CodeGen); 5013 5014 // If we don't have target devices, don't bother emitting the data mapping 5015 // code. 5016 if (CGM.getLangOpts().OMPTargetTriples.empty()) { 5017 RCG(*this); 5018 return; 5019 } 5020 5021 // Check if we have any if clause associated with the directive. 5022 const Expr *IfCond = nullptr; 5023 if (const auto *C = S.getSingleClause<OMPIfClause>()) 5024 IfCond = C->getCondition(); 5025 5026 // Check if we have any device clause associated with the directive. 5027 const Expr *Device = nullptr; 5028 if (const auto *C = S.getSingleClause<OMPDeviceClause>()) 5029 Device = C->getDevice(); 5030 5031 // Set the action to signal privatization of device pointers. 5032 RCG.setAction(PrivAction); 5033 5034 // Emit region code. 5035 CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG, 5036 Info); 5037 } 5038 5039 void CodeGenFunction::EmitOMPTargetEnterDataDirective( 5040 const OMPTargetEnterDataDirective &S) { 5041 // If we don't have target devices, don't bother emitting the data mapping 5042 // code. 5043 if (CGM.getLangOpts().OMPTargetTriples.empty()) 5044 return; 5045 5046 // Check if we have any if clause associated with the directive. 5047 const Expr *IfCond = nullptr; 5048 if (const auto *C = S.getSingleClause<OMPIfClause>()) 5049 IfCond = C->getCondition(); 5050 5051 // Check if we have any device clause associated with the directive. 5052 const Expr *Device = nullptr; 5053 if (const auto *C = S.getSingleClause<OMPDeviceClause>()) 5054 Device = C->getDevice(); 5055 5056 OMPLexicalScope Scope(*this, S, OMPD_task); 5057 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 5058 } 5059 5060 void CodeGenFunction::EmitOMPTargetExitDataDirective( 5061 const OMPTargetExitDataDirective &S) { 5062 // If we don't have target devices, don't bother emitting the data mapping 5063 // code. 5064 if (CGM.getLangOpts().OMPTargetTriples.empty()) 5065 return; 5066 5067 // Check if we have any if clause associated with the directive. 5068 const Expr *IfCond = nullptr; 5069 if (const auto *C = S.getSingleClause<OMPIfClause>()) 5070 IfCond = C->getCondition(); 5071 5072 // Check if we have any device clause associated with the directive. 5073 const Expr *Device = nullptr; 5074 if (const auto *C = S.getSingleClause<OMPDeviceClause>()) 5075 Device = C->getDevice(); 5076 5077 OMPLexicalScope Scope(*this, S, OMPD_task); 5078 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 5079 } 5080 5081 static void emitTargetParallelRegion(CodeGenFunction &CGF, 5082 const OMPTargetParallelDirective &S, 5083 PrePostActionTy &Action) { 5084 // Get the captured statement associated with the 'parallel' region. 5085 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); 5086 Action.Enter(CGF); 5087 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) { 5088 Action.Enter(CGF); 5089 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5090 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 5091 CGF.EmitOMPPrivateClause(S, PrivateScope); 5092 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 5093 (void)PrivateScope.Privatize(); 5094 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 5095 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); 5096 // TODO: Add support for clauses. 5097 CGF.EmitStmt(CS->getCapturedStmt()); 5098 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 5099 }; 5100 emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen, 5101 emitEmptyBoundParameters); 5102 emitPostUpdateForReductionClause(CGF, S, 5103 [](CodeGenFunction &) { return nullptr; }); 5104 } 5105 5106 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 5107 CodeGenModule &CGM, StringRef ParentName, 5108 const OMPTargetParallelDirective &S) { 5109 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5110 emitTargetParallelRegion(CGF, S, Action); 5111 }; 5112 llvm::Function *Fn; 5113 llvm::Constant *Addr; 5114 // Emit target region as a standalone region. 5115 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 5116 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 5117 assert(Fn && Addr && "Target device function emission failed."); 5118 } 5119 5120 void CodeGenFunction::EmitOMPTargetParallelDirective( 5121 const OMPTargetParallelDirective &S) { 5122 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5123 emitTargetParallelRegion(CGF, S, Action); 5124 }; 5125 emitCommonOMPTargetDirective(*this, S, CodeGen); 5126 } 5127 5128 static void emitTargetParallelForRegion(CodeGenFunction &CGF, 5129 const OMPTargetParallelForDirective &S, 5130 PrePostActionTy &Action) { 5131 Action.Enter(CGF); 5132 // Emit directive as a combined directive that consists of two implicit 5133 // directives: 'parallel' with 'for' directive. 5134 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5135 Action.Enter(CGF); 5136 CodeGenFunction::OMPCancelStackRAII CancelRegion( 5137 CGF, OMPD_target_parallel_for, S.hasCancel()); 5138 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 5139 emitDispatchForLoopBounds); 5140 }; 5141 emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen, 5142 emitEmptyBoundParameters); 5143 } 5144 5145 void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 5146 CodeGenModule &CGM, StringRef ParentName, 5147 const OMPTargetParallelForDirective &S) { 5148 // Emit SPMD target parallel for region as a standalone region. 5149 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5150 emitTargetParallelForRegion(CGF, S, Action); 5151 }; 5152 llvm::Function *Fn; 5153 llvm::Constant *Addr; 5154 // Emit target region as a standalone region. 5155 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 5156 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 5157 assert(Fn && Addr && "Target device function emission failed."); 5158 } 5159 5160 void CodeGenFunction::EmitOMPTargetParallelForDirective( 5161 const OMPTargetParallelForDirective &S) { 5162 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5163 emitTargetParallelForRegion(CGF, S, Action); 5164 }; 5165 emitCommonOMPTargetDirective(*this, S, CodeGen); 5166 } 5167 5168 static void 5169 emitTargetParallelForSimdRegion(CodeGenFunction &CGF, 5170 const OMPTargetParallelForSimdDirective &S, 5171 PrePostActionTy &Action) { 5172 Action.Enter(CGF); 5173 // Emit directive as a combined directive that consists of two implicit 5174 // directives: 'parallel' with 'for' directive. 5175 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5176 Action.Enter(CGF); 5177 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 5178 emitDispatchForLoopBounds); 5179 }; 5180 emitCommonOMPParallelDirective(CGF, S, OMPD_simd, CodeGen, 5181 emitEmptyBoundParameters); 5182 } 5183 5184 void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 5185 CodeGenModule &CGM, StringRef ParentName, 5186 const OMPTargetParallelForSimdDirective &S) { 5187 // Emit SPMD target parallel for region as a standalone region. 5188 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5189 emitTargetParallelForSimdRegion(CGF, S, Action); 5190 }; 5191 llvm::Function *Fn; 5192 llvm::Constant *Addr; 5193 // Emit target region as a standalone region. 5194 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 5195 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 5196 assert(Fn && Addr && "Target device function emission failed."); 5197 } 5198 5199 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective( 5200 const OMPTargetParallelForSimdDirective &S) { 5201 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5202 emitTargetParallelForSimdRegion(CGF, S, Action); 5203 }; 5204 emitCommonOMPTargetDirective(*this, S, CodeGen); 5205 } 5206 5207 /// Emit a helper variable and return corresponding lvalue. 5208 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper, 5209 const ImplicitParamDecl *PVD, 5210 CodeGenFunction::OMPPrivateScope &Privates) { 5211 const auto *VDecl = cast<VarDecl>(Helper->getDecl()); 5212 Privates.addPrivate(VDecl, 5213 [&CGF, PVD]() { return CGF.GetAddrOfLocalVar(PVD); }); 5214 } 5215 5216 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { 5217 assert(isOpenMPTaskLoopDirective(S.getDirectiveKind())); 5218 // Emit outlined function for task construct. 5219 const CapturedStmt *CS = S.getCapturedStmt(OMPD_taskloop); 5220 Address CapturedStruct = GenerateCapturedStmtArgument(*CS); 5221 QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 5222 const Expr *IfCond = nullptr; 5223 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 5224 if (C->getNameModifier() == OMPD_unknown || 5225 C->getNameModifier() == OMPD_taskloop) { 5226 IfCond = C->getCondition(); 5227 break; 5228 } 5229 } 5230 5231 OMPTaskDataTy Data; 5232 // Check if taskloop must be emitted without taskgroup. 5233 Data.Nogroup = S.getSingleClause<OMPNogroupClause>(); 5234 // TODO: Check if we should emit tied or untied task. 5235 Data.Tied = true; 5236 // Set scheduling for taskloop 5237 if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) { 5238 // grainsize clause 5239 Data.Schedule.setInt(/*IntVal=*/false); 5240 Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize())); 5241 } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) { 5242 // num_tasks clause 5243 Data.Schedule.setInt(/*IntVal=*/true); 5244 Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks())); 5245 } 5246 5247 auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) { 5248 // if (PreCond) { 5249 // for (IV in 0..LastIteration) BODY; 5250 // <Final counter/linear vars updates>; 5251 // } 5252 // 5253 5254 // Emit: if (PreCond) - begin. 5255 // If the condition constant folds and can be elided, avoid emitting the 5256 // whole loop. 5257 bool CondConstant; 5258 llvm::BasicBlock *ContBlock = nullptr; 5259 OMPLoopScope PreInitScope(CGF, S); 5260 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 5261 if (!CondConstant) 5262 return; 5263 } else { 5264 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("taskloop.if.then"); 5265 ContBlock = CGF.createBasicBlock("taskloop.if.end"); 5266 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 5267 CGF.getProfileCount(&S)); 5268 CGF.EmitBlock(ThenBlock); 5269 CGF.incrementProfileCounter(&S); 5270 } 5271 5272 (void)CGF.EmitOMPLinearClauseInit(S); 5273 5274 OMPPrivateScope LoopScope(CGF); 5275 // Emit helper vars inits. 5276 enum { LowerBound = 5, UpperBound, Stride, LastIter }; 5277 auto *I = CS->getCapturedDecl()->param_begin(); 5278 auto *LBP = std::next(I, LowerBound); 5279 auto *UBP = std::next(I, UpperBound); 5280 auto *STP = std::next(I, Stride); 5281 auto *LIP = std::next(I, LastIter); 5282 mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP, 5283 LoopScope); 5284 mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP, 5285 LoopScope); 5286 mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope); 5287 mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP, 5288 LoopScope); 5289 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 5290 CGF.EmitOMPLinearClause(S, LoopScope); 5291 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 5292 (void)LoopScope.Privatize(); 5293 // Emit the loop iteration variable. 5294 const Expr *IVExpr = S.getIterationVariable(); 5295 const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 5296 CGF.EmitVarDecl(*IVDecl); 5297 CGF.EmitIgnoredExpr(S.getInit()); 5298 5299 // Emit the iterations count variable. 5300 // If it is not a variable, Sema decided to calculate iterations count on 5301 // each iteration (e.g., it is foldable into a constant). 5302 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 5303 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 5304 // Emit calculation of the iterations count. 5305 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 5306 } 5307 5308 { 5309 OMPLexicalScope Scope(CGF, S, OMPD_taskloop, /*EmitPreInitStmt=*/false); 5310 emitCommonSimdLoop( 5311 CGF, S, 5312 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 5313 if (isOpenMPSimdDirective(S.getDirectiveKind())) 5314 CGF.EmitOMPSimdInit(S); 5315 }, 5316 [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { 5317 CGF.EmitOMPInnerLoop( 5318 S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), 5319 [&S](CodeGenFunction &CGF) { 5320 CGF.EmitOMPLoopBody(S, CodeGenFunction::JumpDest()); 5321 CGF.EmitStopPoint(&S); 5322 }, 5323 [](CodeGenFunction &) {}); 5324 }); 5325 } 5326 // Emit: if (PreCond) - end. 5327 if (ContBlock) { 5328 CGF.EmitBranch(ContBlock); 5329 CGF.EmitBlock(ContBlock, true); 5330 } 5331 // Emit final copy of the lastprivate variables if IsLastIter != 0. 5332 if (HasLastprivateClause) { 5333 CGF.EmitOMPLastprivateClauseFinal( 5334 S, isOpenMPSimdDirective(S.getDirectiveKind()), 5335 CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar( 5336 CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false, 5337 (*LIP)->getType(), S.getBeginLoc()))); 5338 } 5339 CGF.EmitOMPLinearClauseFinal(S, [LIP, &S](CodeGenFunction &CGF) { 5340 return CGF.Builder.CreateIsNotNull( 5341 CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false, 5342 (*LIP)->getType(), S.getBeginLoc())); 5343 }); 5344 }; 5345 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 5346 IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn, 5347 const OMPTaskDataTy &Data) { 5348 auto &&CodeGen = [&S, OutlinedFn, SharedsTy, CapturedStruct, IfCond, 5349 &Data](CodeGenFunction &CGF, PrePostActionTy &) { 5350 OMPLoopScope PreInitScope(CGF, S); 5351 CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getBeginLoc(), S, 5352 OutlinedFn, SharedsTy, 5353 CapturedStruct, IfCond, Data); 5354 }; 5355 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop, 5356 CodeGen); 5357 }; 5358 if (Data.Nogroup) { 5359 EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, Data); 5360 } else { 5361 CGM.getOpenMPRuntime().emitTaskgroupRegion( 5362 *this, 5363 [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF, 5364 PrePostActionTy &Action) { 5365 Action.Enter(CGF); 5366 CGF.EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, 5367 Data); 5368 }, 5369 S.getBeginLoc()); 5370 } 5371 } 5372 5373 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) { 5374 EmitOMPTaskLoopBasedDirective(S); 5375 } 5376 5377 void CodeGenFunction::EmitOMPTaskLoopSimdDirective( 5378 const OMPTaskLoopSimdDirective &S) { 5379 OMPLexicalScope Scope(*this, S); 5380 EmitOMPTaskLoopBasedDirective(S); 5381 } 5382 5383 void CodeGenFunction::EmitOMPMasterTaskLoopDirective( 5384 const OMPMasterTaskLoopDirective &S) { 5385 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5386 Action.Enter(CGF); 5387 EmitOMPTaskLoopBasedDirective(S); 5388 }; 5389 OMPLexicalScope Scope(*this, S, llvm::None, /*EmitPreInitStmt=*/false); 5390 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc()); 5391 } 5392 5393 void CodeGenFunction::EmitOMPMasterTaskLoopSimdDirective( 5394 const OMPMasterTaskLoopSimdDirective &S) { 5395 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5396 Action.Enter(CGF); 5397 EmitOMPTaskLoopBasedDirective(S); 5398 }; 5399 OMPLexicalScope Scope(*this, S); 5400 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc()); 5401 } 5402 5403 void CodeGenFunction::EmitOMPParallelMasterTaskLoopDirective( 5404 const OMPParallelMasterTaskLoopDirective &S) { 5405 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5406 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF, 5407 PrePostActionTy &Action) { 5408 Action.Enter(CGF); 5409 CGF.EmitOMPTaskLoopBasedDirective(S); 5410 }; 5411 OMPLexicalScope Scope(CGF, S, llvm::None, /*EmitPreInitStmt=*/false); 5412 CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen, 5413 S.getBeginLoc()); 5414 }; 5415 emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop, CodeGen, 5416 emitEmptyBoundParameters); 5417 } 5418 5419 void CodeGenFunction::EmitOMPParallelMasterTaskLoopSimdDirective( 5420 const OMPParallelMasterTaskLoopSimdDirective &S) { 5421 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5422 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF, 5423 PrePostActionTy &Action) { 5424 Action.Enter(CGF); 5425 CGF.EmitOMPTaskLoopBasedDirective(S); 5426 }; 5427 OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false); 5428 CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen, 5429 S.getBeginLoc()); 5430 }; 5431 emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop_simd, CodeGen, 5432 emitEmptyBoundParameters); 5433 } 5434 5435 // Generate the instructions for '#pragma omp target update' directive. 5436 void CodeGenFunction::EmitOMPTargetUpdateDirective( 5437 const OMPTargetUpdateDirective &S) { 5438 // If we don't have target devices, don't bother emitting the data mapping 5439 // code. 5440 if (CGM.getLangOpts().OMPTargetTriples.empty()) 5441 return; 5442 5443 // Check if we have any if clause associated with the directive. 5444 const Expr *IfCond = nullptr; 5445 if (const auto *C = S.getSingleClause<OMPIfClause>()) 5446 IfCond = C->getCondition(); 5447 5448 // Check if we have any device clause associated with the directive. 5449 const Expr *Device = nullptr; 5450 if (const auto *C = S.getSingleClause<OMPDeviceClause>()) 5451 Device = C->getDevice(); 5452 5453 OMPLexicalScope Scope(*this, S, OMPD_task); 5454 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 5455 } 5456 5457 void CodeGenFunction::EmitSimpleOMPExecutableDirective( 5458 const OMPExecutableDirective &D) { 5459 if (!D.hasAssociatedStmt() || !D.getAssociatedStmt()) 5460 return; 5461 auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) { 5462 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 5463 emitOMPSimdRegion(CGF, cast<OMPLoopDirective>(D), Action); 5464 } else { 5465 OMPPrivateScope LoopGlobals(CGF); 5466 if (const auto *LD = dyn_cast<OMPLoopDirective>(&D)) { 5467 for (const Expr *E : LD->counters()) { 5468 const auto *VD = dyn_cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 5469 if (!VD->hasLocalStorage() && !CGF.LocalDeclMap.count(VD)) { 5470 LValue GlobLVal = CGF.EmitLValue(E); 5471 LoopGlobals.addPrivate( 5472 VD, [&GlobLVal, &CGF]() { return GlobLVal.getAddress(CGF); }); 5473 } 5474 if (isa<OMPCapturedExprDecl>(VD)) { 5475 // Emit only those that were not explicitly referenced in clauses. 5476 if (!CGF.LocalDeclMap.count(VD)) 5477 CGF.EmitVarDecl(*VD); 5478 } 5479 } 5480 for (const auto *C : D.getClausesOfKind<OMPOrderedClause>()) { 5481 if (!C->getNumForLoops()) 5482 continue; 5483 for (unsigned I = LD->getCollapsedNumber(), 5484 E = C->getLoopNumIterations().size(); 5485 I < E; ++I) { 5486 if (const auto *VD = dyn_cast<OMPCapturedExprDecl>( 5487 cast<DeclRefExpr>(C->getLoopCounter(I))->getDecl())) { 5488 // Emit only those that were not explicitly referenced in clauses. 5489 if (!CGF.LocalDeclMap.count(VD)) 5490 CGF.EmitVarDecl(*VD); 5491 } 5492 } 5493 } 5494 } 5495 LoopGlobals.Privatize(); 5496 CGF.EmitStmt(D.getInnermostCapturedStmt()->getCapturedStmt()); 5497 } 5498 }; 5499 OMPSimdLexicalScope Scope(*this, D); 5500 CGM.getOpenMPRuntime().emitInlinedDirective( 5501 *this, 5502 isOpenMPSimdDirective(D.getDirectiveKind()) ? OMPD_simd 5503 : D.getDirectiveKind(), 5504 CodeGen); 5505 } 5506