1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This contains code to emit OpenMP nodes as LLVM code. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGCleanup.h" 15 #include "CGOpenMPRuntime.h" 16 #include "CodeGenFunction.h" 17 #include "CodeGenModule.h" 18 #include "TargetInfo.h" 19 #include "clang/AST/Stmt.h" 20 #include "clang/AST/StmtOpenMP.h" 21 #include "clang/AST/DeclOpenMP.h" 22 #include "llvm/IR/CallSite.h" 23 using namespace clang; 24 using namespace CodeGen; 25 26 namespace { 27 /// Lexical scope for OpenMP executable constructs, that handles correct codegen 28 /// for captured expressions. 29 class OMPLexicalScope : public CodeGenFunction::LexicalScope { 30 void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 31 for (const auto *C : S.clauses()) { 32 if (const auto *CPI = OMPClauseWithPreInit::get(C)) { 33 if (const auto *PreInit = 34 cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { 35 for (const auto *I : PreInit->decls()) { 36 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 37 CGF.EmitVarDecl(cast<VarDecl>(*I)); 38 } else { 39 CodeGenFunction::AutoVarEmission Emission = 40 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 41 CGF.EmitAutoVarCleanups(Emission); 42 } 43 } 44 } 45 } 46 } 47 } 48 CodeGenFunction::OMPPrivateScope InlinedShareds; 49 50 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { 51 return CGF.LambdaCaptureFields.lookup(VD) || 52 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || 53 (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl)); 54 } 55 56 public: 57 OMPLexicalScope( 58 CodeGenFunction &CGF, const OMPExecutableDirective &S, 59 const llvm::Optional<OpenMPDirectiveKind> CapturedRegion = llvm::None, 60 const bool EmitPreInitStmt = true) 61 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), 62 InlinedShareds(CGF) { 63 if (EmitPreInitStmt) 64 emitPreInitStmt(CGF, S); 65 if (!CapturedRegion.hasValue()) 66 return; 67 assert(S.hasAssociatedStmt() && 68 "Expected associated statement for inlined directive."); 69 const CapturedStmt *CS = S.getCapturedStmt(*CapturedRegion); 70 for (const auto &C : CS->captures()) { 71 if (C.capturesVariable() || C.capturesVariableByCopy()) { 72 auto *VD = C.getCapturedVar(); 73 assert(VD == VD->getCanonicalDecl() && 74 "Canonical decl must be captured."); 75 DeclRefExpr DRE( 76 CGF.getContext(), const_cast<VarDecl *>(VD), 77 isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo && 78 InlinedShareds.isGlobalVarCaptured(VD)), 79 VD->getType().getNonReferenceType(), VK_LValue, C.getLocation()); 80 InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address { 81 return CGF.EmitLValue(&DRE).getAddress(); 82 }); 83 } 84 } 85 (void)InlinedShareds.Privatize(); 86 } 87 }; 88 89 /// Lexical scope for OpenMP parallel construct, that handles correct codegen 90 /// for captured expressions. 91 class OMPParallelScope final : public OMPLexicalScope { 92 bool EmitPreInitStmt(const OMPExecutableDirective &S) { 93 OpenMPDirectiveKind Kind = S.getDirectiveKind(); 94 return !(isOpenMPTargetExecutionDirective(Kind) || 95 isOpenMPLoopBoundSharingDirective(Kind)) && 96 isOpenMPParallelDirective(Kind); 97 } 98 99 public: 100 OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 101 : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None, 102 EmitPreInitStmt(S)) {} 103 }; 104 105 /// Lexical scope for OpenMP teams construct, that handles correct codegen 106 /// for captured expressions. 107 class OMPTeamsScope final : public OMPLexicalScope { 108 bool EmitPreInitStmt(const OMPExecutableDirective &S) { 109 OpenMPDirectiveKind Kind = S.getDirectiveKind(); 110 return !isOpenMPTargetExecutionDirective(Kind) && 111 isOpenMPTeamsDirective(Kind); 112 } 113 114 public: 115 OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 116 : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None, 117 EmitPreInitStmt(S)) {} 118 }; 119 120 /// Private scope for OpenMP loop-based directives, that supports capturing 121 /// of used expression from loop statement. 122 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { 123 void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) { 124 CodeGenFunction::OMPMapVars PreCondVars; 125 for (const auto *E : S.counters()) { 126 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 127 (void)PreCondVars.setVarAddr( 128 CGF, VD, CGF.CreateMemTemp(VD->getType().getNonReferenceType())); 129 } 130 (void)PreCondVars.apply(CGF); 131 if (const auto *PreInits = cast_or_null<DeclStmt>(S.getPreInits())) { 132 for (const auto *I : PreInits->decls()) 133 CGF.EmitVarDecl(cast<VarDecl>(*I)); 134 } 135 PreCondVars.restore(CGF); 136 } 137 138 public: 139 OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S) 140 : CodeGenFunction::RunCleanupsScope(CGF) { 141 emitPreInitStmt(CGF, S); 142 } 143 }; 144 145 class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope { 146 CodeGenFunction::OMPPrivateScope InlinedShareds; 147 148 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { 149 return CGF.LambdaCaptureFields.lookup(VD) || 150 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || 151 (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) && 152 cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD)); 153 } 154 155 public: 156 OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 157 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), 158 InlinedShareds(CGF) { 159 for (const auto *C : S.clauses()) { 160 if (const auto *CPI = OMPClauseWithPreInit::get(C)) { 161 if (const auto *PreInit = 162 cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { 163 for (const auto *I : PreInit->decls()) { 164 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 165 CGF.EmitVarDecl(cast<VarDecl>(*I)); 166 } else { 167 CodeGenFunction::AutoVarEmission Emission = 168 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 169 CGF.EmitAutoVarCleanups(Emission); 170 } 171 } 172 } 173 } else if (const auto *UDP = dyn_cast<OMPUseDevicePtrClause>(C)) { 174 for (const Expr *E : UDP->varlists()) { 175 const Decl *D = cast<DeclRefExpr>(E)->getDecl(); 176 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D)) 177 CGF.EmitVarDecl(*OED); 178 } 179 } 180 } 181 if (!isOpenMPSimdDirective(S.getDirectiveKind())) 182 CGF.EmitOMPPrivateClause(S, InlinedShareds); 183 if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(&S)) { 184 if (const Expr *E = TG->getReductionRef()) 185 CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl())); 186 } 187 const auto *CS = cast_or_null<CapturedStmt>(S.getAssociatedStmt()); 188 while (CS) { 189 for (auto &C : CS->captures()) { 190 if (C.capturesVariable() || C.capturesVariableByCopy()) { 191 auto *VD = C.getCapturedVar(); 192 assert(VD == VD->getCanonicalDecl() && 193 "Canonical decl must be captured."); 194 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 195 isCapturedVar(CGF, VD) || 196 (CGF.CapturedStmtInfo && 197 InlinedShareds.isGlobalVarCaptured(VD)), 198 VD->getType().getNonReferenceType(), VK_LValue, 199 C.getLocation()); 200 InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address { 201 return CGF.EmitLValue(&DRE).getAddress(); 202 }); 203 } 204 } 205 CS = dyn_cast<CapturedStmt>(CS->getCapturedStmt()); 206 } 207 (void)InlinedShareds.Privatize(); 208 } 209 }; 210 211 } // namespace 212 213 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, 214 const OMPExecutableDirective &S, 215 const RegionCodeGenTy &CodeGen); 216 217 LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) { 218 if (const auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) { 219 if (const auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) { 220 OrigVD = OrigVD->getCanonicalDecl(); 221 bool IsCaptured = 222 LambdaCaptureFields.lookup(OrigVD) || 223 (CapturedStmtInfo && CapturedStmtInfo->lookup(OrigVD)) || 224 (CurCodeDecl && isa<BlockDecl>(CurCodeDecl)); 225 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), IsCaptured, 226 OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc()); 227 return EmitLValue(&DRE); 228 } 229 } 230 return EmitLValue(E); 231 } 232 233 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) { 234 ASTContext &C = getContext(); 235 llvm::Value *Size = nullptr; 236 auto SizeInChars = C.getTypeSizeInChars(Ty); 237 if (SizeInChars.isZero()) { 238 // getTypeSizeInChars() returns 0 for a VLA. 239 while (const VariableArrayType *VAT = C.getAsVariableArrayType(Ty)) { 240 VlaSizePair VlaSize = getVLASize(VAT); 241 Ty = VlaSize.Type; 242 Size = Size ? Builder.CreateNUWMul(Size, VlaSize.NumElts) 243 : VlaSize.NumElts; 244 } 245 SizeInChars = C.getTypeSizeInChars(Ty); 246 if (SizeInChars.isZero()) 247 return llvm::ConstantInt::get(SizeTy, /*V=*/0); 248 return Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars)); 249 } 250 return CGM.getSize(SizeInChars); 251 } 252 253 void CodeGenFunction::GenerateOpenMPCapturedVars( 254 const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) { 255 const RecordDecl *RD = S.getCapturedRecordDecl(); 256 auto CurField = RD->field_begin(); 257 auto CurCap = S.captures().begin(); 258 for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(), 259 E = S.capture_init_end(); 260 I != E; ++I, ++CurField, ++CurCap) { 261 if (CurField->hasCapturedVLAType()) { 262 const VariableArrayType *VAT = CurField->getCapturedVLAType(); 263 llvm::Value *Val = VLASizeMap[VAT->getSizeExpr()]; 264 CapturedVars.push_back(Val); 265 } else if (CurCap->capturesThis()) { 266 CapturedVars.push_back(CXXThisValue); 267 } else if (CurCap->capturesVariableByCopy()) { 268 llvm::Value *CV = EmitLoadOfScalar(EmitLValue(*I), CurCap->getLocation()); 269 270 // If the field is not a pointer, we need to save the actual value 271 // and load it as a void pointer. 272 if (!CurField->getType()->isAnyPointerType()) { 273 ASTContext &Ctx = getContext(); 274 Address DstAddr = CreateMemTemp( 275 Ctx.getUIntPtrType(), 276 Twine(CurCap->getCapturedVar()->getName(), ".casted")); 277 LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType()); 278 279 llvm::Value *SrcAddrVal = EmitScalarConversion( 280 DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()), 281 Ctx.getPointerType(CurField->getType()), CurCap->getLocation()); 282 LValue SrcLV = 283 MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType()); 284 285 // Store the value using the source type pointer. 286 EmitStoreThroughLValue(RValue::get(CV), SrcLV); 287 288 // Load the value using the destination type pointer. 289 CV = EmitLoadOfScalar(DstLV, CurCap->getLocation()); 290 } 291 CapturedVars.push_back(CV); 292 } else { 293 assert(CurCap->capturesVariable() && "Expected capture by reference."); 294 CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer()); 295 } 296 } 297 } 298 299 static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc, 300 QualType DstType, StringRef Name, 301 LValue AddrLV, 302 bool isReferenceType = false) { 303 ASTContext &Ctx = CGF.getContext(); 304 305 llvm::Value *CastedPtr = CGF.EmitScalarConversion( 306 AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(), 307 Ctx.getPointerType(DstType), Loc); 308 Address TmpAddr = 309 CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType)) 310 .getAddress(); 311 312 // If we are dealing with references we need to return the address of the 313 // reference instead of the reference of the value. 314 if (isReferenceType) { 315 QualType RefType = Ctx.getLValueReferenceType(DstType); 316 llvm::Value *RefVal = TmpAddr.getPointer(); 317 TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name, ".ref")); 318 LValue TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType); 319 CGF.EmitStoreThroughLValue(RValue::get(RefVal), TmpLVal, /*isInit=*/true); 320 } 321 322 return TmpAddr; 323 } 324 325 static QualType getCanonicalParamType(ASTContext &C, QualType T) { 326 if (T->isLValueReferenceType()) 327 return C.getLValueReferenceType( 328 getCanonicalParamType(C, T.getNonReferenceType()), 329 /*SpelledAsLValue=*/false); 330 if (T->isPointerType()) 331 return C.getPointerType(getCanonicalParamType(C, T->getPointeeType())); 332 if (const ArrayType *A = T->getAsArrayTypeUnsafe()) { 333 if (const auto *VLA = dyn_cast<VariableArrayType>(A)) 334 return getCanonicalParamType(C, VLA->getElementType()); 335 if (!A->isVariablyModifiedType()) 336 return C.getCanonicalType(T); 337 } 338 return C.getCanonicalParamType(T); 339 } 340 341 namespace { 342 /// Contains required data for proper outlined function codegen. 343 struct FunctionOptions { 344 /// Captured statement for which the function is generated. 345 const CapturedStmt *S = nullptr; 346 /// true if cast to/from UIntPtr is required for variables captured by 347 /// value. 348 const bool UIntPtrCastRequired = true; 349 /// true if only casted arguments must be registered as local args or VLA 350 /// sizes. 351 const bool RegisterCastedArgsOnly = false; 352 /// Name of the generated function. 353 const StringRef FunctionName; 354 explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired, 355 bool RegisterCastedArgsOnly, 356 StringRef FunctionName) 357 : S(S), UIntPtrCastRequired(UIntPtrCastRequired), 358 RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly), 359 FunctionName(FunctionName) {} 360 }; 361 } 362 363 static llvm::Function *emitOutlinedFunctionPrologue( 364 CodeGenFunction &CGF, FunctionArgList &Args, 365 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> 366 &LocalAddrs, 367 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> 368 &VLASizes, 369 llvm::Value *&CXXThisValue, const FunctionOptions &FO) { 370 const CapturedDecl *CD = FO.S->getCapturedDecl(); 371 const RecordDecl *RD = FO.S->getCapturedRecordDecl(); 372 assert(CD->hasBody() && "missing CapturedDecl body"); 373 374 CXXThisValue = nullptr; 375 // Build the argument list. 376 CodeGenModule &CGM = CGF.CGM; 377 ASTContext &Ctx = CGM.getContext(); 378 FunctionArgList TargetArgs; 379 Args.append(CD->param_begin(), 380 std::next(CD->param_begin(), CD->getContextParamPosition())); 381 TargetArgs.append( 382 CD->param_begin(), 383 std::next(CD->param_begin(), CD->getContextParamPosition())); 384 auto I = FO.S->captures().begin(); 385 FunctionDecl *DebugFunctionDecl = nullptr; 386 if (!FO.UIntPtrCastRequired) { 387 FunctionProtoType::ExtProtoInfo EPI; 388 QualType FunctionTy = Ctx.getFunctionType(Ctx.VoidTy, llvm::None, EPI); 389 DebugFunctionDecl = FunctionDecl::Create( 390 Ctx, Ctx.getTranslationUnitDecl(), FO.S->getBeginLoc(), 391 SourceLocation(), DeclarationName(), FunctionTy, 392 Ctx.getTrivialTypeSourceInfo(FunctionTy), SC_Static, 393 /*isInlineSpecified=*/false, /*hasWrittenPrototype=*/false); 394 } 395 for (const FieldDecl *FD : RD->fields()) { 396 QualType ArgType = FD->getType(); 397 IdentifierInfo *II = nullptr; 398 VarDecl *CapVar = nullptr; 399 400 // If this is a capture by copy and the type is not a pointer, the outlined 401 // function argument type should be uintptr and the value properly casted to 402 // uintptr. This is necessary given that the runtime library is only able to 403 // deal with pointers. We can pass in the same way the VLA type sizes to the 404 // outlined function. 405 if (FO.UIntPtrCastRequired && 406 ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) || 407 I->capturesVariableArrayType())) 408 ArgType = Ctx.getUIntPtrType(); 409 410 if (I->capturesVariable() || I->capturesVariableByCopy()) { 411 CapVar = I->getCapturedVar(); 412 II = CapVar->getIdentifier(); 413 } else if (I->capturesThis()) { 414 II = &Ctx.Idents.get("this"); 415 } else { 416 assert(I->capturesVariableArrayType()); 417 II = &Ctx.Idents.get("vla"); 418 } 419 if (ArgType->isVariablyModifiedType()) 420 ArgType = getCanonicalParamType(Ctx, ArgType); 421 VarDecl *Arg; 422 if (DebugFunctionDecl && (CapVar || I->capturesThis())) { 423 Arg = ParmVarDecl::Create( 424 Ctx, DebugFunctionDecl, 425 CapVar ? CapVar->getBeginLoc() : FD->getBeginLoc(), 426 CapVar ? CapVar->getLocation() : FD->getLocation(), II, ArgType, 427 /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr); 428 } else { 429 Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(), 430 II, ArgType, ImplicitParamDecl::Other); 431 } 432 Args.emplace_back(Arg); 433 // Do not cast arguments if we emit function with non-original types. 434 TargetArgs.emplace_back( 435 FO.UIntPtrCastRequired 436 ? Arg 437 : CGM.getOpenMPRuntime().translateParameter(FD, Arg)); 438 ++I; 439 } 440 Args.append( 441 std::next(CD->param_begin(), CD->getContextParamPosition() + 1), 442 CD->param_end()); 443 TargetArgs.append( 444 std::next(CD->param_begin(), CD->getContextParamPosition() + 1), 445 CD->param_end()); 446 447 // Create the function declaration. 448 const CGFunctionInfo &FuncInfo = 449 CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs); 450 llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); 451 452 auto *F = 453 llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage, 454 FO.FunctionName, &CGM.getModule()); 455 CGM.SetInternalFunctionAttributes(CD, F, FuncInfo); 456 if (CD->isNothrow()) 457 F->setDoesNotThrow(); 458 F->setDoesNotRecurse(); 459 460 // Generate the function. 461 CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs, 462 FO.S->getBeginLoc(), CD->getBody()->getBeginLoc()); 463 unsigned Cnt = CD->getContextParamPosition(); 464 I = FO.S->captures().begin(); 465 for (const FieldDecl *FD : RD->fields()) { 466 // Do not map arguments if we emit function with non-original types. 467 Address LocalAddr(Address::invalid()); 468 if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) { 469 LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt], 470 TargetArgs[Cnt]); 471 } else { 472 LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]); 473 } 474 // If we are capturing a pointer by copy we don't need to do anything, just 475 // use the value that we get from the arguments. 476 if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) { 477 const VarDecl *CurVD = I->getCapturedVar(); 478 // If the variable is a reference we need to materialize it here. 479 if (CurVD->getType()->isReferenceType()) { 480 Address RefAddr = CGF.CreateMemTemp( 481 CurVD->getType(), CGM.getPointerAlign(), ".materialized_ref"); 482 CGF.EmitStoreOfScalar(LocalAddr.getPointer(), RefAddr, 483 /*Volatile=*/false, CurVD->getType()); 484 LocalAddr = RefAddr; 485 } 486 if (!FO.RegisterCastedArgsOnly) 487 LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}}); 488 ++Cnt; 489 ++I; 490 continue; 491 } 492 493 LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(), 494 AlignmentSource::Decl); 495 if (FD->hasCapturedVLAType()) { 496 if (FO.UIntPtrCastRequired) { 497 ArgLVal = CGF.MakeAddrLValue( 498 castValueFromUintptr(CGF, I->getLocation(), FD->getType(), 499 Args[Cnt]->getName(), ArgLVal), 500 FD->getType(), AlignmentSource::Decl); 501 } 502 llvm::Value *ExprArg = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation()); 503 const VariableArrayType *VAT = FD->getCapturedVLAType(); 504 VLASizes.try_emplace(Args[Cnt], VAT->getSizeExpr(), ExprArg); 505 } else if (I->capturesVariable()) { 506 const VarDecl *Var = I->getCapturedVar(); 507 QualType VarTy = Var->getType(); 508 Address ArgAddr = ArgLVal.getAddress(); 509 if (!VarTy->isReferenceType()) { 510 if (ArgLVal.getType()->isLValueReferenceType()) { 511 ArgAddr = CGF.EmitLoadOfReference(ArgLVal); 512 } else if (!VarTy->isVariablyModifiedType() || 513 !VarTy->isPointerType()) { 514 assert(ArgLVal.getType()->isPointerType()); 515 ArgAddr = CGF.EmitLoadOfPointer( 516 ArgAddr, ArgLVal.getType()->castAs<PointerType>()); 517 } 518 } 519 if (!FO.RegisterCastedArgsOnly) { 520 LocalAddrs.insert( 521 {Args[Cnt], 522 {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}}); 523 } 524 } else if (I->capturesVariableByCopy()) { 525 assert(!FD->getType()->isAnyPointerType() && 526 "Not expecting a captured pointer."); 527 const VarDecl *Var = I->getCapturedVar(); 528 QualType VarTy = Var->getType(); 529 LocalAddrs.insert( 530 {Args[Cnt], 531 {Var, FO.UIntPtrCastRequired 532 ? castValueFromUintptr(CGF, I->getLocation(), 533 FD->getType(), Args[Cnt]->getName(), 534 ArgLVal, VarTy->isReferenceType()) 535 : ArgLVal.getAddress()}}); 536 } else { 537 // If 'this' is captured, load it into CXXThisValue. 538 assert(I->capturesThis()); 539 CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation()); 540 LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress()}}); 541 } 542 ++Cnt; 543 ++I; 544 } 545 546 return F; 547 } 548 549 llvm::Function * 550 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { 551 assert( 552 CapturedStmtInfo && 553 "CapturedStmtInfo should be set when generating the captured function"); 554 const CapturedDecl *CD = S.getCapturedDecl(); 555 // Build the argument list. 556 bool NeedWrapperFunction = 557 getDebugInfo() && 558 CGM.getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo; 559 FunctionArgList Args; 560 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs; 561 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes; 562 SmallString<256> Buffer; 563 llvm::raw_svector_ostream Out(Buffer); 564 Out << CapturedStmtInfo->getHelperName(); 565 if (NeedWrapperFunction) 566 Out << "_debug__"; 567 FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false, 568 Out.str()); 569 llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs, 570 VLASizes, CXXThisValue, FO); 571 for (const auto &LocalAddrPair : LocalAddrs) { 572 if (LocalAddrPair.second.first) { 573 setAddrOfLocalVar(LocalAddrPair.second.first, 574 LocalAddrPair.second.second); 575 } 576 } 577 for (const auto &VLASizePair : VLASizes) 578 VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second; 579 PGO.assignRegionCounters(GlobalDecl(CD), F); 580 CapturedStmtInfo->EmitBody(*this, CD->getBody()); 581 FinishFunction(CD->getBodyRBrace()); 582 if (!NeedWrapperFunction) 583 return F; 584 585 FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true, 586 /*RegisterCastedArgsOnly=*/true, 587 CapturedStmtInfo->getHelperName()); 588 CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true); 589 WrapperCGF.CapturedStmtInfo = CapturedStmtInfo; 590 Args.clear(); 591 LocalAddrs.clear(); 592 VLASizes.clear(); 593 llvm::Function *WrapperF = 594 emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes, 595 WrapperCGF.CXXThisValue, WrapperFO); 596 llvm::SmallVector<llvm::Value *, 4> CallArgs; 597 for (const auto *Arg : Args) { 598 llvm::Value *CallArg; 599 auto I = LocalAddrs.find(Arg); 600 if (I != LocalAddrs.end()) { 601 LValue LV = WrapperCGF.MakeAddrLValue( 602 I->second.second, 603 I->second.first ? I->second.first->getType() : Arg->getType(), 604 AlignmentSource::Decl); 605 CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc()); 606 } else { 607 auto EI = VLASizes.find(Arg); 608 if (EI != VLASizes.end()) { 609 CallArg = EI->second.second; 610 } else { 611 LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg), 612 Arg->getType(), 613 AlignmentSource::Decl); 614 CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc()); 615 } 616 } 617 CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType())); 618 } 619 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, S.getBeginLoc(), 620 F, CallArgs); 621 WrapperCGF.FinishFunction(); 622 return WrapperF; 623 } 624 625 //===----------------------------------------------------------------------===// 626 // OpenMP Directive Emission 627 //===----------------------------------------------------------------------===// 628 void CodeGenFunction::EmitOMPAggregateAssign( 629 Address DestAddr, Address SrcAddr, QualType OriginalType, 630 const llvm::function_ref<void(Address, Address)> CopyGen) { 631 // Perform element-by-element initialization. 632 QualType ElementTy; 633 634 // Drill down to the base element type on both arrays. 635 const ArrayType *ArrayTy = OriginalType->getAsArrayTypeUnsafe(); 636 llvm::Value *NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr); 637 SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 638 639 llvm::Value *SrcBegin = SrcAddr.getPointer(); 640 llvm::Value *DestBegin = DestAddr.getPointer(); 641 // Cast from pointer to array type to pointer to single element. 642 llvm::Value *DestEnd = Builder.CreateGEP(DestBegin, NumElements); 643 // The basic structure here is a while-do loop. 644 llvm::BasicBlock *BodyBB = createBasicBlock("omp.arraycpy.body"); 645 llvm::BasicBlock *DoneBB = createBasicBlock("omp.arraycpy.done"); 646 llvm::Value *IsEmpty = 647 Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty"); 648 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 649 650 // Enter the loop body, making that address the current address. 651 llvm::BasicBlock *EntryBB = Builder.GetInsertBlock(); 652 EmitBlock(BodyBB); 653 654 CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy); 655 656 llvm::PHINode *SrcElementPHI = 657 Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 658 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 659 Address SrcElementCurrent = 660 Address(SrcElementPHI, 661 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 662 663 llvm::PHINode *DestElementPHI = 664 Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 665 DestElementPHI->addIncoming(DestBegin, EntryBB); 666 Address DestElementCurrent = 667 Address(DestElementPHI, 668 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 669 670 // Emit copy. 671 CopyGen(DestElementCurrent, SrcElementCurrent); 672 673 // Shift the address forward by one element. 674 llvm::Value *DestElementNext = Builder.CreateConstGEP1_32( 675 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 676 llvm::Value *SrcElementNext = Builder.CreateConstGEP1_32( 677 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 678 // Check whether we've reached the end. 679 llvm::Value *Done = 680 Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 681 Builder.CreateCondBr(Done, DoneBB, BodyBB); 682 DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock()); 683 SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock()); 684 685 // Done. 686 EmitBlock(DoneBB, /*IsFinished=*/true); 687 } 688 689 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr, 690 Address SrcAddr, const VarDecl *DestVD, 691 const VarDecl *SrcVD, const Expr *Copy) { 692 if (OriginalType->isArrayType()) { 693 const auto *BO = dyn_cast<BinaryOperator>(Copy); 694 if (BO && BO->getOpcode() == BO_Assign) { 695 // Perform simple memcpy for simple copying. 696 LValue Dest = MakeAddrLValue(DestAddr, OriginalType); 697 LValue Src = MakeAddrLValue(SrcAddr, OriginalType); 698 EmitAggregateAssign(Dest, Src, OriginalType); 699 } else { 700 // For arrays with complex element types perform element by element 701 // copying. 702 EmitOMPAggregateAssign( 703 DestAddr, SrcAddr, OriginalType, 704 [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) { 705 // Working with the single array element, so have to remap 706 // destination and source variables to corresponding array 707 // elements. 708 CodeGenFunction::OMPPrivateScope Remap(*this); 709 Remap.addPrivate(DestVD, [DestElement]() { return DestElement; }); 710 Remap.addPrivate(SrcVD, [SrcElement]() { return SrcElement; }); 711 (void)Remap.Privatize(); 712 EmitIgnoredExpr(Copy); 713 }); 714 } 715 } else { 716 // Remap pseudo source variable to private copy. 717 CodeGenFunction::OMPPrivateScope Remap(*this); 718 Remap.addPrivate(SrcVD, [SrcAddr]() { return SrcAddr; }); 719 Remap.addPrivate(DestVD, [DestAddr]() { return DestAddr; }); 720 (void)Remap.Privatize(); 721 // Emit copying of the whole variable. 722 EmitIgnoredExpr(Copy); 723 } 724 } 725 726 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, 727 OMPPrivateScope &PrivateScope) { 728 if (!HaveInsertPoint()) 729 return false; 730 bool FirstprivateIsLastprivate = false; 731 llvm::DenseSet<const VarDecl *> Lastprivates; 732 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 733 for (const auto *D : C->varlists()) 734 Lastprivates.insert( 735 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); 736 } 737 llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate; 738 llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 739 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 740 // Force emission of the firstprivate copy if the directive does not emit 741 // outlined function, like omp for, omp simd, omp distribute etc. 742 bool MustEmitFirstprivateCopy = 743 CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown; 744 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { 745 auto IRef = C->varlist_begin(); 746 auto InitsRef = C->inits().begin(); 747 for (const Expr *IInit : C->private_copies()) { 748 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 749 bool ThisFirstprivateIsLastprivate = 750 Lastprivates.count(OrigVD->getCanonicalDecl()) > 0; 751 const FieldDecl *FD = CapturedStmtInfo->lookup(OrigVD); 752 if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD && 753 !FD->getType()->isReferenceType()) { 754 EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()); 755 ++IRef; 756 ++InitsRef; 757 continue; 758 } 759 FirstprivateIsLastprivate = 760 FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate; 761 if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) { 762 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 763 const auto *VDInit = 764 cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl()); 765 bool IsRegistered; 766 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), 767 /*RefersToEnclosingVariableOrCapture=*/FD != nullptr, 768 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 769 LValue OriginalLVal = EmitLValue(&DRE); 770 QualType Type = VD->getType(); 771 if (Type->isArrayType()) { 772 // Emit VarDecl with copy init for arrays. 773 // Get the address of the original variable captured in current 774 // captured region. 775 IsRegistered = PrivateScope.addPrivate( 776 OrigVD, [this, VD, Type, OriginalLVal, VDInit]() { 777 AutoVarEmission Emission = EmitAutoVarAlloca(*VD); 778 const Expr *Init = VD->getInit(); 779 if (!isa<CXXConstructExpr>(Init) || 780 isTrivialInitializer(Init)) { 781 // Perform simple memcpy. 782 LValue Dest = 783 MakeAddrLValue(Emission.getAllocatedAddress(), Type); 784 EmitAggregateAssign(Dest, OriginalLVal, Type); 785 } else { 786 EmitOMPAggregateAssign( 787 Emission.getAllocatedAddress(), OriginalLVal.getAddress(), 788 Type, 789 [this, VDInit, Init](Address DestElement, 790 Address SrcElement) { 791 // Clean up any temporaries needed by the 792 // initialization. 793 RunCleanupsScope InitScope(*this); 794 // Emit initialization for single element. 795 setAddrOfLocalVar(VDInit, SrcElement); 796 EmitAnyExprToMem(Init, DestElement, 797 Init->getType().getQualifiers(), 798 /*IsInitializer*/ false); 799 LocalDeclMap.erase(VDInit); 800 }); 801 } 802 EmitAutoVarCleanups(Emission); 803 return Emission.getAllocatedAddress(); 804 }); 805 } else { 806 Address OriginalAddr = OriginalLVal.getAddress(); 807 IsRegistered = PrivateScope.addPrivate( 808 OrigVD, [this, VDInit, OriginalAddr, VD]() { 809 // Emit private VarDecl with copy init. 810 // Remap temp VDInit variable to the address of the original 811 // variable (for proper handling of captured global variables). 812 setAddrOfLocalVar(VDInit, OriginalAddr); 813 EmitDecl(*VD); 814 LocalDeclMap.erase(VDInit); 815 return GetAddrOfLocalVar(VD); 816 }); 817 } 818 assert(IsRegistered && 819 "firstprivate var already registered as private"); 820 // Silence the warning about unused variable. 821 (void)IsRegistered; 822 } 823 ++IRef; 824 ++InitsRef; 825 } 826 } 827 return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty(); 828 } 829 830 void CodeGenFunction::EmitOMPPrivateClause( 831 const OMPExecutableDirective &D, 832 CodeGenFunction::OMPPrivateScope &PrivateScope) { 833 if (!HaveInsertPoint()) 834 return; 835 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 836 for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) { 837 auto IRef = C->varlist_begin(); 838 for (const Expr *IInit : C->private_copies()) { 839 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 840 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 841 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 842 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD]() { 843 // Emit private VarDecl with copy init. 844 EmitDecl(*VD); 845 return GetAddrOfLocalVar(VD); 846 }); 847 assert(IsRegistered && "private var already registered as private"); 848 // Silence the warning about unused variable. 849 (void)IsRegistered; 850 } 851 ++IRef; 852 } 853 } 854 } 855 856 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { 857 if (!HaveInsertPoint()) 858 return false; 859 // threadprivate_var1 = master_threadprivate_var1; 860 // operator=(threadprivate_var2, master_threadprivate_var2); 861 // ... 862 // __kmpc_barrier(&loc, global_tid); 863 llvm::DenseSet<const VarDecl *> CopiedVars; 864 llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr; 865 for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) { 866 auto IRef = C->varlist_begin(); 867 auto ISrcRef = C->source_exprs().begin(); 868 auto IDestRef = C->destination_exprs().begin(); 869 for (const Expr *AssignOp : C->assignment_ops()) { 870 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 871 QualType Type = VD->getType(); 872 if (CopiedVars.insert(VD->getCanonicalDecl()).second) { 873 // Get the address of the master variable. If we are emitting code with 874 // TLS support, the address is passed from the master as field in the 875 // captured declaration. 876 Address MasterAddr = Address::invalid(); 877 if (getLangOpts().OpenMPUseTLS && 878 getContext().getTargetInfo().isTLSSupported()) { 879 assert(CapturedStmtInfo->lookup(VD) && 880 "Copyin threadprivates should have been captured!"); 881 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), true, 882 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 883 MasterAddr = EmitLValue(&DRE).getAddress(); 884 LocalDeclMap.erase(VD); 885 } else { 886 MasterAddr = 887 Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD) 888 : CGM.GetAddrOfGlobal(VD), 889 getContext().getDeclAlign(VD)); 890 } 891 // Get the address of the threadprivate variable. 892 Address PrivateAddr = EmitLValue(*IRef).getAddress(); 893 if (CopiedVars.size() == 1) { 894 // At first check if current thread is a master thread. If it is, no 895 // need to copy data. 896 CopyBegin = createBasicBlock("copyin.not.master"); 897 CopyEnd = createBasicBlock("copyin.not.master.end"); 898 Builder.CreateCondBr( 899 Builder.CreateICmpNE( 900 Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy), 901 Builder.CreatePtrToInt(PrivateAddr.getPointer(), 902 CGM.IntPtrTy)), 903 CopyBegin, CopyEnd); 904 EmitBlock(CopyBegin); 905 } 906 const auto *SrcVD = 907 cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 908 const auto *DestVD = 909 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 910 EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp); 911 } 912 ++IRef; 913 ++ISrcRef; 914 ++IDestRef; 915 } 916 } 917 if (CopyEnd) { 918 // Exit out of copying procedure for non-master thread. 919 EmitBlock(CopyEnd, /*IsFinished=*/true); 920 return true; 921 } 922 return false; 923 } 924 925 bool CodeGenFunction::EmitOMPLastprivateClauseInit( 926 const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) { 927 if (!HaveInsertPoint()) 928 return false; 929 bool HasAtLeastOneLastprivate = false; 930 llvm::DenseSet<const VarDecl *> SIMDLCVs; 931 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 932 const auto *LoopDirective = cast<OMPLoopDirective>(&D); 933 for (const Expr *C : LoopDirective->counters()) { 934 SIMDLCVs.insert( 935 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 936 } 937 } 938 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 939 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 940 HasAtLeastOneLastprivate = true; 941 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 942 !getLangOpts().OpenMPSimd) 943 break; 944 auto IRef = C->varlist_begin(); 945 auto IDestRef = C->destination_exprs().begin(); 946 for (const Expr *IInit : C->private_copies()) { 947 // Keep the address of the original variable for future update at the end 948 // of the loop. 949 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 950 // Taskloops do not require additional initialization, it is done in 951 // runtime support library. 952 if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) { 953 const auto *DestVD = 954 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 955 PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() { 956 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), 957 /*RefersToEnclosingVariableOrCapture=*/ 958 CapturedStmtInfo->lookup(OrigVD) != nullptr, 959 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 960 return EmitLValue(&DRE).getAddress(); 961 }); 962 // Check if the variable is also a firstprivate: in this case IInit is 963 // not generated. Initialization of this variable will happen in codegen 964 // for 'firstprivate' clause. 965 if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) { 966 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 967 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD]() { 968 // Emit private VarDecl with copy init. 969 EmitDecl(*VD); 970 return GetAddrOfLocalVar(VD); 971 }); 972 assert(IsRegistered && 973 "lastprivate var already registered as private"); 974 (void)IsRegistered; 975 } 976 } 977 ++IRef; 978 ++IDestRef; 979 } 980 } 981 return HasAtLeastOneLastprivate; 982 } 983 984 void CodeGenFunction::EmitOMPLastprivateClauseFinal( 985 const OMPExecutableDirective &D, bool NoFinals, 986 llvm::Value *IsLastIterCond) { 987 if (!HaveInsertPoint()) 988 return; 989 // Emit following code: 990 // if (<IsLastIterCond>) { 991 // orig_var1 = private_orig_var1; 992 // ... 993 // orig_varn = private_orig_varn; 994 // } 995 llvm::BasicBlock *ThenBB = nullptr; 996 llvm::BasicBlock *DoneBB = nullptr; 997 if (IsLastIterCond) { 998 ThenBB = createBasicBlock(".omp.lastprivate.then"); 999 DoneBB = createBasicBlock(".omp.lastprivate.done"); 1000 Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB); 1001 EmitBlock(ThenBB); 1002 } 1003 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 1004 llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates; 1005 if (const auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) { 1006 auto IC = LoopDirective->counters().begin(); 1007 for (const Expr *F : LoopDirective->finals()) { 1008 const auto *D = 1009 cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl(); 1010 if (NoFinals) 1011 AlreadyEmittedVars.insert(D); 1012 else 1013 LoopCountersAndUpdates[D] = F; 1014 ++IC; 1015 } 1016 } 1017 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 1018 auto IRef = C->varlist_begin(); 1019 auto ISrcRef = C->source_exprs().begin(); 1020 auto IDestRef = C->destination_exprs().begin(); 1021 for (const Expr *AssignOp : C->assignment_ops()) { 1022 const auto *PrivateVD = 1023 cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 1024 QualType Type = PrivateVD->getType(); 1025 const auto *CanonicalVD = PrivateVD->getCanonicalDecl(); 1026 if (AlreadyEmittedVars.insert(CanonicalVD).second) { 1027 // If lastprivate variable is a loop control variable for loop-based 1028 // directive, update its value before copyin back to original 1029 // variable. 1030 if (const Expr *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD)) 1031 EmitIgnoredExpr(FinalExpr); 1032 const auto *SrcVD = 1033 cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 1034 const auto *DestVD = 1035 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 1036 // Get the address of the original variable. 1037 Address OriginalAddr = GetAddrOfLocalVar(DestVD); 1038 // Get the address of the private variable. 1039 Address PrivateAddr = GetAddrOfLocalVar(PrivateVD); 1040 if (const auto *RefTy = PrivateVD->getType()->getAs<ReferenceType>()) 1041 PrivateAddr = 1042 Address(Builder.CreateLoad(PrivateAddr), 1043 getNaturalTypeAlignment(RefTy->getPointeeType())); 1044 EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp); 1045 } 1046 ++IRef; 1047 ++ISrcRef; 1048 ++IDestRef; 1049 } 1050 if (const Expr *PostUpdate = C->getPostUpdateExpr()) 1051 EmitIgnoredExpr(PostUpdate); 1052 } 1053 if (IsLastIterCond) 1054 EmitBlock(DoneBB, /*IsFinished=*/true); 1055 } 1056 1057 void CodeGenFunction::EmitOMPReductionClauseInit( 1058 const OMPExecutableDirective &D, 1059 CodeGenFunction::OMPPrivateScope &PrivateScope) { 1060 if (!HaveInsertPoint()) 1061 return; 1062 SmallVector<const Expr *, 4> Shareds; 1063 SmallVector<const Expr *, 4> Privates; 1064 SmallVector<const Expr *, 4> ReductionOps; 1065 SmallVector<const Expr *, 4> LHSs; 1066 SmallVector<const Expr *, 4> RHSs; 1067 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1068 auto IPriv = C->privates().begin(); 1069 auto IRed = C->reduction_ops().begin(); 1070 auto ILHS = C->lhs_exprs().begin(); 1071 auto IRHS = C->rhs_exprs().begin(); 1072 for (const Expr *Ref : C->varlists()) { 1073 Shareds.emplace_back(Ref); 1074 Privates.emplace_back(*IPriv); 1075 ReductionOps.emplace_back(*IRed); 1076 LHSs.emplace_back(*ILHS); 1077 RHSs.emplace_back(*IRHS); 1078 std::advance(IPriv, 1); 1079 std::advance(IRed, 1); 1080 std::advance(ILHS, 1); 1081 std::advance(IRHS, 1); 1082 } 1083 } 1084 ReductionCodeGen RedCG(Shareds, Privates, ReductionOps); 1085 unsigned Count = 0; 1086 auto ILHS = LHSs.begin(); 1087 auto IRHS = RHSs.begin(); 1088 auto IPriv = Privates.begin(); 1089 for (const Expr *IRef : Shareds) { 1090 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl()); 1091 // Emit private VarDecl with reduction init. 1092 RedCG.emitSharedLValue(*this, Count); 1093 RedCG.emitAggregateType(*this, Count); 1094 AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD); 1095 RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(), 1096 RedCG.getSharedLValue(Count), 1097 [&Emission](CodeGenFunction &CGF) { 1098 CGF.EmitAutoVarInit(Emission); 1099 return true; 1100 }); 1101 EmitAutoVarCleanups(Emission); 1102 Address BaseAddr = RedCG.adjustPrivateAddress( 1103 *this, Count, Emission.getAllocatedAddress()); 1104 bool IsRegistered = PrivateScope.addPrivate( 1105 RedCG.getBaseDecl(Count), [BaseAddr]() { return BaseAddr; }); 1106 assert(IsRegistered && "private var already registered as private"); 1107 // Silence the warning about unused variable. 1108 (void)IsRegistered; 1109 1110 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 1111 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 1112 QualType Type = PrivateVD->getType(); 1113 bool isaOMPArraySectionExpr = isa<OMPArraySectionExpr>(IRef); 1114 if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) { 1115 // Store the address of the original variable associated with the LHS 1116 // implicit variable. 1117 PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() { 1118 return RedCG.getSharedLValue(Count).getAddress(); 1119 }); 1120 PrivateScope.addPrivate( 1121 RHSVD, [this, PrivateVD]() { return GetAddrOfLocalVar(PrivateVD); }); 1122 } else if ((isaOMPArraySectionExpr && Type->isScalarType()) || 1123 isa<ArraySubscriptExpr>(IRef)) { 1124 // Store the address of the original variable associated with the LHS 1125 // implicit variable. 1126 PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() { 1127 return RedCG.getSharedLValue(Count).getAddress(); 1128 }); 1129 PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() { 1130 return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD), 1131 ConvertTypeForMem(RHSVD->getType()), 1132 "rhs.begin"); 1133 }); 1134 } else { 1135 QualType Type = PrivateVD->getType(); 1136 bool IsArray = getContext().getAsArrayType(Type) != nullptr; 1137 Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress(); 1138 // Store the address of the original variable associated with the LHS 1139 // implicit variable. 1140 if (IsArray) { 1141 OriginalAddr = Builder.CreateElementBitCast( 1142 OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin"); 1143 } 1144 PrivateScope.addPrivate(LHSVD, [OriginalAddr]() { return OriginalAddr; }); 1145 PrivateScope.addPrivate( 1146 RHSVD, [this, PrivateVD, RHSVD, IsArray]() { 1147 return IsArray 1148 ? Builder.CreateElementBitCast( 1149 GetAddrOfLocalVar(PrivateVD), 1150 ConvertTypeForMem(RHSVD->getType()), "rhs.begin") 1151 : GetAddrOfLocalVar(PrivateVD); 1152 }); 1153 } 1154 ++ILHS; 1155 ++IRHS; 1156 ++IPriv; 1157 ++Count; 1158 } 1159 } 1160 1161 void CodeGenFunction::EmitOMPReductionClauseFinal( 1162 const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) { 1163 if (!HaveInsertPoint()) 1164 return; 1165 llvm::SmallVector<const Expr *, 8> Privates; 1166 llvm::SmallVector<const Expr *, 8> LHSExprs; 1167 llvm::SmallVector<const Expr *, 8> RHSExprs; 1168 llvm::SmallVector<const Expr *, 8> ReductionOps; 1169 bool HasAtLeastOneReduction = false; 1170 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1171 HasAtLeastOneReduction = true; 1172 Privates.append(C->privates().begin(), C->privates().end()); 1173 LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 1174 RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 1175 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 1176 } 1177 if (HasAtLeastOneReduction) { 1178 bool WithNowait = D.getSingleClause<OMPNowaitClause>() || 1179 isOpenMPParallelDirective(D.getDirectiveKind()) || 1180 ReductionKind == OMPD_simd; 1181 bool SimpleReduction = ReductionKind == OMPD_simd; 1182 // Emit nowait reduction if nowait clause is present or directive is a 1183 // parallel directive (it always has implicit barrier). 1184 CGM.getOpenMPRuntime().emitReduction( 1185 *this, D.getEndLoc(), Privates, LHSExprs, RHSExprs, ReductionOps, 1186 {WithNowait, SimpleReduction, ReductionKind}); 1187 } 1188 } 1189 1190 static void emitPostUpdateForReductionClause( 1191 CodeGenFunction &CGF, const OMPExecutableDirective &D, 1192 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { 1193 if (!CGF.HaveInsertPoint()) 1194 return; 1195 llvm::BasicBlock *DoneBB = nullptr; 1196 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1197 if (const Expr *PostUpdate = C->getPostUpdateExpr()) { 1198 if (!DoneBB) { 1199 if (llvm::Value *Cond = CondGen(CGF)) { 1200 // If the first post-update expression is found, emit conditional 1201 // block if it was requested. 1202 llvm::BasicBlock *ThenBB = CGF.createBasicBlock(".omp.reduction.pu"); 1203 DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done"); 1204 CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1205 CGF.EmitBlock(ThenBB); 1206 } 1207 } 1208 CGF.EmitIgnoredExpr(PostUpdate); 1209 } 1210 } 1211 if (DoneBB) 1212 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 1213 } 1214 1215 namespace { 1216 /// Codegen lambda for appending distribute lower and upper bounds to outlined 1217 /// parallel function. This is necessary for combined constructs such as 1218 /// 'distribute parallel for' 1219 typedef llvm::function_ref<void(CodeGenFunction &, 1220 const OMPExecutableDirective &, 1221 llvm::SmallVectorImpl<llvm::Value *> &)> 1222 CodeGenBoundParametersTy; 1223 } // anonymous namespace 1224 1225 static void emitCommonOMPParallelDirective( 1226 CodeGenFunction &CGF, const OMPExecutableDirective &S, 1227 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1228 const CodeGenBoundParametersTy &CodeGenBoundParameters) { 1229 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); 1230 llvm::Value *OutlinedFn = 1231 CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( 1232 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 1233 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) { 1234 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 1235 llvm::Value *NumThreads = 1236 CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), 1237 /*IgnoreResultAssign=*/true); 1238 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause( 1239 CGF, NumThreads, NumThreadsClause->getBeginLoc()); 1240 } 1241 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) { 1242 CodeGenFunction::RunCleanupsScope ProcBindScope(CGF); 1243 CGF.CGM.getOpenMPRuntime().emitProcBindClause( 1244 CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getBeginLoc()); 1245 } 1246 const Expr *IfCond = nullptr; 1247 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 1248 if (C->getNameModifier() == OMPD_unknown || 1249 C->getNameModifier() == OMPD_parallel) { 1250 IfCond = C->getCondition(); 1251 break; 1252 } 1253 } 1254 1255 OMPParallelScope Scope(CGF, S); 1256 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 1257 // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk 1258 // lower and upper bounds with the pragma 'for' chunking mechanism. 1259 // The following lambda takes care of appending the lower and upper bound 1260 // parameters when necessary 1261 CodeGenBoundParameters(CGF, S, CapturedVars); 1262 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 1263 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getBeginLoc(), OutlinedFn, 1264 CapturedVars, IfCond); 1265 } 1266 1267 static void emitEmptyBoundParameters(CodeGenFunction &, 1268 const OMPExecutableDirective &, 1269 llvm::SmallVectorImpl<llvm::Value *> &) {} 1270 1271 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { 1272 // Emit parallel region as a standalone region. 1273 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 1274 Action.Enter(CGF); 1275 OMPPrivateScope PrivateScope(CGF); 1276 bool Copyins = CGF.EmitOMPCopyinClause(S); 1277 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 1278 if (Copyins) { 1279 // Emit implicit barrier to synchronize threads and avoid data races on 1280 // propagation master's thread values of threadprivate variables to local 1281 // instances of that variables of all other implicit threads. 1282 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 1283 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, 1284 /*ForceSimpleCall=*/true); 1285 } 1286 CGF.EmitOMPPrivateClause(S, PrivateScope); 1287 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 1288 (void)PrivateScope.Privatize(); 1289 CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt()); 1290 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 1291 }; 1292 emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen, 1293 emitEmptyBoundParameters); 1294 emitPostUpdateForReductionClause(*this, S, 1295 [](CodeGenFunction &) { return nullptr; }); 1296 } 1297 1298 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, 1299 JumpDest LoopExit) { 1300 RunCleanupsScope BodyScope(*this); 1301 // Update counters values on current iteration. 1302 for (const Expr *UE : D.updates()) 1303 EmitIgnoredExpr(UE); 1304 // Update the linear variables. 1305 // In distribute directives only loop counters may be marked as linear, no 1306 // need to generate the code for them. 1307 if (!isOpenMPDistributeDirective(D.getDirectiveKind())) { 1308 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1309 for (const Expr *UE : C->updates()) 1310 EmitIgnoredExpr(UE); 1311 } 1312 } 1313 1314 // On a continue in the body, jump to the end. 1315 JumpDest Continue = getJumpDestInCurrentScope("omp.body.continue"); 1316 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1317 // Emit loop body. 1318 EmitStmt(D.getBody()); 1319 // The end (updates/cleanups). 1320 EmitBlock(Continue.getBlock()); 1321 BreakContinueStack.pop_back(); 1322 } 1323 1324 void CodeGenFunction::EmitOMPInnerLoop( 1325 const Stmt &S, bool RequiresCleanup, const Expr *LoopCond, 1326 const Expr *IncExpr, 1327 const llvm::function_ref<void(CodeGenFunction &)> BodyGen, 1328 const llvm::function_ref<void(CodeGenFunction &)> PostIncGen) { 1329 auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end"); 1330 1331 // Start the loop with a block that tests the condition. 1332 auto CondBlock = createBasicBlock("omp.inner.for.cond"); 1333 EmitBlock(CondBlock); 1334 const SourceRange R = S.getSourceRange(); 1335 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), 1336 SourceLocToDebugLoc(R.getEnd())); 1337 1338 // If there are any cleanups between here and the loop-exit scope, 1339 // create a block to stage a loop exit along. 1340 llvm::BasicBlock *ExitBlock = LoopExit.getBlock(); 1341 if (RequiresCleanup) 1342 ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup"); 1343 1344 llvm::BasicBlock *LoopBody = createBasicBlock("omp.inner.for.body"); 1345 1346 // Emit condition. 1347 EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S)); 1348 if (ExitBlock != LoopExit.getBlock()) { 1349 EmitBlock(ExitBlock); 1350 EmitBranchThroughCleanup(LoopExit); 1351 } 1352 1353 EmitBlock(LoopBody); 1354 incrementProfileCounter(&S); 1355 1356 // Create a block for the increment. 1357 JumpDest Continue = getJumpDestInCurrentScope("omp.inner.for.inc"); 1358 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1359 1360 BodyGen(*this); 1361 1362 // Emit "IV = IV + 1" and a back-edge to the condition block. 1363 EmitBlock(Continue.getBlock()); 1364 EmitIgnoredExpr(IncExpr); 1365 PostIncGen(*this); 1366 BreakContinueStack.pop_back(); 1367 EmitBranch(CondBlock); 1368 LoopStack.pop(); 1369 // Emit the fall-through block. 1370 EmitBlock(LoopExit.getBlock()); 1371 } 1372 1373 bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { 1374 if (!HaveInsertPoint()) 1375 return false; 1376 // Emit inits for the linear variables. 1377 bool HasLinears = false; 1378 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1379 for (const Expr *Init : C->inits()) { 1380 HasLinears = true; 1381 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl()); 1382 if (const auto *Ref = 1383 dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) { 1384 AutoVarEmission Emission = EmitAutoVarAlloca(*VD); 1385 const auto *OrigVD = cast<VarDecl>(Ref->getDecl()); 1386 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), 1387 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1388 VD->getInit()->getType(), VK_LValue, 1389 VD->getInit()->getExprLoc()); 1390 EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(), 1391 VD->getType()), 1392 /*capturedByInit=*/false); 1393 EmitAutoVarCleanups(Emission); 1394 } else { 1395 EmitVarDecl(*VD); 1396 } 1397 } 1398 // Emit the linear steps for the linear clauses. 1399 // If a step is not constant, it is pre-calculated before the loop. 1400 if (const auto *CS = cast_or_null<BinaryOperator>(C->getCalcStep())) 1401 if (const auto *SaveRef = cast<DeclRefExpr>(CS->getLHS())) { 1402 EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl())); 1403 // Emit calculation of the linear step. 1404 EmitIgnoredExpr(CS); 1405 } 1406 } 1407 return HasLinears; 1408 } 1409 1410 void CodeGenFunction::EmitOMPLinearClauseFinal( 1411 const OMPLoopDirective &D, 1412 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { 1413 if (!HaveInsertPoint()) 1414 return; 1415 llvm::BasicBlock *DoneBB = nullptr; 1416 // Emit the final values of the linear variables. 1417 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1418 auto IC = C->varlist_begin(); 1419 for (const Expr *F : C->finals()) { 1420 if (!DoneBB) { 1421 if (llvm::Value *Cond = CondGen(*this)) { 1422 // If the first post-update expression is found, emit conditional 1423 // block if it was requested. 1424 llvm::BasicBlock *ThenBB = createBasicBlock(".omp.linear.pu"); 1425 DoneBB = createBasicBlock(".omp.linear.pu.done"); 1426 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1427 EmitBlock(ThenBB); 1428 } 1429 } 1430 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl()); 1431 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), 1432 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1433 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); 1434 Address OrigAddr = EmitLValue(&DRE).getAddress(); 1435 CodeGenFunction::OMPPrivateScope VarScope(*this); 1436 VarScope.addPrivate(OrigVD, [OrigAddr]() { return OrigAddr; }); 1437 (void)VarScope.Privatize(); 1438 EmitIgnoredExpr(F); 1439 ++IC; 1440 } 1441 if (const Expr *PostUpdate = C->getPostUpdateExpr()) 1442 EmitIgnoredExpr(PostUpdate); 1443 } 1444 if (DoneBB) 1445 EmitBlock(DoneBB, /*IsFinished=*/true); 1446 } 1447 1448 static void emitAlignedClause(CodeGenFunction &CGF, 1449 const OMPExecutableDirective &D) { 1450 if (!CGF.HaveInsertPoint()) 1451 return; 1452 for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) { 1453 unsigned ClauseAlignment = 0; 1454 if (const Expr *AlignmentExpr = Clause->getAlignment()) { 1455 auto *AlignmentCI = 1456 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); 1457 ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue()); 1458 } 1459 for (const Expr *E : Clause->varlists()) { 1460 unsigned Alignment = ClauseAlignment; 1461 if (Alignment == 0) { 1462 // OpenMP [2.8.1, Description] 1463 // If no optional parameter is specified, implementation-defined default 1464 // alignments for SIMD instructions on the target platforms are assumed. 1465 Alignment = 1466 CGF.getContext() 1467 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign( 1468 E->getType()->getPointeeType())) 1469 .getQuantity(); 1470 } 1471 assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) && 1472 "alignment is not power of 2"); 1473 if (Alignment != 0) { 1474 llvm::Value *PtrValue = CGF.EmitScalarExpr(E); 1475 CGF.EmitAlignmentAssumption( 1476 PtrValue, E, /*No second loc needed*/ SourceLocation(), Alignment); 1477 } 1478 } 1479 } 1480 } 1481 1482 void CodeGenFunction::EmitOMPPrivateLoopCounters( 1483 const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) { 1484 if (!HaveInsertPoint()) 1485 return; 1486 auto I = S.private_counters().begin(); 1487 for (const Expr *E : S.counters()) { 1488 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1489 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()); 1490 // Emit var without initialization. 1491 AutoVarEmission VarEmission = EmitAutoVarAlloca(*PrivateVD); 1492 EmitAutoVarCleanups(VarEmission); 1493 LocalDeclMap.erase(PrivateVD); 1494 (void)LoopScope.addPrivate(VD, [&VarEmission]() { 1495 return VarEmission.getAllocatedAddress(); 1496 }); 1497 if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) || 1498 VD->hasGlobalStorage()) { 1499 (void)LoopScope.addPrivate(PrivateVD, [this, VD, E]() { 1500 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), 1501 LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD), 1502 E->getType(), VK_LValue, E->getExprLoc()); 1503 return EmitLValue(&DRE).getAddress(); 1504 }); 1505 } else { 1506 (void)LoopScope.addPrivate(PrivateVD, [&VarEmission]() { 1507 return VarEmission.getAllocatedAddress(); 1508 }); 1509 } 1510 ++I; 1511 } 1512 // Privatize extra loop counters used in loops for ordered(n) clauses. 1513 for (const auto *C : S.getClausesOfKind<OMPOrderedClause>()) { 1514 if (!C->getNumForLoops()) 1515 continue; 1516 for (unsigned I = S.getCollapsedNumber(), 1517 E = C->getLoopNumIterations().size(); 1518 I < E; ++I) { 1519 const auto *DRE = cast<DeclRefExpr>(C->getLoopCounter(I)); 1520 const auto *VD = cast<VarDecl>(DRE->getDecl()); 1521 // Override only those variables that can be captured to avoid re-emission 1522 // of the variables declared within the loops. 1523 if (DRE->refersToEnclosingVariableOrCapture()) { 1524 (void)LoopScope.addPrivate(VD, [this, DRE, VD]() { 1525 return CreateMemTemp(DRE->getType(), VD->getName()); 1526 }); 1527 } 1528 } 1529 } 1530 } 1531 1532 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S, 1533 const Expr *Cond, llvm::BasicBlock *TrueBlock, 1534 llvm::BasicBlock *FalseBlock, uint64_t TrueCount) { 1535 if (!CGF.HaveInsertPoint()) 1536 return; 1537 { 1538 CodeGenFunction::OMPPrivateScope PreCondScope(CGF); 1539 CGF.EmitOMPPrivateLoopCounters(S, PreCondScope); 1540 (void)PreCondScope.Privatize(); 1541 // Get initial values of real counters. 1542 for (const Expr *I : S.inits()) { 1543 CGF.EmitIgnoredExpr(I); 1544 } 1545 } 1546 // Check that loop is executed at least one time. 1547 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount); 1548 } 1549 1550 void CodeGenFunction::EmitOMPLinearClause( 1551 const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) { 1552 if (!HaveInsertPoint()) 1553 return; 1554 llvm::DenseSet<const VarDecl *> SIMDLCVs; 1555 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 1556 const auto *LoopDirective = cast<OMPLoopDirective>(&D); 1557 for (const Expr *C : LoopDirective->counters()) { 1558 SIMDLCVs.insert( 1559 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 1560 } 1561 } 1562 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1563 auto CurPrivate = C->privates().begin(); 1564 for (const Expr *E : C->varlists()) { 1565 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1566 const auto *PrivateVD = 1567 cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl()); 1568 if (!SIMDLCVs.count(VD->getCanonicalDecl())) { 1569 bool IsRegistered = PrivateScope.addPrivate(VD, [this, PrivateVD]() { 1570 // Emit private VarDecl with copy init. 1571 EmitVarDecl(*PrivateVD); 1572 return GetAddrOfLocalVar(PrivateVD); 1573 }); 1574 assert(IsRegistered && "linear var already registered as private"); 1575 // Silence the warning about unused variable. 1576 (void)IsRegistered; 1577 } else { 1578 EmitVarDecl(*PrivateVD); 1579 } 1580 ++CurPrivate; 1581 } 1582 } 1583 } 1584 1585 static void emitSimdlenSafelenClause(CodeGenFunction &CGF, 1586 const OMPExecutableDirective &D, 1587 bool IsMonotonic) { 1588 if (!CGF.HaveInsertPoint()) 1589 return; 1590 if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) { 1591 RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(), 1592 /*ignoreResult=*/true); 1593 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 1594 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 1595 // In presence of finite 'safelen', it may be unsafe to mark all 1596 // the memory instructions parallel, because loop-carried 1597 // dependences of 'safelen' iterations are possible. 1598 if (!IsMonotonic) 1599 CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>()); 1600 } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) { 1601 RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(), 1602 /*ignoreResult=*/true); 1603 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 1604 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 1605 // In presence of finite 'safelen', it may be unsafe to mark all 1606 // the memory instructions parallel, because loop-carried 1607 // dependences of 'safelen' iterations are possible. 1608 CGF.LoopStack.setParallel(/*Enable=*/false); 1609 } 1610 } 1611 1612 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D, 1613 bool IsMonotonic) { 1614 // Walk clauses and process safelen/lastprivate. 1615 LoopStack.setParallel(!IsMonotonic); 1616 LoopStack.setVectorizeEnable(); 1617 emitSimdlenSafelenClause(*this, D, IsMonotonic); 1618 } 1619 1620 void CodeGenFunction::EmitOMPSimdFinal( 1621 const OMPLoopDirective &D, 1622 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { 1623 if (!HaveInsertPoint()) 1624 return; 1625 llvm::BasicBlock *DoneBB = nullptr; 1626 auto IC = D.counters().begin(); 1627 auto IPC = D.private_counters().begin(); 1628 for (const Expr *F : D.finals()) { 1629 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl()); 1630 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl()); 1631 const auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD); 1632 if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) || 1633 OrigVD->hasGlobalStorage() || CED) { 1634 if (!DoneBB) { 1635 if (llvm::Value *Cond = CondGen(*this)) { 1636 // If the first post-update expression is found, emit conditional 1637 // block if it was requested. 1638 llvm::BasicBlock *ThenBB = createBasicBlock(".omp.final.then"); 1639 DoneBB = createBasicBlock(".omp.final.done"); 1640 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1641 EmitBlock(ThenBB); 1642 } 1643 } 1644 Address OrigAddr = Address::invalid(); 1645 if (CED) { 1646 OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress(); 1647 } else { 1648 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(PrivateVD), 1649 /*RefersToEnclosingVariableOrCapture=*/false, 1650 (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc()); 1651 OrigAddr = EmitLValue(&DRE).getAddress(); 1652 } 1653 OMPPrivateScope VarScope(*this); 1654 VarScope.addPrivate(OrigVD, [OrigAddr]() { return OrigAddr; }); 1655 (void)VarScope.Privatize(); 1656 EmitIgnoredExpr(F); 1657 } 1658 ++IC; 1659 ++IPC; 1660 } 1661 if (DoneBB) 1662 EmitBlock(DoneBB, /*IsFinished=*/true); 1663 } 1664 1665 static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF, 1666 const OMPLoopDirective &S, 1667 CodeGenFunction::JumpDest LoopExit) { 1668 CGF.EmitOMPLoopBody(S, LoopExit); 1669 CGF.EmitStopPoint(&S); 1670 } 1671 1672 /// Emit a helper variable and return corresponding lvalue. 1673 static LValue EmitOMPHelperVar(CodeGenFunction &CGF, 1674 const DeclRefExpr *Helper) { 1675 auto VDecl = cast<VarDecl>(Helper->getDecl()); 1676 CGF.EmitVarDecl(*VDecl); 1677 return CGF.EmitLValue(Helper); 1678 } 1679 1680 static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S, 1681 PrePostActionTy &Action) { 1682 Action.Enter(CGF); 1683 assert(isOpenMPSimdDirective(S.getDirectiveKind()) && 1684 "Expected simd directive"); 1685 OMPLoopScope PreInitScope(CGF, S); 1686 // if (PreCond) { 1687 // for (IV in 0..LastIteration) BODY; 1688 // <Final counter/linear vars updates>; 1689 // } 1690 // 1691 if (isOpenMPDistributeDirective(S.getDirectiveKind()) || 1692 isOpenMPWorksharingDirective(S.getDirectiveKind()) || 1693 isOpenMPTaskLoopDirective(S.getDirectiveKind())) { 1694 (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable())); 1695 (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable())); 1696 } 1697 1698 // Emit: if (PreCond) - begin. 1699 // If the condition constant folds and can be elided, avoid emitting the 1700 // whole loop. 1701 bool CondConstant; 1702 llvm::BasicBlock *ContBlock = nullptr; 1703 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 1704 if (!CondConstant) 1705 return; 1706 } else { 1707 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("simd.if.then"); 1708 ContBlock = CGF.createBasicBlock("simd.if.end"); 1709 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 1710 CGF.getProfileCount(&S)); 1711 CGF.EmitBlock(ThenBlock); 1712 CGF.incrementProfileCounter(&S); 1713 } 1714 1715 // Emit the loop iteration variable. 1716 const Expr *IVExpr = S.getIterationVariable(); 1717 const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 1718 CGF.EmitVarDecl(*IVDecl); 1719 CGF.EmitIgnoredExpr(S.getInit()); 1720 1721 // Emit the iterations count variable. 1722 // If it is not a variable, Sema decided to calculate iterations count on 1723 // each iteration (e.g., it is foldable into a constant). 1724 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 1725 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 1726 // Emit calculation of the iterations count. 1727 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 1728 } 1729 1730 CGF.EmitOMPSimdInit(S); 1731 1732 emitAlignedClause(CGF, S); 1733 (void)CGF.EmitOMPLinearClauseInit(S); 1734 { 1735 CodeGenFunction::OMPPrivateScope LoopScope(CGF); 1736 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 1737 CGF.EmitOMPLinearClause(S, LoopScope); 1738 CGF.EmitOMPPrivateClause(S, LoopScope); 1739 CGF.EmitOMPReductionClauseInit(S, LoopScope); 1740 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 1741 (void)LoopScope.Privatize(); 1742 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 1743 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); 1744 CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 1745 S.getInc(), 1746 [&S](CodeGenFunction &CGF) { 1747 CGF.EmitOMPLoopBody(S, CodeGenFunction::JumpDest()); 1748 CGF.EmitStopPoint(&S); 1749 }, 1750 [](CodeGenFunction &) {}); 1751 CGF.EmitOMPSimdFinal(S, [](CodeGenFunction &) { return nullptr; }); 1752 // Emit final copy of the lastprivate variables at the end of loops. 1753 if (HasLastprivateClause) 1754 CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true); 1755 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd); 1756 emitPostUpdateForReductionClause(CGF, S, 1757 [](CodeGenFunction &) { return nullptr; }); 1758 } 1759 CGF.EmitOMPLinearClauseFinal(S, [](CodeGenFunction &) { return nullptr; }); 1760 // Emit: if (PreCond) - end. 1761 if (ContBlock) { 1762 CGF.EmitBranch(ContBlock); 1763 CGF.EmitBlock(ContBlock, true); 1764 } 1765 } 1766 1767 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { 1768 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 1769 emitOMPSimdRegion(CGF, S, Action); 1770 }; 1771 OMPLexicalScope Scope(*this, S, OMPD_unknown); 1772 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 1773 } 1774 1775 void CodeGenFunction::EmitOMPOuterLoop( 1776 bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S, 1777 CodeGenFunction::OMPPrivateScope &LoopScope, 1778 const CodeGenFunction::OMPLoopArguments &LoopArgs, 1779 const CodeGenFunction::CodeGenLoopTy &CodeGenLoop, 1780 const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) { 1781 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); 1782 1783 const Expr *IVExpr = S.getIterationVariable(); 1784 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1785 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1786 1787 JumpDest LoopExit = getJumpDestInCurrentScope("omp.dispatch.end"); 1788 1789 // Start the loop with a block that tests the condition. 1790 llvm::BasicBlock *CondBlock = createBasicBlock("omp.dispatch.cond"); 1791 EmitBlock(CondBlock); 1792 const SourceRange R = S.getSourceRange(); 1793 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), 1794 SourceLocToDebugLoc(R.getEnd())); 1795 1796 llvm::Value *BoolCondVal = nullptr; 1797 if (!DynamicOrOrdered) { 1798 // UB = min(UB, GlobalUB) or 1799 // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g. 1800 // 'distribute parallel for') 1801 EmitIgnoredExpr(LoopArgs.EUB); 1802 // IV = LB 1803 EmitIgnoredExpr(LoopArgs.Init); 1804 // IV < UB 1805 BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond); 1806 } else { 1807 BoolCondVal = 1808 RT.emitForNext(*this, S.getBeginLoc(), IVSize, IVSigned, LoopArgs.IL, 1809 LoopArgs.LB, LoopArgs.UB, LoopArgs.ST); 1810 } 1811 1812 // If there are any cleanups between here and the loop-exit scope, 1813 // create a block to stage a loop exit along. 1814 llvm::BasicBlock *ExitBlock = LoopExit.getBlock(); 1815 if (LoopScope.requiresCleanups()) 1816 ExitBlock = createBasicBlock("omp.dispatch.cleanup"); 1817 1818 llvm::BasicBlock *LoopBody = createBasicBlock("omp.dispatch.body"); 1819 Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock); 1820 if (ExitBlock != LoopExit.getBlock()) { 1821 EmitBlock(ExitBlock); 1822 EmitBranchThroughCleanup(LoopExit); 1823 } 1824 EmitBlock(LoopBody); 1825 1826 // Emit "IV = LB" (in case of static schedule, we have already calculated new 1827 // LB for loop condition and emitted it above). 1828 if (DynamicOrOrdered) 1829 EmitIgnoredExpr(LoopArgs.Init); 1830 1831 // Create a block for the increment. 1832 JumpDest Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); 1833 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1834 1835 // Generate !llvm.loop.parallel metadata for loads and stores for loops 1836 // with dynamic/guided scheduling and without ordered clause. 1837 if (!isOpenMPSimdDirective(S.getDirectiveKind())) 1838 LoopStack.setParallel(!IsMonotonic); 1839 else 1840 EmitOMPSimdInit(S, IsMonotonic); 1841 1842 SourceLocation Loc = S.getBeginLoc(); 1843 1844 // when 'distribute' is not combined with a 'for': 1845 // while (idx <= UB) { BODY; ++idx; } 1846 // when 'distribute' is combined with a 'for' 1847 // (e.g. 'distribute parallel for') 1848 // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; } 1849 EmitOMPInnerLoop( 1850 S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr, 1851 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { 1852 CodeGenLoop(CGF, S, LoopExit); 1853 }, 1854 [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) { 1855 CodeGenOrdered(CGF, Loc, IVSize, IVSigned); 1856 }); 1857 1858 EmitBlock(Continue.getBlock()); 1859 BreakContinueStack.pop_back(); 1860 if (!DynamicOrOrdered) { 1861 // Emit "LB = LB + Stride", "UB = UB + Stride". 1862 EmitIgnoredExpr(LoopArgs.NextLB); 1863 EmitIgnoredExpr(LoopArgs.NextUB); 1864 } 1865 1866 EmitBranch(CondBlock); 1867 LoopStack.pop(); 1868 // Emit the fall-through block. 1869 EmitBlock(LoopExit.getBlock()); 1870 1871 // Tell the runtime we are done. 1872 auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) { 1873 if (!DynamicOrOrdered) 1874 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(), 1875 S.getDirectiveKind()); 1876 }; 1877 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); 1878 } 1879 1880 void CodeGenFunction::EmitOMPForOuterLoop( 1881 const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic, 1882 const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, 1883 const OMPLoopArguments &LoopArgs, 1884 const CodeGenDispatchBoundsTy &CGDispatchBounds) { 1885 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); 1886 1887 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). 1888 const bool DynamicOrOrdered = 1889 Ordered || RT.isDynamic(ScheduleKind.Schedule); 1890 1891 assert((Ordered || 1892 !RT.isStaticNonchunked(ScheduleKind.Schedule, 1893 LoopArgs.Chunk != nullptr)) && 1894 "static non-chunked schedule does not need outer loop"); 1895 1896 // Emit outer loop. 1897 // 1898 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1899 // When schedule(dynamic,chunk_size) is specified, the iterations are 1900 // distributed to threads in the team in chunks as the threads request them. 1901 // Each thread executes a chunk of iterations, then requests another chunk, 1902 // until no chunks remain to be distributed. Each chunk contains chunk_size 1903 // iterations, except for the last chunk to be distributed, which may have 1904 // fewer iterations. When no chunk_size is specified, it defaults to 1. 1905 // 1906 // When schedule(guided,chunk_size) is specified, the iterations are assigned 1907 // to threads in the team in chunks as the executing threads request them. 1908 // Each thread executes a chunk of iterations, then requests another chunk, 1909 // until no chunks remain to be assigned. For a chunk_size of 1, the size of 1910 // each chunk is proportional to the number of unassigned iterations divided 1911 // by the number of threads in the team, decreasing to 1. For a chunk_size 1912 // with value k (greater than 1), the size of each chunk is determined in the 1913 // same way, with the restriction that the chunks do not contain fewer than k 1914 // iterations (except for the last chunk to be assigned, which may have fewer 1915 // than k iterations). 1916 // 1917 // When schedule(auto) is specified, the decision regarding scheduling is 1918 // delegated to the compiler and/or runtime system. The programmer gives the 1919 // implementation the freedom to choose any possible mapping of iterations to 1920 // threads in the team. 1921 // 1922 // When schedule(runtime) is specified, the decision regarding scheduling is 1923 // deferred until run time, and the schedule and chunk size are taken from the 1924 // run-sched-var ICV. If the ICV is set to auto, the schedule is 1925 // implementation defined 1926 // 1927 // while(__kmpc_dispatch_next(&LB, &UB)) { 1928 // idx = LB; 1929 // while (idx <= UB) { BODY; ++idx; 1930 // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only. 1931 // } // inner loop 1932 // } 1933 // 1934 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1935 // When schedule(static, chunk_size) is specified, iterations are divided into 1936 // chunks of size chunk_size, and the chunks are assigned to the threads in 1937 // the team in a round-robin fashion in the order of the thread number. 1938 // 1939 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { 1940 // while (idx <= UB) { BODY; ++idx; } // inner loop 1941 // LB = LB + ST; 1942 // UB = UB + ST; 1943 // } 1944 // 1945 1946 const Expr *IVExpr = S.getIterationVariable(); 1947 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1948 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1949 1950 if (DynamicOrOrdered) { 1951 const std::pair<llvm::Value *, llvm::Value *> DispatchBounds = 1952 CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB); 1953 llvm::Value *LBVal = DispatchBounds.first; 1954 llvm::Value *UBVal = DispatchBounds.second; 1955 CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal, 1956 LoopArgs.Chunk}; 1957 RT.emitForDispatchInit(*this, S.getBeginLoc(), ScheduleKind, IVSize, 1958 IVSigned, Ordered, DipatchRTInputValues); 1959 } else { 1960 CGOpenMPRuntime::StaticRTInput StaticInit( 1961 IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB, 1962 LoopArgs.ST, LoopArgs.Chunk); 1963 RT.emitForStaticInit(*this, S.getBeginLoc(), S.getDirectiveKind(), 1964 ScheduleKind, StaticInit); 1965 } 1966 1967 auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc, 1968 const unsigned IVSize, 1969 const bool IVSigned) { 1970 if (Ordered) { 1971 CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize, 1972 IVSigned); 1973 } 1974 }; 1975 1976 OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST, 1977 LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB); 1978 OuterLoopArgs.IncExpr = S.getInc(); 1979 OuterLoopArgs.Init = S.getInit(); 1980 OuterLoopArgs.Cond = S.getCond(); 1981 OuterLoopArgs.NextLB = S.getNextLowerBound(); 1982 OuterLoopArgs.NextUB = S.getNextUpperBound(); 1983 EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs, 1984 emitOMPLoopBodyWithStopPoint, CodeGenOrdered); 1985 } 1986 1987 static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc, 1988 const unsigned IVSize, const bool IVSigned) {} 1989 1990 void CodeGenFunction::EmitOMPDistributeOuterLoop( 1991 OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S, 1992 OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs, 1993 const CodeGenLoopTy &CodeGenLoopContent) { 1994 1995 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); 1996 1997 // Emit outer loop. 1998 // Same behavior as a OMPForOuterLoop, except that schedule cannot be 1999 // dynamic 2000 // 2001 2002 const Expr *IVExpr = S.getIterationVariable(); 2003 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2004 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2005 2006 CGOpenMPRuntime::StaticRTInput StaticInit( 2007 IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB, 2008 LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk); 2009 RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, StaticInit); 2010 2011 // for combined 'distribute' and 'for' the increment expression of distribute 2012 // is stored in DistInc. For 'distribute' alone, it is in Inc. 2013 Expr *IncExpr; 2014 if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())) 2015 IncExpr = S.getDistInc(); 2016 else 2017 IncExpr = S.getInc(); 2018 2019 // this routine is shared by 'omp distribute parallel for' and 2020 // 'omp distribute': select the right EUB expression depending on the 2021 // directive 2022 OMPLoopArguments OuterLoopArgs; 2023 OuterLoopArgs.LB = LoopArgs.LB; 2024 OuterLoopArgs.UB = LoopArgs.UB; 2025 OuterLoopArgs.ST = LoopArgs.ST; 2026 OuterLoopArgs.IL = LoopArgs.IL; 2027 OuterLoopArgs.Chunk = LoopArgs.Chunk; 2028 OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2029 ? S.getCombinedEnsureUpperBound() 2030 : S.getEnsureUpperBound(); 2031 OuterLoopArgs.IncExpr = IncExpr; 2032 OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2033 ? S.getCombinedInit() 2034 : S.getInit(); 2035 OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2036 ? S.getCombinedCond() 2037 : S.getCond(); 2038 OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2039 ? S.getCombinedNextLowerBound() 2040 : S.getNextLowerBound(); 2041 OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2042 ? S.getCombinedNextUpperBound() 2043 : S.getNextUpperBound(); 2044 2045 EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S, 2046 LoopScope, OuterLoopArgs, CodeGenLoopContent, 2047 emitEmptyOrdered); 2048 } 2049 2050 static std::pair<LValue, LValue> 2051 emitDistributeParallelForInnerBounds(CodeGenFunction &CGF, 2052 const OMPExecutableDirective &S) { 2053 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 2054 LValue LB = 2055 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); 2056 LValue UB = 2057 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); 2058 2059 // When composing 'distribute' with 'for' (e.g. as in 'distribute 2060 // parallel for') we need to use the 'distribute' 2061 // chunk lower and upper bounds rather than the whole loop iteration 2062 // space. These are parameters to the outlined function for 'parallel' 2063 // and we copy the bounds of the previous schedule into the 2064 // the current ones. 2065 LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable()); 2066 LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable()); 2067 llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar( 2068 PrevLB, LS.getPrevLowerBoundVariable()->getExprLoc()); 2069 PrevLBVal = CGF.EmitScalarConversion( 2070 PrevLBVal, LS.getPrevLowerBoundVariable()->getType(), 2071 LS.getIterationVariable()->getType(), 2072 LS.getPrevLowerBoundVariable()->getExprLoc()); 2073 llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar( 2074 PrevUB, LS.getPrevUpperBoundVariable()->getExprLoc()); 2075 PrevUBVal = CGF.EmitScalarConversion( 2076 PrevUBVal, LS.getPrevUpperBoundVariable()->getType(), 2077 LS.getIterationVariable()->getType(), 2078 LS.getPrevUpperBoundVariable()->getExprLoc()); 2079 2080 CGF.EmitStoreOfScalar(PrevLBVal, LB); 2081 CGF.EmitStoreOfScalar(PrevUBVal, UB); 2082 2083 return {LB, UB}; 2084 } 2085 2086 /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then 2087 /// we need to use the LB and UB expressions generated by the worksharing 2088 /// code generation support, whereas in non combined situations we would 2089 /// just emit 0 and the LastIteration expression 2090 /// This function is necessary due to the difference of the LB and UB 2091 /// types for the RT emission routines for 'for_static_init' and 2092 /// 'for_dispatch_init' 2093 static std::pair<llvm::Value *, llvm::Value *> 2094 emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF, 2095 const OMPExecutableDirective &S, 2096 Address LB, Address UB) { 2097 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 2098 const Expr *IVExpr = LS.getIterationVariable(); 2099 // when implementing a dynamic schedule for a 'for' combined with a 2100 // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop 2101 // is not normalized as each team only executes its own assigned 2102 // distribute chunk 2103 QualType IteratorTy = IVExpr->getType(); 2104 llvm::Value *LBVal = 2105 CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, S.getBeginLoc()); 2106 llvm::Value *UBVal = 2107 CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, S.getBeginLoc()); 2108 return {LBVal, UBVal}; 2109 } 2110 2111 static void emitDistributeParallelForDistributeInnerBoundParams( 2112 CodeGenFunction &CGF, const OMPExecutableDirective &S, 2113 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) { 2114 const auto &Dir = cast<OMPLoopDirective>(S); 2115 LValue LB = 2116 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable())); 2117 llvm::Value *LBCast = CGF.Builder.CreateIntCast( 2118 CGF.Builder.CreateLoad(LB.getAddress()), CGF.SizeTy, /*isSigned=*/false); 2119 CapturedVars.push_back(LBCast); 2120 LValue UB = 2121 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable())); 2122 2123 llvm::Value *UBCast = CGF.Builder.CreateIntCast( 2124 CGF.Builder.CreateLoad(UB.getAddress()), CGF.SizeTy, /*isSigned=*/false); 2125 CapturedVars.push_back(UBCast); 2126 } 2127 2128 static void 2129 emitInnerParallelForWhenCombined(CodeGenFunction &CGF, 2130 const OMPLoopDirective &S, 2131 CodeGenFunction::JumpDest LoopExit) { 2132 auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF, 2133 PrePostActionTy &Action) { 2134 Action.Enter(CGF); 2135 bool HasCancel = false; 2136 if (!isOpenMPSimdDirective(S.getDirectiveKind())) { 2137 if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(&S)) 2138 HasCancel = D->hasCancel(); 2139 else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(&S)) 2140 HasCancel = D->hasCancel(); 2141 else if (const auto *D = 2142 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&S)) 2143 HasCancel = D->hasCancel(); 2144 } 2145 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(), 2146 HasCancel); 2147 CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(), 2148 emitDistributeParallelForInnerBounds, 2149 emitDistributeParallelForDispatchBounds); 2150 }; 2151 2152 emitCommonOMPParallelDirective( 2153 CGF, S, 2154 isOpenMPSimdDirective(S.getDirectiveKind()) ? OMPD_for_simd : OMPD_for, 2155 CGInlinedWorksharingLoop, 2156 emitDistributeParallelForDistributeInnerBoundParams); 2157 } 2158 2159 void CodeGenFunction::EmitOMPDistributeParallelForDirective( 2160 const OMPDistributeParallelForDirective &S) { 2161 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2162 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 2163 S.getDistInc()); 2164 }; 2165 OMPLexicalScope Scope(*this, S, OMPD_parallel); 2166 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); 2167 } 2168 2169 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective( 2170 const OMPDistributeParallelForSimdDirective &S) { 2171 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2172 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 2173 S.getDistInc()); 2174 }; 2175 OMPLexicalScope Scope(*this, S, OMPD_parallel); 2176 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); 2177 } 2178 2179 void CodeGenFunction::EmitOMPDistributeSimdDirective( 2180 const OMPDistributeSimdDirective &S) { 2181 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2182 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 2183 }; 2184 OMPLexicalScope Scope(*this, S, OMPD_unknown); 2185 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 2186 } 2187 2188 void CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 2189 CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) { 2190 // Emit SPMD target parallel for region as a standalone region. 2191 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2192 emitOMPSimdRegion(CGF, S, Action); 2193 }; 2194 llvm::Function *Fn; 2195 llvm::Constant *Addr; 2196 // Emit target region as a standalone region. 2197 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 2198 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 2199 assert(Fn && Addr && "Target device function emission failed."); 2200 } 2201 2202 void CodeGenFunction::EmitOMPTargetSimdDirective( 2203 const OMPTargetSimdDirective &S) { 2204 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2205 emitOMPSimdRegion(CGF, S, Action); 2206 }; 2207 emitCommonOMPTargetDirective(*this, S, CodeGen); 2208 } 2209 2210 namespace { 2211 struct ScheduleKindModifiersTy { 2212 OpenMPScheduleClauseKind Kind; 2213 OpenMPScheduleClauseModifier M1; 2214 OpenMPScheduleClauseModifier M2; 2215 ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind, 2216 OpenMPScheduleClauseModifier M1, 2217 OpenMPScheduleClauseModifier M2) 2218 : Kind(Kind), M1(M1), M2(M2) {} 2219 }; 2220 } // namespace 2221 2222 bool CodeGenFunction::EmitOMPWorksharingLoop( 2223 const OMPLoopDirective &S, Expr *EUB, 2224 const CodeGenLoopBoundsTy &CodeGenLoopBounds, 2225 const CodeGenDispatchBoundsTy &CGDispatchBounds) { 2226 // Emit the loop iteration variable. 2227 const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 2228 const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl()); 2229 EmitVarDecl(*IVDecl); 2230 2231 // Emit the iterations count variable. 2232 // If it is not a variable, Sema decided to calculate iterations count on each 2233 // iteration (e.g., it is foldable into a constant). 2234 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 2235 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 2236 // Emit calculation of the iterations count. 2237 EmitIgnoredExpr(S.getCalcLastIteration()); 2238 } 2239 2240 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); 2241 2242 bool HasLastprivateClause; 2243 // Check pre-condition. 2244 { 2245 OMPLoopScope PreInitScope(*this, S); 2246 // Skip the entire loop if we don't meet the precondition. 2247 // If the condition constant folds and can be elided, avoid emitting the 2248 // whole loop. 2249 bool CondConstant; 2250 llvm::BasicBlock *ContBlock = nullptr; 2251 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 2252 if (!CondConstant) 2253 return false; 2254 } else { 2255 llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then"); 2256 ContBlock = createBasicBlock("omp.precond.end"); 2257 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 2258 getProfileCount(&S)); 2259 EmitBlock(ThenBlock); 2260 incrementProfileCounter(&S); 2261 } 2262 2263 RunCleanupsScope DoacrossCleanupScope(*this); 2264 bool Ordered = false; 2265 if (const auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) { 2266 if (OrderedClause->getNumForLoops()) 2267 RT.emitDoacrossInit(*this, S, OrderedClause->getLoopNumIterations()); 2268 else 2269 Ordered = true; 2270 } 2271 2272 llvm::DenseSet<const Expr *> EmittedFinals; 2273 emitAlignedClause(*this, S); 2274 bool HasLinears = EmitOMPLinearClauseInit(S); 2275 // Emit helper vars inits. 2276 2277 std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S); 2278 LValue LB = Bounds.first; 2279 LValue UB = Bounds.second; 2280 LValue ST = 2281 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 2282 LValue IL = 2283 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 2284 2285 // Emit 'then' code. 2286 { 2287 OMPPrivateScope LoopScope(*this); 2288 if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) { 2289 // Emit implicit barrier to synchronize threads and avoid data races on 2290 // initialization of firstprivate variables and post-update of 2291 // lastprivate variables. 2292 CGM.getOpenMPRuntime().emitBarrierCall( 2293 *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, 2294 /*ForceSimpleCall=*/true); 2295 } 2296 EmitOMPPrivateClause(S, LoopScope); 2297 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 2298 EmitOMPReductionClauseInit(S, LoopScope); 2299 EmitOMPPrivateLoopCounters(S, LoopScope); 2300 EmitOMPLinearClause(S, LoopScope); 2301 (void)LoopScope.Privatize(); 2302 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 2303 CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S); 2304 2305 // Detect the loop schedule kind and chunk. 2306 const Expr *ChunkExpr = nullptr; 2307 OpenMPScheduleTy ScheduleKind; 2308 if (const auto *C = S.getSingleClause<OMPScheduleClause>()) { 2309 ScheduleKind.Schedule = C->getScheduleKind(); 2310 ScheduleKind.M1 = C->getFirstScheduleModifier(); 2311 ScheduleKind.M2 = C->getSecondScheduleModifier(); 2312 ChunkExpr = C->getChunkSize(); 2313 } else { 2314 // Default behaviour for schedule clause. 2315 CGM.getOpenMPRuntime().getDefaultScheduleAndChunk( 2316 *this, S, ScheduleKind.Schedule, ChunkExpr); 2317 } 2318 bool HasChunkSizeOne = false; 2319 llvm::Value *Chunk = nullptr; 2320 if (ChunkExpr) { 2321 Chunk = EmitScalarExpr(ChunkExpr); 2322 Chunk = EmitScalarConversion(Chunk, ChunkExpr->getType(), 2323 S.getIterationVariable()->getType(), 2324 S.getBeginLoc()); 2325 Expr::EvalResult Result; 2326 if (ChunkExpr->EvaluateAsInt(Result, getContext())) { 2327 llvm::APSInt EvaluatedChunk = Result.Val.getInt(); 2328 HasChunkSizeOne = (EvaluatedChunk.getLimitedValue() == 1); 2329 } 2330 } 2331 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2332 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2333 // OpenMP 4.5, 2.7.1 Loop Construct, Description. 2334 // If the static schedule kind is specified or if the ordered clause is 2335 // specified, and if no monotonic modifier is specified, the effect will 2336 // be as if the monotonic modifier was specified. 2337 bool StaticChunkedOne = RT.isStaticChunked(ScheduleKind.Schedule, 2338 /* Chunked */ Chunk != nullptr) && HasChunkSizeOne && 2339 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()); 2340 if ((RT.isStaticNonchunked(ScheduleKind.Schedule, 2341 /* Chunked */ Chunk != nullptr) || 2342 StaticChunkedOne) && 2343 !Ordered) { 2344 if (isOpenMPSimdDirective(S.getDirectiveKind())) 2345 EmitOMPSimdInit(S, /*IsMonotonic=*/true); 2346 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 2347 // When no chunk_size is specified, the iteration space is divided into 2348 // chunks that are approximately equal in size, and at most one chunk is 2349 // distributed to each thread. Note that the size of the chunks is 2350 // unspecified in this case. 2351 CGOpenMPRuntime::StaticRTInput StaticInit( 2352 IVSize, IVSigned, Ordered, IL.getAddress(), LB.getAddress(), 2353 UB.getAddress(), ST.getAddress(), 2354 StaticChunkedOne ? Chunk : nullptr); 2355 RT.emitForStaticInit(*this, S.getBeginLoc(), S.getDirectiveKind(), 2356 ScheduleKind, StaticInit); 2357 JumpDest LoopExit = 2358 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 2359 // UB = min(UB, GlobalUB); 2360 if (!StaticChunkedOne) 2361 EmitIgnoredExpr(S.getEnsureUpperBound()); 2362 // IV = LB; 2363 EmitIgnoredExpr(S.getInit()); 2364 // For unchunked static schedule generate: 2365 // 2366 // while (idx <= UB) { 2367 // BODY; 2368 // ++idx; 2369 // } 2370 // 2371 // For static schedule with chunk one: 2372 // 2373 // while (IV <= PrevUB) { 2374 // BODY; 2375 // IV += ST; 2376 // } 2377 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), 2378 StaticChunkedOne ? S.getCombinedParForInDistCond() : S.getCond(), 2379 StaticChunkedOne ? S.getDistInc() : S.getInc(), 2380 [&S, LoopExit](CodeGenFunction &CGF) { 2381 CGF.EmitOMPLoopBody(S, LoopExit); 2382 CGF.EmitStopPoint(&S); 2383 }, 2384 [](CodeGenFunction &) {}); 2385 EmitBlock(LoopExit.getBlock()); 2386 // Tell the runtime we are done. 2387 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 2388 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(), 2389 S.getDirectiveKind()); 2390 }; 2391 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); 2392 } else { 2393 const bool IsMonotonic = 2394 Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static || 2395 ScheduleKind.Schedule == OMPC_SCHEDULE_unknown || 2396 ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic || 2397 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic; 2398 // Emit the outer loop, which requests its work chunk [LB..UB] from 2399 // runtime and runs the inner loop to process it. 2400 const OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(), 2401 ST.getAddress(), IL.getAddress(), 2402 Chunk, EUB); 2403 EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered, 2404 LoopArguments, CGDispatchBounds); 2405 } 2406 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 2407 EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) { 2408 return CGF.Builder.CreateIsNotNull( 2409 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 2410 }); 2411 } 2412 EmitOMPReductionClauseFinal( 2413 S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind()) 2414 ? /*Parallel and Simd*/ OMPD_parallel_for_simd 2415 : /*Parallel only*/ OMPD_parallel); 2416 // Emit post-update of the reduction variables if IsLastIter != 0. 2417 emitPostUpdateForReductionClause( 2418 *this, S, [IL, &S](CodeGenFunction &CGF) { 2419 return CGF.Builder.CreateIsNotNull( 2420 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 2421 }); 2422 // Emit final copy of the lastprivate variables if IsLastIter != 0. 2423 if (HasLastprivateClause) 2424 EmitOMPLastprivateClauseFinal( 2425 S, isOpenMPSimdDirective(S.getDirectiveKind()), 2426 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc()))); 2427 } 2428 EmitOMPLinearClauseFinal(S, [IL, &S](CodeGenFunction &CGF) { 2429 return CGF.Builder.CreateIsNotNull( 2430 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 2431 }); 2432 DoacrossCleanupScope.ForceCleanup(); 2433 // We're now done with the loop, so jump to the continuation block. 2434 if (ContBlock) { 2435 EmitBranch(ContBlock); 2436 EmitBlock(ContBlock, /*IsFinished=*/true); 2437 } 2438 } 2439 return HasLastprivateClause; 2440 } 2441 2442 /// The following two functions generate expressions for the loop lower 2443 /// and upper bounds in case of static and dynamic (dispatch) schedule 2444 /// of the associated 'for' or 'distribute' loop. 2445 static std::pair<LValue, LValue> 2446 emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 2447 const auto &LS = cast<OMPLoopDirective>(S); 2448 LValue LB = 2449 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); 2450 LValue UB = 2451 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); 2452 return {LB, UB}; 2453 } 2454 2455 /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not 2456 /// consider the lower and upper bound expressions generated by the 2457 /// worksharing loop support, but we use 0 and the iteration space size as 2458 /// constants 2459 static std::pair<llvm::Value *, llvm::Value *> 2460 emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S, 2461 Address LB, Address UB) { 2462 const auto &LS = cast<OMPLoopDirective>(S); 2463 const Expr *IVExpr = LS.getIterationVariable(); 2464 const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType()); 2465 llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0); 2466 llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration()); 2467 return {LBVal, UBVal}; 2468 } 2469 2470 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { 2471 bool HasLastprivates = false; 2472 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, 2473 PrePostActionTy &) { 2474 OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel()); 2475 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 2476 emitForLoopBounds, 2477 emitDispatchForLoopBounds); 2478 }; 2479 { 2480 OMPLexicalScope Scope(*this, S, OMPD_unknown); 2481 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen, 2482 S.hasCancel()); 2483 } 2484 2485 // Emit an implicit barrier at the end. 2486 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) 2487 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for); 2488 } 2489 2490 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { 2491 bool HasLastprivates = false; 2492 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, 2493 PrePostActionTy &) { 2494 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 2495 emitForLoopBounds, 2496 emitDispatchForLoopBounds); 2497 }; 2498 { 2499 OMPLexicalScope Scope(*this, S, OMPD_unknown); 2500 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 2501 } 2502 2503 // Emit an implicit barrier at the end. 2504 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) 2505 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for); 2506 } 2507 2508 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, 2509 const Twine &Name, 2510 llvm::Value *Init = nullptr) { 2511 LValue LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty); 2512 if (Init) 2513 CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true); 2514 return LVal; 2515 } 2516 2517 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { 2518 const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt(); 2519 const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt); 2520 bool HasLastprivates = false; 2521 auto &&CodeGen = [&S, CapturedStmt, CS, 2522 &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) { 2523 ASTContext &C = CGF.getContext(); 2524 QualType KmpInt32Ty = 2525 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2526 // Emit helper vars inits. 2527 LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.", 2528 CGF.Builder.getInt32(0)); 2529 llvm::ConstantInt *GlobalUBVal = CS != nullptr 2530 ? CGF.Builder.getInt32(CS->size() - 1) 2531 : CGF.Builder.getInt32(0); 2532 LValue UB = 2533 createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal); 2534 LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.", 2535 CGF.Builder.getInt32(1)); 2536 LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.", 2537 CGF.Builder.getInt32(0)); 2538 // Loop counter. 2539 LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv."); 2540 OpaqueValueExpr IVRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue); 2541 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV); 2542 OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue); 2543 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); 2544 // Generate condition for loop. 2545 BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, 2546 OK_Ordinary, S.getBeginLoc(), FPOptions()); 2547 // Increment for loop counter. 2548 UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary, 2549 S.getBeginLoc(), true); 2550 auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) { 2551 // Iterate through all sections and emit a switch construct: 2552 // switch (IV) { 2553 // case 0: 2554 // <SectionStmt[0]>; 2555 // break; 2556 // ... 2557 // case <NumSection> - 1: 2558 // <SectionStmt[<NumSection> - 1]>; 2559 // break; 2560 // } 2561 // .omp.sections.exit: 2562 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".omp.sections.exit"); 2563 llvm::SwitchInst *SwitchStmt = 2564 CGF.Builder.CreateSwitch(CGF.EmitLoadOfScalar(IV, S.getBeginLoc()), 2565 ExitBB, CS == nullptr ? 1 : CS->size()); 2566 if (CS) { 2567 unsigned CaseNumber = 0; 2568 for (const Stmt *SubStmt : CS->children()) { 2569 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 2570 CGF.EmitBlock(CaseBB); 2571 SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB); 2572 CGF.EmitStmt(SubStmt); 2573 CGF.EmitBranch(ExitBB); 2574 ++CaseNumber; 2575 } 2576 } else { 2577 llvm::BasicBlock *CaseBB = CGF.createBasicBlock(".omp.sections.case"); 2578 CGF.EmitBlock(CaseBB); 2579 SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB); 2580 CGF.EmitStmt(CapturedStmt); 2581 CGF.EmitBranch(ExitBB); 2582 } 2583 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 2584 }; 2585 2586 CodeGenFunction::OMPPrivateScope LoopScope(CGF); 2587 if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) { 2588 // Emit implicit barrier to synchronize threads and avoid data races on 2589 // initialization of firstprivate variables and post-update of lastprivate 2590 // variables. 2591 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 2592 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, 2593 /*ForceSimpleCall=*/true); 2594 } 2595 CGF.EmitOMPPrivateClause(S, LoopScope); 2596 HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 2597 CGF.EmitOMPReductionClauseInit(S, LoopScope); 2598 (void)LoopScope.Privatize(); 2599 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 2600 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); 2601 2602 // Emit static non-chunked loop. 2603 OpenMPScheduleTy ScheduleKind; 2604 ScheduleKind.Schedule = OMPC_SCHEDULE_static; 2605 CGOpenMPRuntime::StaticRTInput StaticInit( 2606 /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), 2607 LB.getAddress(), UB.getAddress(), ST.getAddress()); 2608 CGF.CGM.getOpenMPRuntime().emitForStaticInit( 2609 CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, StaticInit); 2610 // UB = min(UB, GlobalUB); 2611 llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, S.getBeginLoc()); 2612 llvm::Value *MinUBGlobalUB = CGF.Builder.CreateSelect( 2613 CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal); 2614 CGF.EmitStoreOfScalar(MinUBGlobalUB, UB); 2615 // IV = LB; 2616 CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getBeginLoc()), IV); 2617 // while (idx <= UB) { BODY; ++idx; } 2618 CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen, 2619 [](CodeGenFunction &) {}); 2620 // Tell the runtime we are done. 2621 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 2622 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(), 2623 S.getDirectiveKind()); 2624 }; 2625 CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen); 2626 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 2627 // Emit post-update of the reduction variables if IsLastIter != 0. 2628 emitPostUpdateForReductionClause(CGF, S, [IL, &S](CodeGenFunction &CGF) { 2629 return CGF.Builder.CreateIsNotNull( 2630 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 2631 }); 2632 2633 // Emit final copy of the lastprivate variables if IsLastIter != 0. 2634 if (HasLastprivates) 2635 CGF.EmitOMPLastprivateClauseFinal( 2636 S, /*NoFinals=*/false, 2637 CGF.Builder.CreateIsNotNull( 2638 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()))); 2639 }; 2640 2641 bool HasCancel = false; 2642 if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S)) 2643 HasCancel = OSD->hasCancel(); 2644 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S)) 2645 HasCancel = OPSD->hasCancel(); 2646 OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel); 2647 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen, 2648 HasCancel); 2649 // Emit barrier for lastprivates only if 'sections' directive has 'nowait' 2650 // clause. Otherwise the barrier will be generated by the codegen for the 2651 // directive. 2652 if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) { 2653 // Emit implicit barrier to synchronize threads and avoid data races on 2654 // initialization of firstprivate variables. 2655 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), 2656 OMPD_unknown); 2657 } 2658 } 2659 2660 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { 2661 { 2662 OMPLexicalScope Scope(*this, S, OMPD_unknown); 2663 EmitSections(S); 2664 } 2665 // Emit an implicit barrier at the end. 2666 if (!S.getSingleClause<OMPNowaitClause>()) { 2667 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), 2668 OMPD_sections); 2669 } 2670 } 2671 2672 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { 2673 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2674 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 2675 }; 2676 OMPLexicalScope Scope(*this, S, OMPD_unknown); 2677 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen, 2678 S.hasCancel()); 2679 } 2680 2681 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { 2682 llvm::SmallVector<const Expr *, 8> CopyprivateVars; 2683 llvm::SmallVector<const Expr *, 8> DestExprs; 2684 llvm::SmallVector<const Expr *, 8> SrcExprs; 2685 llvm::SmallVector<const Expr *, 8> AssignmentOps; 2686 // Check if there are any 'copyprivate' clauses associated with this 2687 // 'single' construct. 2688 // Build a list of copyprivate variables along with helper expressions 2689 // (<source>, <destination>, <destination>=<source> expressions) 2690 for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) { 2691 CopyprivateVars.append(C->varlists().begin(), C->varlists().end()); 2692 DestExprs.append(C->destination_exprs().begin(), 2693 C->destination_exprs().end()); 2694 SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end()); 2695 AssignmentOps.append(C->assignment_ops().begin(), 2696 C->assignment_ops().end()); 2697 } 2698 // Emit code for 'single' region along with 'copyprivate' clauses 2699 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2700 Action.Enter(CGF); 2701 OMPPrivateScope SingleScope(CGF); 2702 (void)CGF.EmitOMPFirstprivateClause(S, SingleScope); 2703 CGF.EmitOMPPrivateClause(S, SingleScope); 2704 (void)SingleScope.Privatize(); 2705 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 2706 }; 2707 { 2708 OMPLexicalScope Scope(*this, S, OMPD_unknown); 2709 CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getBeginLoc(), 2710 CopyprivateVars, DestExprs, 2711 SrcExprs, AssignmentOps); 2712 } 2713 // Emit an implicit barrier at the end (to avoid data race on firstprivate 2714 // init or if no 'nowait' clause was specified and no 'copyprivate' clause). 2715 if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) { 2716 CGM.getOpenMPRuntime().emitBarrierCall( 2717 *this, S.getBeginLoc(), 2718 S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single); 2719 } 2720 } 2721 2722 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { 2723 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2724 Action.Enter(CGF); 2725 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 2726 }; 2727 OMPLexicalScope Scope(*this, S, OMPD_unknown); 2728 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc()); 2729 } 2730 2731 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { 2732 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2733 Action.Enter(CGF); 2734 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 2735 }; 2736 const Expr *Hint = nullptr; 2737 if (const auto *HintClause = S.getSingleClause<OMPHintClause>()) 2738 Hint = HintClause->getHint(); 2739 OMPLexicalScope Scope(*this, S, OMPD_unknown); 2740 CGM.getOpenMPRuntime().emitCriticalRegion(*this, 2741 S.getDirectiveName().getAsString(), 2742 CodeGen, S.getBeginLoc(), Hint); 2743 } 2744 2745 void CodeGenFunction::EmitOMPParallelForDirective( 2746 const OMPParallelForDirective &S) { 2747 // Emit directive as a combined directive that consists of two implicit 2748 // directives: 'parallel' with 'for' directive. 2749 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2750 Action.Enter(CGF); 2751 OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel()); 2752 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 2753 emitDispatchForLoopBounds); 2754 }; 2755 emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen, 2756 emitEmptyBoundParameters); 2757 } 2758 2759 void CodeGenFunction::EmitOMPParallelForSimdDirective( 2760 const OMPParallelForSimdDirective &S) { 2761 // Emit directive as a combined directive that consists of two implicit 2762 // directives: 'parallel' with 'for' directive. 2763 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2764 Action.Enter(CGF); 2765 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 2766 emitDispatchForLoopBounds); 2767 }; 2768 emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen, 2769 emitEmptyBoundParameters); 2770 } 2771 2772 void CodeGenFunction::EmitOMPParallelSectionsDirective( 2773 const OMPParallelSectionsDirective &S) { 2774 // Emit directive as a combined directive that consists of two implicit 2775 // directives: 'parallel' with 'sections' directive. 2776 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2777 Action.Enter(CGF); 2778 CGF.EmitSections(S); 2779 }; 2780 emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen, 2781 emitEmptyBoundParameters); 2782 } 2783 2784 void CodeGenFunction::EmitOMPTaskBasedDirective( 2785 const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion, 2786 const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen, 2787 OMPTaskDataTy &Data) { 2788 // Emit outlined function for task construct. 2789 const CapturedStmt *CS = S.getCapturedStmt(CapturedRegion); 2790 auto I = CS->getCapturedDecl()->param_begin(); 2791 auto PartId = std::next(I); 2792 auto TaskT = std::next(I, 4); 2793 // Check if the task is final 2794 if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) { 2795 // If the condition constant folds and can be elided, try to avoid emitting 2796 // the condition and the dead arm of the if/else. 2797 const Expr *Cond = Clause->getCondition(); 2798 bool CondConstant; 2799 if (ConstantFoldsToSimpleInteger(Cond, CondConstant)) 2800 Data.Final.setInt(CondConstant); 2801 else 2802 Data.Final.setPointer(EvaluateExprAsBool(Cond)); 2803 } else { 2804 // By default the task is not final. 2805 Data.Final.setInt(/*IntVal=*/false); 2806 } 2807 // Check if the task has 'priority' clause. 2808 if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) { 2809 const Expr *Prio = Clause->getPriority(); 2810 Data.Priority.setInt(/*IntVal=*/true); 2811 Data.Priority.setPointer(EmitScalarConversion( 2812 EmitScalarExpr(Prio), Prio->getType(), 2813 getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1), 2814 Prio->getExprLoc())); 2815 } 2816 // The first function argument for tasks is a thread id, the second one is a 2817 // part id (0 for tied tasks, >=0 for untied task). 2818 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 2819 // Get list of private variables. 2820 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 2821 auto IRef = C->varlist_begin(); 2822 for (const Expr *IInit : C->private_copies()) { 2823 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2824 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2825 Data.PrivateVars.push_back(*IRef); 2826 Data.PrivateCopies.push_back(IInit); 2827 } 2828 ++IRef; 2829 } 2830 } 2831 EmittedAsPrivate.clear(); 2832 // Get list of firstprivate variables. 2833 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 2834 auto IRef = C->varlist_begin(); 2835 auto IElemInitRef = C->inits().begin(); 2836 for (const Expr *IInit : C->private_copies()) { 2837 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2838 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2839 Data.FirstprivateVars.push_back(*IRef); 2840 Data.FirstprivateCopies.push_back(IInit); 2841 Data.FirstprivateInits.push_back(*IElemInitRef); 2842 } 2843 ++IRef; 2844 ++IElemInitRef; 2845 } 2846 } 2847 // Get list of lastprivate variables (for taskloops). 2848 llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs; 2849 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 2850 auto IRef = C->varlist_begin(); 2851 auto ID = C->destination_exprs().begin(); 2852 for (const Expr *IInit : C->private_copies()) { 2853 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2854 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2855 Data.LastprivateVars.push_back(*IRef); 2856 Data.LastprivateCopies.push_back(IInit); 2857 } 2858 LastprivateDstsOrigs.insert( 2859 {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()), 2860 cast<DeclRefExpr>(*IRef)}); 2861 ++IRef; 2862 ++ID; 2863 } 2864 } 2865 SmallVector<const Expr *, 4> LHSs; 2866 SmallVector<const Expr *, 4> RHSs; 2867 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 2868 auto IPriv = C->privates().begin(); 2869 auto IRed = C->reduction_ops().begin(); 2870 auto ILHS = C->lhs_exprs().begin(); 2871 auto IRHS = C->rhs_exprs().begin(); 2872 for (const Expr *Ref : C->varlists()) { 2873 Data.ReductionVars.emplace_back(Ref); 2874 Data.ReductionCopies.emplace_back(*IPriv); 2875 Data.ReductionOps.emplace_back(*IRed); 2876 LHSs.emplace_back(*ILHS); 2877 RHSs.emplace_back(*IRHS); 2878 std::advance(IPriv, 1); 2879 std::advance(IRed, 1); 2880 std::advance(ILHS, 1); 2881 std::advance(IRHS, 1); 2882 } 2883 } 2884 Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit( 2885 *this, S.getBeginLoc(), LHSs, RHSs, Data); 2886 // Build list of dependences. 2887 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) 2888 for (const Expr *IRef : C->varlists()) 2889 Data.Dependences.emplace_back(C->getDependencyKind(), IRef); 2890 auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs, 2891 CapturedRegion](CodeGenFunction &CGF, 2892 PrePostActionTy &Action) { 2893 // Set proper addresses for generated private copies. 2894 OMPPrivateScope Scope(CGF); 2895 if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() || 2896 !Data.LastprivateVars.empty()) { 2897 enum { PrivatesParam = 2, CopyFnParam = 3 }; 2898 llvm::Value *CopyFn = CGF.Builder.CreateLoad( 2899 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam))); 2900 llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar( 2901 CS->getCapturedDecl()->getParam(PrivatesParam))); 2902 // Map privates. 2903 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; 2904 llvm::SmallVector<llvm::Value *, 16> CallArgs; 2905 CallArgs.push_back(PrivatesPtr); 2906 for (const Expr *E : Data.PrivateVars) { 2907 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2908 Address PrivatePtr = CGF.CreateMemTemp( 2909 CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr"); 2910 PrivatePtrs.emplace_back(VD, PrivatePtr); 2911 CallArgs.push_back(PrivatePtr.getPointer()); 2912 } 2913 for (const Expr *E : Data.FirstprivateVars) { 2914 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2915 Address PrivatePtr = 2916 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 2917 ".firstpriv.ptr.addr"); 2918 PrivatePtrs.emplace_back(VD, PrivatePtr); 2919 CallArgs.push_back(PrivatePtr.getPointer()); 2920 } 2921 for (const Expr *E : Data.LastprivateVars) { 2922 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2923 Address PrivatePtr = 2924 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 2925 ".lastpriv.ptr.addr"); 2926 PrivatePtrs.emplace_back(VD, PrivatePtr); 2927 CallArgs.push_back(PrivatePtr.getPointer()); 2928 } 2929 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getBeginLoc(), 2930 CopyFn, CallArgs); 2931 for (const auto &Pair : LastprivateDstsOrigs) { 2932 const auto *OrigVD = cast<VarDecl>(Pair.second->getDecl()); 2933 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(OrigVD), 2934 /*RefersToEnclosingVariableOrCapture=*/ 2935 CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr, 2936 Pair.second->getType(), VK_LValue, 2937 Pair.second->getExprLoc()); 2938 Scope.addPrivate(Pair.first, [&CGF, &DRE]() { 2939 return CGF.EmitLValue(&DRE).getAddress(); 2940 }); 2941 } 2942 for (const auto &Pair : PrivatePtrs) { 2943 Address Replacement(CGF.Builder.CreateLoad(Pair.second), 2944 CGF.getContext().getDeclAlign(Pair.first)); 2945 Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); 2946 } 2947 } 2948 if (Data.Reductions) { 2949 OMPLexicalScope LexScope(CGF, S, CapturedRegion); 2950 ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionCopies, 2951 Data.ReductionOps); 2952 llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad( 2953 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9))); 2954 for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) { 2955 RedCG.emitSharedLValue(CGF, Cnt); 2956 RedCG.emitAggregateType(CGF, Cnt); 2957 // FIXME: This must removed once the runtime library is fixed. 2958 // Emit required threadprivate variables for 2959 // initializer/combiner/finalizer. 2960 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(), 2961 RedCG, Cnt); 2962 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( 2963 CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); 2964 Replacement = 2965 Address(CGF.EmitScalarConversion( 2966 Replacement.getPointer(), CGF.getContext().VoidPtrTy, 2967 CGF.getContext().getPointerType( 2968 Data.ReductionCopies[Cnt]->getType()), 2969 Data.ReductionCopies[Cnt]->getExprLoc()), 2970 Replacement.getAlignment()); 2971 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); 2972 Scope.addPrivate(RedCG.getBaseDecl(Cnt), 2973 [Replacement]() { return Replacement; }); 2974 } 2975 } 2976 // Privatize all private variables except for in_reduction items. 2977 (void)Scope.Privatize(); 2978 SmallVector<const Expr *, 4> InRedVars; 2979 SmallVector<const Expr *, 4> InRedPrivs; 2980 SmallVector<const Expr *, 4> InRedOps; 2981 SmallVector<const Expr *, 4> TaskgroupDescriptors; 2982 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) { 2983 auto IPriv = C->privates().begin(); 2984 auto IRed = C->reduction_ops().begin(); 2985 auto ITD = C->taskgroup_descriptors().begin(); 2986 for (const Expr *Ref : C->varlists()) { 2987 InRedVars.emplace_back(Ref); 2988 InRedPrivs.emplace_back(*IPriv); 2989 InRedOps.emplace_back(*IRed); 2990 TaskgroupDescriptors.emplace_back(*ITD); 2991 std::advance(IPriv, 1); 2992 std::advance(IRed, 1); 2993 std::advance(ITD, 1); 2994 } 2995 } 2996 // Privatize in_reduction items here, because taskgroup descriptors must be 2997 // privatized earlier. 2998 OMPPrivateScope InRedScope(CGF); 2999 if (!InRedVars.empty()) { 3000 ReductionCodeGen RedCG(InRedVars, InRedPrivs, InRedOps); 3001 for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) { 3002 RedCG.emitSharedLValue(CGF, Cnt); 3003 RedCG.emitAggregateType(CGF, Cnt); 3004 // The taskgroup descriptor variable is always implicit firstprivate and 3005 // privatized already during processing of the firstprivates. 3006 // FIXME: This must removed once the runtime library is fixed. 3007 // Emit required threadprivate variables for 3008 // initializer/combiner/finalizer. 3009 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(), 3010 RedCG, Cnt); 3011 llvm::Value *ReductionsPtr = 3012 CGF.EmitLoadOfScalar(CGF.EmitLValue(TaskgroupDescriptors[Cnt]), 3013 TaskgroupDescriptors[Cnt]->getExprLoc()); 3014 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( 3015 CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); 3016 Replacement = Address( 3017 CGF.EmitScalarConversion( 3018 Replacement.getPointer(), CGF.getContext().VoidPtrTy, 3019 CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()), 3020 InRedPrivs[Cnt]->getExprLoc()), 3021 Replacement.getAlignment()); 3022 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); 3023 InRedScope.addPrivate(RedCG.getBaseDecl(Cnt), 3024 [Replacement]() { return Replacement; }); 3025 } 3026 } 3027 (void)InRedScope.Privatize(); 3028 3029 Action.Enter(CGF); 3030 BodyGen(CGF); 3031 }; 3032 llvm::Value *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( 3033 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied, 3034 Data.NumberOfParts); 3035 OMPLexicalScope Scope(*this, S); 3036 TaskGen(*this, OutlinedFn, Data); 3037 } 3038 3039 static ImplicitParamDecl * 3040 createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data, 3041 QualType Ty, CapturedDecl *CD, 3042 SourceLocation Loc) { 3043 auto *OrigVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty, 3044 ImplicitParamDecl::Other); 3045 auto *OrigRef = DeclRefExpr::Create( 3046 C, NestedNameSpecifierLoc(), SourceLocation(), OrigVD, 3047 /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue); 3048 auto *PrivateVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty, 3049 ImplicitParamDecl::Other); 3050 auto *PrivateRef = DeclRefExpr::Create( 3051 C, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD, 3052 /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue); 3053 QualType ElemType = C.getBaseElementType(Ty); 3054 auto *InitVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, ElemType, 3055 ImplicitParamDecl::Other); 3056 auto *InitRef = DeclRefExpr::Create( 3057 C, NestedNameSpecifierLoc(), SourceLocation(), InitVD, 3058 /*RefersToEnclosingVariableOrCapture=*/false, Loc, ElemType, VK_LValue); 3059 PrivateVD->setInitStyle(VarDecl::CInit); 3060 PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue, 3061 InitRef, /*BasePath=*/nullptr, 3062 VK_RValue)); 3063 Data.FirstprivateVars.emplace_back(OrigRef); 3064 Data.FirstprivateCopies.emplace_back(PrivateRef); 3065 Data.FirstprivateInits.emplace_back(InitRef); 3066 return OrigVD; 3067 } 3068 3069 void CodeGenFunction::EmitOMPTargetTaskBasedDirective( 3070 const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen, 3071 OMPTargetDataInfo &InputInfo) { 3072 // Emit outlined function for task construct. 3073 const CapturedStmt *CS = S.getCapturedStmt(OMPD_task); 3074 Address CapturedStruct = GenerateCapturedStmtArgument(*CS); 3075 QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 3076 auto I = CS->getCapturedDecl()->param_begin(); 3077 auto PartId = std::next(I); 3078 auto TaskT = std::next(I, 4); 3079 OMPTaskDataTy Data; 3080 // The task is not final. 3081 Data.Final.setInt(/*IntVal=*/false); 3082 // Get list of firstprivate variables. 3083 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 3084 auto IRef = C->varlist_begin(); 3085 auto IElemInitRef = C->inits().begin(); 3086 for (auto *IInit : C->private_copies()) { 3087 Data.FirstprivateVars.push_back(*IRef); 3088 Data.FirstprivateCopies.push_back(IInit); 3089 Data.FirstprivateInits.push_back(*IElemInitRef); 3090 ++IRef; 3091 ++IElemInitRef; 3092 } 3093 } 3094 OMPPrivateScope TargetScope(*this); 3095 VarDecl *BPVD = nullptr; 3096 VarDecl *PVD = nullptr; 3097 VarDecl *SVD = nullptr; 3098 if (InputInfo.NumberOfTargetItems > 0) { 3099 auto *CD = CapturedDecl::Create( 3100 getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0); 3101 llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems); 3102 QualType BaseAndPointersType = getContext().getConstantArrayType( 3103 getContext().VoidPtrTy, ArrSize, ArrayType::Normal, 3104 /*IndexTypeQuals=*/0); 3105 BPVD = createImplicitFirstprivateForType( 3106 getContext(), Data, BaseAndPointersType, CD, S.getBeginLoc()); 3107 PVD = createImplicitFirstprivateForType( 3108 getContext(), Data, BaseAndPointersType, CD, S.getBeginLoc()); 3109 QualType SizesType = getContext().getConstantArrayType( 3110 getContext().getSizeType(), ArrSize, ArrayType::Normal, 3111 /*IndexTypeQuals=*/0); 3112 SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD, 3113 S.getBeginLoc()); 3114 TargetScope.addPrivate( 3115 BPVD, [&InputInfo]() { return InputInfo.BasePointersArray; }); 3116 TargetScope.addPrivate(PVD, 3117 [&InputInfo]() { return InputInfo.PointersArray; }); 3118 TargetScope.addPrivate(SVD, 3119 [&InputInfo]() { return InputInfo.SizesArray; }); 3120 } 3121 (void)TargetScope.Privatize(); 3122 // Build list of dependences. 3123 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) 3124 for (const Expr *IRef : C->varlists()) 3125 Data.Dependences.emplace_back(C->getDependencyKind(), IRef); 3126 auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD, 3127 &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) { 3128 // Set proper addresses for generated private copies. 3129 OMPPrivateScope Scope(CGF); 3130 if (!Data.FirstprivateVars.empty()) { 3131 enum { PrivatesParam = 2, CopyFnParam = 3 }; 3132 llvm::Value *CopyFn = CGF.Builder.CreateLoad( 3133 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam))); 3134 llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar( 3135 CS->getCapturedDecl()->getParam(PrivatesParam))); 3136 // Map privates. 3137 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; 3138 llvm::SmallVector<llvm::Value *, 16> CallArgs; 3139 CallArgs.push_back(PrivatesPtr); 3140 for (const Expr *E : Data.FirstprivateVars) { 3141 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3142 Address PrivatePtr = 3143 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 3144 ".firstpriv.ptr.addr"); 3145 PrivatePtrs.emplace_back(VD, PrivatePtr); 3146 CallArgs.push_back(PrivatePtr.getPointer()); 3147 } 3148 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getBeginLoc(), 3149 CopyFn, CallArgs); 3150 for (const auto &Pair : PrivatePtrs) { 3151 Address Replacement(CGF.Builder.CreateLoad(Pair.second), 3152 CGF.getContext().getDeclAlign(Pair.first)); 3153 Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); 3154 } 3155 } 3156 // Privatize all private variables except for in_reduction items. 3157 (void)Scope.Privatize(); 3158 if (InputInfo.NumberOfTargetItems > 0) { 3159 InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP( 3160 CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0, CGF.getPointerSize()); 3161 InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP( 3162 CGF.GetAddrOfLocalVar(PVD), /*Index=*/0, CGF.getPointerSize()); 3163 InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP( 3164 CGF.GetAddrOfLocalVar(SVD), /*Index=*/0, CGF.getSizeSize()); 3165 } 3166 3167 Action.Enter(CGF); 3168 OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false); 3169 BodyGen(CGF); 3170 }; 3171 llvm::Value *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( 3172 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true, 3173 Data.NumberOfParts); 3174 llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0); 3175 IntegerLiteral IfCond(getContext(), TrueOrFalse, 3176 getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 3177 SourceLocation()); 3178 3179 CGM.getOpenMPRuntime().emitTaskCall(*this, S.getBeginLoc(), S, OutlinedFn, 3180 SharedsTy, CapturedStruct, &IfCond, Data); 3181 } 3182 3183 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { 3184 // Emit outlined function for task construct. 3185 const CapturedStmt *CS = S.getCapturedStmt(OMPD_task); 3186 Address CapturedStruct = GenerateCapturedStmtArgument(*CS); 3187 QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 3188 const Expr *IfCond = nullptr; 3189 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 3190 if (C->getNameModifier() == OMPD_unknown || 3191 C->getNameModifier() == OMPD_task) { 3192 IfCond = C->getCondition(); 3193 break; 3194 } 3195 } 3196 3197 OMPTaskDataTy Data; 3198 // Check if we should emit tied or untied task. 3199 Data.Tied = !S.getSingleClause<OMPUntiedClause>(); 3200 auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { 3201 CGF.EmitStmt(CS->getCapturedStmt()); 3202 }; 3203 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 3204 IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn, 3205 const OMPTaskDataTy &Data) { 3206 CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getBeginLoc(), S, OutlinedFn, 3207 SharedsTy, CapturedStruct, IfCond, 3208 Data); 3209 }; 3210 EmitOMPTaskBasedDirective(S, OMPD_task, BodyGen, TaskGen, Data); 3211 } 3212 3213 void CodeGenFunction::EmitOMPTaskyieldDirective( 3214 const OMPTaskyieldDirective &S) { 3215 CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getBeginLoc()); 3216 } 3217 3218 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { 3219 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_barrier); 3220 } 3221 3222 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) { 3223 CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getBeginLoc()); 3224 } 3225 3226 void CodeGenFunction::EmitOMPTaskgroupDirective( 3227 const OMPTaskgroupDirective &S) { 3228 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3229 Action.Enter(CGF); 3230 if (const Expr *E = S.getReductionRef()) { 3231 SmallVector<const Expr *, 4> LHSs; 3232 SmallVector<const Expr *, 4> RHSs; 3233 OMPTaskDataTy Data; 3234 for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) { 3235 auto IPriv = C->privates().begin(); 3236 auto IRed = C->reduction_ops().begin(); 3237 auto ILHS = C->lhs_exprs().begin(); 3238 auto IRHS = C->rhs_exprs().begin(); 3239 for (const Expr *Ref : C->varlists()) { 3240 Data.ReductionVars.emplace_back(Ref); 3241 Data.ReductionCopies.emplace_back(*IPriv); 3242 Data.ReductionOps.emplace_back(*IRed); 3243 LHSs.emplace_back(*ILHS); 3244 RHSs.emplace_back(*IRHS); 3245 std::advance(IPriv, 1); 3246 std::advance(IRed, 1); 3247 std::advance(ILHS, 1); 3248 std::advance(IRHS, 1); 3249 } 3250 } 3251 llvm::Value *ReductionDesc = 3252 CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getBeginLoc(), 3253 LHSs, RHSs, Data); 3254 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3255 CGF.EmitVarDecl(*VD); 3256 CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD), 3257 /*Volatile=*/false, E->getType()); 3258 } 3259 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 3260 }; 3261 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3262 CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getBeginLoc()); 3263 } 3264 3265 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { 3266 CGM.getOpenMPRuntime().emitFlush( 3267 *this, 3268 [&S]() -> ArrayRef<const Expr *> { 3269 if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) 3270 return llvm::makeArrayRef(FlushClause->varlist_begin(), 3271 FlushClause->varlist_end()); 3272 return llvm::None; 3273 }(), 3274 S.getBeginLoc()); 3275 } 3276 3277 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, 3278 const CodeGenLoopTy &CodeGenLoop, 3279 Expr *IncExpr) { 3280 // Emit the loop iteration variable. 3281 const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 3282 const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl()); 3283 EmitVarDecl(*IVDecl); 3284 3285 // Emit the iterations count variable. 3286 // If it is not a variable, Sema decided to calculate iterations count on each 3287 // iteration (e.g., it is foldable into a constant). 3288 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 3289 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 3290 // Emit calculation of the iterations count. 3291 EmitIgnoredExpr(S.getCalcLastIteration()); 3292 } 3293 3294 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); 3295 3296 bool HasLastprivateClause = false; 3297 // Check pre-condition. 3298 { 3299 OMPLoopScope PreInitScope(*this, S); 3300 // Skip the entire loop if we don't meet the precondition. 3301 // If the condition constant folds and can be elided, avoid emitting the 3302 // whole loop. 3303 bool CondConstant; 3304 llvm::BasicBlock *ContBlock = nullptr; 3305 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 3306 if (!CondConstant) 3307 return; 3308 } else { 3309 llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then"); 3310 ContBlock = createBasicBlock("omp.precond.end"); 3311 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 3312 getProfileCount(&S)); 3313 EmitBlock(ThenBlock); 3314 incrementProfileCounter(&S); 3315 } 3316 3317 emitAlignedClause(*this, S); 3318 // Emit 'then' code. 3319 { 3320 // Emit helper vars inits. 3321 3322 LValue LB = EmitOMPHelperVar( 3323 *this, cast<DeclRefExpr>( 3324 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3325 ? S.getCombinedLowerBoundVariable() 3326 : S.getLowerBoundVariable()))); 3327 LValue UB = EmitOMPHelperVar( 3328 *this, cast<DeclRefExpr>( 3329 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3330 ? S.getCombinedUpperBoundVariable() 3331 : S.getUpperBoundVariable()))); 3332 LValue ST = 3333 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 3334 LValue IL = 3335 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 3336 3337 OMPPrivateScope LoopScope(*this); 3338 if (EmitOMPFirstprivateClause(S, LoopScope)) { 3339 // Emit implicit barrier to synchronize threads and avoid data races 3340 // on initialization of firstprivate variables and post-update of 3341 // lastprivate variables. 3342 CGM.getOpenMPRuntime().emitBarrierCall( 3343 *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, 3344 /*ForceSimpleCall=*/true); 3345 } 3346 EmitOMPPrivateClause(S, LoopScope); 3347 if (isOpenMPSimdDirective(S.getDirectiveKind()) && 3348 !isOpenMPParallelDirective(S.getDirectiveKind()) && 3349 !isOpenMPTeamsDirective(S.getDirectiveKind())) 3350 EmitOMPReductionClauseInit(S, LoopScope); 3351 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 3352 EmitOMPPrivateLoopCounters(S, LoopScope); 3353 (void)LoopScope.Privatize(); 3354 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 3355 CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S); 3356 3357 // Detect the distribute schedule kind and chunk. 3358 llvm::Value *Chunk = nullptr; 3359 OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown; 3360 if (const auto *C = S.getSingleClause<OMPDistScheduleClause>()) { 3361 ScheduleKind = C->getDistScheduleKind(); 3362 if (const Expr *Ch = C->getChunkSize()) { 3363 Chunk = EmitScalarExpr(Ch); 3364 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 3365 S.getIterationVariable()->getType(), 3366 S.getBeginLoc()); 3367 } 3368 } else { 3369 // Default behaviour for dist_schedule clause. 3370 CGM.getOpenMPRuntime().getDefaultDistScheduleAndChunk( 3371 *this, S, ScheduleKind, Chunk); 3372 } 3373 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 3374 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 3375 3376 // OpenMP [2.10.8, distribute Construct, Description] 3377 // If dist_schedule is specified, kind must be static. If specified, 3378 // iterations are divided into chunks of size chunk_size, chunks are 3379 // assigned to the teams of the league in a round-robin fashion in the 3380 // order of the team number. When no chunk_size is specified, the 3381 // iteration space is divided into chunks that are approximately equal 3382 // in size, and at most one chunk is distributed to each team of the 3383 // league. The size of the chunks is unspecified in this case. 3384 bool StaticChunked = RT.isStaticChunked( 3385 ScheduleKind, /* Chunked */ Chunk != nullptr) && 3386 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()); 3387 if (RT.isStaticNonchunked(ScheduleKind, 3388 /* Chunked */ Chunk != nullptr) || 3389 StaticChunked) { 3390 if (isOpenMPSimdDirective(S.getDirectiveKind())) 3391 EmitOMPSimdInit(S, /*IsMonotonic=*/true); 3392 CGOpenMPRuntime::StaticRTInput StaticInit( 3393 IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(), 3394 LB.getAddress(), UB.getAddress(), ST.getAddress(), 3395 StaticChunked ? Chunk : nullptr); 3396 RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, 3397 StaticInit); 3398 JumpDest LoopExit = 3399 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 3400 // UB = min(UB, GlobalUB); 3401 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3402 ? S.getCombinedEnsureUpperBound() 3403 : S.getEnsureUpperBound()); 3404 // IV = LB; 3405 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3406 ? S.getCombinedInit() 3407 : S.getInit()); 3408 3409 const Expr *Cond = 3410 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3411 ? S.getCombinedCond() 3412 : S.getCond(); 3413 3414 if (StaticChunked) 3415 Cond = S.getCombinedDistCond(); 3416 3417 // For static unchunked schedules generate: 3418 // 3419 // 1. For distribute alone, codegen 3420 // while (idx <= UB) { 3421 // BODY; 3422 // ++idx; 3423 // } 3424 // 3425 // 2. When combined with 'for' (e.g. as in 'distribute parallel for') 3426 // while (idx <= UB) { 3427 // <CodeGen rest of pragma>(LB, UB); 3428 // idx += ST; 3429 // } 3430 // 3431 // For static chunk one schedule generate: 3432 // 3433 // while (IV <= GlobalUB) { 3434 // <CodeGen rest of pragma>(LB, UB); 3435 // LB += ST; 3436 // UB += ST; 3437 // UB = min(UB, GlobalUB); 3438 // IV = LB; 3439 // } 3440 // 3441 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), Cond, IncExpr, 3442 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { 3443 CodeGenLoop(CGF, S, LoopExit); 3444 }, 3445 [&S, StaticChunked](CodeGenFunction &CGF) { 3446 if (StaticChunked) { 3447 CGF.EmitIgnoredExpr(S.getCombinedNextLowerBound()); 3448 CGF.EmitIgnoredExpr(S.getCombinedNextUpperBound()); 3449 CGF.EmitIgnoredExpr(S.getCombinedEnsureUpperBound()); 3450 CGF.EmitIgnoredExpr(S.getCombinedInit()); 3451 } 3452 }); 3453 EmitBlock(LoopExit.getBlock()); 3454 // Tell the runtime we are done. 3455 RT.emitForStaticFinish(*this, S.getBeginLoc(), S.getDirectiveKind()); 3456 } else { 3457 // Emit the outer loop, which requests its work chunk [LB..UB] from 3458 // runtime and runs the inner loop to process it. 3459 const OMPLoopArguments LoopArguments = { 3460 LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(), 3461 Chunk}; 3462 EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments, 3463 CodeGenLoop); 3464 } 3465 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 3466 EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) { 3467 return CGF.Builder.CreateIsNotNull( 3468 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 3469 }); 3470 } 3471 if (isOpenMPSimdDirective(S.getDirectiveKind()) && 3472 !isOpenMPParallelDirective(S.getDirectiveKind()) && 3473 !isOpenMPTeamsDirective(S.getDirectiveKind())) { 3474 EmitOMPReductionClauseFinal(S, OMPD_simd); 3475 // Emit post-update of the reduction variables if IsLastIter != 0. 3476 emitPostUpdateForReductionClause( 3477 *this, S, [IL, &S](CodeGenFunction &CGF) { 3478 return CGF.Builder.CreateIsNotNull( 3479 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 3480 }); 3481 } 3482 // Emit final copy of the lastprivate variables if IsLastIter != 0. 3483 if (HasLastprivateClause) { 3484 EmitOMPLastprivateClauseFinal( 3485 S, /*NoFinals=*/false, 3486 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc()))); 3487 } 3488 } 3489 3490 // We're now done with the loop, so jump to the continuation block. 3491 if (ContBlock) { 3492 EmitBranch(ContBlock); 3493 EmitBlock(ContBlock, true); 3494 } 3495 } 3496 } 3497 3498 void CodeGenFunction::EmitOMPDistributeDirective( 3499 const OMPDistributeDirective &S) { 3500 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3501 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 3502 }; 3503 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3504 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); 3505 } 3506 3507 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, 3508 const CapturedStmt *S) { 3509 CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); 3510 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; 3511 CGF.CapturedStmtInfo = &CapStmtInfo; 3512 llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S); 3513 Fn->setDoesNotRecurse(); 3514 return Fn; 3515 } 3516 3517 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { 3518 if (S.hasClausesOfKind<OMPDependClause>()) { 3519 assert(!S.getAssociatedStmt() && 3520 "No associated statement must be in ordered depend construct."); 3521 for (const auto *DC : S.getClausesOfKind<OMPDependClause>()) 3522 CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC); 3523 return; 3524 } 3525 const auto *C = S.getSingleClause<OMPSIMDClause>(); 3526 auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF, 3527 PrePostActionTy &Action) { 3528 const CapturedStmt *CS = S.getInnermostCapturedStmt(); 3529 if (C) { 3530 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 3531 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 3532 llvm::Function *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS); 3533 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getBeginLoc(), 3534 OutlinedFn, CapturedVars); 3535 } else { 3536 Action.Enter(CGF); 3537 CGF.EmitStmt(CS->getCapturedStmt()); 3538 } 3539 }; 3540 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3541 CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getBeginLoc(), !C); 3542 } 3543 3544 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, 3545 QualType SrcType, QualType DestType, 3546 SourceLocation Loc) { 3547 assert(CGF.hasScalarEvaluationKind(DestType) && 3548 "DestType must have scalar evaluation kind."); 3549 assert(!Val.isAggregate() && "Must be a scalar or complex."); 3550 return Val.isScalar() ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, 3551 DestType, Loc) 3552 : CGF.EmitComplexToScalarConversion( 3553 Val.getComplexVal(), SrcType, DestType, Loc); 3554 } 3555 3556 static CodeGenFunction::ComplexPairTy 3557 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, 3558 QualType DestType, SourceLocation Loc) { 3559 assert(CGF.getEvaluationKind(DestType) == TEK_Complex && 3560 "DestType must have complex evaluation kind."); 3561 CodeGenFunction::ComplexPairTy ComplexVal; 3562 if (Val.isScalar()) { 3563 // Convert the input element to the element type of the complex. 3564 QualType DestElementType = 3565 DestType->castAs<ComplexType>()->getElementType(); 3566 llvm::Value *ScalarVal = CGF.EmitScalarConversion( 3567 Val.getScalarVal(), SrcType, DestElementType, Loc); 3568 ComplexVal = CodeGenFunction::ComplexPairTy( 3569 ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType())); 3570 } else { 3571 assert(Val.isComplex() && "Must be a scalar or complex."); 3572 QualType SrcElementType = SrcType->castAs<ComplexType>()->getElementType(); 3573 QualType DestElementType = 3574 DestType->castAs<ComplexType>()->getElementType(); 3575 ComplexVal.first = CGF.EmitScalarConversion( 3576 Val.getComplexVal().first, SrcElementType, DestElementType, Loc); 3577 ComplexVal.second = CGF.EmitScalarConversion( 3578 Val.getComplexVal().second, SrcElementType, DestElementType, Loc); 3579 } 3580 return ComplexVal; 3581 } 3582 3583 static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst, 3584 LValue LVal, RValue RVal) { 3585 if (LVal.isGlobalReg()) { 3586 CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal); 3587 } else { 3588 CGF.EmitAtomicStore(RVal, LVal, 3589 IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3590 : llvm::AtomicOrdering::Monotonic, 3591 LVal.isVolatile(), /*IsInit=*/false); 3592 } 3593 } 3594 3595 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal, 3596 QualType RValTy, SourceLocation Loc) { 3597 switch (getEvaluationKind(LVal.getType())) { 3598 case TEK_Scalar: 3599 EmitStoreThroughLValue(RValue::get(convertToScalarValue( 3600 *this, RVal, RValTy, LVal.getType(), Loc)), 3601 LVal); 3602 break; 3603 case TEK_Complex: 3604 EmitStoreOfComplex( 3605 convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal, 3606 /*isInit=*/false); 3607 break; 3608 case TEK_Aggregate: 3609 llvm_unreachable("Must be a scalar or complex."); 3610 } 3611 } 3612 3613 static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst, 3614 const Expr *X, const Expr *V, 3615 SourceLocation Loc) { 3616 // v = x; 3617 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue"); 3618 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue"); 3619 LValue XLValue = CGF.EmitLValue(X); 3620 LValue VLValue = CGF.EmitLValue(V); 3621 RValue Res = XLValue.isGlobalReg() 3622 ? CGF.EmitLoadOfLValue(XLValue, Loc) 3623 : CGF.EmitAtomicLoad( 3624 XLValue, Loc, 3625 IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3626 : llvm::AtomicOrdering::Monotonic, 3627 XLValue.isVolatile()); 3628 // OpenMP, 2.12.6, atomic Construct 3629 // Any atomic construct with a seq_cst clause forces the atomically 3630 // performed operation to include an implicit flush operation without a 3631 // list. 3632 if (IsSeqCst) 3633 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3634 CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc); 3635 } 3636 3637 static void emitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst, 3638 const Expr *X, const Expr *E, 3639 SourceLocation Loc) { 3640 // x = expr; 3641 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue"); 3642 emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E)); 3643 // OpenMP, 2.12.6, atomic Construct 3644 // Any atomic construct with a seq_cst clause forces the atomically 3645 // performed operation to include an implicit flush operation without a 3646 // list. 3647 if (IsSeqCst) 3648 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3649 } 3650 3651 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, 3652 RValue Update, 3653 BinaryOperatorKind BO, 3654 llvm::AtomicOrdering AO, 3655 bool IsXLHSInRHSPart) { 3656 ASTContext &Context = CGF.getContext(); 3657 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x' 3658 // expression is simple and atomic is allowed for the given type for the 3659 // target platform. 3660 if (BO == BO_Comma || !Update.isScalar() || 3661 !Update.getScalarVal()->getType()->isIntegerTy() || 3662 !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) && 3663 (Update.getScalarVal()->getType() != 3664 X.getAddress().getElementType())) || 3665 !X.getAddress().getElementType()->isIntegerTy() || 3666 !Context.getTargetInfo().hasBuiltinAtomic( 3667 Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment()))) 3668 return std::make_pair(false, RValue::get(nullptr)); 3669 3670 llvm::AtomicRMWInst::BinOp RMWOp; 3671 switch (BO) { 3672 case BO_Add: 3673 RMWOp = llvm::AtomicRMWInst::Add; 3674 break; 3675 case BO_Sub: 3676 if (!IsXLHSInRHSPart) 3677 return std::make_pair(false, RValue::get(nullptr)); 3678 RMWOp = llvm::AtomicRMWInst::Sub; 3679 break; 3680 case BO_And: 3681 RMWOp = llvm::AtomicRMWInst::And; 3682 break; 3683 case BO_Or: 3684 RMWOp = llvm::AtomicRMWInst::Or; 3685 break; 3686 case BO_Xor: 3687 RMWOp = llvm::AtomicRMWInst::Xor; 3688 break; 3689 case BO_LT: 3690 RMWOp = X.getType()->hasSignedIntegerRepresentation() 3691 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min 3692 : llvm::AtomicRMWInst::Max) 3693 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin 3694 : llvm::AtomicRMWInst::UMax); 3695 break; 3696 case BO_GT: 3697 RMWOp = X.getType()->hasSignedIntegerRepresentation() 3698 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max 3699 : llvm::AtomicRMWInst::Min) 3700 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax 3701 : llvm::AtomicRMWInst::UMin); 3702 break; 3703 case BO_Assign: 3704 RMWOp = llvm::AtomicRMWInst::Xchg; 3705 break; 3706 case BO_Mul: 3707 case BO_Div: 3708 case BO_Rem: 3709 case BO_Shl: 3710 case BO_Shr: 3711 case BO_LAnd: 3712 case BO_LOr: 3713 return std::make_pair(false, RValue::get(nullptr)); 3714 case BO_PtrMemD: 3715 case BO_PtrMemI: 3716 case BO_LE: 3717 case BO_GE: 3718 case BO_EQ: 3719 case BO_NE: 3720 case BO_Cmp: 3721 case BO_AddAssign: 3722 case BO_SubAssign: 3723 case BO_AndAssign: 3724 case BO_OrAssign: 3725 case BO_XorAssign: 3726 case BO_MulAssign: 3727 case BO_DivAssign: 3728 case BO_RemAssign: 3729 case BO_ShlAssign: 3730 case BO_ShrAssign: 3731 case BO_Comma: 3732 llvm_unreachable("Unsupported atomic update operation"); 3733 } 3734 llvm::Value *UpdateVal = Update.getScalarVal(); 3735 if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) { 3736 UpdateVal = CGF.Builder.CreateIntCast( 3737 IC, X.getAddress().getElementType(), 3738 X.getType()->hasSignedIntegerRepresentation()); 3739 } 3740 llvm::Value *Res = 3741 CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO); 3742 return std::make_pair(true, RValue::get(Res)); 3743 } 3744 3745 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr( 3746 LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart, 3747 llvm::AtomicOrdering AO, SourceLocation Loc, 3748 const llvm::function_ref<RValue(RValue)> CommonGen) { 3749 // Update expressions are allowed to have the following forms: 3750 // x binop= expr; -> xrval + expr; 3751 // x++, ++x -> xrval + 1; 3752 // x--, --x -> xrval - 1; 3753 // x = x binop expr; -> xrval binop expr 3754 // x = expr Op x; - > expr binop xrval; 3755 auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart); 3756 if (!Res.first) { 3757 if (X.isGlobalReg()) { 3758 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop 3759 // 'xrval'. 3760 EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X); 3761 } else { 3762 // Perform compare-and-swap procedure. 3763 EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified()); 3764 } 3765 } 3766 return Res; 3767 } 3768 3769 static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst, 3770 const Expr *X, const Expr *E, 3771 const Expr *UE, bool IsXLHSInRHSPart, 3772 SourceLocation Loc) { 3773 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 3774 "Update expr in 'atomic update' must be a binary operator."); 3775 const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 3776 // Update expressions are allowed to have the following forms: 3777 // x binop= expr; -> xrval + expr; 3778 // x++, ++x -> xrval + 1; 3779 // x--, --x -> xrval - 1; 3780 // x = x binop expr; -> xrval binop expr 3781 // x = expr Op x; - > expr binop xrval; 3782 assert(X->isLValue() && "X of 'omp atomic update' is not lvalue"); 3783 LValue XLValue = CGF.EmitLValue(X); 3784 RValue ExprRValue = CGF.EmitAnyExpr(E); 3785 llvm::AtomicOrdering AO = IsSeqCst 3786 ? llvm::AtomicOrdering::SequentiallyConsistent 3787 : llvm::AtomicOrdering::Monotonic; 3788 const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 3789 const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 3790 const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 3791 const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 3792 auto &&Gen = [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) { 3793 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3794 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 3795 return CGF.EmitAnyExpr(UE); 3796 }; 3797 (void)CGF.EmitOMPAtomicSimpleUpdateExpr( 3798 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 3799 // OpenMP, 2.12.6, atomic Construct 3800 // Any atomic construct with a seq_cst clause forces the atomically 3801 // performed operation to include an implicit flush operation without a 3802 // list. 3803 if (IsSeqCst) 3804 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3805 } 3806 3807 static RValue convertToType(CodeGenFunction &CGF, RValue Value, 3808 QualType SourceType, QualType ResType, 3809 SourceLocation Loc) { 3810 switch (CGF.getEvaluationKind(ResType)) { 3811 case TEK_Scalar: 3812 return RValue::get( 3813 convertToScalarValue(CGF, Value, SourceType, ResType, Loc)); 3814 case TEK_Complex: { 3815 auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc); 3816 return RValue::getComplex(Res.first, Res.second); 3817 } 3818 case TEK_Aggregate: 3819 break; 3820 } 3821 llvm_unreachable("Must be a scalar or complex."); 3822 } 3823 3824 static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst, 3825 bool IsPostfixUpdate, const Expr *V, 3826 const Expr *X, const Expr *E, 3827 const Expr *UE, bool IsXLHSInRHSPart, 3828 SourceLocation Loc) { 3829 assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue"); 3830 assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue"); 3831 RValue NewVVal; 3832 LValue VLValue = CGF.EmitLValue(V); 3833 LValue XLValue = CGF.EmitLValue(X); 3834 RValue ExprRValue = CGF.EmitAnyExpr(E); 3835 llvm::AtomicOrdering AO = IsSeqCst 3836 ? llvm::AtomicOrdering::SequentiallyConsistent 3837 : llvm::AtomicOrdering::Monotonic; 3838 QualType NewVValType; 3839 if (UE) { 3840 // 'x' is updated with some additional value. 3841 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 3842 "Update expr in 'atomic capture' must be a binary operator."); 3843 const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 3844 // Update expressions are allowed to have the following forms: 3845 // x binop= expr; -> xrval + expr; 3846 // x++, ++x -> xrval + 1; 3847 // x--, --x -> xrval - 1; 3848 // x = x binop expr; -> xrval binop expr 3849 // x = expr Op x; - > expr binop xrval; 3850 const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 3851 const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 3852 const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 3853 NewVValType = XRValExpr->getType(); 3854 const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 3855 auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr, 3856 IsPostfixUpdate](RValue XRValue) { 3857 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3858 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 3859 RValue Res = CGF.EmitAnyExpr(UE); 3860 NewVVal = IsPostfixUpdate ? XRValue : Res; 3861 return Res; 3862 }; 3863 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 3864 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 3865 if (Res.first) { 3866 // 'atomicrmw' instruction was generated. 3867 if (IsPostfixUpdate) { 3868 // Use old value from 'atomicrmw'. 3869 NewVVal = Res.second; 3870 } else { 3871 // 'atomicrmw' does not provide new value, so evaluate it using old 3872 // value of 'x'. 3873 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3874 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second); 3875 NewVVal = CGF.EmitAnyExpr(UE); 3876 } 3877 } 3878 } else { 3879 // 'x' is simply rewritten with some 'expr'. 3880 NewVValType = X->getType().getNonReferenceType(); 3881 ExprRValue = convertToType(CGF, ExprRValue, E->getType(), 3882 X->getType().getNonReferenceType(), Loc); 3883 auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) { 3884 NewVVal = XRValue; 3885 return ExprRValue; 3886 }; 3887 // Try to perform atomicrmw xchg, otherwise simple exchange. 3888 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 3889 XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO, 3890 Loc, Gen); 3891 if (Res.first) { 3892 // 'atomicrmw' instruction was generated. 3893 NewVVal = IsPostfixUpdate ? Res.second : ExprRValue; 3894 } 3895 } 3896 // Emit post-update store to 'v' of old/new 'x' value. 3897 CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc); 3898 // OpenMP, 2.12.6, atomic Construct 3899 // Any atomic construct with a seq_cst clause forces the atomically 3900 // performed operation to include an implicit flush operation without a 3901 // list. 3902 if (IsSeqCst) 3903 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3904 } 3905 3906 static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, 3907 bool IsSeqCst, bool IsPostfixUpdate, 3908 const Expr *X, const Expr *V, const Expr *E, 3909 const Expr *UE, bool IsXLHSInRHSPart, 3910 SourceLocation Loc) { 3911 switch (Kind) { 3912 case OMPC_read: 3913 emitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc); 3914 break; 3915 case OMPC_write: 3916 emitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc); 3917 break; 3918 case OMPC_unknown: 3919 case OMPC_update: 3920 emitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc); 3921 break; 3922 case OMPC_capture: 3923 emitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE, 3924 IsXLHSInRHSPart, Loc); 3925 break; 3926 case OMPC_if: 3927 case OMPC_final: 3928 case OMPC_num_threads: 3929 case OMPC_private: 3930 case OMPC_firstprivate: 3931 case OMPC_lastprivate: 3932 case OMPC_reduction: 3933 case OMPC_task_reduction: 3934 case OMPC_in_reduction: 3935 case OMPC_safelen: 3936 case OMPC_simdlen: 3937 case OMPC_collapse: 3938 case OMPC_default: 3939 case OMPC_seq_cst: 3940 case OMPC_shared: 3941 case OMPC_linear: 3942 case OMPC_aligned: 3943 case OMPC_copyin: 3944 case OMPC_copyprivate: 3945 case OMPC_flush: 3946 case OMPC_proc_bind: 3947 case OMPC_schedule: 3948 case OMPC_ordered: 3949 case OMPC_nowait: 3950 case OMPC_untied: 3951 case OMPC_threadprivate: 3952 case OMPC_depend: 3953 case OMPC_mergeable: 3954 case OMPC_device: 3955 case OMPC_threads: 3956 case OMPC_simd: 3957 case OMPC_map: 3958 case OMPC_num_teams: 3959 case OMPC_thread_limit: 3960 case OMPC_priority: 3961 case OMPC_grainsize: 3962 case OMPC_nogroup: 3963 case OMPC_num_tasks: 3964 case OMPC_hint: 3965 case OMPC_dist_schedule: 3966 case OMPC_defaultmap: 3967 case OMPC_uniform: 3968 case OMPC_to: 3969 case OMPC_from: 3970 case OMPC_use_device_ptr: 3971 case OMPC_is_device_ptr: 3972 case OMPC_unified_address: 3973 case OMPC_unified_shared_memory: 3974 case OMPC_reverse_offload: 3975 case OMPC_dynamic_allocators: 3976 case OMPC_atomic_default_mem_order: 3977 llvm_unreachable("Clause is not allowed in 'omp atomic'."); 3978 } 3979 } 3980 3981 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { 3982 bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>(); 3983 OpenMPClauseKind Kind = OMPC_unknown; 3984 for (const OMPClause *C : S.clauses()) { 3985 // Find first clause (skip seq_cst clause, if it is first). 3986 if (C->getClauseKind() != OMPC_seq_cst) { 3987 Kind = C->getClauseKind(); 3988 break; 3989 } 3990 } 3991 3992 const Stmt *CS = S.getInnermostCapturedStmt()->IgnoreContainers(); 3993 if (const auto *FE = dyn_cast<FullExpr>(CS)) 3994 enterFullExpression(FE); 3995 // Processing for statements under 'atomic capture'. 3996 if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) { 3997 for (const Stmt *C : Compound->body()) { 3998 if (const auto *FE = dyn_cast<FullExpr>(C)) 3999 enterFullExpression(FE); 4000 } 4001 } 4002 4003 auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF, 4004 PrePostActionTy &) { 4005 CGF.EmitStopPoint(CS); 4006 emitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(), 4007 S.getV(), S.getExpr(), S.getUpdateExpr(), 4008 S.isXLHSInRHSPart(), S.getBeginLoc()); 4009 }; 4010 OMPLexicalScope Scope(*this, S, OMPD_unknown); 4011 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen); 4012 } 4013 4014 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, 4015 const OMPExecutableDirective &S, 4016 const RegionCodeGenTy &CodeGen) { 4017 assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind())); 4018 CodeGenModule &CGM = CGF.CGM; 4019 4020 // On device emit this construct as inlined code. 4021 if (CGM.getLangOpts().OpenMPIsDevice) { 4022 OMPLexicalScope Scope(CGF, S, OMPD_target); 4023 CGM.getOpenMPRuntime().emitInlinedDirective( 4024 CGF, OMPD_target, [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4025 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 4026 }); 4027 return; 4028 } 4029 4030 llvm::Function *Fn = nullptr; 4031 llvm::Constant *FnID = nullptr; 4032 4033 const Expr *IfCond = nullptr; 4034 // Check for the at most one if clause associated with the target region. 4035 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 4036 if (C->getNameModifier() == OMPD_unknown || 4037 C->getNameModifier() == OMPD_target) { 4038 IfCond = C->getCondition(); 4039 break; 4040 } 4041 } 4042 4043 // Check if we have any device clause associated with the directive. 4044 const Expr *Device = nullptr; 4045 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 4046 Device = C->getDevice(); 4047 4048 // Check if we have an if clause whose conditional always evaluates to false 4049 // or if we do not have any targets specified. If so the target region is not 4050 // an offload entry point. 4051 bool IsOffloadEntry = true; 4052 if (IfCond) { 4053 bool Val; 4054 if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val) 4055 IsOffloadEntry = false; 4056 } 4057 if (CGM.getLangOpts().OMPTargetTriples.empty()) 4058 IsOffloadEntry = false; 4059 4060 assert(CGF.CurFuncDecl && "No parent declaration for target region!"); 4061 StringRef ParentName; 4062 // In case we have Ctors/Dtors we use the complete type variant to produce 4063 // the mangling of the device outlined kernel. 4064 if (const auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl)) 4065 ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete)); 4066 else if (const auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl)) 4067 ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete)); 4068 else 4069 ParentName = 4070 CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl))); 4071 4072 // Emit target region as a standalone region. 4073 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID, 4074 IsOffloadEntry, CodeGen); 4075 OMPLexicalScope Scope(CGF, S, OMPD_task); 4076 auto &&SizeEmitter = [](CodeGenFunction &CGF, const OMPLoopDirective &D) { 4077 OMPLoopScope(CGF, D); 4078 // Emit calculation of the iterations count. 4079 llvm::Value *NumIterations = CGF.EmitScalarExpr(D.getNumIterations()); 4080 NumIterations = CGF.Builder.CreateIntCast(NumIterations, CGF.Int64Ty, 4081 /*IsSigned=*/false); 4082 return NumIterations; 4083 }; 4084 CGM.getOpenMPRuntime().emitTargetNumIterationsCall(CGF, S, Device, 4085 SizeEmitter); 4086 CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device); 4087 } 4088 4089 static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S, 4090 PrePostActionTy &Action) { 4091 Action.Enter(CGF); 4092 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 4093 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 4094 CGF.EmitOMPPrivateClause(S, PrivateScope); 4095 (void)PrivateScope.Privatize(); 4096 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 4097 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); 4098 4099 CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt()); 4100 } 4101 4102 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM, 4103 StringRef ParentName, 4104 const OMPTargetDirective &S) { 4105 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4106 emitTargetRegion(CGF, S, Action); 4107 }; 4108 llvm::Function *Fn; 4109 llvm::Constant *Addr; 4110 // Emit target region as a standalone region. 4111 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 4112 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 4113 assert(Fn && Addr && "Target device function emission failed."); 4114 } 4115 4116 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { 4117 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4118 emitTargetRegion(CGF, S, Action); 4119 }; 4120 emitCommonOMPTargetDirective(*this, S, CodeGen); 4121 } 4122 4123 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF, 4124 const OMPExecutableDirective &S, 4125 OpenMPDirectiveKind InnermostKind, 4126 const RegionCodeGenTy &CodeGen) { 4127 const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams); 4128 llvm::Value *OutlinedFn = 4129 CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction( 4130 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 4131 4132 const auto *NT = S.getSingleClause<OMPNumTeamsClause>(); 4133 const auto *TL = S.getSingleClause<OMPThreadLimitClause>(); 4134 if (NT || TL) { 4135 const Expr *NumTeams = NT ? NT->getNumTeams() : nullptr; 4136 const Expr *ThreadLimit = TL ? TL->getThreadLimit() : nullptr; 4137 4138 CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit, 4139 S.getBeginLoc()); 4140 } 4141 4142 OMPTeamsScope Scope(CGF, S); 4143 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 4144 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 4145 CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getBeginLoc(), OutlinedFn, 4146 CapturedVars); 4147 } 4148 4149 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) { 4150 // Emit teams region as a standalone region. 4151 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4152 Action.Enter(CGF); 4153 OMPPrivateScope PrivateScope(CGF); 4154 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 4155 CGF.EmitOMPPrivateClause(S, PrivateScope); 4156 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4157 (void)PrivateScope.Privatize(); 4158 CGF.EmitStmt(S.getCapturedStmt(OMPD_teams)->getCapturedStmt()); 4159 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4160 }; 4161 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen); 4162 emitPostUpdateForReductionClause(*this, S, 4163 [](CodeGenFunction &) { return nullptr; }); 4164 } 4165 4166 static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action, 4167 const OMPTargetTeamsDirective &S) { 4168 auto *CS = S.getCapturedStmt(OMPD_teams); 4169 Action.Enter(CGF); 4170 // Emit teams region as a standalone region. 4171 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) { 4172 Action.Enter(CGF); 4173 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 4174 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 4175 CGF.EmitOMPPrivateClause(S, PrivateScope); 4176 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4177 (void)PrivateScope.Privatize(); 4178 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 4179 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); 4180 CGF.EmitStmt(CS->getCapturedStmt()); 4181 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4182 }; 4183 emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen); 4184 emitPostUpdateForReductionClause(CGF, S, 4185 [](CodeGenFunction &) { return nullptr; }); 4186 } 4187 4188 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 4189 CodeGenModule &CGM, StringRef ParentName, 4190 const OMPTargetTeamsDirective &S) { 4191 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4192 emitTargetTeamsRegion(CGF, Action, S); 4193 }; 4194 llvm::Function *Fn; 4195 llvm::Constant *Addr; 4196 // Emit target region as a standalone region. 4197 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 4198 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 4199 assert(Fn && Addr && "Target device function emission failed."); 4200 } 4201 4202 void CodeGenFunction::EmitOMPTargetTeamsDirective( 4203 const OMPTargetTeamsDirective &S) { 4204 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4205 emitTargetTeamsRegion(CGF, Action, S); 4206 }; 4207 emitCommonOMPTargetDirective(*this, S, CodeGen); 4208 } 4209 4210 static void 4211 emitTargetTeamsDistributeRegion(CodeGenFunction &CGF, PrePostActionTy &Action, 4212 const OMPTargetTeamsDistributeDirective &S) { 4213 Action.Enter(CGF); 4214 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4215 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 4216 }; 4217 4218 // Emit teams region as a standalone region. 4219 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 4220 PrePostActionTy &Action) { 4221 Action.Enter(CGF); 4222 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 4223 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4224 (void)PrivateScope.Privatize(); 4225 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 4226 CodeGenDistribute); 4227 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4228 }; 4229 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute, CodeGen); 4230 emitPostUpdateForReductionClause(CGF, S, 4231 [](CodeGenFunction &) { return nullptr; }); 4232 } 4233 4234 void CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 4235 CodeGenModule &CGM, StringRef ParentName, 4236 const OMPTargetTeamsDistributeDirective &S) { 4237 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4238 emitTargetTeamsDistributeRegion(CGF, Action, S); 4239 }; 4240 llvm::Function *Fn; 4241 llvm::Constant *Addr; 4242 // Emit target region as a standalone region. 4243 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 4244 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 4245 assert(Fn && Addr && "Target device function emission failed."); 4246 } 4247 4248 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective( 4249 const OMPTargetTeamsDistributeDirective &S) { 4250 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4251 emitTargetTeamsDistributeRegion(CGF, Action, S); 4252 }; 4253 emitCommonOMPTargetDirective(*this, S, CodeGen); 4254 } 4255 4256 static void emitTargetTeamsDistributeSimdRegion( 4257 CodeGenFunction &CGF, PrePostActionTy &Action, 4258 const OMPTargetTeamsDistributeSimdDirective &S) { 4259 Action.Enter(CGF); 4260 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4261 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 4262 }; 4263 4264 // Emit teams region as a standalone region. 4265 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 4266 PrePostActionTy &Action) { 4267 Action.Enter(CGF); 4268 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 4269 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4270 (void)PrivateScope.Privatize(); 4271 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 4272 CodeGenDistribute); 4273 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4274 }; 4275 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_simd, CodeGen); 4276 emitPostUpdateForReductionClause(CGF, S, 4277 [](CodeGenFunction &) { return nullptr; }); 4278 } 4279 4280 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 4281 CodeGenModule &CGM, StringRef ParentName, 4282 const OMPTargetTeamsDistributeSimdDirective &S) { 4283 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4284 emitTargetTeamsDistributeSimdRegion(CGF, Action, S); 4285 }; 4286 llvm::Function *Fn; 4287 llvm::Constant *Addr; 4288 // Emit target region as a standalone region. 4289 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 4290 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 4291 assert(Fn && Addr && "Target device function emission failed."); 4292 } 4293 4294 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective( 4295 const OMPTargetTeamsDistributeSimdDirective &S) { 4296 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4297 emitTargetTeamsDistributeSimdRegion(CGF, Action, S); 4298 }; 4299 emitCommonOMPTargetDirective(*this, S, CodeGen); 4300 } 4301 4302 void CodeGenFunction::EmitOMPTeamsDistributeDirective( 4303 const OMPTeamsDistributeDirective &S) { 4304 4305 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4306 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 4307 }; 4308 4309 // Emit teams region as a standalone region. 4310 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 4311 PrePostActionTy &Action) { 4312 Action.Enter(CGF); 4313 OMPPrivateScope PrivateScope(CGF); 4314 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4315 (void)PrivateScope.Privatize(); 4316 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 4317 CodeGenDistribute); 4318 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4319 }; 4320 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen); 4321 emitPostUpdateForReductionClause(*this, S, 4322 [](CodeGenFunction &) { return nullptr; }); 4323 } 4324 4325 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective( 4326 const OMPTeamsDistributeSimdDirective &S) { 4327 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4328 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 4329 }; 4330 4331 // Emit teams region as a standalone region. 4332 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 4333 PrePostActionTy &Action) { 4334 Action.Enter(CGF); 4335 OMPPrivateScope PrivateScope(CGF); 4336 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4337 (void)PrivateScope.Privatize(); 4338 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_simd, 4339 CodeGenDistribute); 4340 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4341 }; 4342 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_simd, CodeGen); 4343 emitPostUpdateForReductionClause(*this, S, 4344 [](CodeGenFunction &) { return nullptr; }); 4345 } 4346 4347 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective( 4348 const OMPTeamsDistributeParallelForDirective &S) { 4349 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4350 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 4351 S.getDistInc()); 4352 }; 4353 4354 // Emit teams region as a standalone region. 4355 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 4356 PrePostActionTy &Action) { 4357 Action.Enter(CGF); 4358 OMPPrivateScope PrivateScope(CGF); 4359 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4360 (void)PrivateScope.Privatize(); 4361 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 4362 CodeGenDistribute); 4363 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4364 }; 4365 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen); 4366 emitPostUpdateForReductionClause(*this, S, 4367 [](CodeGenFunction &) { return nullptr; }); 4368 } 4369 4370 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective( 4371 const OMPTeamsDistributeParallelForSimdDirective &S) { 4372 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4373 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 4374 S.getDistInc()); 4375 }; 4376 4377 // Emit teams region as a standalone region. 4378 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 4379 PrePostActionTy &Action) { 4380 Action.Enter(CGF); 4381 OMPPrivateScope PrivateScope(CGF); 4382 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4383 (void)PrivateScope.Privatize(); 4384 CGF.CGM.getOpenMPRuntime().emitInlinedDirective( 4385 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); 4386 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4387 }; 4388 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen); 4389 emitPostUpdateForReductionClause(*this, S, 4390 [](CodeGenFunction &) { return nullptr; }); 4391 } 4392 4393 static void emitTargetTeamsDistributeParallelForRegion( 4394 CodeGenFunction &CGF, const OMPTargetTeamsDistributeParallelForDirective &S, 4395 PrePostActionTy &Action) { 4396 Action.Enter(CGF); 4397 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4398 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 4399 S.getDistInc()); 4400 }; 4401 4402 // Emit teams region as a standalone region. 4403 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 4404 PrePostActionTy &Action) { 4405 Action.Enter(CGF); 4406 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 4407 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4408 (void)PrivateScope.Privatize(); 4409 CGF.CGM.getOpenMPRuntime().emitInlinedDirective( 4410 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); 4411 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4412 }; 4413 4414 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for, 4415 CodeGenTeams); 4416 emitPostUpdateForReductionClause(CGF, S, 4417 [](CodeGenFunction &) { return nullptr; }); 4418 } 4419 4420 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 4421 CodeGenModule &CGM, StringRef ParentName, 4422 const OMPTargetTeamsDistributeParallelForDirective &S) { 4423 // Emit SPMD target teams distribute parallel for region as a standalone 4424 // region. 4425 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4426 emitTargetTeamsDistributeParallelForRegion(CGF, S, Action); 4427 }; 4428 llvm::Function *Fn; 4429 llvm::Constant *Addr; 4430 // Emit target region as a standalone region. 4431 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 4432 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 4433 assert(Fn && Addr && "Target device function emission failed."); 4434 } 4435 4436 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective( 4437 const OMPTargetTeamsDistributeParallelForDirective &S) { 4438 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4439 emitTargetTeamsDistributeParallelForRegion(CGF, S, Action); 4440 }; 4441 emitCommonOMPTargetDirective(*this, S, CodeGen); 4442 } 4443 4444 static void emitTargetTeamsDistributeParallelForSimdRegion( 4445 CodeGenFunction &CGF, 4446 const OMPTargetTeamsDistributeParallelForSimdDirective &S, 4447 PrePostActionTy &Action) { 4448 Action.Enter(CGF); 4449 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4450 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 4451 S.getDistInc()); 4452 }; 4453 4454 // Emit teams region as a standalone region. 4455 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 4456 PrePostActionTy &Action) { 4457 Action.Enter(CGF); 4458 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 4459 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4460 (void)PrivateScope.Privatize(); 4461 CGF.CGM.getOpenMPRuntime().emitInlinedDirective( 4462 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); 4463 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4464 }; 4465 4466 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for_simd, 4467 CodeGenTeams); 4468 emitPostUpdateForReductionClause(CGF, S, 4469 [](CodeGenFunction &) { return nullptr; }); 4470 } 4471 4472 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 4473 CodeGenModule &CGM, StringRef ParentName, 4474 const OMPTargetTeamsDistributeParallelForSimdDirective &S) { 4475 // Emit SPMD target teams distribute parallel for simd region as a standalone 4476 // region. 4477 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4478 emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action); 4479 }; 4480 llvm::Function *Fn; 4481 llvm::Constant *Addr; 4482 // Emit target region as a standalone region. 4483 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 4484 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 4485 assert(Fn && Addr && "Target device function emission failed."); 4486 } 4487 4488 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective( 4489 const OMPTargetTeamsDistributeParallelForSimdDirective &S) { 4490 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4491 emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action); 4492 }; 4493 emitCommonOMPTargetDirective(*this, S, CodeGen); 4494 } 4495 4496 void CodeGenFunction::EmitOMPCancellationPointDirective( 4497 const OMPCancellationPointDirective &S) { 4498 CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getBeginLoc(), 4499 S.getCancelRegion()); 4500 } 4501 4502 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { 4503 const Expr *IfCond = nullptr; 4504 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 4505 if (C->getNameModifier() == OMPD_unknown || 4506 C->getNameModifier() == OMPD_cancel) { 4507 IfCond = C->getCondition(); 4508 break; 4509 } 4510 } 4511 CGM.getOpenMPRuntime().emitCancelCall(*this, S.getBeginLoc(), IfCond, 4512 S.getCancelRegion()); 4513 } 4514 4515 CodeGenFunction::JumpDest 4516 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) { 4517 if (Kind == OMPD_parallel || Kind == OMPD_task || 4518 Kind == OMPD_target_parallel) 4519 return ReturnBlock; 4520 assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections || 4521 Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for || 4522 Kind == OMPD_distribute_parallel_for || 4523 Kind == OMPD_target_parallel_for || 4524 Kind == OMPD_teams_distribute_parallel_for || 4525 Kind == OMPD_target_teams_distribute_parallel_for); 4526 return OMPCancelStack.getExitBlock(); 4527 } 4528 4529 void CodeGenFunction::EmitOMPUseDevicePtrClause( 4530 const OMPClause &NC, OMPPrivateScope &PrivateScope, 4531 const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) { 4532 const auto &C = cast<OMPUseDevicePtrClause>(NC); 4533 auto OrigVarIt = C.varlist_begin(); 4534 auto InitIt = C.inits().begin(); 4535 for (const Expr *PvtVarIt : C.private_copies()) { 4536 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl()); 4537 const auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl()); 4538 const auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl()); 4539 4540 // In order to identify the right initializer we need to match the 4541 // declaration used by the mapping logic. In some cases we may get 4542 // OMPCapturedExprDecl that refers to the original declaration. 4543 const ValueDecl *MatchingVD = OrigVD; 4544 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) { 4545 // OMPCapturedExprDecl are used to privative fields of the current 4546 // structure. 4547 const auto *ME = cast<MemberExpr>(OED->getInit()); 4548 assert(isa<CXXThisExpr>(ME->getBase()) && 4549 "Base should be the current struct!"); 4550 MatchingVD = ME->getMemberDecl(); 4551 } 4552 4553 // If we don't have information about the current list item, move on to 4554 // the next one. 4555 auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD); 4556 if (InitAddrIt == CaptureDeviceAddrMap.end()) 4557 continue; 4558 4559 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, OrigVD, 4560 InitAddrIt, InitVD, 4561 PvtVD]() { 4562 // Initialize the temporary initialization variable with the address we 4563 // get from the runtime library. We have to cast the source address 4564 // because it is always a void *. References are materialized in the 4565 // privatization scope, so the initialization here disregards the fact 4566 // the original variable is a reference. 4567 QualType AddrQTy = 4568 getContext().getPointerType(OrigVD->getType().getNonReferenceType()); 4569 llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy); 4570 Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy); 4571 setAddrOfLocalVar(InitVD, InitAddr); 4572 4573 // Emit private declaration, it will be initialized by the value we 4574 // declaration we just added to the local declarations map. 4575 EmitDecl(*PvtVD); 4576 4577 // The initialization variables reached its purpose in the emission 4578 // of the previous declaration, so we don't need it anymore. 4579 LocalDeclMap.erase(InitVD); 4580 4581 // Return the address of the private variable. 4582 return GetAddrOfLocalVar(PvtVD); 4583 }); 4584 assert(IsRegistered && "firstprivate var already registered as private"); 4585 // Silence the warning about unused variable. 4586 (void)IsRegistered; 4587 4588 ++OrigVarIt; 4589 ++InitIt; 4590 } 4591 } 4592 4593 // Generate the instructions for '#pragma omp target data' directive. 4594 void CodeGenFunction::EmitOMPTargetDataDirective( 4595 const OMPTargetDataDirective &S) { 4596 CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true); 4597 4598 // Create a pre/post action to signal the privatization of the device pointer. 4599 // This action can be replaced by the OpenMP runtime code generation to 4600 // deactivate privatization. 4601 bool PrivatizeDevicePointers = false; 4602 class DevicePointerPrivActionTy : public PrePostActionTy { 4603 bool &PrivatizeDevicePointers; 4604 4605 public: 4606 explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers) 4607 : PrePostActionTy(), PrivatizeDevicePointers(PrivatizeDevicePointers) {} 4608 void Enter(CodeGenFunction &CGF) override { 4609 PrivatizeDevicePointers = true; 4610 } 4611 }; 4612 DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers); 4613 4614 auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers]( 4615 CodeGenFunction &CGF, PrePostActionTy &Action) { 4616 auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4617 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 4618 }; 4619 4620 // Codegen that selects whether to generate the privatization code or not. 4621 auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers, 4622 &InnermostCodeGen](CodeGenFunction &CGF, 4623 PrePostActionTy &Action) { 4624 RegionCodeGenTy RCG(InnermostCodeGen); 4625 PrivatizeDevicePointers = false; 4626 4627 // Call the pre-action to change the status of PrivatizeDevicePointers if 4628 // needed. 4629 Action.Enter(CGF); 4630 4631 if (PrivatizeDevicePointers) { 4632 OMPPrivateScope PrivateScope(CGF); 4633 // Emit all instances of the use_device_ptr clause. 4634 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>()) 4635 CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope, 4636 Info.CaptureDeviceAddrMap); 4637 (void)PrivateScope.Privatize(); 4638 RCG(CGF); 4639 } else { 4640 RCG(CGF); 4641 } 4642 }; 4643 4644 // Forward the provided action to the privatization codegen. 4645 RegionCodeGenTy PrivRCG(PrivCodeGen); 4646 PrivRCG.setAction(Action); 4647 4648 // Notwithstanding the body of the region is emitted as inlined directive, 4649 // we don't use an inline scope as changes in the references inside the 4650 // region are expected to be visible outside, so we do not privative them. 4651 OMPLexicalScope Scope(CGF, S); 4652 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data, 4653 PrivRCG); 4654 }; 4655 4656 RegionCodeGenTy RCG(CodeGen); 4657 4658 // If we don't have target devices, don't bother emitting the data mapping 4659 // code. 4660 if (CGM.getLangOpts().OMPTargetTriples.empty()) { 4661 RCG(*this); 4662 return; 4663 } 4664 4665 // Check if we have any if clause associated with the directive. 4666 const Expr *IfCond = nullptr; 4667 if (const auto *C = S.getSingleClause<OMPIfClause>()) 4668 IfCond = C->getCondition(); 4669 4670 // Check if we have any device clause associated with the directive. 4671 const Expr *Device = nullptr; 4672 if (const auto *C = S.getSingleClause<OMPDeviceClause>()) 4673 Device = C->getDevice(); 4674 4675 // Set the action to signal privatization of device pointers. 4676 RCG.setAction(PrivAction); 4677 4678 // Emit region code. 4679 CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG, 4680 Info); 4681 } 4682 4683 void CodeGenFunction::EmitOMPTargetEnterDataDirective( 4684 const OMPTargetEnterDataDirective &S) { 4685 // If we don't have target devices, don't bother emitting the data mapping 4686 // code. 4687 if (CGM.getLangOpts().OMPTargetTriples.empty()) 4688 return; 4689 4690 // Check if we have any if clause associated with the directive. 4691 const Expr *IfCond = nullptr; 4692 if (const auto *C = S.getSingleClause<OMPIfClause>()) 4693 IfCond = C->getCondition(); 4694 4695 // Check if we have any device clause associated with the directive. 4696 const Expr *Device = nullptr; 4697 if (const auto *C = S.getSingleClause<OMPDeviceClause>()) 4698 Device = C->getDevice(); 4699 4700 OMPLexicalScope Scope(*this, S, OMPD_task); 4701 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 4702 } 4703 4704 void CodeGenFunction::EmitOMPTargetExitDataDirective( 4705 const OMPTargetExitDataDirective &S) { 4706 // If we don't have target devices, don't bother emitting the data mapping 4707 // code. 4708 if (CGM.getLangOpts().OMPTargetTriples.empty()) 4709 return; 4710 4711 // Check if we have any if clause associated with the directive. 4712 const Expr *IfCond = nullptr; 4713 if (const auto *C = S.getSingleClause<OMPIfClause>()) 4714 IfCond = C->getCondition(); 4715 4716 // Check if we have any device clause associated with the directive. 4717 const Expr *Device = nullptr; 4718 if (const auto *C = S.getSingleClause<OMPDeviceClause>()) 4719 Device = C->getDevice(); 4720 4721 OMPLexicalScope Scope(*this, S, OMPD_task); 4722 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 4723 } 4724 4725 static void emitTargetParallelRegion(CodeGenFunction &CGF, 4726 const OMPTargetParallelDirective &S, 4727 PrePostActionTy &Action) { 4728 // Get the captured statement associated with the 'parallel' region. 4729 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); 4730 Action.Enter(CGF); 4731 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) { 4732 Action.Enter(CGF); 4733 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 4734 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 4735 CGF.EmitOMPPrivateClause(S, PrivateScope); 4736 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4737 (void)PrivateScope.Privatize(); 4738 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 4739 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); 4740 // TODO: Add support for clauses. 4741 CGF.EmitStmt(CS->getCapturedStmt()); 4742 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 4743 }; 4744 emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen, 4745 emitEmptyBoundParameters); 4746 emitPostUpdateForReductionClause(CGF, S, 4747 [](CodeGenFunction &) { return nullptr; }); 4748 } 4749 4750 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 4751 CodeGenModule &CGM, StringRef ParentName, 4752 const OMPTargetParallelDirective &S) { 4753 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4754 emitTargetParallelRegion(CGF, S, Action); 4755 }; 4756 llvm::Function *Fn; 4757 llvm::Constant *Addr; 4758 // Emit target region as a standalone region. 4759 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 4760 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 4761 assert(Fn && Addr && "Target device function emission failed."); 4762 } 4763 4764 void CodeGenFunction::EmitOMPTargetParallelDirective( 4765 const OMPTargetParallelDirective &S) { 4766 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4767 emitTargetParallelRegion(CGF, S, Action); 4768 }; 4769 emitCommonOMPTargetDirective(*this, S, CodeGen); 4770 } 4771 4772 static void emitTargetParallelForRegion(CodeGenFunction &CGF, 4773 const OMPTargetParallelForDirective &S, 4774 PrePostActionTy &Action) { 4775 Action.Enter(CGF); 4776 // Emit directive as a combined directive that consists of two implicit 4777 // directives: 'parallel' with 'for' directive. 4778 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4779 Action.Enter(CGF); 4780 CodeGenFunction::OMPCancelStackRAII CancelRegion( 4781 CGF, OMPD_target_parallel_for, S.hasCancel()); 4782 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 4783 emitDispatchForLoopBounds); 4784 }; 4785 emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen, 4786 emitEmptyBoundParameters); 4787 } 4788 4789 void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 4790 CodeGenModule &CGM, StringRef ParentName, 4791 const OMPTargetParallelForDirective &S) { 4792 // Emit SPMD target parallel for region as a standalone region. 4793 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4794 emitTargetParallelForRegion(CGF, S, Action); 4795 }; 4796 llvm::Function *Fn; 4797 llvm::Constant *Addr; 4798 // Emit target region as a standalone region. 4799 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 4800 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 4801 assert(Fn && Addr && "Target device function emission failed."); 4802 } 4803 4804 void CodeGenFunction::EmitOMPTargetParallelForDirective( 4805 const OMPTargetParallelForDirective &S) { 4806 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4807 emitTargetParallelForRegion(CGF, S, Action); 4808 }; 4809 emitCommonOMPTargetDirective(*this, S, CodeGen); 4810 } 4811 4812 static void 4813 emitTargetParallelForSimdRegion(CodeGenFunction &CGF, 4814 const OMPTargetParallelForSimdDirective &S, 4815 PrePostActionTy &Action) { 4816 Action.Enter(CGF); 4817 // Emit directive as a combined directive that consists of two implicit 4818 // directives: 'parallel' with 'for' directive. 4819 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4820 Action.Enter(CGF); 4821 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 4822 emitDispatchForLoopBounds); 4823 }; 4824 emitCommonOMPParallelDirective(CGF, S, OMPD_simd, CodeGen, 4825 emitEmptyBoundParameters); 4826 } 4827 4828 void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 4829 CodeGenModule &CGM, StringRef ParentName, 4830 const OMPTargetParallelForSimdDirective &S) { 4831 // Emit SPMD target parallel for region as a standalone region. 4832 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4833 emitTargetParallelForSimdRegion(CGF, S, Action); 4834 }; 4835 llvm::Function *Fn; 4836 llvm::Constant *Addr; 4837 // Emit target region as a standalone region. 4838 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 4839 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 4840 assert(Fn && Addr && "Target device function emission failed."); 4841 } 4842 4843 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective( 4844 const OMPTargetParallelForSimdDirective &S) { 4845 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4846 emitTargetParallelForSimdRegion(CGF, S, Action); 4847 }; 4848 emitCommonOMPTargetDirective(*this, S, CodeGen); 4849 } 4850 4851 /// Emit a helper variable and return corresponding lvalue. 4852 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper, 4853 const ImplicitParamDecl *PVD, 4854 CodeGenFunction::OMPPrivateScope &Privates) { 4855 const auto *VDecl = cast<VarDecl>(Helper->getDecl()); 4856 Privates.addPrivate(VDecl, 4857 [&CGF, PVD]() { return CGF.GetAddrOfLocalVar(PVD); }); 4858 } 4859 4860 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { 4861 assert(isOpenMPTaskLoopDirective(S.getDirectiveKind())); 4862 // Emit outlined function for task construct. 4863 const CapturedStmt *CS = S.getCapturedStmt(OMPD_taskloop); 4864 Address CapturedStruct = GenerateCapturedStmtArgument(*CS); 4865 QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 4866 const Expr *IfCond = nullptr; 4867 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 4868 if (C->getNameModifier() == OMPD_unknown || 4869 C->getNameModifier() == OMPD_taskloop) { 4870 IfCond = C->getCondition(); 4871 break; 4872 } 4873 } 4874 4875 OMPTaskDataTy Data; 4876 // Check if taskloop must be emitted without taskgroup. 4877 Data.Nogroup = S.getSingleClause<OMPNogroupClause>(); 4878 // TODO: Check if we should emit tied or untied task. 4879 Data.Tied = true; 4880 // Set scheduling for taskloop 4881 if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) { 4882 // grainsize clause 4883 Data.Schedule.setInt(/*IntVal=*/false); 4884 Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize())); 4885 } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) { 4886 // num_tasks clause 4887 Data.Schedule.setInt(/*IntVal=*/true); 4888 Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks())); 4889 } 4890 4891 auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) { 4892 // if (PreCond) { 4893 // for (IV in 0..LastIteration) BODY; 4894 // <Final counter/linear vars updates>; 4895 // } 4896 // 4897 4898 // Emit: if (PreCond) - begin. 4899 // If the condition constant folds and can be elided, avoid emitting the 4900 // whole loop. 4901 bool CondConstant; 4902 llvm::BasicBlock *ContBlock = nullptr; 4903 OMPLoopScope PreInitScope(CGF, S); 4904 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 4905 if (!CondConstant) 4906 return; 4907 } else { 4908 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("taskloop.if.then"); 4909 ContBlock = CGF.createBasicBlock("taskloop.if.end"); 4910 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 4911 CGF.getProfileCount(&S)); 4912 CGF.EmitBlock(ThenBlock); 4913 CGF.incrementProfileCounter(&S); 4914 } 4915 4916 if (isOpenMPSimdDirective(S.getDirectiveKind())) 4917 CGF.EmitOMPSimdInit(S); 4918 4919 OMPPrivateScope LoopScope(CGF); 4920 // Emit helper vars inits. 4921 enum { LowerBound = 5, UpperBound, Stride, LastIter }; 4922 auto *I = CS->getCapturedDecl()->param_begin(); 4923 auto *LBP = std::next(I, LowerBound); 4924 auto *UBP = std::next(I, UpperBound); 4925 auto *STP = std::next(I, Stride); 4926 auto *LIP = std::next(I, LastIter); 4927 mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP, 4928 LoopScope); 4929 mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP, 4930 LoopScope); 4931 mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope); 4932 mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP, 4933 LoopScope); 4934 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 4935 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 4936 (void)LoopScope.Privatize(); 4937 // Emit the loop iteration variable. 4938 const Expr *IVExpr = S.getIterationVariable(); 4939 const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 4940 CGF.EmitVarDecl(*IVDecl); 4941 CGF.EmitIgnoredExpr(S.getInit()); 4942 4943 // Emit the iterations count variable. 4944 // If it is not a variable, Sema decided to calculate iterations count on 4945 // each iteration (e.g., it is foldable into a constant). 4946 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 4947 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 4948 // Emit calculation of the iterations count. 4949 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 4950 } 4951 4952 CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 4953 S.getInc(), 4954 [&S](CodeGenFunction &CGF) { 4955 CGF.EmitOMPLoopBody(S, JumpDest()); 4956 CGF.EmitStopPoint(&S); 4957 }, 4958 [](CodeGenFunction &) {}); 4959 // Emit: if (PreCond) - end. 4960 if (ContBlock) { 4961 CGF.EmitBranch(ContBlock); 4962 CGF.EmitBlock(ContBlock, true); 4963 } 4964 // Emit final copy of the lastprivate variables if IsLastIter != 0. 4965 if (HasLastprivateClause) { 4966 CGF.EmitOMPLastprivateClauseFinal( 4967 S, isOpenMPSimdDirective(S.getDirectiveKind()), 4968 CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar( 4969 CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false, 4970 (*LIP)->getType(), S.getBeginLoc()))); 4971 } 4972 }; 4973 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 4974 IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn, 4975 const OMPTaskDataTy &Data) { 4976 auto &&CodeGen = [&S, OutlinedFn, SharedsTy, CapturedStruct, IfCond, 4977 &Data](CodeGenFunction &CGF, PrePostActionTy &) { 4978 OMPLoopScope PreInitScope(CGF, S); 4979 CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getBeginLoc(), S, 4980 OutlinedFn, SharedsTy, 4981 CapturedStruct, IfCond, Data); 4982 }; 4983 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop, 4984 CodeGen); 4985 }; 4986 if (Data.Nogroup) { 4987 EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, Data); 4988 } else { 4989 CGM.getOpenMPRuntime().emitTaskgroupRegion( 4990 *this, 4991 [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF, 4992 PrePostActionTy &Action) { 4993 Action.Enter(CGF); 4994 CGF.EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, 4995 Data); 4996 }, 4997 S.getBeginLoc()); 4998 } 4999 } 5000 5001 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) { 5002 EmitOMPTaskLoopBasedDirective(S); 5003 } 5004 5005 void CodeGenFunction::EmitOMPTaskLoopSimdDirective( 5006 const OMPTaskLoopSimdDirective &S) { 5007 EmitOMPTaskLoopBasedDirective(S); 5008 } 5009 5010 // Generate the instructions for '#pragma omp target update' directive. 5011 void CodeGenFunction::EmitOMPTargetUpdateDirective( 5012 const OMPTargetUpdateDirective &S) { 5013 // If we don't have target devices, don't bother emitting the data mapping 5014 // code. 5015 if (CGM.getLangOpts().OMPTargetTriples.empty()) 5016 return; 5017 5018 // Check if we have any if clause associated with the directive. 5019 const Expr *IfCond = nullptr; 5020 if (const auto *C = S.getSingleClause<OMPIfClause>()) 5021 IfCond = C->getCondition(); 5022 5023 // Check if we have any device clause associated with the directive. 5024 const Expr *Device = nullptr; 5025 if (const auto *C = S.getSingleClause<OMPDeviceClause>()) 5026 Device = C->getDevice(); 5027 5028 OMPLexicalScope Scope(*this, S, OMPD_task); 5029 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 5030 } 5031 5032 void CodeGenFunction::EmitSimpleOMPExecutableDirective( 5033 const OMPExecutableDirective &D) { 5034 if (!D.hasAssociatedStmt() || !D.getAssociatedStmt()) 5035 return; 5036 auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) { 5037 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 5038 emitOMPSimdRegion(CGF, cast<OMPLoopDirective>(D), Action); 5039 } else { 5040 OMPPrivateScope LoopGlobals(CGF); 5041 if (const auto *LD = dyn_cast<OMPLoopDirective>(&D)) { 5042 for (const Expr *E : LD->counters()) { 5043 const auto *VD = dyn_cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 5044 if (!VD->hasLocalStorage() && !CGF.LocalDeclMap.count(VD)) { 5045 LValue GlobLVal = CGF.EmitLValue(E); 5046 LoopGlobals.addPrivate( 5047 VD, [&GlobLVal]() { return GlobLVal.getAddress(); }); 5048 } 5049 if (isa<OMPCapturedExprDecl>(VD)) { 5050 // Emit only those that were not explicitly referenced in clauses. 5051 if (!CGF.LocalDeclMap.count(VD)) 5052 CGF.EmitVarDecl(*VD); 5053 } 5054 } 5055 for (const auto *C : D.getClausesOfKind<OMPOrderedClause>()) { 5056 if (!C->getNumForLoops()) 5057 continue; 5058 for (unsigned I = LD->getCollapsedNumber(), 5059 E = C->getLoopNumIterations().size(); 5060 I < E; ++I) { 5061 if (const auto *VD = dyn_cast<OMPCapturedExprDecl>( 5062 cast<DeclRefExpr>(C->getLoopCounter(I))->getDecl())) { 5063 // Emit only those that were not explicitly referenced in clauses. 5064 if (!CGF.LocalDeclMap.count(VD)) 5065 CGF.EmitVarDecl(*VD); 5066 } 5067 } 5068 } 5069 } 5070 LoopGlobals.Privatize(); 5071 CGF.EmitStmt(D.getInnermostCapturedStmt()->getCapturedStmt()); 5072 } 5073 }; 5074 OMPSimdLexicalScope Scope(*this, D); 5075 CGM.getOpenMPRuntime().emitInlinedDirective( 5076 *this, 5077 isOpenMPSimdDirective(D.getDirectiveKind()) ? OMPD_simd 5078 : D.getDirectiveKind(), 5079 CodeGen); 5080 } 5081 5082