1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This contains code to emit OpenMP nodes as LLVM code. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGCleanup.h" 15 #include "CGOpenMPRuntime.h" 16 #include "CodeGenFunction.h" 17 #include "CodeGenModule.h" 18 #include "TargetInfo.h" 19 #include "clang/AST/Stmt.h" 20 #include "clang/AST/StmtOpenMP.h" 21 #include "clang/AST/DeclOpenMP.h" 22 #include "llvm/IR/CallSite.h" 23 using namespace clang; 24 using namespace CodeGen; 25 26 namespace { 27 /// Lexical scope for OpenMP executable constructs, that handles correct codegen 28 /// for captured expressions. 29 class OMPLexicalScope : public CodeGenFunction::LexicalScope { 30 void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 31 for (const auto *C : S.clauses()) { 32 if (auto *CPI = OMPClauseWithPreInit::get(C)) { 33 if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { 34 for (const auto *I : PreInit->decls()) { 35 if (!I->hasAttr<OMPCaptureNoInitAttr>()) 36 CGF.EmitVarDecl(cast<VarDecl>(*I)); 37 else { 38 CodeGenFunction::AutoVarEmission Emission = 39 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 40 CGF.EmitAutoVarCleanups(Emission); 41 } 42 } 43 } 44 } 45 } 46 } 47 CodeGenFunction::OMPPrivateScope InlinedShareds; 48 49 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { 50 return CGF.LambdaCaptureFields.lookup(VD) || 51 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || 52 (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl)); 53 } 54 55 public: 56 OMPLexicalScope( 57 CodeGenFunction &CGF, const OMPExecutableDirective &S, 58 const llvm::Optional<OpenMPDirectiveKind> CapturedRegion = llvm::None, 59 const bool EmitPreInitStmt = true) 60 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), 61 InlinedShareds(CGF) { 62 if (EmitPreInitStmt) 63 emitPreInitStmt(CGF, S); 64 if (!CapturedRegion.hasValue()) 65 return; 66 assert(S.hasAssociatedStmt() && 67 "Expected associated statement for inlined directive."); 68 const CapturedStmt *CS = S.getCapturedStmt(*CapturedRegion); 69 for (auto &C : CS->captures()) { 70 if (C.capturesVariable() || C.capturesVariableByCopy()) { 71 auto *VD = C.getCapturedVar(); 72 assert(VD == VD->getCanonicalDecl() && 73 "Canonical decl must be captured."); 74 DeclRefExpr DRE( 75 const_cast<VarDecl *>(VD), 76 isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo && 77 InlinedShareds.isGlobalVarCaptured(VD)), 78 VD->getType().getNonReferenceType(), VK_LValue, C.getLocation()); 79 InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address { 80 return CGF.EmitLValue(&DRE).getAddress(); 81 }); 82 } 83 } 84 (void)InlinedShareds.Privatize(); 85 } 86 }; 87 88 /// Lexical scope for OpenMP parallel construct, that handles correct codegen 89 /// for captured expressions. 90 class OMPParallelScope final : public OMPLexicalScope { 91 bool EmitPreInitStmt(const OMPExecutableDirective &S) { 92 OpenMPDirectiveKind Kind = S.getDirectiveKind(); 93 return !(isOpenMPTargetExecutionDirective(Kind) || 94 isOpenMPLoopBoundSharingDirective(Kind)) && 95 isOpenMPParallelDirective(Kind); 96 } 97 98 public: 99 OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 100 : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None, 101 EmitPreInitStmt(S)) {} 102 }; 103 104 /// Lexical scope for OpenMP teams construct, that handles correct codegen 105 /// for captured expressions. 106 class OMPTeamsScope final : public OMPLexicalScope { 107 bool EmitPreInitStmt(const OMPExecutableDirective &S) { 108 OpenMPDirectiveKind Kind = S.getDirectiveKind(); 109 return !isOpenMPTargetExecutionDirective(Kind) && 110 isOpenMPTeamsDirective(Kind); 111 } 112 113 public: 114 OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 115 : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None, 116 EmitPreInitStmt(S)) {} 117 }; 118 119 /// Private scope for OpenMP loop-based directives, that supports capturing 120 /// of used expression from loop statement. 121 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { 122 void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) { 123 CodeGenFunction::OMPMapVars PreCondVars; 124 for (auto *E : S.counters()) { 125 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 126 (void)PreCondVars.setVarAddr( 127 CGF, VD, CGF.CreateMemTemp(VD->getType().getNonReferenceType())); 128 } 129 (void)PreCondVars.apply(CGF); 130 if (auto *PreInits = cast_or_null<DeclStmt>(S.getPreInits())) { 131 for (const auto *I : PreInits->decls()) 132 CGF.EmitVarDecl(cast<VarDecl>(*I)); 133 } 134 PreCondVars.restore(CGF); 135 } 136 137 public: 138 OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S) 139 : CodeGenFunction::RunCleanupsScope(CGF) { 140 emitPreInitStmt(CGF, S); 141 } 142 }; 143 144 class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope { 145 CodeGenFunction::OMPPrivateScope InlinedShareds; 146 147 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { 148 return CGF.LambdaCaptureFields.lookup(VD) || 149 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || 150 (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) && 151 cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD)); 152 } 153 154 public: 155 OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 156 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), 157 InlinedShareds(CGF) { 158 for (const auto *C : S.clauses()) { 159 if (auto *CPI = OMPClauseWithPreInit::get(C)) { 160 if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { 161 for (const auto *I : PreInit->decls()) { 162 if (!I->hasAttr<OMPCaptureNoInitAttr>()) 163 CGF.EmitVarDecl(cast<VarDecl>(*I)); 164 else { 165 CodeGenFunction::AutoVarEmission Emission = 166 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 167 CGF.EmitAutoVarCleanups(Emission); 168 } 169 } 170 } 171 } else if (const auto *UDP = dyn_cast<OMPUseDevicePtrClause>(C)) { 172 for (const Expr *E : UDP->varlists()) { 173 const Decl *D = cast<DeclRefExpr>(E)->getDecl(); 174 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D)) 175 CGF.EmitVarDecl(*OED); 176 } 177 } 178 } 179 if (!isOpenMPSimdDirective(S.getDirectiveKind())) 180 CGF.EmitOMPPrivateClause(S, InlinedShareds); 181 if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(&S)) { 182 if (const Expr *E = TG->getReductionRef()) 183 CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl())); 184 } 185 const auto *CS = cast_or_null<CapturedStmt>(S.getAssociatedStmt()); 186 while (CS) { 187 for (auto &C : CS->captures()) { 188 if (C.capturesVariable() || C.capturesVariableByCopy()) { 189 auto *VD = C.getCapturedVar(); 190 assert(VD == VD->getCanonicalDecl() && 191 "Canonical decl must be captured."); 192 DeclRefExpr DRE(const_cast<VarDecl *>(VD), 193 isCapturedVar(CGF, VD) || 194 (CGF.CapturedStmtInfo && 195 InlinedShareds.isGlobalVarCaptured(VD)), 196 VD->getType().getNonReferenceType(), VK_LValue, 197 C.getLocation()); 198 InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address { 199 return CGF.EmitLValue(&DRE).getAddress(); 200 }); 201 } 202 } 203 CS = dyn_cast<CapturedStmt>(CS->getCapturedStmt()); 204 } 205 (void)InlinedShareds.Privatize(); 206 } 207 }; 208 209 } // namespace 210 211 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, 212 const OMPExecutableDirective &S, 213 const RegionCodeGenTy &CodeGen); 214 215 LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) { 216 if (auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) { 217 if (auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) { 218 OrigVD = OrigVD->getCanonicalDecl(); 219 bool IsCaptured = 220 LambdaCaptureFields.lookup(OrigVD) || 221 (CapturedStmtInfo && CapturedStmtInfo->lookup(OrigVD)) || 222 (CurCodeDecl && isa<BlockDecl>(CurCodeDecl)); 223 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), IsCaptured, 224 OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc()); 225 return EmitLValue(&DRE); 226 } 227 } 228 return EmitLValue(E); 229 } 230 231 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) { 232 auto &C = getContext(); 233 llvm::Value *Size = nullptr; 234 auto SizeInChars = C.getTypeSizeInChars(Ty); 235 if (SizeInChars.isZero()) { 236 // getTypeSizeInChars() returns 0 for a VLA. 237 while (auto *VAT = C.getAsVariableArrayType(Ty)) { 238 auto VlaSize = getVLASize(VAT); 239 Ty = VlaSize.Type; 240 Size = Size ? Builder.CreateNUWMul(Size, VlaSize.NumElts) 241 : VlaSize.NumElts; 242 } 243 SizeInChars = C.getTypeSizeInChars(Ty); 244 if (SizeInChars.isZero()) 245 return llvm::ConstantInt::get(SizeTy, /*V=*/0); 246 Size = Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars)); 247 } else 248 Size = CGM.getSize(SizeInChars); 249 return Size; 250 } 251 252 void CodeGenFunction::GenerateOpenMPCapturedVars( 253 const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) { 254 const RecordDecl *RD = S.getCapturedRecordDecl(); 255 auto CurField = RD->field_begin(); 256 auto CurCap = S.captures().begin(); 257 for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(), 258 E = S.capture_init_end(); 259 I != E; ++I, ++CurField, ++CurCap) { 260 if (CurField->hasCapturedVLAType()) { 261 auto VAT = CurField->getCapturedVLAType(); 262 auto *Val = VLASizeMap[VAT->getSizeExpr()]; 263 CapturedVars.push_back(Val); 264 } else if (CurCap->capturesThis()) 265 CapturedVars.push_back(CXXThisValue); 266 else if (CurCap->capturesVariableByCopy()) { 267 llvm::Value *CV = EmitLoadOfScalar(EmitLValue(*I), CurCap->getLocation()); 268 269 // If the field is not a pointer, we need to save the actual value 270 // and load it as a void pointer. 271 if (!CurField->getType()->isAnyPointerType()) { 272 auto &Ctx = getContext(); 273 auto DstAddr = CreateMemTemp( 274 Ctx.getUIntPtrType(), 275 Twine(CurCap->getCapturedVar()->getName()) + ".casted"); 276 LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType()); 277 278 auto *SrcAddrVal = EmitScalarConversion( 279 DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()), 280 Ctx.getPointerType(CurField->getType()), CurCap->getLocation()); 281 LValue SrcLV = 282 MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType()); 283 284 // Store the value using the source type pointer. 285 EmitStoreThroughLValue(RValue::get(CV), SrcLV); 286 287 // Load the value using the destination type pointer. 288 CV = EmitLoadOfScalar(DstLV, CurCap->getLocation()); 289 } 290 CapturedVars.push_back(CV); 291 } else { 292 assert(CurCap->capturesVariable() && "Expected capture by reference."); 293 CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer()); 294 } 295 } 296 } 297 298 static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc, 299 QualType DstType, StringRef Name, 300 LValue AddrLV, 301 bool isReferenceType = false) { 302 ASTContext &Ctx = CGF.getContext(); 303 304 auto *CastedPtr = CGF.EmitScalarConversion(AddrLV.getAddress().getPointer(), 305 Ctx.getUIntPtrType(), 306 Ctx.getPointerType(DstType), Loc); 307 auto TmpAddr = 308 CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType)) 309 .getAddress(); 310 311 // If we are dealing with references we need to return the address of the 312 // reference instead of the reference of the value. 313 if (isReferenceType) { 314 QualType RefType = Ctx.getLValueReferenceType(DstType); 315 auto *RefVal = TmpAddr.getPointer(); 316 TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name) + ".ref"); 317 auto TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType); 318 CGF.EmitStoreThroughLValue(RValue::get(RefVal), TmpLVal, /*isInit*/ true); 319 } 320 321 return TmpAddr; 322 } 323 324 static QualType getCanonicalParamType(ASTContext &C, QualType T) { 325 if (T->isLValueReferenceType()) { 326 return C.getLValueReferenceType( 327 getCanonicalParamType(C, T.getNonReferenceType()), 328 /*SpelledAsLValue=*/false); 329 } 330 if (T->isPointerType()) 331 return C.getPointerType(getCanonicalParamType(C, T->getPointeeType())); 332 if (auto *A = T->getAsArrayTypeUnsafe()) { 333 if (auto *VLA = dyn_cast<VariableArrayType>(A)) 334 return getCanonicalParamType(C, VLA->getElementType()); 335 else if (!A->isVariablyModifiedType()) 336 return C.getCanonicalType(T); 337 } 338 return C.getCanonicalParamType(T); 339 } 340 341 namespace { 342 /// Contains required data for proper outlined function codegen. 343 struct FunctionOptions { 344 /// Captured statement for which the function is generated. 345 const CapturedStmt *S = nullptr; 346 /// true if cast to/from UIntPtr is required for variables captured by 347 /// value. 348 const bool UIntPtrCastRequired = true; 349 /// true if only casted arguments must be registered as local args or VLA 350 /// sizes. 351 const bool RegisterCastedArgsOnly = false; 352 /// Name of the generated function. 353 const StringRef FunctionName; 354 explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired, 355 bool RegisterCastedArgsOnly, 356 StringRef FunctionName) 357 : S(S), UIntPtrCastRequired(UIntPtrCastRequired), 358 RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly), 359 FunctionName(FunctionName) {} 360 }; 361 } 362 363 static llvm::Function *emitOutlinedFunctionPrologue( 364 CodeGenFunction &CGF, FunctionArgList &Args, 365 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> 366 &LocalAddrs, 367 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> 368 &VLASizes, 369 llvm::Value *&CXXThisValue, const FunctionOptions &FO) { 370 const CapturedDecl *CD = FO.S->getCapturedDecl(); 371 const RecordDecl *RD = FO.S->getCapturedRecordDecl(); 372 assert(CD->hasBody() && "missing CapturedDecl body"); 373 374 CXXThisValue = nullptr; 375 // Build the argument list. 376 CodeGenModule &CGM = CGF.CGM; 377 ASTContext &Ctx = CGM.getContext(); 378 FunctionArgList TargetArgs; 379 Args.append(CD->param_begin(), 380 std::next(CD->param_begin(), CD->getContextParamPosition())); 381 TargetArgs.append( 382 CD->param_begin(), 383 std::next(CD->param_begin(), CD->getContextParamPosition())); 384 auto I = FO.S->captures().begin(); 385 FunctionDecl *DebugFunctionDecl = nullptr; 386 if (!FO.UIntPtrCastRequired) { 387 FunctionProtoType::ExtProtoInfo EPI; 388 DebugFunctionDecl = FunctionDecl::Create( 389 Ctx, Ctx.getTranslationUnitDecl(), FO.S->getLocStart(), 390 SourceLocation(), DeclarationName(), Ctx.VoidTy, 391 Ctx.getTrivialTypeSourceInfo( 392 Ctx.getFunctionType(Ctx.VoidTy, llvm::None, EPI)), 393 SC_Static, /*isInlineSpecified=*/false, /*hasWrittenPrototype=*/false); 394 } 395 for (auto *FD : RD->fields()) { 396 QualType ArgType = FD->getType(); 397 IdentifierInfo *II = nullptr; 398 VarDecl *CapVar = nullptr; 399 400 // If this is a capture by copy and the type is not a pointer, the outlined 401 // function argument type should be uintptr and the value properly casted to 402 // uintptr. This is necessary given that the runtime library is only able to 403 // deal with pointers. We can pass in the same way the VLA type sizes to the 404 // outlined function. 405 if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) || 406 I->capturesVariableArrayType()) { 407 if (FO.UIntPtrCastRequired) 408 ArgType = Ctx.getUIntPtrType(); 409 } 410 411 if (I->capturesVariable() || I->capturesVariableByCopy()) { 412 CapVar = I->getCapturedVar(); 413 II = CapVar->getIdentifier(); 414 } else if (I->capturesThis()) 415 II = &Ctx.Idents.get("this"); 416 else { 417 assert(I->capturesVariableArrayType()); 418 II = &Ctx.Idents.get("vla"); 419 } 420 if (ArgType->isVariablyModifiedType()) 421 ArgType = getCanonicalParamType(Ctx, ArgType); 422 VarDecl *Arg; 423 if (DebugFunctionDecl && (CapVar || I->capturesThis())) { 424 Arg = ParmVarDecl::Create( 425 Ctx, DebugFunctionDecl, 426 CapVar ? CapVar->getLocStart() : FD->getLocStart(), 427 CapVar ? CapVar->getLocation() : FD->getLocation(), II, ArgType, 428 /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr); 429 } else { 430 Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(), 431 II, ArgType, ImplicitParamDecl::Other); 432 } 433 Args.emplace_back(Arg); 434 // Do not cast arguments if we emit function with non-original types. 435 TargetArgs.emplace_back( 436 FO.UIntPtrCastRequired 437 ? Arg 438 : CGM.getOpenMPRuntime().translateParameter(FD, Arg)); 439 ++I; 440 } 441 Args.append( 442 std::next(CD->param_begin(), CD->getContextParamPosition() + 1), 443 CD->param_end()); 444 TargetArgs.append( 445 std::next(CD->param_begin(), CD->getContextParamPosition() + 1), 446 CD->param_end()); 447 448 // Create the function declaration. 449 const CGFunctionInfo &FuncInfo = 450 CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs); 451 llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); 452 453 llvm::Function *F = 454 llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage, 455 FO.FunctionName, &CGM.getModule()); 456 CGM.SetInternalFunctionAttributes(CD, F, FuncInfo); 457 if (CD->isNothrow()) 458 F->setDoesNotThrow(); 459 F->setDoesNotRecurse(); 460 461 // Generate the function. 462 CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs, 463 FO.S->getLocStart(), CD->getBody()->getLocStart()); 464 unsigned Cnt = CD->getContextParamPosition(); 465 I = FO.S->captures().begin(); 466 for (auto *FD : RD->fields()) { 467 // Do not map arguments if we emit function with non-original types. 468 Address LocalAddr(Address::invalid()); 469 if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) { 470 LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt], 471 TargetArgs[Cnt]); 472 } else { 473 LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]); 474 } 475 // If we are capturing a pointer by copy we don't need to do anything, just 476 // use the value that we get from the arguments. 477 if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) { 478 const VarDecl *CurVD = I->getCapturedVar(); 479 // If the variable is a reference we need to materialize it here. 480 if (CurVD->getType()->isReferenceType()) { 481 Address RefAddr = CGF.CreateMemTemp( 482 CurVD->getType(), CGM.getPointerAlign(), ".materialized_ref"); 483 CGF.EmitStoreOfScalar(LocalAddr.getPointer(), RefAddr, 484 /*Volatile=*/false, CurVD->getType()); 485 LocalAddr = RefAddr; 486 } 487 if (!FO.RegisterCastedArgsOnly) 488 LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}}); 489 ++Cnt; 490 ++I; 491 continue; 492 } 493 494 LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(), 495 AlignmentSource::Decl); 496 if (FD->hasCapturedVLAType()) { 497 if (FO.UIntPtrCastRequired) { 498 ArgLVal = CGF.MakeAddrLValue( 499 castValueFromUintptr(CGF, I->getLocation(), FD->getType(), 500 Args[Cnt]->getName(), ArgLVal), 501 FD->getType(), AlignmentSource::Decl); 502 } 503 auto *ExprArg = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation()); 504 auto VAT = FD->getCapturedVLAType(); 505 VLASizes.insert({Args[Cnt], {VAT->getSizeExpr(), ExprArg}}); 506 } else if (I->capturesVariable()) { 507 auto *Var = I->getCapturedVar(); 508 QualType VarTy = Var->getType(); 509 Address ArgAddr = ArgLVal.getAddress(); 510 if (!VarTy->isReferenceType()) { 511 if (ArgLVal.getType()->isLValueReferenceType()) { 512 ArgAddr = CGF.EmitLoadOfReference(ArgLVal); 513 } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) { 514 assert(ArgLVal.getType()->isPointerType()); 515 ArgAddr = CGF.EmitLoadOfPointer( 516 ArgAddr, ArgLVal.getType()->castAs<PointerType>()); 517 } 518 } 519 if (!FO.RegisterCastedArgsOnly) { 520 LocalAddrs.insert( 521 {Args[Cnt], 522 {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}}); 523 } 524 } else if (I->capturesVariableByCopy()) { 525 assert(!FD->getType()->isAnyPointerType() && 526 "Not expecting a captured pointer."); 527 auto *Var = I->getCapturedVar(); 528 QualType VarTy = Var->getType(); 529 LocalAddrs.insert( 530 {Args[Cnt], 531 {Var, FO.UIntPtrCastRequired 532 ? castValueFromUintptr(CGF, I->getLocation(), 533 FD->getType(), Args[Cnt]->getName(), 534 ArgLVal, VarTy->isReferenceType()) 535 : ArgLVal.getAddress()}}); 536 } else { 537 // If 'this' is captured, load it into CXXThisValue. 538 assert(I->capturesThis()); 539 CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation()); 540 LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress()}}); 541 } 542 ++Cnt; 543 ++I; 544 } 545 546 return F; 547 } 548 549 llvm::Function * 550 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { 551 assert( 552 CapturedStmtInfo && 553 "CapturedStmtInfo should be set when generating the captured function"); 554 const CapturedDecl *CD = S.getCapturedDecl(); 555 // Build the argument list. 556 bool NeedWrapperFunction = 557 getDebugInfo() && 558 CGM.getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo; 559 FunctionArgList Args; 560 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs; 561 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes; 562 SmallString<256> Buffer; 563 llvm::raw_svector_ostream Out(Buffer); 564 Out << CapturedStmtInfo->getHelperName(); 565 if (NeedWrapperFunction) 566 Out << "_debug__"; 567 FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false, 568 Out.str()); 569 llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs, 570 VLASizes, CXXThisValue, FO); 571 for (const auto &LocalAddrPair : LocalAddrs) { 572 if (LocalAddrPair.second.first) { 573 setAddrOfLocalVar(LocalAddrPair.second.first, 574 LocalAddrPair.second.second); 575 } 576 } 577 for (const auto &VLASizePair : VLASizes) 578 VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second; 579 PGO.assignRegionCounters(GlobalDecl(CD), F); 580 CapturedStmtInfo->EmitBody(*this, CD->getBody()); 581 FinishFunction(CD->getBodyRBrace()); 582 if (!NeedWrapperFunction) 583 return F; 584 585 FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true, 586 /*RegisterCastedArgsOnly=*/true, 587 CapturedStmtInfo->getHelperName()); 588 CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true); 589 WrapperCGF.CapturedStmtInfo = CapturedStmtInfo; 590 Args.clear(); 591 LocalAddrs.clear(); 592 VLASizes.clear(); 593 llvm::Function *WrapperF = 594 emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes, 595 WrapperCGF.CXXThisValue, WrapperFO); 596 llvm::SmallVector<llvm::Value *, 4> CallArgs; 597 for (const auto *Arg : Args) { 598 llvm::Value *CallArg; 599 auto I = LocalAddrs.find(Arg); 600 if (I != LocalAddrs.end()) { 601 LValue LV = WrapperCGF.MakeAddrLValue( 602 I->second.second, 603 I->second.first ? I->second.first->getType() : Arg->getType(), 604 AlignmentSource::Decl); 605 CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getLocStart()); 606 } else { 607 auto EI = VLASizes.find(Arg); 608 if (EI != VLASizes.end()) 609 CallArg = EI->second.second; 610 else { 611 LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg), 612 Arg->getType(), 613 AlignmentSource::Decl); 614 CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getLocStart()); 615 } 616 } 617 CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType())); 618 } 619 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, S.getLocStart(), 620 F, CallArgs); 621 WrapperCGF.FinishFunction(); 622 return WrapperF; 623 } 624 625 //===----------------------------------------------------------------------===// 626 // OpenMP Directive Emission 627 //===----------------------------------------------------------------------===// 628 void CodeGenFunction::EmitOMPAggregateAssign( 629 Address DestAddr, Address SrcAddr, QualType OriginalType, 630 const llvm::function_ref<void(Address, Address)> &CopyGen) { 631 // Perform element-by-element initialization. 632 QualType ElementTy; 633 634 // Drill down to the base element type on both arrays. 635 auto ArrayTy = OriginalType->getAsArrayTypeUnsafe(); 636 auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr); 637 SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 638 639 auto SrcBegin = SrcAddr.getPointer(); 640 auto DestBegin = DestAddr.getPointer(); 641 // Cast from pointer to array type to pointer to single element. 642 auto DestEnd = Builder.CreateGEP(DestBegin, NumElements); 643 // The basic structure here is a while-do loop. 644 auto BodyBB = createBasicBlock("omp.arraycpy.body"); 645 auto DoneBB = createBasicBlock("omp.arraycpy.done"); 646 auto IsEmpty = 647 Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty"); 648 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 649 650 // Enter the loop body, making that address the current address. 651 auto EntryBB = Builder.GetInsertBlock(); 652 EmitBlock(BodyBB); 653 654 CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy); 655 656 llvm::PHINode *SrcElementPHI = 657 Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 658 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 659 Address SrcElementCurrent = 660 Address(SrcElementPHI, 661 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 662 663 llvm::PHINode *DestElementPHI = 664 Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 665 DestElementPHI->addIncoming(DestBegin, EntryBB); 666 Address DestElementCurrent = 667 Address(DestElementPHI, 668 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 669 670 // Emit copy. 671 CopyGen(DestElementCurrent, SrcElementCurrent); 672 673 // Shift the address forward by one element. 674 auto DestElementNext = Builder.CreateConstGEP1_32( 675 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 676 auto SrcElementNext = Builder.CreateConstGEP1_32( 677 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 678 // Check whether we've reached the end. 679 auto Done = 680 Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 681 Builder.CreateCondBr(Done, DoneBB, BodyBB); 682 DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock()); 683 SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock()); 684 685 // Done. 686 EmitBlock(DoneBB, /*IsFinished=*/true); 687 } 688 689 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr, 690 Address SrcAddr, const VarDecl *DestVD, 691 const VarDecl *SrcVD, const Expr *Copy) { 692 if (OriginalType->isArrayType()) { 693 auto *BO = dyn_cast<BinaryOperator>(Copy); 694 if (BO && BO->getOpcode() == BO_Assign) { 695 // Perform simple memcpy for simple copying. 696 LValue Dest = MakeAddrLValue(DestAddr, OriginalType); 697 LValue Src = MakeAddrLValue(SrcAddr, OriginalType); 698 EmitAggregateAssign(Dest, Src, OriginalType); 699 } else { 700 // For arrays with complex element types perform element by element 701 // copying. 702 EmitOMPAggregateAssign( 703 DestAddr, SrcAddr, OriginalType, 704 [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) { 705 // Working with the single array element, so have to remap 706 // destination and source variables to corresponding array 707 // elements. 708 CodeGenFunction::OMPPrivateScope Remap(*this); 709 Remap.addPrivate(DestVD, [DestElement]() -> Address { 710 return DestElement; 711 }); 712 Remap.addPrivate( 713 SrcVD, [SrcElement]() -> Address { return SrcElement; }); 714 (void)Remap.Privatize(); 715 EmitIgnoredExpr(Copy); 716 }); 717 } 718 } else { 719 // Remap pseudo source variable to private copy. 720 CodeGenFunction::OMPPrivateScope Remap(*this); 721 Remap.addPrivate(SrcVD, [SrcAddr]() -> Address { return SrcAddr; }); 722 Remap.addPrivate(DestVD, [DestAddr]() -> Address { return DestAddr; }); 723 (void)Remap.Privatize(); 724 // Emit copying of the whole variable. 725 EmitIgnoredExpr(Copy); 726 } 727 } 728 729 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, 730 OMPPrivateScope &PrivateScope) { 731 if (!HaveInsertPoint()) 732 return false; 733 bool FirstprivateIsLastprivate = false; 734 llvm::DenseSet<const VarDecl *> Lastprivates; 735 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 736 for (const auto *D : C->varlists()) 737 Lastprivates.insert( 738 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); 739 } 740 llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate; 741 llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 742 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 743 // Force emission of the firstprivate copy if the directive does not emit 744 // outlined function, like omp for, omp simd, omp distribute etc. 745 bool MustEmitFirstprivateCopy = 746 CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown; 747 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { 748 auto IRef = C->varlist_begin(); 749 auto InitsRef = C->inits().begin(); 750 for (auto IInit : C->private_copies()) { 751 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 752 bool ThisFirstprivateIsLastprivate = 753 Lastprivates.count(OrigVD->getCanonicalDecl()) > 0; 754 auto *FD = CapturedStmtInfo->lookup(OrigVD); 755 if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD && 756 !FD->getType()->isReferenceType()) { 757 EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()); 758 ++IRef; 759 ++InitsRef; 760 continue; 761 } 762 FirstprivateIsLastprivate = 763 FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate; 764 if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) { 765 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 766 auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl()); 767 bool IsRegistered; 768 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 769 /*RefersToEnclosingVariableOrCapture=*/FD != nullptr, 770 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 771 LValue OriginalLVal = EmitLValue(&DRE); 772 Address OriginalAddr = OriginalLVal.getAddress(); 773 QualType Type = VD->getType(); 774 if (Type->isArrayType()) { 775 // Emit VarDecl with copy init for arrays. 776 // Get the address of the original variable captured in current 777 // captured region. 778 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 779 auto Emission = EmitAutoVarAlloca(*VD); 780 auto *Init = VD->getInit(); 781 if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) { 782 // Perform simple memcpy. 783 LValue Dest = MakeAddrLValue(Emission.getAllocatedAddress(), 784 Type); 785 EmitAggregateAssign(Dest, OriginalLVal, Type); 786 } else { 787 EmitOMPAggregateAssign( 788 Emission.getAllocatedAddress(), OriginalAddr, Type, 789 [this, VDInit, Init](Address DestElement, 790 Address SrcElement) { 791 // Clean up any temporaries needed by the initialization. 792 RunCleanupsScope InitScope(*this); 793 // Emit initialization for single element. 794 setAddrOfLocalVar(VDInit, SrcElement); 795 EmitAnyExprToMem(Init, DestElement, 796 Init->getType().getQualifiers(), 797 /*IsInitializer*/ false); 798 LocalDeclMap.erase(VDInit); 799 }); 800 } 801 EmitAutoVarCleanups(Emission); 802 return Emission.getAllocatedAddress(); 803 }); 804 } else { 805 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 806 // Emit private VarDecl with copy init. 807 // Remap temp VDInit variable to the address of the original 808 // variable 809 // (for proper handling of captured global variables). 810 setAddrOfLocalVar(VDInit, OriginalAddr); 811 EmitDecl(*VD); 812 LocalDeclMap.erase(VDInit); 813 return GetAddrOfLocalVar(VD); 814 }); 815 } 816 assert(IsRegistered && 817 "firstprivate var already registered as private"); 818 // Silence the warning about unused variable. 819 (void)IsRegistered; 820 } 821 ++IRef; 822 ++InitsRef; 823 } 824 } 825 return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty(); 826 } 827 828 void CodeGenFunction::EmitOMPPrivateClause( 829 const OMPExecutableDirective &D, 830 CodeGenFunction::OMPPrivateScope &PrivateScope) { 831 if (!HaveInsertPoint()) 832 return; 833 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 834 for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) { 835 auto IRef = C->varlist_begin(); 836 for (auto IInit : C->private_copies()) { 837 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 838 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 839 auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 840 bool IsRegistered = 841 PrivateScope.addPrivate(OrigVD, [&]() -> Address { 842 // Emit private VarDecl with copy init. 843 EmitDecl(*VD); 844 return GetAddrOfLocalVar(VD); 845 }); 846 assert(IsRegistered && "private var already registered as private"); 847 // Silence the warning about unused variable. 848 (void)IsRegistered; 849 } 850 ++IRef; 851 } 852 } 853 } 854 855 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { 856 if (!HaveInsertPoint()) 857 return false; 858 // threadprivate_var1 = master_threadprivate_var1; 859 // operator=(threadprivate_var2, master_threadprivate_var2); 860 // ... 861 // __kmpc_barrier(&loc, global_tid); 862 llvm::DenseSet<const VarDecl *> CopiedVars; 863 llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr; 864 for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) { 865 auto IRef = C->varlist_begin(); 866 auto ISrcRef = C->source_exprs().begin(); 867 auto IDestRef = C->destination_exprs().begin(); 868 for (auto *AssignOp : C->assignment_ops()) { 869 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 870 QualType Type = VD->getType(); 871 if (CopiedVars.insert(VD->getCanonicalDecl()).second) { 872 // Get the address of the master variable. If we are emitting code with 873 // TLS support, the address is passed from the master as field in the 874 // captured declaration. 875 Address MasterAddr = Address::invalid(); 876 if (getLangOpts().OpenMPUseTLS && 877 getContext().getTargetInfo().isTLSSupported()) { 878 assert(CapturedStmtInfo->lookup(VD) && 879 "Copyin threadprivates should have been captured!"); 880 DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(), 881 VK_LValue, (*IRef)->getExprLoc()); 882 MasterAddr = EmitLValue(&DRE).getAddress(); 883 LocalDeclMap.erase(VD); 884 } else { 885 MasterAddr = 886 Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD) 887 : CGM.GetAddrOfGlobal(VD), 888 getContext().getDeclAlign(VD)); 889 } 890 // Get the address of the threadprivate variable. 891 Address PrivateAddr = EmitLValue(*IRef).getAddress(); 892 if (CopiedVars.size() == 1) { 893 // At first check if current thread is a master thread. If it is, no 894 // need to copy data. 895 CopyBegin = createBasicBlock("copyin.not.master"); 896 CopyEnd = createBasicBlock("copyin.not.master.end"); 897 Builder.CreateCondBr( 898 Builder.CreateICmpNE( 899 Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy), 900 Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy)), 901 CopyBegin, CopyEnd); 902 EmitBlock(CopyBegin); 903 } 904 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 905 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 906 EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp); 907 } 908 ++IRef; 909 ++ISrcRef; 910 ++IDestRef; 911 } 912 } 913 if (CopyEnd) { 914 // Exit out of copying procedure for non-master thread. 915 EmitBlock(CopyEnd, /*IsFinished=*/true); 916 return true; 917 } 918 return false; 919 } 920 921 bool CodeGenFunction::EmitOMPLastprivateClauseInit( 922 const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) { 923 if (!HaveInsertPoint()) 924 return false; 925 bool HasAtLeastOneLastprivate = false; 926 llvm::DenseSet<const VarDecl *> SIMDLCVs; 927 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 928 auto *LoopDirective = cast<OMPLoopDirective>(&D); 929 for (auto *C : LoopDirective->counters()) { 930 SIMDLCVs.insert( 931 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 932 } 933 } 934 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 935 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 936 HasAtLeastOneLastprivate = true; 937 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 938 !getLangOpts().OpenMPSimd) 939 break; 940 auto IRef = C->varlist_begin(); 941 auto IDestRef = C->destination_exprs().begin(); 942 for (auto *IInit : C->private_copies()) { 943 // Keep the address of the original variable for future update at the end 944 // of the loop. 945 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 946 // Taskloops do not require additional initialization, it is done in 947 // runtime support library. 948 if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) { 949 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 950 PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address { 951 DeclRefExpr DRE( 952 const_cast<VarDecl *>(OrigVD), 953 /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup( 954 OrigVD) != nullptr, 955 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 956 return EmitLValue(&DRE).getAddress(); 957 }); 958 // Check if the variable is also a firstprivate: in this case IInit is 959 // not generated. Initialization of this variable will happen in codegen 960 // for 'firstprivate' clause. 961 if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) { 962 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 963 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 964 // Emit private VarDecl with copy init. 965 EmitDecl(*VD); 966 return GetAddrOfLocalVar(VD); 967 }); 968 assert(IsRegistered && 969 "lastprivate var already registered as private"); 970 (void)IsRegistered; 971 } 972 } 973 ++IRef; 974 ++IDestRef; 975 } 976 } 977 return HasAtLeastOneLastprivate; 978 } 979 980 void CodeGenFunction::EmitOMPLastprivateClauseFinal( 981 const OMPExecutableDirective &D, bool NoFinals, 982 llvm::Value *IsLastIterCond) { 983 if (!HaveInsertPoint()) 984 return; 985 // Emit following code: 986 // if (<IsLastIterCond>) { 987 // orig_var1 = private_orig_var1; 988 // ... 989 // orig_varn = private_orig_varn; 990 // } 991 llvm::BasicBlock *ThenBB = nullptr; 992 llvm::BasicBlock *DoneBB = nullptr; 993 if (IsLastIterCond) { 994 ThenBB = createBasicBlock(".omp.lastprivate.then"); 995 DoneBB = createBasicBlock(".omp.lastprivate.done"); 996 Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB); 997 EmitBlock(ThenBB); 998 } 999 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 1000 llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates; 1001 if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) { 1002 auto IC = LoopDirective->counters().begin(); 1003 for (auto F : LoopDirective->finals()) { 1004 auto *D = 1005 cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl(); 1006 if (NoFinals) 1007 AlreadyEmittedVars.insert(D); 1008 else 1009 LoopCountersAndUpdates[D] = F; 1010 ++IC; 1011 } 1012 } 1013 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 1014 auto IRef = C->varlist_begin(); 1015 auto ISrcRef = C->source_exprs().begin(); 1016 auto IDestRef = C->destination_exprs().begin(); 1017 for (auto *AssignOp : C->assignment_ops()) { 1018 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 1019 QualType Type = PrivateVD->getType(); 1020 auto *CanonicalVD = PrivateVD->getCanonicalDecl(); 1021 if (AlreadyEmittedVars.insert(CanonicalVD).second) { 1022 // If lastprivate variable is a loop control variable for loop-based 1023 // directive, update its value before copyin back to original 1024 // variable. 1025 if (auto *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD)) 1026 EmitIgnoredExpr(FinalExpr); 1027 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 1028 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 1029 // Get the address of the original variable. 1030 Address OriginalAddr = GetAddrOfLocalVar(DestVD); 1031 // Get the address of the private variable. 1032 Address PrivateAddr = GetAddrOfLocalVar(PrivateVD); 1033 if (auto RefTy = PrivateVD->getType()->getAs<ReferenceType>()) 1034 PrivateAddr = 1035 Address(Builder.CreateLoad(PrivateAddr), 1036 getNaturalTypeAlignment(RefTy->getPointeeType())); 1037 EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp); 1038 } 1039 ++IRef; 1040 ++ISrcRef; 1041 ++IDestRef; 1042 } 1043 if (auto *PostUpdate = C->getPostUpdateExpr()) 1044 EmitIgnoredExpr(PostUpdate); 1045 } 1046 if (IsLastIterCond) 1047 EmitBlock(DoneBB, /*IsFinished=*/true); 1048 } 1049 1050 void CodeGenFunction::EmitOMPReductionClauseInit( 1051 const OMPExecutableDirective &D, 1052 CodeGenFunction::OMPPrivateScope &PrivateScope) { 1053 if (!HaveInsertPoint()) 1054 return; 1055 SmallVector<const Expr *, 4> Shareds; 1056 SmallVector<const Expr *, 4> Privates; 1057 SmallVector<const Expr *, 4> ReductionOps; 1058 SmallVector<const Expr *, 4> LHSs; 1059 SmallVector<const Expr *, 4> RHSs; 1060 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1061 auto IPriv = C->privates().begin(); 1062 auto IRed = C->reduction_ops().begin(); 1063 auto ILHS = C->lhs_exprs().begin(); 1064 auto IRHS = C->rhs_exprs().begin(); 1065 for (const auto *Ref : C->varlists()) { 1066 Shareds.emplace_back(Ref); 1067 Privates.emplace_back(*IPriv); 1068 ReductionOps.emplace_back(*IRed); 1069 LHSs.emplace_back(*ILHS); 1070 RHSs.emplace_back(*IRHS); 1071 std::advance(IPriv, 1); 1072 std::advance(IRed, 1); 1073 std::advance(ILHS, 1); 1074 std::advance(IRHS, 1); 1075 } 1076 } 1077 ReductionCodeGen RedCG(Shareds, Privates, ReductionOps); 1078 unsigned Count = 0; 1079 auto ILHS = LHSs.begin(); 1080 auto IRHS = RHSs.begin(); 1081 auto IPriv = Privates.begin(); 1082 for (const auto *IRef : Shareds) { 1083 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl()); 1084 // Emit private VarDecl with reduction init. 1085 RedCG.emitSharedLValue(*this, Count); 1086 RedCG.emitAggregateType(*this, Count); 1087 auto Emission = EmitAutoVarAlloca(*PrivateVD); 1088 RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(), 1089 RedCG.getSharedLValue(Count), 1090 [&Emission](CodeGenFunction &CGF) { 1091 CGF.EmitAutoVarInit(Emission); 1092 return true; 1093 }); 1094 EmitAutoVarCleanups(Emission); 1095 Address BaseAddr = RedCG.adjustPrivateAddress( 1096 *this, Count, Emission.getAllocatedAddress()); 1097 bool IsRegistered = PrivateScope.addPrivate( 1098 RedCG.getBaseDecl(Count), [BaseAddr]() -> Address { return BaseAddr; }); 1099 assert(IsRegistered && "private var already registered as private"); 1100 // Silence the warning about unused variable. 1101 (void)IsRegistered; 1102 1103 auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 1104 auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 1105 QualType Type = PrivateVD->getType(); 1106 bool isaOMPArraySectionExpr = isa<OMPArraySectionExpr>(IRef); 1107 if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) { 1108 // Store the address of the original variable associated with the LHS 1109 // implicit variable. 1110 PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address { 1111 return RedCG.getSharedLValue(Count).getAddress(); 1112 }); 1113 PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { 1114 return GetAddrOfLocalVar(PrivateVD); 1115 }); 1116 } else if ((isaOMPArraySectionExpr && Type->isScalarType()) || 1117 isa<ArraySubscriptExpr>(IRef)) { 1118 // Store the address of the original variable associated with the LHS 1119 // implicit variable. 1120 PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address { 1121 return RedCG.getSharedLValue(Count).getAddress(); 1122 }); 1123 PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address { 1124 return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD), 1125 ConvertTypeForMem(RHSVD->getType()), 1126 "rhs.begin"); 1127 }); 1128 } else { 1129 QualType Type = PrivateVD->getType(); 1130 bool IsArray = getContext().getAsArrayType(Type) != nullptr; 1131 Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress(); 1132 // Store the address of the original variable associated with the LHS 1133 // implicit variable. 1134 if (IsArray) { 1135 OriginalAddr = Builder.CreateElementBitCast( 1136 OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin"); 1137 } 1138 PrivateScope.addPrivate( 1139 LHSVD, [OriginalAddr]() -> Address { return OriginalAddr; }); 1140 PrivateScope.addPrivate( 1141 RHSVD, [this, PrivateVD, RHSVD, IsArray]() -> Address { 1142 return IsArray 1143 ? Builder.CreateElementBitCast( 1144 GetAddrOfLocalVar(PrivateVD), 1145 ConvertTypeForMem(RHSVD->getType()), "rhs.begin") 1146 : GetAddrOfLocalVar(PrivateVD); 1147 }); 1148 } 1149 ++ILHS; 1150 ++IRHS; 1151 ++IPriv; 1152 ++Count; 1153 } 1154 } 1155 1156 void CodeGenFunction::EmitOMPReductionClauseFinal( 1157 const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) { 1158 if (!HaveInsertPoint()) 1159 return; 1160 llvm::SmallVector<const Expr *, 8> Privates; 1161 llvm::SmallVector<const Expr *, 8> LHSExprs; 1162 llvm::SmallVector<const Expr *, 8> RHSExprs; 1163 llvm::SmallVector<const Expr *, 8> ReductionOps; 1164 bool HasAtLeastOneReduction = false; 1165 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1166 HasAtLeastOneReduction = true; 1167 Privates.append(C->privates().begin(), C->privates().end()); 1168 LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 1169 RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 1170 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 1171 } 1172 if (HasAtLeastOneReduction) { 1173 bool WithNowait = D.getSingleClause<OMPNowaitClause>() || 1174 isOpenMPParallelDirective(D.getDirectiveKind()) || 1175 ReductionKind == OMPD_simd; 1176 bool SimpleReduction = ReductionKind == OMPD_simd; 1177 // Emit nowait reduction if nowait clause is present or directive is a 1178 // parallel directive (it always has implicit barrier). 1179 CGM.getOpenMPRuntime().emitReduction( 1180 *this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps, 1181 {WithNowait, SimpleReduction, ReductionKind}); 1182 } 1183 } 1184 1185 static void emitPostUpdateForReductionClause( 1186 CodeGenFunction &CGF, const OMPExecutableDirective &D, 1187 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1188 if (!CGF.HaveInsertPoint()) 1189 return; 1190 llvm::BasicBlock *DoneBB = nullptr; 1191 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1192 if (auto *PostUpdate = C->getPostUpdateExpr()) { 1193 if (!DoneBB) { 1194 if (auto *Cond = CondGen(CGF)) { 1195 // If the first post-update expression is found, emit conditional 1196 // block if it was requested. 1197 auto *ThenBB = CGF.createBasicBlock(".omp.reduction.pu"); 1198 DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done"); 1199 CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1200 CGF.EmitBlock(ThenBB); 1201 } 1202 } 1203 CGF.EmitIgnoredExpr(PostUpdate); 1204 } 1205 } 1206 if (DoneBB) 1207 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 1208 } 1209 1210 namespace { 1211 /// Codegen lambda for appending distribute lower and upper bounds to outlined 1212 /// parallel function. This is necessary for combined constructs such as 1213 /// 'distribute parallel for' 1214 typedef llvm::function_ref<void(CodeGenFunction &, 1215 const OMPExecutableDirective &, 1216 llvm::SmallVectorImpl<llvm::Value *> &)> 1217 CodeGenBoundParametersTy; 1218 } // anonymous namespace 1219 1220 static void emitCommonOMPParallelDirective( 1221 CodeGenFunction &CGF, const OMPExecutableDirective &S, 1222 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1223 const CodeGenBoundParametersTy &CodeGenBoundParameters) { 1224 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); 1225 auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( 1226 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 1227 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) { 1228 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 1229 auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), 1230 /*IgnoreResultAssign*/ true); 1231 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause( 1232 CGF, NumThreads, NumThreadsClause->getLocStart()); 1233 } 1234 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) { 1235 CodeGenFunction::RunCleanupsScope ProcBindScope(CGF); 1236 CGF.CGM.getOpenMPRuntime().emitProcBindClause( 1237 CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart()); 1238 } 1239 const Expr *IfCond = nullptr; 1240 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 1241 if (C->getNameModifier() == OMPD_unknown || 1242 C->getNameModifier() == OMPD_parallel) { 1243 IfCond = C->getCondition(); 1244 break; 1245 } 1246 } 1247 1248 OMPParallelScope Scope(CGF, S); 1249 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 1250 // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk 1251 // lower and upper bounds with the pragma 'for' chunking mechanism. 1252 // The following lambda takes care of appending the lower and upper bound 1253 // parameters when necessary 1254 CodeGenBoundParameters(CGF, S, CapturedVars); 1255 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 1256 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn, 1257 CapturedVars, IfCond); 1258 } 1259 1260 static void emitEmptyBoundParameters(CodeGenFunction &, 1261 const OMPExecutableDirective &, 1262 llvm::SmallVectorImpl<llvm::Value *> &) {} 1263 1264 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { 1265 // Emit parallel region as a standalone region. 1266 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 1267 Action.Enter(CGF); 1268 OMPPrivateScope PrivateScope(CGF); 1269 bool Copyins = CGF.EmitOMPCopyinClause(S); 1270 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 1271 if (Copyins) { 1272 // Emit implicit barrier to synchronize threads and avoid data races on 1273 // propagation master's thread values of threadprivate variables to local 1274 // instances of that variables of all other implicit threads. 1275 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 1276 CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 1277 /*ForceSimpleCall=*/true); 1278 } 1279 CGF.EmitOMPPrivateClause(S, PrivateScope); 1280 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 1281 (void)PrivateScope.Privatize(); 1282 CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt()); 1283 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 1284 }; 1285 emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen, 1286 emitEmptyBoundParameters); 1287 emitPostUpdateForReductionClause( 1288 *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1289 } 1290 1291 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, 1292 JumpDest LoopExit) { 1293 RunCleanupsScope BodyScope(*this); 1294 // Update counters values on current iteration. 1295 for (auto I : D.updates()) { 1296 EmitIgnoredExpr(I); 1297 } 1298 // Update the linear variables. 1299 // In distribute directives only loop counters may be marked as linear, no 1300 // need to generate the code for them. 1301 if (!isOpenMPDistributeDirective(D.getDirectiveKind())) { 1302 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1303 for (auto *U : C->updates()) 1304 EmitIgnoredExpr(U); 1305 } 1306 } 1307 1308 // On a continue in the body, jump to the end. 1309 auto Continue = getJumpDestInCurrentScope("omp.body.continue"); 1310 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1311 // Emit loop body. 1312 EmitStmt(D.getBody()); 1313 // The end (updates/cleanups). 1314 EmitBlock(Continue.getBlock()); 1315 BreakContinueStack.pop_back(); 1316 } 1317 1318 void CodeGenFunction::EmitOMPInnerLoop( 1319 const Stmt &S, bool RequiresCleanup, const Expr *LoopCond, 1320 const Expr *IncExpr, 1321 const llvm::function_ref<void(CodeGenFunction &)> &BodyGen, 1322 const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen) { 1323 auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end"); 1324 1325 // Start the loop with a block that tests the condition. 1326 auto CondBlock = createBasicBlock("omp.inner.for.cond"); 1327 EmitBlock(CondBlock); 1328 const SourceRange &R = S.getSourceRange(); 1329 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), 1330 SourceLocToDebugLoc(R.getEnd())); 1331 1332 // If there are any cleanups between here and the loop-exit scope, 1333 // create a block to stage a loop exit along. 1334 auto ExitBlock = LoopExit.getBlock(); 1335 if (RequiresCleanup) 1336 ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup"); 1337 1338 auto LoopBody = createBasicBlock("omp.inner.for.body"); 1339 1340 // Emit condition. 1341 EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S)); 1342 if (ExitBlock != LoopExit.getBlock()) { 1343 EmitBlock(ExitBlock); 1344 EmitBranchThroughCleanup(LoopExit); 1345 } 1346 1347 EmitBlock(LoopBody); 1348 incrementProfileCounter(&S); 1349 1350 // Create a block for the increment. 1351 auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc"); 1352 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1353 1354 BodyGen(*this); 1355 1356 // Emit "IV = IV + 1" and a back-edge to the condition block. 1357 EmitBlock(Continue.getBlock()); 1358 EmitIgnoredExpr(IncExpr); 1359 PostIncGen(*this); 1360 BreakContinueStack.pop_back(); 1361 EmitBranch(CondBlock); 1362 LoopStack.pop(); 1363 // Emit the fall-through block. 1364 EmitBlock(LoopExit.getBlock()); 1365 } 1366 1367 bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { 1368 if (!HaveInsertPoint()) 1369 return false; 1370 // Emit inits for the linear variables. 1371 bool HasLinears = false; 1372 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1373 for (auto *Init : C->inits()) { 1374 HasLinears = true; 1375 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl()); 1376 if (auto *Ref = dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) { 1377 AutoVarEmission Emission = EmitAutoVarAlloca(*VD); 1378 auto *OrigVD = cast<VarDecl>(Ref->getDecl()); 1379 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1380 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1381 VD->getInit()->getType(), VK_LValue, 1382 VD->getInit()->getExprLoc()); 1383 EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(), 1384 VD->getType()), 1385 /*capturedByInit=*/false); 1386 EmitAutoVarCleanups(Emission); 1387 } else 1388 EmitVarDecl(*VD); 1389 } 1390 // Emit the linear steps for the linear clauses. 1391 // If a step is not constant, it is pre-calculated before the loop. 1392 if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep())) 1393 if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) { 1394 EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl())); 1395 // Emit calculation of the linear step. 1396 EmitIgnoredExpr(CS); 1397 } 1398 } 1399 return HasLinears; 1400 } 1401 1402 void CodeGenFunction::EmitOMPLinearClauseFinal( 1403 const OMPLoopDirective &D, 1404 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1405 if (!HaveInsertPoint()) 1406 return; 1407 llvm::BasicBlock *DoneBB = nullptr; 1408 // Emit the final values of the linear variables. 1409 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1410 auto IC = C->varlist_begin(); 1411 for (auto *F : C->finals()) { 1412 if (!DoneBB) { 1413 if (auto *Cond = CondGen(*this)) { 1414 // If the first post-update expression is found, emit conditional 1415 // block if it was requested. 1416 auto *ThenBB = createBasicBlock(".omp.linear.pu"); 1417 DoneBB = createBasicBlock(".omp.linear.pu.done"); 1418 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1419 EmitBlock(ThenBB); 1420 } 1421 } 1422 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl()); 1423 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1424 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1425 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); 1426 Address OrigAddr = EmitLValue(&DRE).getAddress(); 1427 CodeGenFunction::OMPPrivateScope VarScope(*this); 1428 VarScope.addPrivate(OrigVD, [OrigAddr]() -> Address { return OrigAddr; }); 1429 (void)VarScope.Privatize(); 1430 EmitIgnoredExpr(F); 1431 ++IC; 1432 } 1433 if (auto *PostUpdate = C->getPostUpdateExpr()) 1434 EmitIgnoredExpr(PostUpdate); 1435 } 1436 if (DoneBB) 1437 EmitBlock(DoneBB, /*IsFinished=*/true); 1438 } 1439 1440 static void emitAlignedClause(CodeGenFunction &CGF, 1441 const OMPExecutableDirective &D) { 1442 if (!CGF.HaveInsertPoint()) 1443 return; 1444 for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) { 1445 unsigned ClauseAlignment = 0; 1446 if (auto AlignmentExpr = Clause->getAlignment()) { 1447 auto AlignmentCI = 1448 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); 1449 ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue()); 1450 } 1451 for (auto E : Clause->varlists()) { 1452 unsigned Alignment = ClauseAlignment; 1453 if (Alignment == 0) { 1454 // OpenMP [2.8.1, Description] 1455 // If no optional parameter is specified, implementation-defined default 1456 // alignments for SIMD instructions on the target platforms are assumed. 1457 Alignment = 1458 CGF.getContext() 1459 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign( 1460 E->getType()->getPointeeType())) 1461 .getQuantity(); 1462 } 1463 assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) && 1464 "alignment is not power of 2"); 1465 if (Alignment != 0) { 1466 llvm::Value *PtrValue = CGF.EmitScalarExpr(E); 1467 CGF.EmitAlignmentAssumption(PtrValue, Alignment); 1468 } 1469 } 1470 } 1471 } 1472 1473 void CodeGenFunction::EmitOMPPrivateLoopCounters( 1474 const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) { 1475 if (!HaveInsertPoint()) 1476 return; 1477 auto I = S.private_counters().begin(); 1478 for (auto *E : S.counters()) { 1479 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1480 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()); 1481 // Emit var without initialization. 1482 auto VarEmission = EmitAutoVarAlloca(*PrivateVD); 1483 EmitAutoVarCleanups(VarEmission); 1484 LocalDeclMap.erase(PrivateVD); 1485 (void)LoopScope.addPrivate(VD, [&VarEmission]() { 1486 return VarEmission.getAllocatedAddress(); 1487 }); 1488 if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) || 1489 VD->hasGlobalStorage()) { 1490 (void)LoopScope.addPrivate(PrivateVD, [this, VD, E]() { 1491 DeclRefExpr DRE(const_cast<VarDecl *>(VD), 1492 LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD), 1493 E->getType(), VK_LValue, E->getExprLoc()); 1494 return EmitLValue(&DRE).getAddress(); 1495 }); 1496 } else { 1497 (void)LoopScope.addPrivate(PrivateVD, [&VarEmission]() { 1498 return VarEmission.getAllocatedAddress(); 1499 }); 1500 } 1501 ++I; 1502 } 1503 } 1504 1505 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S, 1506 const Expr *Cond, llvm::BasicBlock *TrueBlock, 1507 llvm::BasicBlock *FalseBlock, uint64_t TrueCount) { 1508 if (!CGF.HaveInsertPoint()) 1509 return; 1510 { 1511 CodeGenFunction::OMPPrivateScope PreCondScope(CGF); 1512 CGF.EmitOMPPrivateLoopCounters(S, PreCondScope); 1513 (void)PreCondScope.Privatize(); 1514 // Get initial values of real counters. 1515 for (auto I : S.inits()) { 1516 CGF.EmitIgnoredExpr(I); 1517 } 1518 } 1519 // Check that loop is executed at least one time. 1520 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount); 1521 } 1522 1523 void CodeGenFunction::EmitOMPLinearClause( 1524 const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) { 1525 if (!HaveInsertPoint()) 1526 return; 1527 llvm::DenseSet<const VarDecl *> SIMDLCVs; 1528 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 1529 auto *LoopDirective = cast<OMPLoopDirective>(&D); 1530 for (auto *C : LoopDirective->counters()) { 1531 SIMDLCVs.insert( 1532 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 1533 } 1534 } 1535 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1536 auto CurPrivate = C->privates().begin(); 1537 for (auto *E : C->varlists()) { 1538 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1539 auto *PrivateVD = 1540 cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl()); 1541 if (!SIMDLCVs.count(VD->getCanonicalDecl())) { 1542 bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address { 1543 // Emit private VarDecl with copy init. 1544 EmitVarDecl(*PrivateVD); 1545 return GetAddrOfLocalVar(PrivateVD); 1546 }); 1547 assert(IsRegistered && "linear var already registered as private"); 1548 // Silence the warning about unused variable. 1549 (void)IsRegistered; 1550 } else 1551 EmitVarDecl(*PrivateVD); 1552 ++CurPrivate; 1553 } 1554 } 1555 } 1556 1557 static void emitSimdlenSafelenClause(CodeGenFunction &CGF, 1558 const OMPExecutableDirective &D, 1559 bool IsMonotonic) { 1560 if (!CGF.HaveInsertPoint()) 1561 return; 1562 if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) { 1563 RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(), 1564 /*ignoreResult=*/true); 1565 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 1566 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 1567 // In presence of finite 'safelen', it may be unsafe to mark all 1568 // the memory instructions parallel, because loop-carried 1569 // dependences of 'safelen' iterations are possible. 1570 if (!IsMonotonic) 1571 CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>()); 1572 } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) { 1573 RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(), 1574 /*ignoreResult=*/true); 1575 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 1576 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 1577 // In presence of finite 'safelen', it may be unsafe to mark all 1578 // the memory instructions parallel, because loop-carried 1579 // dependences of 'safelen' iterations are possible. 1580 CGF.LoopStack.setParallel(false); 1581 } 1582 } 1583 1584 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D, 1585 bool IsMonotonic) { 1586 // Walk clauses and process safelen/lastprivate. 1587 LoopStack.setParallel(!IsMonotonic); 1588 LoopStack.setVectorizeEnable(true); 1589 emitSimdlenSafelenClause(*this, D, IsMonotonic); 1590 } 1591 1592 void CodeGenFunction::EmitOMPSimdFinal( 1593 const OMPLoopDirective &D, 1594 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1595 if (!HaveInsertPoint()) 1596 return; 1597 llvm::BasicBlock *DoneBB = nullptr; 1598 auto IC = D.counters().begin(); 1599 auto IPC = D.private_counters().begin(); 1600 for (auto F : D.finals()) { 1601 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl()); 1602 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl()); 1603 auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD); 1604 if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) || 1605 OrigVD->hasGlobalStorage() || CED) { 1606 if (!DoneBB) { 1607 if (auto *Cond = CondGen(*this)) { 1608 // If the first post-update expression is found, emit conditional 1609 // block if it was requested. 1610 auto *ThenBB = createBasicBlock(".omp.final.then"); 1611 DoneBB = createBasicBlock(".omp.final.done"); 1612 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1613 EmitBlock(ThenBB); 1614 } 1615 } 1616 Address OrigAddr = Address::invalid(); 1617 if (CED) { 1618 OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress(); 1619 } else { 1620 DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD), 1621 /*RefersToEnclosingVariableOrCapture=*/false, 1622 (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc()); 1623 OrigAddr = EmitLValue(&DRE).getAddress(); 1624 } 1625 OMPPrivateScope VarScope(*this); 1626 VarScope.addPrivate(OrigVD, 1627 [OrigAddr]() -> Address { return OrigAddr; }); 1628 (void)VarScope.Privatize(); 1629 EmitIgnoredExpr(F); 1630 } 1631 ++IC; 1632 ++IPC; 1633 } 1634 if (DoneBB) 1635 EmitBlock(DoneBB, /*IsFinished=*/true); 1636 } 1637 1638 static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF, 1639 const OMPLoopDirective &S, 1640 CodeGenFunction::JumpDest LoopExit) { 1641 CGF.EmitOMPLoopBody(S, LoopExit); 1642 CGF.EmitStopPoint(&S); 1643 } 1644 1645 /// Emit a helper variable and return corresponding lvalue. 1646 static LValue EmitOMPHelperVar(CodeGenFunction &CGF, 1647 const DeclRefExpr *Helper) { 1648 auto VDecl = cast<VarDecl>(Helper->getDecl()); 1649 CGF.EmitVarDecl(*VDecl); 1650 return CGF.EmitLValue(Helper); 1651 } 1652 1653 static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S, 1654 PrePostActionTy &Action) { 1655 Action.Enter(CGF); 1656 assert(isOpenMPSimdDirective(S.getDirectiveKind()) && 1657 "Expected simd directive"); 1658 OMPLoopScope PreInitScope(CGF, S); 1659 // if (PreCond) { 1660 // for (IV in 0..LastIteration) BODY; 1661 // <Final counter/linear vars updates>; 1662 // } 1663 // 1664 if (isOpenMPDistributeDirective(S.getDirectiveKind()) || 1665 isOpenMPWorksharingDirective(S.getDirectiveKind()) || 1666 isOpenMPTaskLoopDirective(S.getDirectiveKind())) { 1667 (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable())); 1668 (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable())); 1669 } 1670 1671 // Emit: if (PreCond) - begin. 1672 // If the condition constant folds and can be elided, avoid emitting the 1673 // whole loop. 1674 bool CondConstant; 1675 llvm::BasicBlock *ContBlock = nullptr; 1676 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 1677 if (!CondConstant) 1678 return; 1679 } else { 1680 auto *ThenBlock = CGF.createBasicBlock("simd.if.then"); 1681 ContBlock = CGF.createBasicBlock("simd.if.end"); 1682 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 1683 CGF.getProfileCount(&S)); 1684 CGF.EmitBlock(ThenBlock); 1685 CGF.incrementProfileCounter(&S); 1686 } 1687 1688 // Emit the loop iteration variable. 1689 const Expr *IVExpr = S.getIterationVariable(); 1690 const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 1691 CGF.EmitVarDecl(*IVDecl); 1692 CGF.EmitIgnoredExpr(S.getInit()); 1693 1694 // Emit the iterations count variable. 1695 // If it is not a variable, Sema decided to calculate iterations count on 1696 // each iteration (e.g., it is foldable into a constant). 1697 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 1698 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 1699 // Emit calculation of the iterations count. 1700 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 1701 } 1702 1703 CGF.EmitOMPSimdInit(S); 1704 1705 emitAlignedClause(CGF, S); 1706 (void)CGF.EmitOMPLinearClauseInit(S); 1707 { 1708 CodeGenFunction::OMPPrivateScope LoopScope(CGF); 1709 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 1710 CGF.EmitOMPLinearClause(S, LoopScope); 1711 CGF.EmitOMPPrivateClause(S, LoopScope); 1712 CGF.EmitOMPReductionClauseInit(S, LoopScope); 1713 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 1714 (void)LoopScope.Privatize(); 1715 CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 1716 S.getInc(), 1717 [&S](CodeGenFunction &CGF) { 1718 CGF.EmitOMPLoopBody(S, CodeGenFunction::JumpDest()); 1719 CGF.EmitStopPoint(&S); 1720 }, 1721 [](CodeGenFunction &) {}); 1722 CGF.EmitOMPSimdFinal( 1723 S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1724 // Emit final copy of the lastprivate variables at the end of loops. 1725 if (HasLastprivateClause) 1726 CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true); 1727 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd); 1728 emitPostUpdateForReductionClause( 1729 CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1730 } 1731 CGF.EmitOMPLinearClauseFinal( 1732 S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1733 // Emit: if (PreCond) - end. 1734 if (ContBlock) { 1735 CGF.EmitBranch(ContBlock); 1736 CGF.EmitBlock(ContBlock, true); 1737 } 1738 } 1739 1740 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { 1741 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 1742 emitOMPSimdRegion(CGF, S, Action); 1743 }; 1744 OMPLexicalScope Scope(*this, S, OMPD_unknown); 1745 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 1746 } 1747 1748 void CodeGenFunction::EmitOMPOuterLoop( 1749 bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S, 1750 CodeGenFunction::OMPPrivateScope &LoopScope, 1751 const CodeGenFunction::OMPLoopArguments &LoopArgs, 1752 const CodeGenFunction::CodeGenLoopTy &CodeGenLoop, 1753 const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) { 1754 auto &RT = CGM.getOpenMPRuntime(); 1755 1756 const Expr *IVExpr = S.getIterationVariable(); 1757 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1758 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1759 1760 auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end"); 1761 1762 // Start the loop with a block that tests the condition. 1763 auto CondBlock = createBasicBlock("omp.dispatch.cond"); 1764 EmitBlock(CondBlock); 1765 const SourceRange &R = S.getSourceRange(); 1766 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), 1767 SourceLocToDebugLoc(R.getEnd())); 1768 1769 llvm::Value *BoolCondVal = nullptr; 1770 if (!DynamicOrOrdered) { 1771 // UB = min(UB, GlobalUB) or 1772 // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g. 1773 // 'distribute parallel for') 1774 EmitIgnoredExpr(LoopArgs.EUB); 1775 // IV = LB 1776 EmitIgnoredExpr(LoopArgs.Init); 1777 // IV < UB 1778 BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond); 1779 } else { 1780 BoolCondVal = 1781 RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, LoopArgs.IL, 1782 LoopArgs.LB, LoopArgs.UB, LoopArgs.ST); 1783 } 1784 1785 // If there are any cleanups between here and the loop-exit scope, 1786 // create a block to stage a loop exit along. 1787 auto ExitBlock = LoopExit.getBlock(); 1788 if (LoopScope.requiresCleanups()) 1789 ExitBlock = createBasicBlock("omp.dispatch.cleanup"); 1790 1791 auto LoopBody = createBasicBlock("omp.dispatch.body"); 1792 Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock); 1793 if (ExitBlock != LoopExit.getBlock()) { 1794 EmitBlock(ExitBlock); 1795 EmitBranchThroughCleanup(LoopExit); 1796 } 1797 EmitBlock(LoopBody); 1798 1799 // Emit "IV = LB" (in case of static schedule, we have already calculated new 1800 // LB for loop condition and emitted it above). 1801 if (DynamicOrOrdered) 1802 EmitIgnoredExpr(LoopArgs.Init); 1803 1804 // Create a block for the increment. 1805 auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); 1806 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1807 1808 // Generate !llvm.loop.parallel metadata for loads and stores for loops 1809 // with dynamic/guided scheduling and without ordered clause. 1810 if (!isOpenMPSimdDirective(S.getDirectiveKind())) 1811 LoopStack.setParallel(!IsMonotonic); 1812 else 1813 EmitOMPSimdInit(S, IsMonotonic); 1814 1815 SourceLocation Loc = S.getLocStart(); 1816 1817 // when 'distribute' is not combined with a 'for': 1818 // while (idx <= UB) { BODY; ++idx; } 1819 // when 'distribute' is combined with a 'for' 1820 // (e.g. 'distribute parallel for') 1821 // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; } 1822 EmitOMPInnerLoop( 1823 S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr, 1824 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { 1825 CodeGenLoop(CGF, S, LoopExit); 1826 }, 1827 [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) { 1828 CodeGenOrdered(CGF, Loc, IVSize, IVSigned); 1829 }); 1830 1831 EmitBlock(Continue.getBlock()); 1832 BreakContinueStack.pop_back(); 1833 if (!DynamicOrOrdered) { 1834 // Emit "LB = LB + Stride", "UB = UB + Stride". 1835 EmitIgnoredExpr(LoopArgs.NextLB); 1836 EmitIgnoredExpr(LoopArgs.NextUB); 1837 } 1838 1839 EmitBranch(CondBlock); 1840 LoopStack.pop(); 1841 // Emit the fall-through block. 1842 EmitBlock(LoopExit.getBlock()); 1843 1844 // Tell the runtime we are done. 1845 auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) { 1846 if (!DynamicOrOrdered) 1847 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(), 1848 S.getDirectiveKind()); 1849 }; 1850 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); 1851 } 1852 1853 void CodeGenFunction::EmitOMPForOuterLoop( 1854 const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic, 1855 const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, 1856 const OMPLoopArguments &LoopArgs, 1857 const CodeGenDispatchBoundsTy &CGDispatchBounds) { 1858 auto &RT = CGM.getOpenMPRuntime(); 1859 1860 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). 1861 const bool DynamicOrOrdered = 1862 Ordered || RT.isDynamic(ScheduleKind.Schedule); 1863 1864 assert((Ordered || 1865 !RT.isStaticNonchunked(ScheduleKind.Schedule, 1866 LoopArgs.Chunk != nullptr)) && 1867 "static non-chunked schedule does not need outer loop"); 1868 1869 // Emit outer loop. 1870 // 1871 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1872 // When schedule(dynamic,chunk_size) is specified, the iterations are 1873 // distributed to threads in the team in chunks as the threads request them. 1874 // Each thread executes a chunk of iterations, then requests another chunk, 1875 // until no chunks remain to be distributed. Each chunk contains chunk_size 1876 // iterations, except for the last chunk to be distributed, which may have 1877 // fewer iterations. When no chunk_size is specified, it defaults to 1. 1878 // 1879 // When schedule(guided,chunk_size) is specified, the iterations are assigned 1880 // to threads in the team in chunks as the executing threads request them. 1881 // Each thread executes a chunk of iterations, then requests another chunk, 1882 // until no chunks remain to be assigned. For a chunk_size of 1, the size of 1883 // each chunk is proportional to the number of unassigned iterations divided 1884 // by the number of threads in the team, decreasing to 1. For a chunk_size 1885 // with value k (greater than 1), the size of each chunk is determined in the 1886 // same way, with the restriction that the chunks do not contain fewer than k 1887 // iterations (except for the last chunk to be assigned, which may have fewer 1888 // than k iterations). 1889 // 1890 // When schedule(auto) is specified, the decision regarding scheduling is 1891 // delegated to the compiler and/or runtime system. The programmer gives the 1892 // implementation the freedom to choose any possible mapping of iterations to 1893 // threads in the team. 1894 // 1895 // When schedule(runtime) is specified, the decision regarding scheduling is 1896 // deferred until run time, and the schedule and chunk size are taken from the 1897 // run-sched-var ICV. If the ICV is set to auto, the schedule is 1898 // implementation defined 1899 // 1900 // while(__kmpc_dispatch_next(&LB, &UB)) { 1901 // idx = LB; 1902 // while (idx <= UB) { BODY; ++idx; 1903 // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only. 1904 // } // inner loop 1905 // } 1906 // 1907 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1908 // When schedule(static, chunk_size) is specified, iterations are divided into 1909 // chunks of size chunk_size, and the chunks are assigned to the threads in 1910 // the team in a round-robin fashion in the order of the thread number. 1911 // 1912 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { 1913 // while (idx <= UB) { BODY; ++idx; } // inner loop 1914 // LB = LB + ST; 1915 // UB = UB + ST; 1916 // } 1917 // 1918 1919 const Expr *IVExpr = S.getIterationVariable(); 1920 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1921 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1922 1923 if (DynamicOrOrdered) { 1924 auto DispatchBounds = CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB); 1925 llvm::Value *LBVal = DispatchBounds.first; 1926 llvm::Value *UBVal = DispatchBounds.second; 1927 CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal, 1928 LoopArgs.Chunk}; 1929 RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, IVSize, 1930 IVSigned, Ordered, DipatchRTInputValues); 1931 } else { 1932 CGOpenMPRuntime::StaticRTInput StaticInit( 1933 IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB, 1934 LoopArgs.ST, LoopArgs.Chunk); 1935 RT.emitForStaticInit(*this, S.getLocStart(), S.getDirectiveKind(), 1936 ScheduleKind, StaticInit); 1937 } 1938 1939 auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc, 1940 const unsigned IVSize, 1941 const bool IVSigned) { 1942 if (Ordered) { 1943 CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize, 1944 IVSigned); 1945 } 1946 }; 1947 1948 OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST, 1949 LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB); 1950 OuterLoopArgs.IncExpr = S.getInc(); 1951 OuterLoopArgs.Init = S.getInit(); 1952 OuterLoopArgs.Cond = S.getCond(); 1953 OuterLoopArgs.NextLB = S.getNextLowerBound(); 1954 OuterLoopArgs.NextUB = S.getNextUpperBound(); 1955 EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs, 1956 emitOMPLoopBodyWithStopPoint, CodeGenOrdered); 1957 } 1958 1959 static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc, 1960 const unsigned IVSize, const bool IVSigned) {} 1961 1962 void CodeGenFunction::EmitOMPDistributeOuterLoop( 1963 OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S, 1964 OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs, 1965 const CodeGenLoopTy &CodeGenLoopContent) { 1966 1967 auto &RT = CGM.getOpenMPRuntime(); 1968 1969 // Emit outer loop. 1970 // Same behavior as a OMPForOuterLoop, except that schedule cannot be 1971 // dynamic 1972 // 1973 1974 const Expr *IVExpr = S.getIterationVariable(); 1975 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1976 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1977 1978 CGOpenMPRuntime::StaticRTInput StaticInit( 1979 IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB, 1980 LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk); 1981 RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, StaticInit); 1982 1983 // for combined 'distribute' and 'for' the increment expression of distribute 1984 // is store in DistInc. For 'distribute' alone, it is in Inc. 1985 Expr *IncExpr; 1986 if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())) 1987 IncExpr = S.getDistInc(); 1988 else 1989 IncExpr = S.getInc(); 1990 1991 // this routine is shared by 'omp distribute parallel for' and 1992 // 'omp distribute': select the right EUB expression depending on the 1993 // directive 1994 OMPLoopArguments OuterLoopArgs; 1995 OuterLoopArgs.LB = LoopArgs.LB; 1996 OuterLoopArgs.UB = LoopArgs.UB; 1997 OuterLoopArgs.ST = LoopArgs.ST; 1998 OuterLoopArgs.IL = LoopArgs.IL; 1999 OuterLoopArgs.Chunk = LoopArgs.Chunk; 2000 OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2001 ? S.getCombinedEnsureUpperBound() 2002 : S.getEnsureUpperBound(); 2003 OuterLoopArgs.IncExpr = IncExpr; 2004 OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2005 ? S.getCombinedInit() 2006 : S.getInit(); 2007 OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2008 ? S.getCombinedCond() 2009 : S.getCond(); 2010 OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2011 ? S.getCombinedNextLowerBound() 2012 : S.getNextLowerBound(); 2013 OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2014 ? S.getCombinedNextUpperBound() 2015 : S.getNextUpperBound(); 2016 2017 EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S, 2018 LoopScope, OuterLoopArgs, CodeGenLoopContent, 2019 emitEmptyOrdered); 2020 } 2021 2022 static std::pair<LValue, LValue> 2023 emitDistributeParallelForInnerBounds(CodeGenFunction &CGF, 2024 const OMPExecutableDirective &S) { 2025 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 2026 LValue LB = 2027 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); 2028 LValue UB = 2029 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); 2030 2031 // When composing 'distribute' with 'for' (e.g. as in 'distribute 2032 // parallel for') we need to use the 'distribute' 2033 // chunk lower and upper bounds rather than the whole loop iteration 2034 // space. These are parameters to the outlined function for 'parallel' 2035 // and we copy the bounds of the previous schedule into the 2036 // the current ones. 2037 LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable()); 2038 LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable()); 2039 llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar( 2040 PrevLB, LS.getPrevLowerBoundVariable()->getExprLoc()); 2041 PrevLBVal = CGF.EmitScalarConversion( 2042 PrevLBVal, LS.getPrevLowerBoundVariable()->getType(), 2043 LS.getIterationVariable()->getType(), 2044 LS.getPrevLowerBoundVariable()->getExprLoc()); 2045 llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar( 2046 PrevUB, LS.getPrevUpperBoundVariable()->getExprLoc()); 2047 PrevUBVal = CGF.EmitScalarConversion( 2048 PrevUBVal, LS.getPrevUpperBoundVariable()->getType(), 2049 LS.getIterationVariable()->getType(), 2050 LS.getPrevUpperBoundVariable()->getExprLoc()); 2051 2052 CGF.EmitStoreOfScalar(PrevLBVal, LB); 2053 CGF.EmitStoreOfScalar(PrevUBVal, UB); 2054 2055 return {LB, UB}; 2056 } 2057 2058 /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then 2059 /// we need to use the LB and UB expressions generated by the worksharing 2060 /// code generation support, whereas in non combined situations we would 2061 /// just emit 0 and the LastIteration expression 2062 /// This function is necessary due to the difference of the LB and UB 2063 /// types for the RT emission routines for 'for_static_init' and 2064 /// 'for_dispatch_init' 2065 static std::pair<llvm::Value *, llvm::Value *> 2066 emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF, 2067 const OMPExecutableDirective &S, 2068 Address LB, Address UB) { 2069 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 2070 const Expr *IVExpr = LS.getIterationVariable(); 2071 // when implementing a dynamic schedule for a 'for' combined with a 2072 // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop 2073 // is not normalized as each team only executes its own assigned 2074 // distribute chunk 2075 QualType IteratorTy = IVExpr->getType(); 2076 llvm::Value *LBVal = 2077 CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, S.getLocStart()); 2078 llvm::Value *UBVal = 2079 CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, S.getLocStart()); 2080 return {LBVal, UBVal}; 2081 } 2082 2083 static void emitDistributeParallelForDistributeInnerBoundParams( 2084 CodeGenFunction &CGF, const OMPExecutableDirective &S, 2085 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) { 2086 const auto &Dir = cast<OMPLoopDirective>(S); 2087 LValue LB = 2088 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable())); 2089 auto LBCast = CGF.Builder.CreateIntCast( 2090 CGF.Builder.CreateLoad(LB.getAddress()), CGF.SizeTy, /*isSigned=*/false); 2091 CapturedVars.push_back(LBCast); 2092 LValue UB = 2093 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable())); 2094 2095 auto UBCast = CGF.Builder.CreateIntCast( 2096 CGF.Builder.CreateLoad(UB.getAddress()), CGF.SizeTy, /*isSigned=*/false); 2097 CapturedVars.push_back(UBCast); 2098 } 2099 2100 static void 2101 emitInnerParallelForWhenCombined(CodeGenFunction &CGF, 2102 const OMPLoopDirective &S, 2103 CodeGenFunction::JumpDest LoopExit) { 2104 auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF, 2105 PrePostActionTy &Action) { 2106 Action.Enter(CGF); 2107 bool HasCancel = false; 2108 if (!isOpenMPSimdDirective(S.getDirectiveKind())) { 2109 if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(&S)) 2110 HasCancel = D->hasCancel(); 2111 else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(&S)) 2112 HasCancel = D->hasCancel(); 2113 else if (const auto *D = 2114 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&S)) 2115 HasCancel = D->hasCancel(); 2116 } 2117 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(), 2118 HasCancel); 2119 CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(), 2120 emitDistributeParallelForInnerBounds, 2121 emitDistributeParallelForDispatchBounds); 2122 }; 2123 2124 emitCommonOMPParallelDirective( 2125 CGF, S, 2126 isOpenMPSimdDirective(S.getDirectiveKind()) ? OMPD_for_simd : OMPD_for, 2127 CGInlinedWorksharingLoop, 2128 emitDistributeParallelForDistributeInnerBoundParams); 2129 } 2130 2131 void CodeGenFunction::EmitOMPDistributeParallelForDirective( 2132 const OMPDistributeParallelForDirective &S) { 2133 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2134 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 2135 S.getDistInc()); 2136 }; 2137 OMPLexicalScope Scope(*this, S, OMPD_parallel); 2138 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); 2139 } 2140 2141 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective( 2142 const OMPDistributeParallelForSimdDirective &S) { 2143 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2144 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 2145 S.getDistInc()); 2146 }; 2147 OMPLexicalScope Scope(*this, S, OMPD_parallel); 2148 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); 2149 } 2150 2151 void CodeGenFunction::EmitOMPDistributeSimdDirective( 2152 const OMPDistributeSimdDirective &S) { 2153 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2154 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 2155 }; 2156 OMPLexicalScope Scope(*this, S, OMPD_unknown); 2157 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 2158 } 2159 2160 void CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 2161 CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) { 2162 // Emit SPMD target parallel for region as a standalone region. 2163 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2164 emitOMPSimdRegion(CGF, S, Action); 2165 }; 2166 llvm::Function *Fn; 2167 llvm::Constant *Addr; 2168 // Emit target region as a standalone region. 2169 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 2170 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 2171 assert(Fn && Addr && "Target device function emission failed."); 2172 } 2173 2174 void CodeGenFunction::EmitOMPTargetSimdDirective( 2175 const OMPTargetSimdDirective &S) { 2176 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2177 emitOMPSimdRegion(CGF, S, Action); 2178 }; 2179 emitCommonOMPTargetDirective(*this, S, CodeGen); 2180 } 2181 2182 namespace { 2183 struct ScheduleKindModifiersTy { 2184 OpenMPScheduleClauseKind Kind; 2185 OpenMPScheduleClauseModifier M1; 2186 OpenMPScheduleClauseModifier M2; 2187 ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind, 2188 OpenMPScheduleClauseModifier M1, 2189 OpenMPScheduleClauseModifier M2) 2190 : Kind(Kind), M1(M1), M2(M2) {} 2191 }; 2192 } // namespace 2193 2194 bool CodeGenFunction::EmitOMPWorksharingLoop( 2195 const OMPLoopDirective &S, Expr *EUB, 2196 const CodeGenLoopBoundsTy &CodeGenLoopBounds, 2197 const CodeGenDispatchBoundsTy &CGDispatchBounds) { 2198 // Emit the loop iteration variable. 2199 auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 2200 auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); 2201 EmitVarDecl(*IVDecl); 2202 2203 // Emit the iterations count variable. 2204 // If it is not a variable, Sema decided to calculate iterations count on each 2205 // iteration (e.g., it is foldable into a constant). 2206 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 2207 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 2208 // Emit calculation of the iterations count. 2209 EmitIgnoredExpr(S.getCalcLastIteration()); 2210 } 2211 2212 auto &RT = CGM.getOpenMPRuntime(); 2213 2214 bool HasLastprivateClause; 2215 // Check pre-condition. 2216 { 2217 OMPLoopScope PreInitScope(*this, S); 2218 // Skip the entire loop if we don't meet the precondition. 2219 // If the condition constant folds and can be elided, avoid emitting the 2220 // whole loop. 2221 bool CondConstant; 2222 llvm::BasicBlock *ContBlock = nullptr; 2223 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 2224 if (!CondConstant) 2225 return false; 2226 } else { 2227 auto *ThenBlock = createBasicBlock("omp.precond.then"); 2228 ContBlock = createBasicBlock("omp.precond.end"); 2229 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 2230 getProfileCount(&S)); 2231 EmitBlock(ThenBlock); 2232 incrementProfileCounter(&S); 2233 } 2234 2235 RunCleanupsScope DoacrossCleanupScope(*this); 2236 bool Ordered = false; 2237 if (auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) { 2238 if (OrderedClause->getNumForLoops()) 2239 RT.emitDoacrossInit(*this, S); 2240 else 2241 Ordered = true; 2242 } 2243 2244 llvm::DenseSet<const Expr *> EmittedFinals; 2245 emitAlignedClause(*this, S); 2246 bool HasLinears = EmitOMPLinearClauseInit(S); 2247 // Emit helper vars inits. 2248 2249 std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S); 2250 LValue LB = Bounds.first; 2251 LValue UB = Bounds.second; 2252 LValue ST = 2253 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 2254 LValue IL = 2255 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 2256 2257 // Emit 'then' code. 2258 { 2259 OMPPrivateScope LoopScope(*this); 2260 if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) { 2261 // Emit implicit barrier to synchronize threads and avoid data races on 2262 // initialization of firstprivate variables and post-update of 2263 // lastprivate variables. 2264 CGM.getOpenMPRuntime().emitBarrierCall( 2265 *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 2266 /*ForceSimpleCall=*/true); 2267 } 2268 EmitOMPPrivateClause(S, LoopScope); 2269 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 2270 EmitOMPReductionClauseInit(S, LoopScope); 2271 EmitOMPPrivateLoopCounters(S, LoopScope); 2272 EmitOMPLinearClause(S, LoopScope); 2273 (void)LoopScope.Privatize(); 2274 2275 // Detect the loop schedule kind and chunk. 2276 llvm::Value *Chunk = nullptr; 2277 OpenMPScheduleTy ScheduleKind; 2278 if (auto *C = S.getSingleClause<OMPScheduleClause>()) { 2279 ScheduleKind.Schedule = C->getScheduleKind(); 2280 ScheduleKind.M1 = C->getFirstScheduleModifier(); 2281 ScheduleKind.M2 = C->getSecondScheduleModifier(); 2282 if (const auto *Ch = C->getChunkSize()) { 2283 Chunk = EmitScalarExpr(Ch); 2284 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 2285 S.getIterationVariable()->getType(), 2286 S.getLocStart()); 2287 } 2288 } 2289 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2290 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2291 // OpenMP 4.5, 2.7.1 Loop Construct, Description. 2292 // If the static schedule kind is specified or if the ordered clause is 2293 // specified, and if no monotonic modifier is specified, the effect will 2294 // be as if the monotonic modifier was specified. 2295 if (RT.isStaticNonchunked(ScheduleKind.Schedule, 2296 /* Chunked */ Chunk != nullptr) && 2297 !Ordered) { 2298 if (isOpenMPSimdDirective(S.getDirectiveKind())) 2299 EmitOMPSimdInit(S, /*IsMonotonic=*/true); 2300 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 2301 // When no chunk_size is specified, the iteration space is divided into 2302 // chunks that are approximately equal in size, and at most one chunk is 2303 // distributed to each thread. Note that the size of the chunks is 2304 // unspecified in this case. 2305 CGOpenMPRuntime::StaticRTInput StaticInit( 2306 IVSize, IVSigned, Ordered, IL.getAddress(), LB.getAddress(), 2307 UB.getAddress(), ST.getAddress()); 2308 RT.emitForStaticInit(*this, S.getLocStart(), S.getDirectiveKind(), 2309 ScheduleKind, StaticInit); 2310 auto LoopExit = 2311 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 2312 // UB = min(UB, GlobalUB); 2313 EmitIgnoredExpr(S.getEnsureUpperBound()); 2314 // IV = LB; 2315 EmitIgnoredExpr(S.getInit()); 2316 // while (idx <= UB) { BODY; ++idx; } 2317 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 2318 S.getInc(), 2319 [&S, LoopExit](CodeGenFunction &CGF) { 2320 CGF.EmitOMPLoopBody(S, LoopExit); 2321 CGF.EmitStopPoint(&S); 2322 }, 2323 [](CodeGenFunction &) {}); 2324 EmitBlock(LoopExit.getBlock()); 2325 // Tell the runtime we are done. 2326 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 2327 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(), 2328 S.getDirectiveKind()); 2329 }; 2330 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); 2331 } else { 2332 const bool IsMonotonic = 2333 Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static || 2334 ScheduleKind.Schedule == OMPC_SCHEDULE_unknown || 2335 ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic || 2336 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic; 2337 // Emit the outer loop, which requests its work chunk [LB..UB] from 2338 // runtime and runs the inner loop to process it. 2339 const OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(), 2340 ST.getAddress(), IL.getAddress(), 2341 Chunk, EUB); 2342 EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered, 2343 LoopArguments, CGDispatchBounds); 2344 } 2345 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 2346 EmitOMPSimdFinal(S, 2347 [&](CodeGenFunction &CGF) -> llvm::Value * { 2348 return CGF.Builder.CreateIsNotNull( 2349 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2350 }); 2351 } 2352 EmitOMPReductionClauseFinal( 2353 S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind()) 2354 ? /*Parallel and Simd*/ OMPD_parallel_for_simd 2355 : /*Parallel only*/ OMPD_parallel); 2356 // Emit post-update of the reduction variables if IsLastIter != 0. 2357 emitPostUpdateForReductionClause( 2358 *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * { 2359 return CGF.Builder.CreateIsNotNull( 2360 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2361 }); 2362 // Emit final copy of the lastprivate variables if IsLastIter != 0. 2363 if (HasLastprivateClause) 2364 EmitOMPLastprivateClauseFinal( 2365 S, isOpenMPSimdDirective(S.getDirectiveKind()), 2366 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart()))); 2367 } 2368 EmitOMPLinearClauseFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * { 2369 return CGF.Builder.CreateIsNotNull( 2370 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2371 }); 2372 DoacrossCleanupScope.ForceCleanup(); 2373 // We're now done with the loop, so jump to the continuation block. 2374 if (ContBlock) { 2375 EmitBranch(ContBlock); 2376 EmitBlock(ContBlock, true); 2377 } 2378 } 2379 return HasLastprivateClause; 2380 } 2381 2382 /// The following two functions generate expressions for the loop lower 2383 /// and upper bounds in case of static and dynamic (dispatch) schedule 2384 /// of the associated 'for' or 'distribute' loop. 2385 static std::pair<LValue, LValue> 2386 emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 2387 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 2388 LValue LB = 2389 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); 2390 LValue UB = 2391 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); 2392 return {LB, UB}; 2393 } 2394 2395 /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not 2396 /// consider the lower and upper bound expressions generated by the 2397 /// worksharing loop support, but we use 0 and the iteration space size as 2398 /// constants 2399 static std::pair<llvm::Value *, llvm::Value *> 2400 emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S, 2401 Address LB, Address UB) { 2402 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 2403 const Expr *IVExpr = LS.getIterationVariable(); 2404 const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType()); 2405 llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0); 2406 llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration()); 2407 return {LBVal, UBVal}; 2408 } 2409 2410 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { 2411 bool HasLastprivates = false; 2412 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, 2413 PrePostActionTy &) { 2414 OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel()); 2415 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 2416 emitForLoopBounds, 2417 emitDispatchForLoopBounds); 2418 }; 2419 { 2420 OMPLexicalScope Scope(*this, S, OMPD_unknown); 2421 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen, 2422 S.hasCancel()); 2423 } 2424 2425 // Emit an implicit barrier at the end. 2426 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) { 2427 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); 2428 } 2429 } 2430 2431 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { 2432 bool HasLastprivates = false; 2433 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, 2434 PrePostActionTy &) { 2435 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 2436 emitForLoopBounds, 2437 emitDispatchForLoopBounds); 2438 }; 2439 { 2440 OMPLexicalScope Scope(*this, S, OMPD_unknown); 2441 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 2442 } 2443 2444 // Emit an implicit barrier at the end. 2445 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) { 2446 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); 2447 } 2448 } 2449 2450 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, 2451 const Twine &Name, 2452 llvm::Value *Init = nullptr) { 2453 auto LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty); 2454 if (Init) 2455 CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true); 2456 return LVal; 2457 } 2458 2459 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { 2460 const Stmt *Stmt = S.getInnermostCapturedStmt()->getCapturedStmt(); 2461 const auto *CS = dyn_cast<CompoundStmt>(Stmt); 2462 bool HasLastprivates = false; 2463 auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF, 2464 PrePostActionTy &) { 2465 auto &C = CGF.CGM.getContext(); 2466 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2467 // Emit helper vars inits. 2468 LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.", 2469 CGF.Builder.getInt32(0)); 2470 auto *GlobalUBVal = CS != nullptr ? CGF.Builder.getInt32(CS->size() - 1) 2471 : CGF.Builder.getInt32(0); 2472 LValue UB = 2473 createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal); 2474 LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.", 2475 CGF.Builder.getInt32(1)); 2476 LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.", 2477 CGF.Builder.getInt32(0)); 2478 // Loop counter. 2479 LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv."); 2480 OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 2481 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV); 2482 OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 2483 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); 2484 // Generate condition for loop. 2485 BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, 2486 OK_Ordinary, S.getLocStart(), FPOptions()); 2487 // Increment for loop counter. 2488 UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary, 2489 S.getLocStart(), true); 2490 auto BodyGen = [Stmt, CS, &S, &IV](CodeGenFunction &CGF) { 2491 // Iterate through all sections and emit a switch construct: 2492 // switch (IV) { 2493 // case 0: 2494 // <SectionStmt[0]>; 2495 // break; 2496 // ... 2497 // case <NumSection> - 1: 2498 // <SectionStmt[<NumSection> - 1]>; 2499 // break; 2500 // } 2501 // .omp.sections.exit: 2502 auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit"); 2503 auto *SwitchStmt = 2504 CGF.Builder.CreateSwitch(CGF.EmitLoadOfScalar(IV, S.getLocStart()), 2505 ExitBB, CS == nullptr ? 1 : CS->size()); 2506 if (CS) { 2507 unsigned CaseNumber = 0; 2508 for (auto *SubStmt : CS->children()) { 2509 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 2510 CGF.EmitBlock(CaseBB); 2511 SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB); 2512 CGF.EmitStmt(SubStmt); 2513 CGF.EmitBranch(ExitBB); 2514 ++CaseNumber; 2515 } 2516 } else { 2517 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 2518 CGF.EmitBlock(CaseBB); 2519 SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB); 2520 CGF.EmitStmt(Stmt); 2521 CGF.EmitBranch(ExitBB); 2522 } 2523 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 2524 }; 2525 2526 CodeGenFunction::OMPPrivateScope LoopScope(CGF); 2527 if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) { 2528 // Emit implicit barrier to synchronize threads and avoid data races on 2529 // initialization of firstprivate variables and post-update of lastprivate 2530 // variables. 2531 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 2532 CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 2533 /*ForceSimpleCall=*/true); 2534 } 2535 CGF.EmitOMPPrivateClause(S, LoopScope); 2536 HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 2537 CGF.EmitOMPReductionClauseInit(S, LoopScope); 2538 (void)LoopScope.Privatize(); 2539 2540 // Emit static non-chunked loop. 2541 OpenMPScheduleTy ScheduleKind; 2542 ScheduleKind.Schedule = OMPC_SCHEDULE_static; 2543 CGOpenMPRuntime::StaticRTInput StaticInit( 2544 /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), 2545 LB.getAddress(), UB.getAddress(), ST.getAddress()); 2546 CGF.CGM.getOpenMPRuntime().emitForStaticInit( 2547 CGF, S.getLocStart(), S.getDirectiveKind(), ScheduleKind, StaticInit); 2548 // UB = min(UB, GlobalUB); 2549 auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart()); 2550 auto *MinUBGlobalUB = CGF.Builder.CreateSelect( 2551 CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal); 2552 CGF.EmitStoreOfScalar(MinUBGlobalUB, UB); 2553 // IV = LB; 2554 CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV); 2555 // while (idx <= UB) { BODY; ++idx; } 2556 CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen, 2557 [](CodeGenFunction &) {}); 2558 // Tell the runtime we are done. 2559 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 2560 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(), 2561 S.getDirectiveKind()); 2562 }; 2563 CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen); 2564 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 2565 // Emit post-update of the reduction variables if IsLastIter != 0. 2566 emitPostUpdateForReductionClause( 2567 CGF, S, [&](CodeGenFunction &CGF) -> llvm::Value * { 2568 return CGF.Builder.CreateIsNotNull( 2569 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2570 }); 2571 2572 // Emit final copy of the lastprivate variables if IsLastIter != 0. 2573 if (HasLastprivates) 2574 CGF.EmitOMPLastprivateClauseFinal( 2575 S, /*NoFinals=*/false, 2576 CGF.Builder.CreateIsNotNull( 2577 CGF.EmitLoadOfScalar(IL, S.getLocStart()))); 2578 }; 2579 2580 bool HasCancel = false; 2581 if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S)) 2582 HasCancel = OSD->hasCancel(); 2583 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S)) 2584 HasCancel = OPSD->hasCancel(); 2585 OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel); 2586 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen, 2587 HasCancel); 2588 // Emit barrier for lastprivates only if 'sections' directive has 'nowait' 2589 // clause. Otherwise the barrier will be generated by the codegen for the 2590 // directive. 2591 if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) { 2592 // Emit implicit barrier to synchronize threads and avoid data races on 2593 // initialization of firstprivate variables. 2594 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), 2595 OMPD_unknown); 2596 } 2597 } 2598 2599 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { 2600 { 2601 OMPLexicalScope Scope(*this, S, OMPD_unknown); 2602 EmitSections(S); 2603 } 2604 // Emit an implicit barrier at the end. 2605 if (!S.getSingleClause<OMPNowaitClause>()) { 2606 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), 2607 OMPD_sections); 2608 } 2609 } 2610 2611 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { 2612 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2613 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 2614 }; 2615 OMPLexicalScope Scope(*this, S, OMPD_unknown); 2616 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen, 2617 S.hasCancel()); 2618 } 2619 2620 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { 2621 llvm::SmallVector<const Expr *, 8> CopyprivateVars; 2622 llvm::SmallVector<const Expr *, 8> DestExprs; 2623 llvm::SmallVector<const Expr *, 8> SrcExprs; 2624 llvm::SmallVector<const Expr *, 8> AssignmentOps; 2625 // Check if there are any 'copyprivate' clauses associated with this 2626 // 'single' construct. 2627 // Build a list of copyprivate variables along with helper expressions 2628 // (<source>, <destination>, <destination>=<source> expressions) 2629 for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) { 2630 CopyprivateVars.append(C->varlists().begin(), C->varlists().end()); 2631 DestExprs.append(C->destination_exprs().begin(), 2632 C->destination_exprs().end()); 2633 SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end()); 2634 AssignmentOps.append(C->assignment_ops().begin(), 2635 C->assignment_ops().end()); 2636 } 2637 // Emit code for 'single' region along with 'copyprivate' clauses 2638 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2639 Action.Enter(CGF); 2640 OMPPrivateScope SingleScope(CGF); 2641 (void)CGF.EmitOMPFirstprivateClause(S, SingleScope); 2642 CGF.EmitOMPPrivateClause(S, SingleScope); 2643 (void)SingleScope.Privatize(); 2644 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 2645 }; 2646 { 2647 OMPLexicalScope Scope(*this, S, OMPD_unknown); 2648 CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(), 2649 CopyprivateVars, DestExprs, 2650 SrcExprs, AssignmentOps); 2651 } 2652 // Emit an implicit barrier at the end (to avoid data race on firstprivate 2653 // init or if no 'nowait' clause was specified and no 'copyprivate' clause). 2654 if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) { 2655 CGM.getOpenMPRuntime().emitBarrierCall( 2656 *this, S.getLocStart(), 2657 S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single); 2658 } 2659 } 2660 2661 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { 2662 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2663 Action.Enter(CGF); 2664 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 2665 }; 2666 OMPLexicalScope Scope(*this, S, OMPD_unknown); 2667 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart()); 2668 } 2669 2670 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { 2671 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2672 Action.Enter(CGF); 2673 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 2674 }; 2675 Expr *Hint = nullptr; 2676 if (auto *HintClause = S.getSingleClause<OMPHintClause>()) 2677 Hint = HintClause->getHint(); 2678 OMPLexicalScope Scope(*this, S, OMPD_unknown); 2679 CGM.getOpenMPRuntime().emitCriticalRegion(*this, 2680 S.getDirectiveName().getAsString(), 2681 CodeGen, S.getLocStart(), Hint); 2682 } 2683 2684 void CodeGenFunction::EmitOMPParallelForDirective( 2685 const OMPParallelForDirective &S) { 2686 // Emit directive as a combined directive that consists of two implicit 2687 // directives: 'parallel' with 'for' directive. 2688 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2689 Action.Enter(CGF); 2690 OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel()); 2691 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 2692 emitDispatchForLoopBounds); 2693 }; 2694 emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen, 2695 emitEmptyBoundParameters); 2696 } 2697 2698 void CodeGenFunction::EmitOMPParallelForSimdDirective( 2699 const OMPParallelForSimdDirective &S) { 2700 // Emit directive as a combined directive that consists of two implicit 2701 // directives: 'parallel' with 'for' directive. 2702 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2703 Action.Enter(CGF); 2704 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 2705 emitDispatchForLoopBounds); 2706 }; 2707 emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen, 2708 emitEmptyBoundParameters); 2709 } 2710 2711 void CodeGenFunction::EmitOMPParallelSectionsDirective( 2712 const OMPParallelSectionsDirective &S) { 2713 // Emit directive as a combined directive that consists of two implicit 2714 // directives: 'parallel' with 'sections' directive. 2715 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2716 Action.Enter(CGF); 2717 CGF.EmitSections(S); 2718 }; 2719 emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen, 2720 emitEmptyBoundParameters); 2721 } 2722 2723 void CodeGenFunction::EmitOMPTaskBasedDirective( 2724 const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion, 2725 const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen, 2726 OMPTaskDataTy &Data) { 2727 // Emit outlined function for task construct. 2728 const CapturedStmt *CS = S.getCapturedStmt(CapturedRegion); 2729 auto *I = CS->getCapturedDecl()->param_begin(); 2730 auto *PartId = std::next(I); 2731 auto *TaskT = std::next(I, 4); 2732 // Check if the task is final 2733 if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) { 2734 // If the condition constant folds and can be elided, try to avoid emitting 2735 // the condition and the dead arm of the if/else. 2736 auto *Cond = Clause->getCondition(); 2737 bool CondConstant; 2738 if (ConstantFoldsToSimpleInteger(Cond, CondConstant)) 2739 Data.Final.setInt(CondConstant); 2740 else 2741 Data.Final.setPointer(EvaluateExprAsBool(Cond)); 2742 } else { 2743 // By default the task is not final. 2744 Data.Final.setInt(/*IntVal=*/false); 2745 } 2746 // Check if the task has 'priority' clause. 2747 if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) { 2748 auto *Prio = Clause->getPriority(); 2749 Data.Priority.setInt(/*IntVal=*/true); 2750 Data.Priority.setPointer(EmitScalarConversion( 2751 EmitScalarExpr(Prio), Prio->getType(), 2752 getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1), 2753 Prio->getExprLoc())); 2754 } 2755 // The first function argument for tasks is a thread id, the second one is a 2756 // part id (0 for tied tasks, >=0 for untied task). 2757 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 2758 // Get list of private variables. 2759 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 2760 auto IRef = C->varlist_begin(); 2761 for (auto *IInit : C->private_copies()) { 2762 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2763 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2764 Data.PrivateVars.push_back(*IRef); 2765 Data.PrivateCopies.push_back(IInit); 2766 } 2767 ++IRef; 2768 } 2769 } 2770 EmittedAsPrivate.clear(); 2771 // Get list of firstprivate variables. 2772 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 2773 auto IRef = C->varlist_begin(); 2774 auto IElemInitRef = C->inits().begin(); 2775 for (auto *IInit : C->private_copies()) { 2776 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2777 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2778 Data.FirstprivateVars.push_back(*IRef); 2779 Data.FirstprivateCopies.push_back(IInit); 2780 Data.FirstprivateInits.push_back(*IElemInitRef); 2781 } 2782 ++IRef; 2783 ++IElemInitRef; 2784 } 2785 } 2786 // Get list of lastprivate variables (for taskloops). 2787 llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs; 2788 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 2789 auto IRef = C->varlist_begin(); 2790 auto ID = C->destination_exprs().begin(); 2791 for (auto *IInit : C->private_copies()) { 2792 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2793 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2794 Data.LastprivateVars.push_back(*IRef); 2795 Data.LastprivateCopies.push_back(IInit); 2796 } 2797 LastprivateDstsOrigs.insert( 2798 {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()), 2799 cast<DeclRefExpr>(*IRef)}); 2800 ++IRef; 2801 ++ID; 2802 } 2803 } 2804 SmallVector<const Expr *, 4> LHSs; 2805 SmallVector<const Expr *, 4> RHSs; 2806 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 2807 auto IPriv = C->privates().begin(); 2808 auto IRed = C->reduction_ops().begin(); 2809 auto ILHS = C->lhs_exprs().begin(); 2810 auto IRHS = C->rhs_exprs().begin(); 2811 for (const auto *Ref : C->varlists()) { 2812 Data.ReductionVars.emplace_back(Ref); 2813 Data.ReductionCopies.emplace_back(*IPriv); 2814 Data.ReductionOps.emplace_back(*IRed); 2815 LHSs.emplace_back(*ILHS); 2816 RHSs.emplace_back(*IRHS); 2817 std::advance(IPriv, 1); 2818 std::advance(IRed, 1); 2819 std::advance(ILHS, 1); 2820 std::advance(IRHS, 1); 2821 } 2822 } 2823 Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit( 2824 *this, S.getLocStart(), LHSs, RHSs, Data); 2825 // Build list of dependences. 2826 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) 2827 for (auto *IRef : C->varlists()) 2828 Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef)); 2829 auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs, 2830 CapturedRegion](CodeGenFunction &CGF, 2831 PrePostActionTy &Action) { 2832 // Set proper addresses for generated private copies. 2833 OMPPrivateScope Scope(CGF); 2834 if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() || 2835 !Data.LastprivateVars.empty()) { 2836 enum { PrivatesParam = 2, CopyFnParam = 3 }; 2837 auto *CopyFn = CGF.Builder.CreateLoad( 2838 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3))); 2839 auto *PrivatesPtr = CGF.Builder.CreateLoad( 2840 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2))); 2841 // Map privates. 2842 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; 2843 llvm::SmallVector<llvm::Value *, 16> CallArgs; 2844 CallArgs.push_back(PrivatesPtr); 2845 for (auto *E : Data.PrivateVars) { 2846 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2847 Address PrivatePtr = CGF.CreateMemTemp( 2848 CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr"); 2849 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2850 CallArgs.push_back(PrivatePtr.getPointer()); 2851 } 2852 for (auto *E : Data.FirstprivateVars) { 2853 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2854 Address PrivatePtr = 2855 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 2856 ".firstpriv.ptr.addr"); 2857 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2858 CallArgs.push_back(PrivatePtr.getPointer()); 2859 } 2860 for (auto *E : Data.LastprivateVars) { 2861 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2862 Address PrivatePtr = 2863 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 2864 ".lastpriv.ptr.addr"); 2865 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2866 CallArgs.push_back(PrivatePtr.getPointer()); 2867 } 2868 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(), 2869 CopyFn, CallArgs); 2870 for (auto &&Pair : LastprivateDstsOrigs) { 2871 auto *OrigVD = cast<VarDecl>(Pair.second->getDecl()); 2872 DeclRefExpr DRE( 2873 const_cast<VarDecl *>(OrigVD), 2874 /*RefersToEnclosingVariableOrCapture=*/CGF.CapturedStmtInfo->lookup( 2875 OrigVD) != nullptr, 2876 Pair.second->getType(), VK_LValue, Pair.second->getExprLoc()); 2877 Scope.addPrivate(Pair.first, [&CGF, &DRE]() { 2878 return CGF.EmitLValue(&DRE).getAddress(); 2879 }); 2880 } 2881 for (auto &&Pair : PrivatePtrs) { 2882 Address Replacement(CGF.Builder.CreateLoad(Pair.second), 2883 CGF.getContext().getDeclAlign(Pair.first)); 2884 Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); 2885 } 2886 } 2887 if (Data.Reductions) { 2888 OMPLexicalScope LexScope(CGF, S, CapturedRegion); 2889 ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionCopies, 2890 Data.ReductionOps); 2891 llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad( 2892 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9))); 2893 for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) { 2894 RedCG.emitSharedLValue(CGF, Cnt); 2895 RedCG.emitAggregateType(CGF, Cnt); 2896 // FIXME: This must removed once the runtime library is fixed. 2897 // Emit required threadprivate variables for 2898 // initilizer/combiner/finalizer. 2899 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(), 2900 RedCG, Cnt); 2901 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( 2902 CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); 2903 Replacement = 2904 Address(CGF.EmitScalarConversion( 2905 Replacement.getPointer(), CGF.getContext().VoidPtrTy, 2906 CGF.getContext().getPointerType( 2907 Data.ReductionCopies[Cnt]->getType()), 2908 Data.ReductionCopies[Cnt]->getExprLoc()), 2909 Replacement.getAlignment()); 2910 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); 2911 Scope.addPrivate(RedCG.getBaseDecl(Cnt), 2912 [Replacement]() { return Replacement; }); 2913 } 2914 } 2915 // Privatize all private variables except for in_reduction items. 2916 (void)Scope.Privatize(); 2917 SmallVector<const Expr *, 4> InRedVars; 2918 SmallVector<const Expr *, 4> InRedPrivs; 2919 SmallVector<const Expr *, 4> InRedOps; 2920 SmallVector<const Expr *, 4> TaskgroupDescriptors; 2921 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) { 2922 auto IPriv = C->privates().begin(); 2923 auto IRed = C->reduction_ops().begin(); 2924 auto ITD = C->taskgroup_descriptors().begin(); 2925 for (const auto *Ref : C->varlists()) { 2926 InRedVars.emplace_back(Ref); 2927 InRedPrivs.emplace_back(*IPriv); 2928 InRedOps.emplace_back(*IRed); 2929 TaskgroupDescriptors.emplace_back(*ITD); 2930 std::advance(IPriv, 1); 2931 std::advance(IRed, 1); 2932 std::advance(ITD, 1); 2933 } 2934 } 2935 // Privatize in_reduction items here, because taskgroup descriptors must be 2936 // privatized earlier. 2937 OMPPrivateScope InRedScope(CGF); 2938 if (!InRedVars.empty()) { 2939 ReductionCodeGen RedCG(InRedVars, InRedPrivs, InRedOps); 2940 for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) { 2941 RedCG.emitSharedLValue(CGF, Cnt); 2942 RedCG.emitAggregateType(CGF, Cnt); 2943 // The taskgroup descriptor variable is always implicit firstprivate and 2944 // privatized already during procoessing of the firstprivates. 2945 // FIXME: This must removed once the runtime library is fixed. 2946 // Emit required threadprivate variables for 2947 // initilizer/combiner/finalizer. 2948 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(), 2949 RedCG, Cnt); 2950 llvm::Value *ReductionsPtr = 2951 CGF.EmitLoadOfScalar(CGF.EmitLValue(TaskgroupDescriptors[Cnt]), 2952 TaskgroupDescriptors[Cnt]->getExprLoc()); 2953 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( 2954 CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); 2955 Replacement = Address( 2956 CGF.EmitScalarConversion( 2957 Replacement.getPointer(), CGF.getContext().VoidPtrTy, 2958 CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()), 2959 InRedPrivs[Cnt]->getExprLoc()), 2960 Replacement.getAlignment()); 2961 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); 2962 InRedScope.addPrivate(RedCG.getBaseDecl(Cnt), 2963 [Replacement]() { return Replacement; }); 2964 } 2965 } 2966 (void)InRedScope.Privatize(); 2967 2968 Action.Enter(CGF); 2969 BodyGen(CGF); 2970 }; 2971 auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( 2972 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied, 2973 Data.NumberOfParts); 2974 OMPLexicalScope Scope(*this, S); 2975 TaskGen(*this, OutlinedFn, Data); 2976 } 2977 2978 static ImplicitParamDecl * 2979 createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data, 2980 QualType Ty, CapturedDecl *CD, 2981 SourceLocation Loc) { 2982 auto *OrigVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty, 2983 ImplicitParamDecl::Other); 2984 auto *OrigRef = DeclRefExpr::Create( 2985 C, NestedNameSpecifierLoc(), SourceLocation(), OrigVD, 2986 /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue); 2987 auto *PrivateVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty, 2988 ImplicitParamDecl::Other); 2989 auto *PrivateRef = DeclRefExpr::Create( 2990 C, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD, 2991 /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue); 2992 QualType ElemType = C.getBaseElementType(Ty); 2993 auto *InitVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, ElemType, 2994 ImplicitParamDecl::Other); 2995 auto *InitRef = DeclRefExpr::Create( 2996 C, NestedNameSpecifierLoc(), SourceLocation(), InitVD, 2997 /*RefersToEnclosingVariableOrCapture=*/false, Loc, ElemType, VK_LValue); 2998 PrivateVD->setInitStyle(VarDecl::CInit); 2999 PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue, 3000 InitRef, /*BasePath=*/nullptr, 3001 VK_RValue)); 3002 Data.FirstprivateVars.emplace_back(OrigRef); 3003 Data.FirstprivateCopies.emplace_back(PrivateRef); 3004 Data.FirstprivateInits.emplace_back(InitRef); 3005 return OrigVD; 3006 } 3007 3008 void CodeGenFunction::EmitOMPTargetTaskBasedDirective( 3009 const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen, 3010 OMPTargetDataInfo &InputInfo) { 3011 // Emit outlined function for task construct. 3012 auto CS = S.getCapturedStmt(OMPD_task); 3013 auto CapturedStruct = GenerateCapturedStmtArgument(*CS); 3014 auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 3015 auto *I = CS->getCapturedDecl()->param_begin(); 3016 auto *PartId = std::next(I); 3017 auto *TaskT = std::next(I, 4); 3018 OMPTaskDataTy Data; 3019 // The task is not final. 3020 Data.Final.setInt(/*IntVal=*/false); 3021 // Get list of firstprivate variables. 3022 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 3023 auto IRef = C->varlist_begin(); 3024 auto IElemInitRef = C->inits().begin(); 3025 for (auto *IInit : C->private_copies()) { 3026 Data.FirstprivateVars.push_back(*IRef); 3027 Data.FirstprivateCopies.push_back(IInit); 3028 Data.FirstprivateInits.push_back(*IElemInitRef); 3029 ++IRef; 3030 ++IElemInitRef; 3031 } 3032 } 3033 OMPPrivateScope TargetScope(*this); 3034 VarDecl *BPVD = nullptr; 3035 VarDecl *PVD = nullptr; 3036 VarDecl *SVD = nullptr; 3037 if (InputInfo.NumberOfTargetItems > 0) { 3038 auto *CD = CapturedDecl::Create( 3039 getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0); 3040 llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems); 3041 QualType BaseAndPointersType = getContext().getConstantArrayType( 3042 getContext().VoidPtrTy, ArrSize, ArrayType::Normal, 3043 /*IndexTypeQuals=*/0); 3044 BPVD = createImplicitFirstprivateForType( 3045 getContext(), Data, BaseAndPointersType, CD, S.getLocStart()); 3046 PVD = createImplicitFirstprivateForType( 3047 getContext(), Data, BaseAndPointersType, CD, S.getLocStart()); 3048 QualType SizesType = getContext().getConstantArrayType( 3049 getContext().getSizeType(), ArrSize, ArrayType::Normal, 3050 /*IndexTypeQuals=*/0); 3051 SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD, 3052 S.getLocStart()); 3053 TargetScope.addPrivate( 3054 BPVD, [&InputInfo]() { return InputInfo.BasePointersArray; }); 3055 TargetScope.addPrivate(PVD, 3056 [&InputInfo]() { return InputInfo.PointersArray; }); 3057 TargetScope.addPrivate(SVD, 3058 [&InputInfo]() { return InputInfo.SizesArray; }); 3059 } 3060 (void)TargetScope.Privatize(); 3061 // Build list of dependences. 3062 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) 3063 for (auto *IRef : C->varlists()) 3064 Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef)); 3065 auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD, 3066 &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) { 3067 // Set proper addresses for generated private copies. 3068 OMPPrivateScope Scope(CGF); 3069 if (!Data.FirstprivateVars.empty()) { 3070 enum { PrivatesParam = 2, CopyFnParam = 3 }; 3071 auto *CopyFn = CGF.Builder.CreateLoad( 3072 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3))); 3073 auto *PrivatesPtr = CGF.Builder.CreateLoad( 3074 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2))); 3075 // Map privates. 3076 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; 3077 llvm::SmallVector<llvm::Value *, 16> CallArgs; 3078 CallArgs.push_back(PrivatesPtr); 3079 for (auto *E : Data.FirstprivateVars) { 3080 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3081 Address PrivatePtr = 3082 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 3083 ".firstpriv.ptr.addr"); 3084 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 3085 CallArgs.push_back(PrivatePtr.getPointer()); 3086 } 3087 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(), 3088 CopyFn, CallArgs); 3089 for (auto &&Pair : PrivatePtrs) { 3090 Address Replacement(CGF.Builder.CreateLoad(Pair.second), 3091 CGF.getContext().getDeclAlign(Pair.first)); 3092 Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); 3093 } 3094 } 3095 // Privatize all private variables except for in_reduction items. 3096 (void)Scope.Privatize(); 3097 if (InputInfo.NumberOfTargetItems > 0) { 3098 InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP( 3099 CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0, CGF.getPointerSize()); 3100 InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP( 3101 CGF.GetAddrOfLocalVar(PVD), /*Index=*/0, CGF.getPointerSize()); 3102 InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP( 3103 CGF.GetAddrOfLocalVar(SVD), /*Index=*/0, CGF.getSizeSize()); 3104 } 3105 3106 Action.Enter(CGF); 3107 OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false); 3108 BodyGen(CGF); 3109 }; 3110 auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( 3111 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true, 3112 Data.NumberOfParts); 3113 llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0); 3114 IntegerLiteral IfCond(getContext(), TrueOrFalse, 3115 getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 3116 SourceLocation()); 3117 3118 CGM.getOpenMPRuntime().emitTaskCall(*this, S.getLocStart(), S, OutlinedFn, 3119 SharedsTy, CapturedStruct, &IfCond, Data); 3120 } 3121 3122 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { 3123 // Emit outlined function for task construct. 3124 const CapturedStmt *CS = S.getCapturedStmt(OMPD_task); 3125 auto CapturedStruct = GenerateCapturedStmtArgument(*CS); 3126 auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 3127 const Expr *IfCond = nullptr; 3128 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 3129 if (C->getNameModifier() == OMPD_unknown || 3130 C->getNameModifier() == OMPD_task) { 3131 IfCond = C->getCondition(); 3132 break; 3133 } 3134 } 3135 3136 OMPTaskDataTy Data; 3137 // Check if we should emit tied or untied task. 3138 Data.Tied = !S.getSingleClause<OMPUntiedClause>(); 3139 auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { 3140 CGF.EmitStmt(CS->getCapturedStmt()); 3141 }; 3142 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 3143 IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn, 3144 const OMPTaskDataTy &Data) { 3145 CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getLocStart(), S, OutlinedFn, 3146 SharedsTy, CapturedStruct, IfCond, 3147 Data); 3148 }; 3149 EmitOMPTaskBasedDirective(S, OMPD_task, BodyGen, TaskGen, Data); 3150 } 3151 3152 void CodeGenFunction::EmitOMPTaskyieldDirective( 3153 const OMPTaskyieldDirective &S) { 3154 CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart()); 3155 } 3156 3157 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { 3158 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier); 3159 } 3160 3161 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) { 3162 CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart()); 3163 } 3164 3165 void CodeGenFunction::EmitOMPTaskgroupDirective( 3166 const OMPTaskgroupDirective &S) { 3167 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3168 Action.Enter(CGF); 3169 if (const Expr *E = S.getReductionRef()) { 3170 SmallVector<const Expr *, 4> LHSs; 3171 SmallVector<const Expr *, 4> RHSs; 3172 OMPTaskDataTy Data; 3173 for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) { 3174 auto IPriv = C->privates().begin(); 3175 auto IRed = C->reduction_ops().begin(); 3176 auto ILHS = C->lhs_exprs().begin(); 3177 auto IRHS = C->rhs_exprs().begin(); 3178 for (const auto *Ref : C->varlists()) { 3179 Data.ReductionVars.emplace_back(Ref); 3180 Data.ReductionCopies.emplace_back(*IPriv); 3181 Data.ReductionOps.emplace_back(*IRed); 3182 LHSs.emplace_back(*ILHS); 3183 RHSs.emplace_back(*IRHS); 3184 std::advance(IPriv, 1); 3185 std::advance(IRed, 1); 3186 std::advance(ILHS, 1); 3187 std::advance(IRHS, 1); 3188 } 3189 } 3190 llvm::Value *ReductionDesc = 3191 CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getLocStart(), 3192 LHSs, RHSs, Data); 3193 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3194 CGF.EmitVarDecl(*VD); 3195 CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD), 3196 /*Volatile=*/false, E->getType()); 3197 } 3198 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 3199 }; 3200 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3201 CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart()); 3202 } 3203 3204 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { 3205 CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> { 3206 if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) { 3207 return llvm::makeArrayRef(FlushClause->varlist_begin(), 3208 FlushClause->varlist_end()); 3209 } 3210 return llvm::None; 3211 }(), S.getLocStart()); 3212 } 3213 3214 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, 3215 const CodeGenLoopTy &CodeGenLoop, 3216 Expr *IncExpr) { 3217 // Emit the loop iteration variable. 3218 auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 3219 auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); 3220 EmitVarDecl(*IVDecl); 3221 3222 // Emit the iterations count variable. 3223 // If it is not a variable, Sema decided to calculate iterations count on each 3224 // iteration (e.g., it is foldable into a constant). 3225 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 3226 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 3227 // Emit calculation of the iterations count. 3228 EmitIgnoredExpr(S.getCalcLastIteration()); 3229 } 3230 3231 auto &RT = CGM.getOpenMPRuntime(); 3232 3233 bool HasLastprivateClause = false; 3234 // Check pre-condition. 3235 { 3236 OMPLoopScope PreInitScope(*this, S); 3237 // Skip the entire loop if we don't meet the precondition. 3238 // If the condition constant folds and can be elided, avoid emitting the 3239 // whole loop. 3240 bool CondConstant; 3241 llvm::BasicBlock *ContBlock = nullptr; 3242 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 3243 if (!CondConstant) 3244 return; 3245 } else { 3246 auto *ThenBlock = createBasicBlock("omp.precond.then"); 3247 ContBlock = createBasicBlock("omp.precond.end"); 3248 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 3249 getProfileCount(&S)); 3250 EmitBlock(ThenBlock); 3251 incrementProfileCounter(&S); 3252 } 3253 3254 emitAlignedClause(*this, S); 3255 // Emit 'then' code. 3256 { 3257 // Emit helper vars inits. 3258 3259 LValue LB = EmitOMPHelperVar( 3260 *this, cast<DeclRefExpr>( 3261 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3262 ? S.getCombinedLowerBoundVariable() 3263 : S.getLowerBoundVariable()))); 3264 LValue UB = EmitOMPHelperVar( 3265 *this, cast<DeclRefExpr>( 3266 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3267 ? S.getCombinedUpperBoundVariable() 3268 : S.getUpperBoundVariable()))); 3269 LValue ST = 3270 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 3271 LValue IL = 3272 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 3273 3274 OMPPrivateScope LoopScope(*this); 3275 if (EmitOMPFirstprivateClause(S, LoopScope)) { 3276 // Emit implicit barrier to synchronize threads and avoid data races 3277 // on initialization of firstprivate variables and post-update of 3278 // lastprivate variables. 3279 CGM.getOpenMPRuntime().emitBarrierCall( 3280 *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 3281 /*ForceSimpleCall=*/true); 3282 } 3283 EmitOMPPrivateClause(S, LoopScope); 3284 if (isOpenMPSimdDirective(S.getDirectiveKind()) && 3285 !isOpenMPParallelDirective(S.getDirectiveKind()) && 3286 !isOpenMPTeamsDirective(S.getDirectiveKind())) 3287 EmitOMPReductionClauseInit(S, LoopScope); 3288 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 3289 EmitOMPPrivateLoopCounters(S, LoopScope); 3290 (void)LoopScope.Privatize(); 3291 3292 // Detect the distribute schedule kind and chunk. 3293 llvm::Value *Chunk = nullptr; 3294 OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown; 3295 if (auto *C = S.getSingleClause<OMPDistScheduleClause>()) { 3296 ScheduleKind = C->getDistScheduleKind(); 3297 if (const auto *Ch = C->getChunkSize()) { 3298 Chunk = EmitScalarExpr(Ch); 3299 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 3300 S.getIterationVariable()->getType(), 3301 S.getLocStart()); 3302 } 3303 } 3304 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 3305 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 3306 3307 // OpenMP [2.10.8, distribute Construct, Description] 3308 // If dist_schedule is specified, kind must be static. If specified, 3309 // iterations are divided into chunks of size chunk_size, chunks are 3310 // assigned to the teams of the league in a round-robin fashion in the 3311 // order of the team number. When no chunk_size is specified, the 3312 // iteration space is divided into chunks that are approximately equal 3313 // in size, and at most one chunk is distributed to each team of the 3314 // league. The size of the chunks is unspecified in this case. 3315 if (RT.isStaticNonchunked(ScheduleKind, 3316 /* Chunked */ Chunk != nullptr)) { 3317 if (isOpenMPSimdDirective(S.getDirectiveKind())) 3318 EmitOMPSimdInit(S, /*IsMonotonic=*/true); 3319 CGOpenMPRuntime::StaticRTInput StaticInit( 3320 IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(), 3321 LB.getAddress(), UB.getAddress(), ST.getAddress()); 3322 RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, 3323 StaticInit); 3324 auto LoopExit = 3325 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 3326 // UB = min(UB, GlobalUB); 3327 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3328 ? S.getCombinedEnsureUpperBound() 3329 : S.getEnsureUpperBound()); 3330 // IV = LB; 3331 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3332 ? S.getCombinedInit() 3333 : S.getInit()); 3334 3335 Expr *Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3336 ? S.getCombinedCond() 3337 : S.getCond(); 3338 3339 // for distribute alone, codegen 3340 // while (idx <= UB) { BODY; ++idx; } 3341 // when combined with 'for' (e.g. as in 'distribute parallel for') 3342 // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; } 3343 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), Cond, IncExpr, 3344 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { 3345 CodeGenLoop(CGF, S, LoopExit); 3346 }, 3347 [](CodeGenFunction &) {}); 3348 EmitBlock(LoopExit.getBlock()); 3349 // Tell the runtime we are done. 3350 RT.emitForStaticFinish(*this, S.getLocStart(), S.getDirectiveKind()); 3351 } else { 3352 // Emit the outer loop, which requests its work chunk [LB..UB] from 3353 // runtime and runs the inner loop to process it. 3354 const OMPLoopArguments LoopArguments = { 3355 LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(), 3356 Chunk}; 3357 EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments, 3358 CodeGenLoop); 3359 } 3360 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 3361 EmitOMPSimdFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * { 3362 return CGF.Builder.CreateIsNotNull( 3363 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 3364 }); 3365 } 3366 if (isOpenMPSimdDirective(S.getDirectiveKind()) && 3367 !isOpenMPParallelDirective(S.getDirectiveKind()) && 3368 !isOpenMPTeamsDirective(S.getDirectiveKind())) { 3369 OpenMPDirectiveKind ReductionKind = OMPD_unknown; 3370 if (isOpenMPParallelDirective(S.getDirectiveKind()) && 3371 isOpenMPSimdDirective(S.getDirectiveKind())) { 3372 ReductionKind = OMPD_parallel_for_simd; 3373 } else if (isOpenMPParallelDirective(S.getDirectiveKind())) { 3374 ReductionKind = OMPD_parallel_for; 3375 } else if (isOpenMPSimdDirective(S.getDirectiveKind())) { 3376 ReductionKind = OMPD_simd; 3377 } else if (!isOpenMPTeamsDirective(S.getDirectiveKind()) && 3378 S.hasClausesOfKind<OMPReductionClause>()) { 3379 llvm_unreachable( 3380 "No reduction clauses is allowed in distribute directive."); 3381 } 3382 EmitOMPReductionClauseFinal(S, ReductionKind); 3383 // Emit post-update of the reduction variables if IsLastIter != 0. 3384 emitPostUpdateForReductionClause( 3385 *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * { 3386 return CGF.Builder.CreateIsNotNull( 3387 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 3388 }); 3389 } 3390 // Emit final copy of the lastprivate variables if IsLastIter != 0. 3391 if (HasLastprivateClause) { 3392 EmitOMPLastprivateClauseFinal( 3393 S, /*NoFinals=*/false, 3394 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart()))); 3395 } 3396 } 3397 3398 // We're now done with the loop, so jump to the continuation block. 3399 if (ContBlock) { 3400 EmitBranch(ContBlock); 3401 EmitBlock(ContBlock, true); 3402 } 3403 } 3404 } 3405 3406 void CodeGenFunction::EmitOMPDistributeDirective( 3407 const OMPDistributeDirective &S) { 3408 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3409 3410 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 3411 }; 3412 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3413 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); 3414 } 3415 3416 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, 3417 const CapturedStmt *S) { 3418 CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); 3419 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; 3420 CGF.CapturedStmtInfo = &CapStmtInfo; 3421 auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S); 3422 Fn->setDoesNotRecurse(); 3423 return Fn; 3424 } 3425 3426 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { 3427 if (S.hasClausesOfKind<OMPDependClause>()) { 3428 assert(!S.getAssociatedStmt() && 3429 "No associated statement must be in ordered depend construct."); 3430 for (const auto *DC : S.getClausesOfKind<OMPDependClause>()) 3431 CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC); 3432 return; 3433 } 3434 auto *C = S.getSingleClause<OMPSIMDClause>(); 3435 auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF, 3436 PrePostActionTy &Action) { 3437 const CapturedStmt *CS = S.getInnermostCapturedStmt(); 3438 if (C) { 3439 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 3440 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 3441 auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS); 3442 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(), 3443 OutlinedFn, CapturedVars); 3444 } else { 3445 Action.Enter(CGF); 3446 CGF.EmitStmt(CS->getCapturedStmt()); 3447 } 3448 }; 3449 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3450 CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C); 3451 } 3452 3453 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, 3454 QualType SrcType, QualType DestType, 3455 SourceLocation Loc) { 3456 assert(CGF.hasScalarEvaluationKind(DestType) && 3457 "DestType must have scalar evaluation kind."); 3458 assert(!Val.isAggregate() && "Must be a scalar or complex."); 3459 return Val.isScalar() 3460 ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType, 3461 Loc) 3462 : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType, 3463 DestType, Loc); 3464 } 3465 3466 static CodeGenFunction::ComplexPairTy 3467 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, 3468 QualType DestType, SourceLocation Loc) { 3469 assert(CGF.getEvaluationKind(DestType) == TEK_Complex && 3470 "DestType must have complex evaluation kind."); 3471 CodeGenFunction::ComplexPairTy ComplexVal; 3472 if (Val.isScalar()) { 3473 // Convert the input element to the element type of the complex. 3474 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 3475 auto ScalarVal = CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, 3476 DestElementType, Loc); 3477 ComplexVal = CodeGenFunction::ComplexPairTy( 3478 ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType())); 3479 } else { 3480 assert(Val.isComplex() && "Must be a scalar or complex."); 3481 auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType(); 3482 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 3483 ComplexVal.first = CGF.EmitScalarConversion( 3484 Val.getComplexVal().first, SrcElementType, DestElementType, Loc); 3485 ComplexVal.second = CGF.EmitScalarConversion( 3486 Val.getComplexVal().second, SrcElementType, DestElementType, Loc); 3487 } 3488 return ComplexVal; 3489 } 3490 3491 static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst, 3492 LValue LVal, RValue RVal) { 3493 if (LVal.isGlobalReg()) { 3494 CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal); 3495 } else { 3496 CGF.EmitAtomicStore(RVal, LVal, 3497 IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3498 : llvm::AtomicOrdering::Monotonic, 3499 LVal.isVolatile(), /*IsInit=*/false); 3500 } 3501 } 3502 3503 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal, 3504 QualType RValTy, SourceLocation Loc) { 3505 switch (getEvaluationKind(LVal.getType())) { 3506 case TEK_Scalar: 3507 EmitStoreThroughLValue(RValue::get(convertToScalarValue( 3508 *this, RVal, RValTy, LVal.getType(), Loc)), 3509 LVal); 3510 break; 3511 case TEK_Complex: 3512 EmitStoreOfComplex( 3513 convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal, 3514 /*isInit=*/false); 3515 break; 3516 case TEK_Aggregate: 3517 llvm_unreachable("Must be a scalar or complex."); 3518 } 3519 } 3520 3521 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst, 3522 const Expr *X, const Expr *V, 3523 SourceLocation Loc) { 3524 // v = x; 3525 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue"); 3526 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue"); 3527 LValue XLValue = CGF.EmitLValue(X); 3528 LValue VLValue = CGF.EmitLValue(V); 3529 RValue Res = XLValue.isGlobalReg() 3530 ? CGF.EmitLoadOfLValue(XLValue, Loc) 3531 : CGF.EmitAtomicLoad( 3532 XLValue, Loc, 3533 IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3534 : llvm::AtomicOrdering::Monotonic, 3535 XLValue.isVolatile()); 3536 // OpenMP, 2.12.6, atomic Construct 3537 // Any atomic construct with a seq_cst clause forces the atomically 3538 // performed operation to include an implicit flush operation without a 3539 // list. 3540 if (IsSeqCst) 3541 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3542 CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc); 3543 } 3544 3545 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst, 3546 const Expr *X, const Expr *E, 3547 SourceLocation Loc) { 3548 // x = expr; 3549 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue"); 3550 emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E)); 3551 // OpenMP, 2.12.6, atomic Construct 3552 // Any atomic construct with a seq_cst clause forces the atomically 3553 // performed operation to include an implicit flush operation without a 3554 // list. 3555 if (IsSeqCst) 3556 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3557 } 3558 3559 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, 3560 RValue Update, 3561 BinaryOperatorKind BO, 3562 llvm::AtomicOrdering AO, 3563 bool IsXLHSInRHSPart) { 3564 auto &Context = CGF.CGM.getContext(); 3565 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x' 3566 // expression is simple and atomic is allowed for the given type for the 3567 // target platform. 3568 if (BO == BO_Comma || !Update.isScalar() || 3569 !Update.getScalarVal()->getType()->isIntegerTy() || 3570 !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) && 3571 (Update.getScalarVal()->getType() != 3572 X.getAddress().getElementType())) || 3573 !X.getAddress().getElementType()->isIntegerTy() || 3574 !Context.getTargetInfo().hasBuiltinAtomic( 3575 Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment()))) 3576 return std::make_pair(false, RValue::get(nullptr)); 3577 3578 llvm::AtomicRMWInst::BinOp RMWOp; 3579 switch (BO) { 3580 case BO_Add: 3581 RMWOp = llvm::AtomicRMWInst::Add; 3582 break; 3583 case BO_Sub: 3584 if (!IsXLHSInRHSPart) 3585 return std::make_pair(false, RValue::get(nullptr)); 3586 RMWOp = llvm::AtomicRMWInst::Sub; 3587 break; 3588 case BO_And: 3589 RMWOp = llvm::AtomicRMWInst::And; 3590 break; 3591 case BO_Or: 3592 RMWOp = llvm::AtomicRMWInst::Or; 3593 break; 3594 case BO_Xor: 3595 RMWOp = llvm::AtomicRMWInst::Xor; 3596 break; 3597 case BO_LT: 3598 RMWOp = X.getType()->hasSignedIntegerRepresentation() 3599 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min 3600 : llvm::AtomicRMWInst::Max) 3601 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin 3602 : llvm::AtomicRMWInst::UMax); 3603 break; 3604 case BO_GT: 3605 RMWOp = X.getType()->hasSignedIntegerRepresentation() 3606 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max 3607 : llvm::AtomicRMWInst::Min) 3608 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax 3609 : llvm::AtomicRMWInst::UMin); 3610 break; 3611 case BO_Assign: 3612 RMWOp = llvm::AtomicRMWInst::Xchg; 3613 break; 3614 case BO_Mul: 3615 case BO_Div: 3616 case BO_Rem: 3617 case BO_Shl: 3618 case BO_Shr: 3619 case BO_LAnd: 3620 case BO_LOr: 3621 return std::make_pair(false, RValue::get(nullptr)); 3622 case BO_PtrMemD: 3623 case BO_PtrMemI: 3624 case BO_LE: 3625 case BO_GE: 3626 case BO_EQ: 3627 case BO_NE: 3628 case BO_Cmp: 3629 case BO_AddAssign: 3630 case BO_SubAssign: 3631 case BO_AndAssign: 3632 case BO_OrAssign: 3633 case BO_XorAssign: 3634 case BO_MulAssign: 3635 case BO_DivAssign: 3636 case BO_RemAssign: 3637 case BO_ShlAssign: 3638 case BO_ShrAssign: 3639 case BO_Comma: 3640 llvm_unreachable("Unsupported atomic update operation"); 3641 } 3642 auto *UpdateVal = Update.getScalarVal(); 3643 if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) { 3644 UpdateVal = CGF.Builder.CreateIntCast( 3645 IC, X.getAddress().getElementType(), 3646 X.getType()->hasSignedIntegerRepresentation()); 3647 } 3648 auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO); 3649 return std::make_pair(true, RValue::get(Res)); 3650 } 3651 3652 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr( 3653 LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart, 3654 llvm::AtomicOrdering AO, SourceLocation Loc, 3655 const llvm::function_ref<RValue(RValue)> &CommonGen) { 3656 // Update expressions are allowed to have the following forms: 3657 // x binop= expr; -> xrval + expr; 3658 // x++, ++x -> xrval + 1; 3659 // x--, --x -> xrval - 1; 3660 // x = x binop expr; -> xrval binop expr 3661 // x = expr Op x; - > expr binop xrval; 3662 auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart); 3663 if (!Res.first) { 3664 if (X.isGlobalReg()) { 3665 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop 3666 // 'xrval'. 3667 EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X); 3668 } else { 3669 // Perform compare-and-swap procedure. 3670 EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified()); 3671 } 3672 } 3673 return Res; 3674 } 3675 3676 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst, 3677 const Expr *X, const Expr *E, 3678 const Expr *UE, bool IsXLHSInRHSPart, 3679 SourceLocation Loc) { 3680 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 3681 "Update expr in 'atomic update' must be a binary operator."); 3682 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 3683 // Update expressions are allowed to have the following forms: 3684 // x binop= expr; -> xrval + expr; 3685 // x++, ++x -> xrval + 1; 3686 // x--, --x -> xrval - 1; 3687 // x = x binop expr; -> xrval binop expr 3688 // x = expr Op x; - > expr binop xrval; 3689 assert(X->isLValue() && "X of 'omp atomic update' is not lvalue"); 3690 LValue XLValue = CGF.EmitLValue(X); 3691 RValue ExprRValue = CGF.EmitAnyExpr(E); 3692 auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3693 : llvm::AtomicOrdering::Monotonic; 3694 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 3695 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 3696 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 3697 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 3698 auto Gen = 3699 [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue { 3700 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3701 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 3702 return CGF.EmitAnyExpr(UE); 3703 }; 3704 (void)CGF.EmitOMPAtomicSimpleUpdateExpr( 3705 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 3706 // OpenMP, 2.12.6, atomic Construct 3707 // Any atomic construct with a seq_cst clause forces the atomically 3708 // performed operation to include an implicit flush operation without a 3709 // list. 3710 if (IsSeqCst) 3711 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3712 } 3713 3714 static RValue convertToType(CodeGenFunction &CGF, RValue Value, 3715 QualType SourceType, QualType ResType, 3716 SourceLocation Loc) { 3717 switch (CGF.getEvaluationKind(ResType)) { 3718 case TEK_Scalar: 3719 return RValue::get( 3720 convertToScalarValue(CGF, Value, SourceType, ResType, Loc)); 3721 case TEK_Complex: { 3722 auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc); 3723 return RValue::getComplex(Res.first, Res.second); 3724 } 3725 case TEK_Aggregate: 3726 break; 3727 } 3728 llvm_unreachable("Must be a scalar or complex."); 3729 } 3730 3731 static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst, 3732 bool IsPostfixUpdate, const Expr *V, 3733 const Expr *X, const Expr *E, 3734 const Expr *UE, bool IsXLHSInRHSPart, 3735 SourceLocation Loc) { 3736 assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue"); 3737 assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue"); 3738 RValue NewVVal; 3739 LValue VLValue = CGF.EmitLValue(V); 3740 LValue XLValue = CGF.EmitLValue(X); 3741 RValue ExprRValue = CGF.EmitAnyExpr(E); 3742 auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3743 : llvm::AtomicOrdering::Monotonic; 3744 QualType NewVValType; 3745 if (UE) { 3746 // 'x' is updated with some additional value. 3747 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 3748 "Update expr in 'atomic capture' must be a binary operator."); 3749 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 3750 // Update expressions are allowed to have the following forms: 3751 // x binop= expr; -> xrval + expr; 3752 // x++, ++x -> xrval + 1; 3753 // x--, --x -> xrval - 1; 3754 // x = x binop expr; -> xrval binop expr 3755 // x = expr Op x; - > expr binop xrval; 3756 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 3757 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 3758 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 3759 NewVValType = XRValExpr->getType(); 3760 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 3761 auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr, 3762 IsPostfixUpdate](RValue XRValue) -> RValue { 3763 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3764 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 3765 RValue Res = CGF.EmitAnyExpr(UE); 3766 NewVVal = IsPostfixUpdate ? XRValue : Res; 3767 return Res; 3768 }; 3769 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 3770 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 3771 if (Res.first) { 3772 // 'atomicrmw' instruction was generated. 3773 if (IsPostfixUpdate) { 3774 // Use old value from 'atomicrmw'. 3775 NewVVal = Res.second; 3776 } else { 3777 // 'atomicrmw' does not provide new value, so evaluate it using old 3778 // value of 'x'. 3779 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3780 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second); 3781 NewVVal = CGF.EmitAnyExpr(UE); 3782 } 3783 } 3784 } else { 3785 // 'x' is simply rewritten with some 'expr'. 3786 NewVValType = X->getType().getNonReferenceType(); 3787 ExprRValue = convertToType(CGF, ExprRValue, E->getType(), 3788 X->getType().getNonReferenceType(), Loc); 3789 auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) -> RValue { 3790 NewVVal = XRValue; 3791 return ExprRValue; 3792 }; 3793 // Try to perform atomicrmw xchg, otherwise simple exchange. 3794 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 3795 XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO, 3796 Loc, Gen); 3797 if (Res.first) { 3798 // 'atomicrmw' instruction was generated. 3799 NewVVal = IsPostfixUpdate ? Res.second : ExprRValue; 3800 } 3801 } 3802 // Emit post-update store to 'v' of old/new 'x' value. 3803 CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc); 3804 // OpenMP, 2.12.6, atomic Construct 3805 // Any atomic construct with a seq_cst clause forces the atomically 3806 // performed operation to include an implicit flush operation without a 3807 // list. 3808 if (IsSeqCst) 3809 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3810 } 3811 3812 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, 3813 bool IsSeqCst, bool IsPostfixUpdate, 3814 const Expr *X, const Expr *V, const Expr *E, 3815 const Expr *UE, bool IsXLHSInRHSPart, 3816 SourceLocation Loc) { 3817 switch (Kind) { 3818 case OMPC_read: 3819 EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc); 3820 break; 3821 case OMPC_write: 3822 EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc); 3823 break; 3824 case OMPC_unknown: 3825 case OMPC_update: 3826 EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc); 3827 break; 3828 case OMPC_capture: 3829 EmitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE, 3830 IsXLHSInRHSPart, Loc); 3831 break; 3832 case OMPC_if: 3833 case OMPC_final: 3834 case OMPC_num_threads: 3835 case OMPC_private: 3836 case OMPC_firstprivate: 3837 case OMPC_lastprivate: 3838 case OMPC_reduction: 3839 case OMPC_task_reduction: 3840 case OMPC_in_reduction: 3841 case OMPC_safelen: 3842 case OMPC_simdlen: 3843 case OMPC_collapse: 3844 case OMPC_default: 3845 case OMPC_seq_cst: 3846 case OMPC_shared: 3847 case OMPC_linear: 3848 case OMPC_aligned: 3849 case OMPC_copyin: 3850 case OMPC_copyprivate: 3851 case OMPC_flush: 3852 case OMPC_proc_bind: 3853 case OMPC_schedule: 3854 case OMPC_ordered: 3855 case OMPC_nowait: 3856 case OMPC_untied: 3857 case OMPC_threadprivate: 3858 case OMPC_depend: 3859 case OMPC_mergeable: 3860 case OMPC_device: 3861 case OMPC_threads: 3862 case OMPC_simd: 3863 case OMPC_map: 3864 case OMPC_num_teams: 3865 case OMPC_thread_limit: 3866 case OMPC_priority: 3867 case OMPC_grainsize: 3868 case OMPC_nogroup: 3869 case OMPC_num_tasks: 3870 case OMPC_hint: 3871 case OMPC_dist_schedule: 3872 case OMPC_defaultmap: 3873 case OMPC_uniform: 3874 case OMPC_to: 3875 case OMPC_from: 3876 case OMPC_use_device_ptr: 3877 case OMPC_is_device_ptr: 3878 llvm_unreachable("Clause is not allowed in 'omp atomic'."); 3879 } 3880 } 3881 3882 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { 3883 bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>(); 3884 OpenMPClauseKind Kind = OMPC_unknown; 3885 for (auto *C : S.clauses()) { 3886 // Find first clause (skip seq_cst clause, if it is first). 3887 if (C->getClauseKind() != OMPC_seq_cst) { 3888 Kind = C->getClauseKind(); 3889 break; 3890 } 3891 } 3892 3893 const auto *CS = S.getInnermostCapturedStmt()->IgnoreContainers(); 3894 if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) { 3895 enterFullExpression(EWC); 3896 } 3897 // Processing for statements under 'atomic capture'. 3898 if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) { 3899 for (const auto *C : Compound->body()) { 3900 if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) { 3901 enterFullExpression(EWC); 3902 } 3903 } 3904 } 3905 3906 auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF, 3907 PrePostActionTy &) { 3908 CGF.EmitStopPoint(CS); 3909 EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(), 3910 S.getV(), S.getExpr(), S.getUpdateExpr(), 3911 S.isXLHSInRHSPart(), S.getLocStart()); 3912 }; 3913 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3914 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen); 3915 } 3916 3917 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, 3918 const OMPExecutableDirective &S, 3919 const RegionCodeGenTy &CodeGen) { 3920 assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind())); 3921 CodeGenModule &CGM = CGF.CGM; 3922 3923 // On device emit this construct as inlined code. 3924 if (CGM.getLangOpts().OpenMPIsDevice) { 3925 OMPLexicalScope Scope(CGF, S, OMPD_target); 3926 CGM.getOpenMPRuntime().emitInlinedDirective( 3927 CGF, OMPD_target, [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3928 CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt()); 3929 }); 3930 return; 3931 } 3932 3933 llvm::Function *Fn = nullptr; 3934 llvm::Constant *FnID = nullptr; 3935 3936 const Expr *IfCond = nullptr; 3937 // Check for the at most one if clause associated with the target region. 3938 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 3939 if (C->getNameModifier() == OMPD_unknown || 3940 C->getNameModifier() == OMPD_target) { 3941 IfCond = C->getCondition(); 3942 break; 3943 } 3944 } 3945 3946 // Check if we have any device clause associated with the directive. 3947 const Expr *Device = nullptr; 3948 if (auto *C = S.getSingleClause<OMPDeviceClause>()) { 3949 Device = C->getDevice(); 3950 } 3951 3952 // Check if we have an if clause whose conditional always evaluates to false 3953 // or if we do not have any targets specified. If so the target region is not 3954 // an offload entry point. 3955 bool IsOffloadEntry = true; 3956 if (IfCond) { 3957 bool Val; 3958 if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val) 3959 IsOffloadEntry = false; 3960 } 3961 if (CGM.getLangOpts().OMPTargetTriples.empty()) 3962 IsOffloadEntry = false; 3963 3964 assert(CGF.CurFuncDecl && "No parent declaration for target region!"); 3965 StringRef ParentName; 3966 // In case we have Ctors/Dtors we use the complete type variant to produce 3967 // the mangling of the device outlined kernel. 3968 if (auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl)) 3969 ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete)); 3970 else if (auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl)) 3971 ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete)); 3972 else 3973 ParentName = 3974 CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl))); 3975 3976 // Emit target region as a standalone region. 3977 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID, 3978 IsOffloadEntry, CodeGen); 3979 OMPLexicalScope Scope(CGF, S, OMPD_task); 3980 CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device); 3981 } 3982 3983 static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S, 3984 PrePostActionTy &Action) { 3985 Action.Enter(CGF); 3986 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 3987 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 3988 CGF.EmitOMPPrivateClause(S, PrivateScope); 3989 (void)PrivateScope.Privatize(); 3990 3991 CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt()); 3992 } 3993 3994 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM, 3995 StringRef ParentName, 3996 const OMPTargetDirective &S) { 3997 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3998 emitTargetRegion(CGF, S, Action); 3999 }; 4000 llvm::Function *Fn; 4001 llvm::Constant *Addr; 4002 // Emit target region as a standalone region. 4003 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 4004 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 4005 assert(Fn && Addr && "Target device function emission failed."); 4006 } 4007 4008 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { 4009 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4010 emitTargetRegion(CGF, S, Action); 4011 }; 4012 emitCommonOMPTargetDirective(*this, S, CodeGen); 4013 } 4014 4015 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF, 4016 const OMPExecutableDirective &S, 4017 OpenMPDirectiveKind InnermostKind, 4018 const RegionCodeGenTy &CodeGen) { 4019 const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams); 4020 auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction( 4021 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 4022 4023 const OMPNumTeamsClause *NT = S.getSingleClause<OMPNumTeamsClause>(); 4024 const OMPThreadLimitClause *TL = S.getSingleClause<OMPThreadLimitClause>(); 4025 if (NT || TL) { 4026 Expr *NumTeams = (NT) ? NT->getNumTeams() : nullptr; 4027 Expr *ThreadLimit = (TL) ? TL->getThreadLimit() : nullptr; 4028 4029 CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit, 4030 S.getLocStart()); 4031 } 4032 4033 OMPTeamsScope Scope(CGF, S); 4034 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 4035 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 4036 CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn, 4037 CapturedVars); 4038 } 4039 4040 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) { 4041 // Emit teams region as a standalone region. 4042 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4043 Action.Enter(CGF); 4044 OMPPrivateScope PrivateScope(CGF); 4045 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 4046 CGF.EmitOMPPrivateClause(S, PrivateScope); 4047 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4048 (void)PrivateScope.Privatize(); 4049 CGF.EmitStmt(S.getCapturedStmt(OMPD_teams)->getCapturedStmt()); 4050 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4051 }; 4052 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen); 4053 emitPostUpdateForReductionClause( 4054 *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 4055 } 4056 4057 static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action, 4058 const OMPTargetTeamsDirective &S) { 4059 auto *CS = S.getCapturedStmt(OMPD_teams); 4060 Action.Enter(CGF); 4061 // Emit teams region as a standalone region. 4062 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) { 4063 Action.Enter(CGF); 4064 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 4065 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 4066 CGF.EmitOMPPrivateClause(S, PrivateScope); 4067 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4068 (void)PrivateScope.Privatize(); 4069 CGF.EmitStmt(CS->getCapturedStmt()); 4070 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4071 }; 4072 emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen); 4073 emitPostUpdateForReductionClause( 4074 CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 4075 } 4076 4077 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 4078 CodeGenModule &CGM, StringRef ParentName, 4079 const OMPTargetTeamsDirective &S) { 4080 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4081 emitTargetTeamsRegion(CGF, Action, S); 4082 }; 4083 llvm::Function *Fn; 4084 llvm::Constant *Addr; 4085 // Emit target region as a standalone region. 4086 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 4087 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 4088 assert(Fn && Addr && "Target device function emission failed."); 4089 } 4090 4091 void CodeGenFunction::EmitOMPTargetTeamsDirective( 4092 const OMPTargetTeamsDirective &S) { 4093 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4094 emitTargetTeamsRegion(CGF, Action, S); 4095 }; 4096 emitCommonOMPTargetDirective(*this, S, CodeGen); 4097 } 4098 4099 static void 4100 emitTargetTeamsDistributeRegion(CodeGenFunction &CGF, PrePostActionTy &Action, 4101 const OMPTargetTeamsDistributeDirective &S) { 4102 Action.Enter(CGF); 4103 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4104 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 4105 }; 4106 4107 // Emit teams region as a standalone region. 4108 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 4109 PrePostActionTy &Action) { 4110 Action.Enter(CGF); 4111 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 4112 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4113 (void)PrivateScope.Privatize(); 4114 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 4115 CodeGenDistribute); 4116 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4117 }; 4118 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute, CodeGen); 4119 emitPostUpdateForReductionClause(CGF, S, 4120 [](CodeGenFunction &) { return nullptr; }); 4121 } 4122 4123 void CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 4124 CodeGenModule &CGM, StringRef ParentName, 4125 const OMPTargetTeamsDistributeDirective &S) { 4126 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4127 emitTargetTeamsDistributeRegion(CGF, Action, S); 4128 }; 4129 llvm::Function *Fn; 4130 llvm::Constant *Addr; 4131 // Emit target region as a standalone region. 4132 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 4133 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 4134 assert(Fn && Addr && "Target device function emission failed."); 4135 } 4136 4137 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective( 4138 const OMPTargetTeamsDistributeDirective &S) { 4139 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4140 emitTargetTeamsDistributeRegion(CGF, Action, S); 4141 }; 4142 emitCommonOMPTargetDirective(*this, S, CodeGen); 4143 } 4144 4145 static void emitTargetTeamsDistributeSimdRegion( 4146 CodeGenFunction &CGF, PrePostActionTy &Action, 4147 const OMPTargetTeamsDistributeSimdDirective &S) { 4148 Action.Enter(CGF); 4149 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4150 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 4151 }; 4152 4153 // Emit teams region as a standalone region. 4154 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 4155 PrePostActionTy &Action) { 4156 Action.Enter(CGF); 4157 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 4158 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4159 (void)PrivateScope.Privatize(); 4160 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 4161 CodeGenDistribute); 4162 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4163 }; 4164 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_simd, CodeGen); 4165 emitPostUpdateForReductionClause(CGF, S, 4166 [](CodeGenFunction &) { return nullptr; }); 4167 } 4168 4169 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 4170 CodeGenModule &CGM, StringRef ParentName, 4171 const OMPTargetTeamsDistributeSimdDirective &S) { 4172 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4173 emitTargetTeamsDistributeSimdRegion(CGF, Action, S); 4174 }; 4175 llvm::Function *Fn; 4176 llvm::Constant *Addr; 4177 // Emit target region as a standalone region. 4178 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 4179 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 4180 assert(Fn && Addr && "Target device function emission failed."); 4181 } 4182 4183 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective( 4184 const OMPTargetTeamsDistributeSimdDirective &S) { 4185 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4186 emitTargetTeamsDistributeSimdRegion(CGF, Action, S); 4187 }; 4188 emitCommonOMPTargetDirective(*this, S, CodeGen); 4189 } 4190 4191 void CodeGenFunction::EmitOMPTeamsDistributeDirective( 4192 const OMPTeamsDistributeDirective &S) { 4193 4194 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4195 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 4196 }; 4197 4198 // Emit teams region as a standalone region. 4199 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 4200 PrePostActionTy &Action) { 4201 Action.Enter(CGF); 4202 OMPPrivateScope PrivateScope(CGF); 4203 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4204 (void)PrivateScope.Privatize(); 4205 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 4206 CodeGenDistribute); 4207 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4208 }; 4209 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen); 4210 emitPostUpdateForReductionClause(*this, S, 4211 [](CodeGenFunction &) { return nullptr; }); 4212 } 4213 4214 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective( 4215 const OMPTeamsDistributeSimdDirective &S) { 4216 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4217 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 4218 }; 4219 4220 // Emit teams region as a standalone region. 4221 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 4222 PrePostActionTy &Action) { 4223 Action.Enter(CGF); 4224 OMPPrivateScope PrivateScope(CGF); 4225 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4226 (void)PrivateScope.Privatize(); 4227 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_simd, 4228 CodeGenDistribute); 4229 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4230 }; 4231 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_simd, CodeGen); 4232 emitPostUpdateForReductionClause(*this, S, 4233 [](CodeGenFunction &) { return nullptr; }); 4234 } 4235 4236 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective( 4237 const OMPTeamsDistributeParallelForDirective &S) { 4238 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4239 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 4240 S.getDistInc()); 4241 }; 4242 4243 // Emit teams region as a standalone region. 4244 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 4245 PrePostActionTy &Action) { 4246 Action.Enter(CGF); 4247 OMPPrivateScope PrivateScope(CGF); 4248 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4249 (void)PrivateScope.Privatize(); 4250 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 4251 CodeGenDistribute); 4252 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4253 }; 4254 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen); 4255 emitPostUpdateForReductionClause(*this, S, 4256 [](CodeGenFunction &) { return nullptr; }); 4257 } 4258 4259 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective( 4260 const OMPTeamsDistributeParallelForSimdDirective &S) { 4261 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4262 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 4263 S.getDistInc()); 4264 }; 4265 4266 // Emit teams region as a standalone region. 4267 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 4268 PrePostActionTy &Action) { 4269 Action.Enter(CGF); 4270 OMPPrivateScope PrivateScope(CGF); 4271 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4272 (void)PrivateScope.Privatize(); 4273 CGF.CGM.getOpenMPRuntime().emitInlinedDirective( 4274 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); 4275 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4276 }; 4277 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen); 4278 emitPostUpdateForReductionClause(*this, S, 4279 [](CodeGenFunction &) { return nullptr; }); 4280 } 4281 4282 static void emitTargetTeamsDistributeParallelForRegion( 4283 CodeGenFunction &CGF, const OMPTargetTeamsDistributeParallelForDirective &S, 4284 PrePostActionTy &Action) { 4285 Action.Enter(CGF); 4286 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4287 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 4288 S.getDistInc()); 4289 }; 4290 4291 // Emit teams region as a standalone region. 4292 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 4293 PrePostActionTy &Action) { 4294 Action.Enter(CGF); 4295 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 4296 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4297 (void)PrivateScope.Privatize(); 4298 CGF.CGM.getOpenMPRuntime().emitInlinedDirective( 4299 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); 4300 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4301 }; 4302 4303 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for, 4304 CodeGenTeams); 4305 emitPostUpdateForReductionClause(CGF, S, 4306 [](CodeGenFunction &) { return nullptr; }); 4307 } 4308 4309 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 4310 CodeGenModule &CGM, StringRef ParentName, 4311 const OMPTargetTeamsDistributeParallelForDirective &S) { 4312 // Emit SPMD target teams distribute parallel for region as a standalone 4313 // region. 4314 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4315 emitTargetTeamsDistributeParallelForRegion(CGF, S, Action); 4316 }; 4317 llvm::Function *Fn; 4318 llvm::Constant *Addr; 4319 // Emit target region as a standalone region. 4320 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 4321 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 4322 assert(Fn && Addr && "Target device function emission failed."); 4323 } 4324 4325 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective( 4326 const OMPTargetTeamsDistributeParallelForDirective &S) { 4327 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4328 emitTargetTeamsDistributeParallelForRegion(CGF, S, Action); 4329 }; 4330 emitCommonOMPTargetDirective(*this, S, CodeGen); 4331 } 4332 4333 static void emitTargetTeamsDistributeParallelForSimdRegion( 4334 CodeGenFunction &CGF, 4335 const OMPTargetTeamsDistributeParallelForSimdDirective &S, 4336 PrePostActionTy &Action) { 4337 Action.Enter(CGF); 4338 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4339 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 4340 S.getDistInc()); 4341 }; 4342 4343 // Emit teams region as a standalone region. 4344 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 4345 PrePostActionTy &Action) { 4346 Action.Enter(CGF); 4347 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 4348 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4349 (void)PrivateScope.Privatize(); 4350 CGF.CGM.getOpenMPRuntime().emitInlinedDirective( 4351 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); 4352 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4353 }; 4354 4355 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for_simd, 4356 CodeGenTeams); 4357 emitPostUpdateForReductionClause(CGF, S, 4358 [](CodeGenFunction &) { return nullptr; }); 4359 } 4360 4361 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 4362 CodeGenModule &CGM, StringRef ParentName, 4363 const OMPTargetTeamsDistributeParallelForSimdDirective &S) { 4364 // Emit SPMD target teams distribute parallel for simd region as a standalone 4365 // region. 4366 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4367 emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action); 4368 }; 4369 llvm::Function *Fn; 4370 llvm::Constant *Addr; 4371 // Emit target region as a standalone region. 4372 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 4373 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 4374 assert(Fn && Addr && "Target device function emission failed."); 4375 } 4376 4377 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective( 4378 const OMPTargetTeamsDistributeParallelForSimdDirective &S) { 4379 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4380 emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action); 4381 }; 4382 emitCommonOMPTargetDirective(*this, S, CodeGen); 4383 } 4384 4385 void CodeGenFunction::EmitOMPCancellationPointDirective( 4386 const OMPCancellationPointDirective &S) { 4387 CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(), 4388 S.getCancelRegion()); 4389 } 4390 4391 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { 4392 const Expr *IfCond = nullptr; 4393 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 4394 if (C->getNameModifier() == OMPD_unknown || 4395 C->getNameModifier() == OMPD_cancel) { 4396 IfCond = C->getCondition(); 4397 break; 4398 } 4399 } 4400 CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(), IfCond, 4401 S.getCancelRegion()); 4402 } 4403 4404 CodeGenFunction::JumpDest 4405 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) { 4406 if (Kind == OMPD_parallel || Kind == OMPD_task || 4407 Kind == OMPD_target_parallel) 4408 return ReturnBlock; 4409 assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections || 4410 Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for || 4411 Kind == OMPD_distribute_parallel_for || 4412 Kind == OMPD_target_parallel_for || 4413 Kind == OMPD_teams_distribute_parallel_for || 4414 Kind == OMPD_target_teams_distribute_parallel_for); 4415 return OMPCancelStack.getExitBlock(); 4416 } 4417 4418 void CodeGenFunction::EmitOMPUseDevicePtrClause( 4419 const OMPClause &NC, OMPPrivateScope &PrivateScope, 4420 const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) { 4421 const auto &C = cast<OMPUseDevicePtrClause>(NC); 4422 auto OrigVarIt = C.varlist_begin(); 4423 auto InitIt = C.inits().begin(); 4424 for (auto PvtVarIt : C.private_copies()) { 4425 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl()); 4426 auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl()); 4427 auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl()); 4428 4429 // In order to identify the right initializer we need to match the 4430 // declaration used by the mapping logic. In some cases we may get 4431 // OMPCapturedExprDecl that refers to the original declaration. 4432 const ValueDecl *MatchingVD = OrigVD; 4433 if (auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) { 4434 // OMPCapturedExprDecl are used to privative fields of the current 4435 // structure. 4436 auto *ME = cast<MemberExpr>(OED->getInit()); 4437 assert(isa<CXXThisExpr>(ME->getBase()) && 4438 "Base should be the current struct!"); 4439 MatchingVD = ME->getMemberDecl(); 4440 } 4441 4442 // If we don't have information about the current list item, move on to 4443 // the next one. 4444 auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD); 4445 if (InitAddrIt == CaptureDeviceAddrMap.end()) 4446 continue; 4447 4448 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 4449 // Initialize the temporary initialization variable with the address we 4450 // get from the runtime library. We have to cast the source address 4451 // because it is always a void *. References are materialized in the 4452 // privatization scope, so the initialization here disregards the fact 4453 // the original variable is a reference. 4454 QualType AddrQTy = 4455 getContext().getPointerType(OrigVD->getType().getNonReferenceType()); 4456 llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy); 4457 Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy); 4458 setAddrOfLocalVar(InitVD, InitAddr); 4459 4460 // Emit private declaration, it will be initialized by the value we 4461 // declaration we just added to the local declarations map. 4462 EmitDecl(*PvtVD); 4463 4464 // The initialization variables reached its purpose in the emission 4465 // of the previous declaration, so we don't need it anymore. 4466 LocalDeclMap.erase(InitVD); 4467 4468 // Return the address of the private variable. 4469 return GetAddrOfLocalVar(PvtVD); 4470 }); 4471 assert(IsRegistered && "firstprivate var already registered as private"); 4472 // Silence the warning about unused variable. 4473 (void)IsRegistered; 4474 4475 ++OrigVarIt; 4476 ++InitIt; 4477 } 4478 } 4479 4480 // Generate the instructions for '#pragma omp target data' directive. 4481 void CodeGenFunction::EmitOMPTargetDataDirective( 4482 const OMPTargetDataDirective &S) { 4483 CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true); 4484 4485 // Create a pre/post action to signal the privatization of the device pointer. 4486 // This action can be replaced by the OpenMP runtime code generation to 4487 // deactivate privatization. 4488 bool PrivatizeDevicePointers = false; 4489 class DevicePointerPrivActionTy : public PrePostActionTy { 4490 bool &PrivatizeDevicePointers; 4491 4492 public: 4493 explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers) 4494 : PrePostActionTy(), PrivatizeDevicePointers(PrivatizeDevicePointers) {} 4495 void Enter(CodeGenFunction &CGF) override { 4496 PrivatizeDevicePointers = true; 4497 } 4498 }; 4499 DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers); 4500 4501 auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers]( 4502 CodeGenFunction &CGF, PrePostActionTy &Action) { 4503 auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4504 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 4505 }; 4506 4507 // Codegen that selects whether to generate the privatization code or not. 4508 auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers, 4509 &InnermostCodeGen](CodeGenFunction &CGF, 4510 PrePostActionTy &Action) { 4511 RegionCodeGenTy RCG(InnermostCodeGen); 4512 PrivatizeDevicePointers = false; 4513 4514 // Call the pre-action to change the status of PrivatizeDevicePointers if 4515 // needed. 4516 Action.Enter(CGF); 4517 4518 if (PrivatizeDevicePointers) { 4519 OMPPrivateScope PrivateScope(CGF); 4520 // Emit all instances of the use_device_ptr clause. 4521 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>()) 4522 CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope, 4523 Info.CaptureDeviceAddrMap); 4524 (void)PrivateScope.Privatize(); 4525 RCG(CGF); 4526 } else 4527 RCG(CGF); 4528 }; 4529 4530 // Forward the provided action to the privatization codegen. 4531 RegionCodeGenTy PrivRCG(PrivCodeGen); 4532 PrivRCG.setAction(Action); 4533 4534 // Notwithstanding the body of the region is emitted as inlined directive, 4535 // we don't use an inline scope as changes in the references inside the 4536 // region are expected to be visible outside, so we do not privative them. 4537 OMPLexicalScope Scope(CGF, S); 4538 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data, 4539 PrivRCG); 4540 }; 4541 4542 RegionCodeGenTy RCG(CodeGen); 4543 4544 // If we don't have target devices, don't bother emitting the data mapping 4545 // code. 4546 if (CGM.getLangOpts().OMPTargetTriples.empty()) { 4547 RCG(*this); 4548 return; 4549 } 4550 4551 // Check if we have any if clause associated with the directive. 4552 const Expr *IfCond = nullptr; 4553 if (auto *C = S.getSingleClause<OMPIfClause>()) 4554 IfCond = C->getCondition(); 4555 4556 // Check if we have any device clause associated with the directive. 4557 const Expr *Device = nullptr; 4558 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 4559 Device = C->getDevice(); 4560 4561 // Set the action to signal privatization of device pointers. 4562 RCG.setAction(PrivAction); 4563 4564 // Emit region code. 4565 CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG, 4566 Info); 4567 } 4568 4569 void CodeGenFunction::EmitOMPTargetEnterDataDirective( 4570 const OMPTargetEnterDataDirective &S) { 4571 // If we don't have target devices, don't bother emitting the data mapping 4572 // code. 4573 if (CGM.getLangOpts().OMPTargetTriples.empty()) 4574 return; 4575 4576 // Check if we have any if clause associated with the directive. 4577 const Expr *IfCond = nullptr; 4578 if (auto *C = S.getSingleClause<OMPIfClause>()) 4579 IfCond = C->getCondition(); 4580 4581 // Check if we have any device clause associated with the directive. 4582 const Expr *Device = nullptr; 4583 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 4584 Device = C->getDevice(); 4585 4586 OMPLexicalScope Scope(*this, S, OMPD_task); 4587 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 4588 } 4589 4590 void CodeGenFunction::EmitOMPTargetExitDataDirective( 4591 const OMPTargetExitDataDirective &S) { 4592 // If we don't have target devices, don't bother emitting the data mapping 4593 // code. 4594 if (CGM.getLangOpts().OMPTargetTriples.empty()) 4595 return; 4596 4597 // Check if we have any if clause associated with the directive. 4598 const Expr *IfCond = nullptr; 4599 if (auto *C = S.getSingleClause<OMPIfClause>()) 4600 IfCond = C->getCondition(); 4601 4602 // Check if we have any device clause associated with the directive. 4603 const Expr *Device = nullptr; 4604 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 4605 Device = C->getDevice(); 4606 4607 OMPLexicalScope Scope(*this, S, OMPD_task); 4608 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 4609 } 4610 4611 static void emitTargetParallelRegion(CodeGenFunction &CGF, 4612 const OMPTargetParallelDirective &S, 4613 PrePostActionTy &Action) { 4614 // Get the captured statement associated with the 'parallel' region. 4615 auto *CS = S.getCapturedStmt(OMPD_parallel); 4616 Action.Enter(CGF); 4617 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) { 4618 Action.Enter(CGF); 4619 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 4620 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 4621 CGF.EmitOMPPrivateClause(S, PrivateScope); 4622 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4623 (void)PrivateScope.Privatize(); 4624 // TODO: Add support for clauses. 4625 CGF.EmitStmt(CS->getCapturedStmt()); 4626 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 4627 }; 4628 emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen, 4629 emitEmptyBoundParameters); 4630 emitPostUpdateForReductionClause( 4631 CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 4632 } 4633 4634 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 4635 CodeGenModule &CGM, StringRef ParentName, 4636 const OMPTargetParallelDirective &S) { 4637 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4638 emitTargetParallelRegion(CGF, S, Action); 4639 }; 4640 llvm::Function *Fn; 4641 llvm::Constant *Addr; 4642 // Emit target region as a standalone region. 4643 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 4644 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 4645 assert(Fn && Addr && "Target device function emission failed."); 4646 } 4647 4648 void CodeGenFunction::EmitOMPTargetParallelDirective( 4649 const OMPTargetParallelDirective &S) { 4650 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4651 emitTargetParallelRegion(CGF, S, Action); 4652 }; 4653 emitCommonOMPTargetDirective(*this, S, CodeGen); 4654 } 4655 4656 static void emitTargetParallelForRegion(CodeGenFunction &CGF, 4657 const OMPTargetParallelForDirective &S, 4658 PrePostActionTy &Action) { 4659 Action.Enter(CGF); 4660 // Emit directive as a combined directive that consists of two implicit 4661 // directives: 'parallel' with 'for' directive. 4662 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4663 Action.Enter(CGF); 4664 CodeGenFunction::OMPCancelStackRAII CancelRegion( 4665 CGF, OMPD_target_parallel_for, S.hasCancel()); 4666 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 4667 emitDispatchForLoopBounds); 4668 }; 4669 emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen, 4670 emitEmptyBoundParameters); 4671 } 4672 4673 void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 4674 CodeGenModule &CGM, StringRef ParentName, 4675 const OMPTargetParallelForDirective &S) { 4676 // Emit SPMD target parallel for region as a standalone region. 4677 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4678 emitTargetParallelForRegion(CGF, S, Action); 4679 }; 4680 llvm::Function *Fn; 4681 llvm::Constant *Addr; 4682 // Emit target region as a standalone region. 4683 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 4684 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 4685 assert(Fn && Addr && "Target device function emission failed."); 4686 } 4687 4688 void CodeGenFunction::EmitOMPTargetParallelForDirective( 4689 const OMPTargetParallelForDirective &S) { 4690 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4691 emitTargetParallelForRegion(CGF, S, Action); 4692 }; 4693 emitCommonOMPTargetDirective(*this, S, CodeGen); 4694 } 4695 4696 static void 4697 emitTargetParallelForSimdRegion(CodeGenFunction &CGF, 4698 const OMPTargetParallelForSimdDirective &S, 4699 PrePostActionTy &Action) { 4700 Action.Enter(CGF); 4701 // Emit directive as a combined directive that consists of two implicit 4702 // directives: 'parallel' with 'for' directive. 4703 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4704 Action.Enter(CGF); 4705 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 4706 emitDispatchForLoopBounds); 4707 }; 4708 emitCommonOMPParallelDirective(CGF, S, OMPD_simd, CodeGen, 4709 emitEmptyBoundParameters); 4710 } 4711 4712 void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 4713 CodeGenModule &CGM, StringRef ParentName, 4714 const OMPTargetParallelForSimdDirective &S) { 4715 // Emit SPMD target parallel for region as a standalone region. 4716 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4717 emitTargetParallelForSimdRegion(CGF, S, Action); 4718 }; 4719 llvm::Function *Fn; 4720 llvm::Constant *Addr; 4721 // Emit target region as a standalone region. 4722 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 4723 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 4724 assert(Fn && Addr && "Target device function emission failed."); 4725 } 4726 4727 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective( 4728 const OMPTargetParallelForSimdDirective &S) { 4729 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4730 emitTargetParallelForSimdRegion(CGF, S, Action); 4731 }; 4732 emitCommonOMPTargetDirective(*this, S, CodeGen); 4733 } 4734 4735 /// Emit a helper variable and return corresponding lvalue. 4736 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper, 4737 const ImplicitParamDecl *PVD, 4738 CodeGenFunction::OMPPrivateScope &Privates) { 4739 auto *VDecl = cast<VarDecl>(Helper->getDecl()); 4740 Privates.addPrivate( 4741 VDecl, [&CGF, PVD]() -> Address { return CGF.GetAddrOfLocalVar(PVD); }); 4742 } 4743 4744 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { 4745 assert(isOpenMPTaskLoopDirective(S.getDirectiveKind())); 4746 // Emit outlined function for task construct. 4747 const CapturedStmt *CS = S.getCapturedStmt(OMPD_taskloop); 4748 auto CapturedStruct = GenerateCapturedStmtArgument(*CS); 4749 auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 4750 const Expr *IfCond = nullptr; 4751 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 4752 if (C->getNameModifier() == OMPD_unknown || 4753 C->getNameModifier() == OMPD_taskloop) { 4754 IfCond = C->getCondition(); 4755 break; 4756 } 4757 } 4758 4759 OMPTaskDataTy Data; 4760 // Check if taskloop must be emitted without taskgroup. 4761 Data.Nogroup = S.getSingleClause<OMPNogroupClause>(); 4762 // TODO: Check if we should emit tied or untied task. 4763 Data.Tied = true; 4764 // Set scheduling for taskloop 4765 if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) { 4766 // grainsize clause 4767 Data.Schedule.setInt(/*IntVal=*/false); 4768 Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize())); 4769 } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) { 4770 // num_tasks clause 4771 Data.Schedule.setInt(/*IntVal=*/true); 4772 Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks())); 4773 } 4774 4775 auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) { 4776 // if (PreCond) { 4777 // for (IV in 0..LastIteration) BODY; 4778 // <Final counter/linear vars updates>; 4779 // } 4780 // 4781 4782 // Emit: if (PreCond) - begin. 4783 // If the condition constant folds and can be elided, avoid emitting the 4784 // whole loop. 4785 bool CondConstant; 4786 llvm::BasicBlock *ContBlock = nullptr; 4787 OMPLoopScope PreInitScope(CGF, S); 4788 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 4789 if (!CondConstant) 4790 return; 4791 } else { 4792 auto *ThenBlock = CGF.createBasicBlock("taskloop.if.then"); 4793 ContBlock = CGF.createBasicBlock("taskloop.if.end"); 4794 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 4795 CGF.getProfileCount(&S)); 4796 CGF.EmitBlock(ThenBlock); 4797 CGF.incrementProfileCounter(&S); 4798 } 4799 4800 if (isOpenMPSimdDirective(S.getDirectiveKind())) 4801 CGF.EmitOMPSimdInit(S); 4802 4803 OMPPrivateScope LoopScope(CGF); 4804 // Emit helper vars inits. 4805 enum { LowerBound = 5, UpperBound, Stride, LastIter }; 4806 auto *I = CS->getCapturedDecl()->param_begin(); 4807 auto *LBP = std::next(I, LowerBound); 4808 auto *UBP = std::next(I, UpperBound); 4809 auto *STP = std::next(I, Stride); 4810 auto *LIP = std::next(I, LastIter); 4811 mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP, 4812 LoopScope); 4813 mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP, 4814 LoopScope); 4815 mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope); 4816 mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP, 4817 LoopScope); 4818 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 4819 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 4820 (void)LoopScope.Privatize(); 4821 // Emit the loop iteration variable. 4822 const Expr *IVExpr = S.getIterationVariable(); 4823 const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 4824 CGF.EmitVarDecl(*IVDecl); 4825 CGF.EmitIgnoredExpr(S.getInit()); 4826 4827 // Emit the iterations count variable. 4828 // If it is not a variable, Sema decided to calculate iterations count on 4829 // each iteration (e.g., it is foldable into a constant). 4830 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 4831 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 4832 // Emit calculation of the iterations count. 4833 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 4834 } 4835 4836 CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 4837 S.getInc(), 4838 [&S](CodeGenFunction &CGF) { 4839 CGF.EmitOMPLoopBody(S, JumpDest()); 4840 CGF.EmitStopPoint(&S); 4841 }, 4842 [](CodeGenFunction &) {}); 4843 // Emit: if (PreCond) - end. 4844 if (ContBlock) { 4845 CGF.EmitBranch(ContBlock); 4846 CGF.EmitBlock(ContBlock, true); 4847 } 4848 // Emit final copy of the lastprivate variables if IsLastIter != 0. 4849 if (HasLastprivateClause) { 4850 CGF.EmitOMPLastprivateClauseFinal( 4851 S, isOpenMPSimdDirective(S.getDirectiveKind()), 4852 CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar( 4853 CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false, 4854 (*LIP)->getType(), S.getLocStart()))); 4855 } 4856 }; 4857 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 4858 IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn, 4859 const OMPTaskDataTy &Data) { 4860 auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &) { 4861 OMPLoopScope PreInitScope(CGF, S); 4862 CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getLocStart(), S, 4863 OutlinedFn, SharedsTy, 4864 CapturedStruct, IfCond, Data); 4865 }; 4866 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop, 4867 CodeGen); 4868 }; 4869 if (Data.Nogroup) { 4870 EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, Data); 4871 } else { 4872 CGM.getOpenMPRuntime().emitTaskgroupRegion( 4873 *this, 4874 [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF, 4875 PrePostActionTy &Action) { 4876 Action.Enter(CGF); 4877 CGF.EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, 4878 Data); 4879 }, 4880 S.getLocStart()); 4881 } 4882 } 4883 4884 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) { 4885 EmitOMPTaskLoopBasedDirective(S); 4886 } 4887 4888 void CodeGenFunction::EmitOMPTaskLoopSimdDirective( 4889 const OMPTaskLoopSimdDirective &S) { 4890 EmitOMPTaskLoopBasedDirective(S); 4891 } 4892 4893 // Generate the instructions for '#pragma omp target update' directive. 4894 void CodeGenFunction::EmitOMPTargetUpdateDirective( 4895 const OMPTargetUpdateDirective &S) { 4896 // If we don't have target devices, don't bother emitting the data mapping 4897 // code. 4898 if (CGM.getLangOpts().OMPTargetTriples.empty()) 4899 return; 4900 4901 // Check if we have any if clause associated with the directive. 4902 const Expr *IfCond = nullptr; 4903 if (auto *C = S.getSingleClause<OMPIfClause>()) 4904 IfCond = C->getCondition(); 4905 4906 // Check if we have any device clause associated with the directive. 4907 const Expr *Device = nullptr; 4908 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 4909 Device = C->getDevice(); 4910 4911 OMPLexicalScope Scope(*this, S, OMPD_task); 4912 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 4913 } 4914 4915 void CodeGenFunction::EmitSimpleOMPExecutableDirective( 4916 const OMPExecutableDirective &D) { 4917 if (!D.hasAssociatedStmt() || !D.getAssociatedStmt()) 4918 return; 4919 auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) { 4920 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 4921 emitOMPSimdRegion(CGF, cast<OMPLoopDirective>(D), Action); 4922 } else { 4923 if (const auto *LD = dyn_cast<OMPLoopDirective>(&D)) { 4924 for (const auto *E : LD->counters()) { 4925 if (const auto *VD = dyn_cast<OMPCapturedExprDecl>( 4926 cast<DeclRefExpr>(E)->getDecl())) { 4927 // Emit only those that were not explicitly referenced in clauses. 4928 if (!CGF.LocalDeclMap.count(VD)) 4929 CGF.EmitVarDecl(*VD); 4930 } 4931 } 4932 } 4933 CGF.EmitStmt(D.getInnermostCapturedStmt()->getCapturedStmt()); 4934 } 4935 }; 4936 OMPSimdLexicalScope Scope(*this, D); 4937 CGM.getOpenMPRuntime().emitInlinedDirective( 4938 *this, 4939 isOpenMPSimdDirective(D.getDirectiveKind()) ? OMPD_simd 4940 : D.getDirectiveKind(), 4941 CodeGen); 4942 } 4943