1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This contains code to emit OpenMP nodes as LLVM code. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGCleanup.h" 15 #include "CGOpenMPRuntime.h" 16 #include "CodeGenFunction.h" 17 #include "CodeGenModule.h" 18 #include "TargetInfo.h" 19 #include "clang/AST/Stmt.h" 20 #include "clang/AST/StmtOpenMP.h" 21 #include "clang/AST/DeclOpenMP.h" 22 #include "llvm/IR/CallSite.h" 23 using namespace clang; 24 using namespace CodeGen; 25 26 namespace { 27 /// Lexical scope for OpenMP executable constructs, that handles correct codegen 28 /// for captured expressions. 29 class OMPLexicalScope : public CodeGenFunction::LexicalScope { 30 void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 31 for (const auto *C : S.clauses()) { 32 if (auto *CPI = OMPClauseWithPreInit::get(C)) { 33 if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { 34 for (const auto *I : PreInit->decls()) { 35 if (!I->hasAttr<OMPCaptureNoInitAttr>()) 36 CGF.EmitVarDecl(cast<VarDecl>(*I)); 37 else { 38 CodeGenFunction::AutoVarEmission Emission = 39 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 40 CGF.EmitAutoVarCleanups(Emission); 41 } 42 } 43 } 44 } 45 } 46 } 47 CodeGenFunction::OMPPrivateScope InlinedShareds; 48 49 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { 50 return CGF.LambdaCaptureFields.lookup(VD) || 51 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || 52 (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl)); 53 } 54 55 public: 56 OMPLexicalScope( 57 CodeGenFunction &CGF, const OMPExecutableDirective &S, 58 const llvm::Optional<OpenMPDirectiveKind> CapturedRegion = llvm::None, 59 const bool EmitPreInitStmt = true) 60 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), 61 InlinedShareds(CGF) { 62 if (EmitPreInitStmt) 63 emitPreInitStmt(CGF, S); 64 if (!CapturedRegion.hasValue()) 65 return; 66 assert(S.hasAssociatedStmt() && 67 "Expected associated statement for inlined directive."); 68 const CapturedStmt *CS = S.getCapturedStmt(*CapturedRegion); 69 for (auto &C : CS->captures()) { 70 if (C.capturesVariable() || C.capturesVariableByCopy()) { 71 auto *VD = C.getCapturedVar(); 72 assert(VD == VD->getCanonicalDecl() && 73 "Canonical decl must be captured."); 74 DeclRefExpr DRE( 75 const_cast<VarDecl *>(VD), 76 isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo && 77 InlinedShareds.isGlobalVarCaptured(VD)), 78 VD->getType().getNonReferenceType(), VK_LValue, C.getLocation()); 79 InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address { 80 return CGF.EmitLValue(&DRE).getAddress(); 81 }); 82 } 83 } 84 (void)InlinedShareds.Privatize(); 85 } 86 }; 87 88 /// Lexical scope for OpenMP parallel construct, that handles correct codegen 89 /// for captured expressions. 90 class OMPParallelScope final : public OMPLexicalScope { 91 bool EmitPreInitStmt(const OMPExecutableDirective &S) { 92 OpenMPDirectiveKind Kind = S.getDirectiveKind(); 93 return !(isOpenMPTargetExecutionDirective(Kind) || 94 isOpenMPLoopBoundSharingDirective(Kind)) && 95 isOpenMPParallelDirective(Kind); 96 } 97 98 public: 99 OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 100 : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None, 101 EmitPreInitStmt(S)) {} 102 }; 103 104 /// Lexical scope for OpenMP teams construct, that handles correct codegen 105 /// for captured expressions. 106 class OMPTeamsScope final : public OMPLexicalScope { 107 bool EmitPreInitStmt(const OMPExecutableDirective &S) { 108 OpenMPDirectiveKind Kind = S.getDirectiveKind(); 109 return !isOpenMPTargetExecutionDirective(Kind) && 110 isOpenMPTeamsDirective(Kind); 111 } 112 113 public: 114 OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 115 : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None, 116 EmitPreInitStmt(S)) {} 117 }; 118 119 /// Private scope for OpenMP loop-based directives, that supports capturing 120 /// of used expression from loop statement. 121 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { 122 void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) { 123 CodeGenFunction::OMPPrivateScope PreCondScope(CGF); 124 for (auto *E : S.counters()) { 125 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 126 (void)PreCondScope.addPrivate(VD, [&CGF, VD]() { 127 return CGF.CreateMemTemp(VD->getType().getNonReferenceType()); 128 }); 129 } 130 (void)PreCondScope.Privatize(); 131 if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) { 132 if (auto *PreInits = cast_or_null<DeclStmt>(LD->getPreInits())) { 133 for (const auto *I : PreInits->decls()) 134 CGF.EmitVarDecl(cast<VarDecl>(*I)); 135 } 136 } 137 } 138 139 public: 140 OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S) 141 : CodeGenFunction::RunCleanupsScope(CGF) { 142 emitPreInitStmt(CGF, S); 143 } 144 }; 145 146 class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope { 147 CodeGenFunction::OMPPrivateScope InlinedShareds; 148 149 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { 150 return CGF.LambdaCaptureFields.lookup(VD) || 151 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || 152 (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) && 153 cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD)); 154 } 155 156 public: 157 OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 158 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), 159 InlinedShareds(CGF) { 160 for (const auto *C : S.clauses()) { 161 if (auto *CPI = OMPClauseWithPreInit::get(C)) { 162 if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { 163 for (const auto *I : PreInit->decls()) { 164 if (!I->hasAttr<OMPCaptureNoInitAttr>()) 165 CGF.EmitVarDecl(cast<VarDecl>(*I)); 166 else { 167 CodeGenFunction::AutoVarEmission Emission = 168 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 169 CGF.EmitAutoVarCleanups(Emission); 170 } 171 } 172 } 173 } else if (const auto *UDP = dyn_cast<OMPUseDevicePtrClause>(C)) { 174 for (const Expr *E : UDP->varlists()) { 175 const Decl *D = cast<DeclRefExpr>(E)->getDecl(); 176 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D)) 177 CGF.EmitVarDecl(*OED); 178 } 179 } 180 } 181 if (!isOpenMPSimdDirective(S.getDirectiveKind())) 182 CGF.EmitOMPPrivateClause(S, InlinedShareds); 183 if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(&S)) { 184 if (const Expr *E = TG->getReductionRef()) 185 CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl())); 186 } 187 const auto *CS = cast_or_null<CapturedStmt>(S.getAssociatedStmt()); 188 while (CS) { 189 for (auto &C : CS->captures()) { 190 if (C.capturesVariable() || C.capturesVariableByCopy()) { 191 auto *VD = C.getCapturedVar(); 192 assert(VD == VD->getCanonicalDecl() && 193 "Canonical decl must be captured."); 194 DeclRefExpr DRE(const_cast<VarDecl *>(VD), 195 isCapturedVar(CGF, VD) || 196 (CGF.CapturedStmtInfo && 197 InlinedShareds.isGlobalVarCaptured(VD)), 198 VD->getType().getNonReferenceType(), VK_LValue, 199 C.getLocation()); 200 InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address { 201 return CGF.EmitLValue(&DRE).getAddress(); 202 }); 203 } 204 } 205 CS = dyn_cast<CapturedStmt>(CS->getCapturedStmt()); 206 } 207 (void)InlinedShareds.Privatize(); 208 } 209 }; 210 211 } // namespace 212 213 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, 214 const OMPExecutableDirective &S, 215 const RegionCodeGenTy &CodeGen); 216 217 LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) { 218 if (auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) { 219 if (auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) { 220 OrigVD = OrigVD->getCanonicalDecl(); 221 bool IsCaptured = 222 LambdaCaptureFields.lookup(OrigVD) || 223 (CapturedStmtInfo && CapturedStmtInfo->lookup(OrigVD)) || 224 (CurCodeDecl && isa<BlockDecl>(CurCodeDecl)); 225 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), IsCaptured, 226 OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc()); 227 return EmitLValue(&DRE); 228 } 229 } 230 return EmitLValue(E); 231 } 232 233 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) { 234 auto &C = getContext(); 235 llvm::Value *Size = nullptr; 236 auto SizeInChars = C.getTypeSizeInChars(Ty); 237 if (SizeInChars.isZero()) { 238 // getTypeSizeInChars() returns 0 for a VLA. 239 while (auto *VAT = C.getAsVariableArrayType(Ty)) { 240 llvm::Value *ArraySize; 241 std::tie(ArraySize, Ty) = getVLASize(VAT); 242 Size = Size ? Builder.CreateNUWMul(Size, ArraySize) : ArraySize; 243 } 244 SizeInChars = C.getTypeSizeInChars(Ty); 245 if (SizeInChars.isZero()) 246 return llvm::ConstantInt::get(SizeTy, /*V=*/0); 247 Size = Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars)); 248 } else 249 Size = CGM.getSize(SizeInChars); 250 return Size; 251 } 252 253 void CodeGenFunction::GenerateOpenMPCapturedVars( 254 const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) { 255 const RecordDecl *RD = S.getCapturedRecordDecl(); 256 auto CurField = RD->field_begin(); 257 auto CurCap = S.captures().begin(); 258 for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(), 259 E = S.capture_init_end(); 260 I != E; ++I, ++CurField, ++CurCap) { 261 if (CurField->hasCapturedVLAType()) { 262 auto VAT = CurField->getCapturedVLAType(); 263 auto *Val = VLASizeMap[VAT->getSizeExpr()]; 264 CapturedVars.push_back(Val); 265 } else if (CurCap->capturesThis()) 266 CapturedVars.push_back(CXXThisValue); 267 else if (CurCap->capturesVariableByCopy()) { 268 llvm::Value *CV = EmitLoadOfScalar(EmitLValue(*I), CurCap->getLocation()); 269 270 // If the field is not a pointer, we need to save the actual value 271 // and load it as a void pointer. 272 if (!CurField->getType()->isAnyPointerType()) { 273 auto &Ctx = getContext(); 274 auto DstAddr = CreateMemTemp( 275 Ctx.getUIntPtrType(), 276 Twine(CurCap->getCapturedVar()->getName()) + ".casted"); 277 LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType()); 278 279 auto *SrcAddrVal = EmitScalarConversion( 280 DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()), 281 Ctx.getPointerType(CurField->getType()), CurCap->getLocation()); 282 LValue SrcLV = 283 MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType()); 284 285 // Store the value using the source type pointer. 286 EmitStoreThroughLValue(RValue::get(CV), SrcLV); 287 288 // Load the value using the destination type pointer. 289 CV = EmitLoadOfScalar(DstLV, CurCap->getLocation()); 290 } 291 CapturedVars.push_back(CV); 292 } else { 293 assert(CurCap->capturesVariable() && "Expected capture by reference."); 294 CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer()); 295 } 296 } 297 } 298 299 static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc, 300 QualType DstType, StringRef Name, 301 LValue AddrLV, 302 bool isReferenceType = false) { 303 ASTContext &Ctx = CGF.getContext(); 304 305 auto *CastedPtr = CGF.EmitScalarConversion(AddrLV.getAddress().getPointer(), 306 Ctx.getUIntPtrType(), 307 Ctx.getPointerType(DstType), Loc); 308 auto TmpAddr = 309 CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType)) 310 .getAddress(); 311 312 // If we are dealing with references we need to return the address of the 313 // reference instead of the reference of the value. 314 if (isReferenceType) { 315 QualType RefType = Ctx.getLValueReferenceType(DstType); 316 auto *RefVal = TmpAddr.getPointer(); 317 TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name) + ".ref"); 318 auto TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType); 319 CGF.EmitStoreThroughLValue(RValue::get(RefVal), TmpLVal, /*isInit*/ true); 320 } 321 322 return TmpAddr; 323 } 324 325 static QualType getCanonicalParamType(ASTContext &C, QualType T) { 326 if (T->isLValueReferenceType()) { 327 return C.getLValueReferenceType( 328 getCanonicalParamType(C, T.getNonReferenceType()), 329 /*SpelledAsLValue=*/false); 330 } 331 if (T->isPointerType()) 332 return C.getPointerType(getCanonicalParamType(C, T->getPointeeType())); 333 if (auto *A = T->getAsArrayTypeUnsafe()) { 334 if (auto *VLA = dyn_cast<VariableArrayType>(A)) 335 return getCanonicalParamType(C, VLA->getElementType()); 336 else if (!A->isVariablyModifiedType()) 337 return C.getCanonicalType(T); 338 } 339 return C.getCanonicalParamType(T); 340 } 341 342 namespace { 343 /// Contains required data for proper outlined function codegen. 344 struct FunctionOptions { 345 /// Captured statement for which the function is generated. 346 const CapturedStmt *S = nullptr; 347 /// true if cast to/from UIntPtr is required for variables captured by 348 /// value. 349 const bool UIntPtrCastRequired = true; 350 /// true if only casted arguments must be registered as local args or VLA 351 /// sizes. 352 const bool RegisterCastedArgsOnly = false; 353 /// Name of the generated function. 354 const StringRef FunctionName; 355 explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired, 356 bool RegisterCastedArgsOnly, 357 StringRef FunctionName) 358 : S(S), UIntPtrCastRequired(UIntPtrCastRequired), 359 RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly), 360 FunctionName(FunctionName) {} 361 }; 362 } 363 364 static llvm::Function *emitOutlinedFunctionPrologue( 365 CodeGenFunction &CGF, FunctionArgList &Args, 366 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> 367 &LocalAddrs, 368 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> 369 &VLASizes, 370 llvm::Value *&CXXThisValue, const FunctionOptions &FO) { 371 const CapturedDecl *CD = FO.S->getCapturedDecl(); 372 const RecordDecl *RD = FO.S->getCapturedRecordDecl(); 373 assert(CD->hasBody() && "missing CapturedDecl body"); 374 375 CXXThisValue = nullptr; 376 // Build the argument list. 377 CodeGenModule &CGM = CGF.CGM; 378 ASTContext &Ctx = CGM.getContext(); 379 FunctionArgList TargetArgs; 380 Args.append(CD->param_begin(), 381 std::next(CD->param_begin(), CD->getContextParamPosition())); 382 TargetArgs.append( 383 CD->param_begin(), 384 std::next(CD->param_begin(), CD->getContextParamPosition())); 385 auto I = FO.S->captures().begin(); 386 FunctionDecl *DebugFunctionDecl = nullptr; 387 if (!FO.UIntPtrCastRequired) { 388 FunctionProtoType::ExtProtoInfo EPI; 389 DebugFunctionDecl = FunctionDecl::Create( 390 Ctx, Ctx.getTranslationUnitDecl(), FO.S->getLocStart(), 391 SourceLocation(), DeclarationName(), Ctx.VoidTy, 392 Ctx.getTrivialTypeSourceInfo( 393 Ctx.getFunctionType(Ctx.VoidTy, llvm::None, EPI)), 394 SC_Static, /*isInlineSpecified=*/false, /*hasWrittenPrototype=*/false); 395 } 396 for (auto *FD : RD->fields()) { 397 QualType ArgType = FD->getType(); 398 IdentifierInfo *II = nullptr; 399 VarDecl *CapVar = nullptr; 400 401 // If this is a capture by copy and the type is not a pointer, the outlined 402 // function argument type should be uintptr and the value properly casted to 403 // uintptr. This is necessary given that the runtime library is only able to 404 // deal with pointers. We can pass in the same way the VLA type sizes to the 405 // outlined function. 406 if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) || 407 I->capturesVariableArrayType()) { 408 if (FO.UIntPtrCastRequired) 409 ArgType = Ctx.getUIntPtrType(); 410 } 411 412 if (I->capturesVariable() || I->capturesVariableByCopy()) { 413 CapVar = I->getCapturedVar(); 414 II = CapVar->getIdentifier(); 415 } else if (I->capturesThis()) 416 II = &Ctx.Idents.get("this"); 417 else { 418 assert(I->capturesVariableArrayType()); 419 II = &Ctx.Idents.get("vla"); 420 } 421 if (ArgType->isVariablyModifiedType()) 422 ArgType = getCanonicalParamType(Ctx, ArgType); 423 VarDecl *Arg; 424 if (DebugFunctionDecl && (CapVar || I->capturesThis())) { 425 Arg = ParmVarDecl::Create( 426 Ctx, DebugFunctionDecl, 427 CapVar ? CapVar->getLocStart() : FD->getLocStart(), 428 CapVar ? CapVar->getLocation() : FD->getLocation(), II, ArgType, 429 /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr); 430 } else { 431 Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(), 432 II, ArgType, ImplicitParamDecl::Other); 433 } 434 Args.emplace_back(Arg); 435 // Do not cast arguments if we emit function with non-original types. 436 TargetArgs.emplace_back( 437 FO.UIntPtrCastRequired 438 ? Arg 439 : CGM.getOpenMPRuntime().translateParameter(FD, Arg)); 440 ++I; 441 } 442 Args.append( 443 std::next(CD->param_begin(), CD->getContextParamPosition() + 1), 444 CD->param_end()); 445 TargetArgs.append( 446 std::next(CD->param_begin(), CD->getContextParamPosition() + 1), 447 CD->param_end()); 448 449 // Create the function declaration. 450 const CGFunctionInfo &FuncInfo = 451 CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs); 452 llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); 453 454 llvm::Function *F = 455 llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage, 456 FO.FunctionName, &CGM.getModule()); 457 CGM.SetInternalFunctionAttributes(CD, F, FuncInfo); 458 if (CD->isNothrow()) 459 F->setDoesNotThrow(); 460 461 // Generate the function. 462 CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs, 463 FO.S->getLocStart(), CD->getBody()->getLocStart()); 464 unsigned Cnt = CD->getContextParamPosition(); 465 I = FO.S->captures().begin(); 466 for (auto *FD : RD->fields()) { 467 // Do not map arguments if we emit function with non-original types. 468 Address LocalAddr(Address::invalid()); 469 if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) { 470 LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt], 471 TargetArgs[Cnt]); 472 } else { 473 LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]); 474 } 475 // If we are capturing a pointer by copy we don't need to do anything, just 476 // use the value that we get from the arguments. 477 if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) { 478 const VarDecl *CurVD = I->getCapturedVar(); 479 // If the variable is a reference we need to materialize it here. 480 if (CurVD->getType()->isReferenceType()) { 481 Address RefAddr = CGF.CreateMemTemp( 482 CurVD->getType(), CGM.getPointerAlign(), ".materialized_ref"); 483 CGF.EmitStoreOfScalar(LocalAddr.getPointer(), RefAddr, 484 /*Volatile=*/false, CurVD->getType()); 485 LocalAddr = RefAddr; 486 } 487 if (!FO.RegisterCastedArgsOnly) 488 LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}}); 489 ++Cnt; 490 ++I; 491 continue; 492 } 493 494 LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(), 495 AlignmentSource::Decl); 496 if (FD->hasCapturedVLAType()) { 497 if (FO.UIntPtrCastRequired) { 498 ArgLVal = CGF.MakeAddrLValue( 499 castValueFromUintptr(CGF, I->getLocation(), FD->getType(), 500 Args[Cnt]->getName(), ArgLVal), 501 FD->getType(), AlignmentSource::Decl); 502 } 503 auto *ExprArg = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation()); 504 auto VAT = FD->getCapturedVLAType(); 505 VLASizes.insert({Args[Cnt], {VAT->getSizeExpr(), ExprArg}}); 506 } else if (I->capturesVariable()) { 507 auto *Var = I->getCapturedVar(); 508 QualType VarTy = Var->getType(); 509 Address ArgAddr = ArgLVal.getAddress(); 510 if (!VarTy->isReferenceType()) { 511 if (ArgLVal.getType()->isLValueReferenceType()) { 512 ArgAddr = CGF.EmitLoadOfReference(ArgLVal); 513 } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) { 514 assert(ArgLVal.getType()->isPointerType()); 515 ArgAddr = CGF.EmitLoadOfPointer( 516 ArgAddr, ArgLVal.getType()->castAs<PointerType>()); 517 } 518 } 519 if (!FO.RegisterCastedArgsOnly) { 520 LocalAddrs.insert( 521 {Args[Cnt], 522 {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}}); 523 } 524 } else if (I->capturesVariableByCopy()) { 525 assert(!FD->getType()->isAnyPointerType() && 526 "Not expecting a captured pointer."); 527 auto *Var = I->getCapturedVar(); 528 QualType VarTy = Var->getType(); 529 LocalAddrs.insert( 530 {Args[Cnt], 531 {Var, FO.UIntPtrCastRequired 532 ? castValueFromUintptr(CGF, I->getLocation(), 533 FD->getType(), Args[Cnt]->getName(), 534 ArgLVal, VarTy->isReferenceType()) 535 : ArgLVal.getAddress()}}); 536 } else { 537 // If 'this' is captured, load it into CXXThisValue. 538 assert(I->capturesThis()); 539 CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation()); 540 LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress()}}); 541 } 542 ++Cnt; 543 ++I; 544 } 545 546 return F; 547 } 548 549 llvm::Function * 550 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { 551 assert( 552 CapturedStmtInfo && 553 "CapturedStmtInfo should be set when generating the captured function"); 554 const CapturedDecl *CD = S.getCapturedDecl(); 555 // Build the argument list. 556 bool NeedWrapperFunction = 557 getDebugInfo() && 558 CGM.getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo; 559 FunctionArgList Args; 560 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs; 561 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes; 562 SmallString<256> Buffer; 563 llvm::raw_svector_ostream Out(Buffer); 564 Out << CapturedStmtInfo->getHelperName(); 565 if (NeedWrapperFunction) 566 Out << "_debug__"; 567 FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false, 568 Out.str()); 569 llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs, 570 VLASizes, CXXThisValue, FO); 571 for (const auto &LocalAddrPair : LocalAddrs) { 572 if (LocalAddrPair.second.first) { 573 setAddrOfLocalVar(LocalAddrPair.second.first, 574 LocalAddrPair.second.second); 575 } 576 } 577 for (const auto &VLASizePair : VLASizes) 578 VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second; 579 PGO.assignRegionCounters(GlobalDecl(CD), F); 580 CapturedStmtInfo->EmitBody(*this, CD->getBody()); 581 FinishFunction(CD->getBodyRBrace()); 582 if (!NeedWrapperFunction) 583 return F; 584 585 FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true, 586 /*RegisterCastedArgsOnly=*/true, 587 CapturedStmtInfo->getHelperName()); 588 CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true); 589 Args.clear(); 590 LocalAddrs.clear(); 591 VLASizes.clear(); 592 llvm::Function *WrapperF = 593 emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes, 594 WrapperCGF.CXXThisValue, WrapperFO); 595 llvm::SmallVector<llvm::Value *, 4> CallArgs; 596 for (const auto *Arg : Args) { 597 llvm::Value *CallArg; 598 auto I = LocalAddrs.find(Arg); 599 if (I != LocalAddrs.end()) { 600 LValue LV = WrapperCGF.MakeAddrLValue( 601 I->second.second, 602 I->second.first ? I->second.first->getType() : Arg->getType(), 603 AlignmentSource::Decl); 604 CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getLocStart()); 605 } else { 606 auto EI = VLASizes.find(Arg); 607 if (EI != VLASizes.end()) 608 CallArg = EI->second.second; 609 else { 610 LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg), 611 Arg->getType(), 612 AlignmentSource::Decl); 613 CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getLocStart()); 614 } 615 } 616 CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType())); 617 } 618 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, S.getLocStart(), 619 F, CallArgs); 620 WrapperCGF.FinishFunction(); 621 return WrapperF; 622 } 623 624 //===----------------------------------------------------------------------===// 625 // OpenMP Directive Emission 626 //===----------------------------------------------------------------------===// 627 void CodeGenFunction::EmitOMPAggregateAssign( 628 Address DestAddr, Address SrcAddr, QualType OriginalType, 629 const llvm::function_ref<void(Address, Address)> &CopyGen) { 630 // Perform element-by-element initialization. 631 QualType ElementTy; 632 633 // Drill down to the base element type on both arrays. 634 auto ArrayTy = OriginalType->getAsArrayTypeUnsafe(); 635 auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr); 636 SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 637 638 auto SrcBegin = SrcAddr.getPointer(); 639 auto DestBegin = DestAddr.getPointer(); 640 // Cast from pointer to array type to pointer to single element. 641 auto DestEnd = Builder.CreateGEP(DestBegin, NumElements); 642 // The basic structure here is a while-do loop. 643 auto BodyBB = createBasicBlock("omp.arraycpy.body"); 644 auto DoneBB = createBasicBlock("omp.arraycpy.done"); 645 auto IsEmpty = 646 Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty"); 647 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 648 649 // Enter the loop body, making that address the current address. 650 auto EntryBB = Builder.GetInsertBlock(); 651 EmitBlock(BodyBB); 652 653 CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy); 654 655 llvm::PHINode *SrcElementPHI = 656 Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 657 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 658 Address SrcElementCurrent = 659 Address(SrcElementPHI, 660 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 661 662 llvm::PHINode *DestElementPHI = 663 Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 664 DestElementPHI->addIncoming(DestBegin, EntryBB); 665 Address DestElementCurrent = 666 Address(DestElementPHI, 667 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 668 669 // Emit copy. 670 CopyGen(DestElementCurrent, SrcElementCurrent); 671 672 // Shift the address forward by one element. 673 auto DestElementNext = Builder.CreateConstGEP1_32( 674 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 675 auto SrcElementNext = Builder.CreateConstGEP1_32( 676 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 677 // Check whether we've reached the end. 678 auto Done = 679 Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 680 Builder.CreateCondBr(Done, DoneBB, BodyBB); 681 DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock()); 682 SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock()); 683 684 // Done. 685 EmitBlock(DoneBB, /*IsFinished=*/true); 686 } 687 688 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr, 689 Address SrcAddr, const VarDecl *DestVD, 690 const VarDecl *SrcVD, const Expr *Copy) { 691 if (OriginalType->isArrayType()) { 692 auto *BO = dyn_cast<BinaryOperator>(Copy); 693 if (BO && BO->getOpcode() == BO_Assign) { 694 // Perform simple memcpy for simple copying. 695 EmitAggregateAssign(DestAddr, SrcAddr, OriginalType); 696 } else { 697 // For arrays with complex element types perform element by element 698 // copying. 699 EmitOMPAggregateAssign( 700 DestAddr, SrcAddr, OriginalType, 701 [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) { 702 // Working with the single array element, so have to remap 703 // destination and source variables to corresponding array 704 // elements. 705 CodeGenFunction::OMPPrivateScope Remap(*this); 706 Remap.addPrivate(DestVD, [DestElement]() -> Address { 707 return DestElement; 708 }); 709 Remap.addPrivate( 710 SrcVD, [SrcElement]() -> Address { return SrcElement; }); 711 (void)Remap.Privatize(); 712 EmitIgnoredExpr(Copy); 713 }); 714 } 715 } else { 716 // Remap pseudo source variable to private copy. 717 CodeGenFunction::OMPPrivateScope Remap(*this); 718 Remap.addPrivate(SrcVD, [SrcAddr]() -> Address { return SrcAddr; }); 719 Remap.addPrivate(DestVD, [DestAddr]() -> Address { return DestAddr; }); 720 (void)Remap.Privatize(); 721 // Emit copying of the whole variable. 722 EmitIgnoredExpr(Copy); 723 } 724 } 725 726 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, 727 OMPPrivateScope &PrivateScope) { 728 if (!HaveInsertPoint()) 729 return false; 730 bool FirstprivateIsLastprivate = false; 731 llvm::DenseSet<const VarDecl *> Lastprivates; 732 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 733 for (const auto *D : C->varlists()) 734 Lastprivates.insert( 735 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); 736 } 737 llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate; 738 llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 739 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 740 // Force emission of the firstprivate copy if the directive does not emit 741 // outlined function, like omp for, omp simd, omp distribute etc. 742 bool MustEmitFirstprivateCopy = 743 CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown; 744 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { 745 auto IRef = C->varlist_begin(); 746 auto InitsRef = C->inits().begin(); 747 for (auto IInit : C->private_copies()) { 748 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 749 bool ThisFirstprivateIsLastprivate = 750 Lastprivates.count(OrigVD->getCanonicalDecl()) > 0; 751 auto *FD = CapturedStmtInfo->lookup(OrigVD); 752 if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD && 753 !FD->getType()->isReferenceType()) { 754 EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()); 755 ++IRef; 756 ++InitsRef; 757 continue; 758 } 759 FirstprivateIsLastprivate = 760 FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate; 761 if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) { 762 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 763 auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl()); 764 bool IsRegistered; 765 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 766 /*RefersToEnclosingVariableOrCapture=*/FD != nullptr, 767 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 768 Address OriginalAddr = EmitLValue(&DRE).getAddress(); 769 QualType Type = VD->getType(); 770 if (Type->isArrayType()) { 771 // Emit VarDecl with copy init for arrays. 772 // Get the address of the original variable captured in current 773 // captured region. 774 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 775 auto Emission = EmitAutoVarAlloca(*VD); 776 auto *Init = VD->getInit(); 777 if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) { 778 // Perform simple memcpy. 779 EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr, 780 Type); 781 } else { 782 EmitOMPAggregateAssign( 783 Emission.getAllocatedAddress(), OriginalAddr, Type, 784 [this, VDInit, Init](Address DestElement, 785 Address SrcElement) { 786 // Clean up any temporaries needed by the initialization. 787 RunCleanupsScope InitScope(*this); 788 // Emit initialization for single element. 789 setAddrOfLocalVar(VDInit, SrcElement); 790 EmitAnyExprToMem(Init, DestElement, 791 Init->getType().getQualifiers(), 792 /*IsInitializer*/ false); 793 LocalDeclMap.erase(VDInit); 794 }); 795 } 796 EmitAutoVarCleanups(Emission); 797 return Emission.getAllocatedAddress(); 798 }); 799 } else { 800 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 801 // Emit private VarDecl with copy init. 802 // Remap temp VDInit variable to the address of the original 803 // variable 804 // (for proper handling of captured global variables). 805 setAddrOfLocalVar(VDInit, OriginalAddr); 806 EmitDecl(*VD); 807 LocalDeclMap.erase(VDInit); 808 return GetAddrOfLocalVar(VD); 809 }); 810 } 811 assert(IsRegistered && 812 "firstprivate var already registered as private"); 813 // Silence the warning about unused variable. 814 (void)IsRegistered; 815 } 816 ++IRef; 817 ++InitsRef; 818 } 819 } 820 return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty(); 821 } 822 823 void CodeGenFunction::EmitOMPPrivateClause( 824 const OMPExecutableDirective &D, 825 CodeGenFunction::OMPPrivateScope &PrivateScope) { 826 if (!HaveInsertPoint()) 827 return; 828 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 829 for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) { 830 auto IRef = C->varlist_begin(); 831 for (auto IInit : C->private_copies()) { 832 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 833 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 834 auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 835 bool IsRegistered = 836 PrivateScope.addPrivate(OrigVD, [&]() -> Address { 837 // Emit private VarDecl with copy init. 838 EmitDecl(*VD); 839 return GetAddrOfLocalVar(VD); 840 }); 841 assert(IsRegistered && "private var already registered as private"); 842 // Silence the warning about unused variable. 843 (void)IsRegistered; 844 } 845 ++IRef; 846 } 847 } 848 } 849 850 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { 851 if (!HaveInsertPoint()) 852 return false; 853 // threadprivate_var1 = master_threadprivate_var1; 854 // operator=(threadprivate_var2, master_threadprivate_var2); 855 // ... 856 // __kmpc_barrier(&loc, global_tid); 857 llvm::DenseSet<const VarDecl *> CopiedVars; 858 llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr; 859 for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) { 860 auto IRef = C->varlist_begin(); 861 auto ISrcRef = C->source_exprs().begin(); 862 auto IDestRef = C->destination_exprs().begin(); 863 for (auto *AssignOp : C->assignment_ops()) { 864 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 865 QualType Type = VD->getType(); 866 if (CopiedVars.insert(VD->getCanonicalDecl()).second) { 867 // Get the address of the master variable. If we are emitting code with 868 // TLS support, the address is passed from the master as field in the 869 // captured declaration. 870 Address MasterAddr = Address::invalid(); 871 if (getLangOpts().OpenMPUseTLS && 872 getContext().getTargetInfo().isTLSSupported()) { 873 assert(CapturedStmtInfo->lookup(VD) && 874 "Copyin threadprivates should have been captured!"); 875 DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(), 876 VK_LValue, (*IRef)->getExprLoc()); 877 MasterAddr = EmitLValue(&DRE).getAddress(); 878 LocalDeclMap.erase(VD); 879 } else { 880 MasterAddr = 881 Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD) 882 : CGM.GetAddrOfGlobal(VD), 883 getContext().getDeclAlign(VD)); 884 } 885 // Get the address of the threadprivate variable. 886 Address PrivateAddr = EmitLValue(*IRef).getAddress(); 887 if (CopiedVars.size() == 1) { 888 // At first check if current thread is a master thread. If it is, no 889 // need to copy data. 890 CopyBegin = createBasicBlock("copyin.not.master"); 891 CopyEnd = createBasicBlock("copyin.not.master.end"); 892 Builder.CreateCondBr( 893 Builder.CreateICmpNE( 894 Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy), 895 Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy)), 896 CopyBegin, CopyEnd); 897 EmitBlock(CopyBegin); 898 } 899 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 900 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 901 EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp); 902 } 903 ++IRef; 904 ++ISrcRef; 905 ++IDestRef; 906 } 907 } 908 if (CopyEnd) { 909 // Exit out of copying procedure for non-master thread. 910 EmitBlock(CopyEnd, /*IsFinished=*/true); 911 return true; 912 } 913 return false; 914 } 915 916 bool CodeGenFunction::EmitOMPLastprivateClauseInit( 917 const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) { 918 if (!HaveInsertPoint()) 919 return false; 920 bool HasAtLeastOneLastprivate = false; 921 llvm::DenseSet<const VarDecl *> SIMDLCVs; 922 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 923 auto *LoopDirective = cast<OMPLoopDirective>(&D); 924 for (auto *C : LoopDirective->counters()) { 925 SIMDLCVs.insert( 926 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 927 } 928 } 929 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 930 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 931 HasAtLeastOneLastprivate = true; 932 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 933 !getLangOpts().OpenMPSimd) 934 break; 935 auto IRef = C->varlist_begin(); 936 auto IDestRef = C->destination_exprs().begin(); 937 for (auto *IInit : C->private_copies()) { 938 // Keep the address of the original variable for future update at the end 939 // of the loop. 940 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 941 // Taskloops do not require additional initialization, it is done in 942 // runtime support library. 943 if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) { 944 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 945 PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address { 946 DeclRefExpr DRE( 947 const_cast<VarDecl *>(OrigVD), 948 /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup( 949 OrigVD) != nullptr, 950 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 951 return EmitLValue(&DRE).getAddress(); 952 }); 953 // Check if the variable is also a firstprivate: in this case IInit is 954 // not generated. Initialization of this variable will happen in codegen 955 // for 'firstprivate' clause. 956 if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) { 957 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 958 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 959 // Emit private VarDecl with copy init. 960 EmitDecl(*VD); 961 return GetAddrOfLocalVar(VD); 962 }); 963 assert(IsRegistered && 964 "lastprivate var already registered as private"); 965 (void)IsRegistered; 966 } 967 } 968 ++IRef; 969 ++IDestRef; 970 } 971 } 972 return HasAtLeastOneLastprivate; 973 } 974 975 void CodeGenFunction::EmitOMPLastprivateClauseFinal( 976 const OMPExecutableDirective &D, bool NoFinals, 977 llvm::Value *IsLastIterCond) { 978 if (!HaveInsertPoint()) 979 return; 980 // Emit following code: 981 // if (<IsLastIterCond>) { 982 // orig_var1 = private_orig_var1; 983 // ... 984 // orig_varn = private_orig_varn; 985 // } 986 llvm::BasicBlock *ThenBB = nullptr; 987 llvm::BasicBlock *DoneBB = nullptr; 988 if (IsLastIterCond) { 989 ThenBB = createBasicBlock(".omp.lastprivate.then"); 990 DoneBB = createBasicBlock(".omp.lastprivate.done"); 991 Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB); 992 EmitBlock(ThenBB); 993 } 994 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 995 llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates; 996 if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) { 997 auto IC = LoopDirective->counters().begin(); 998 for (auto F : LoopDirective->finals()) { 999 auto *D = 1000 cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl(); 1001 if (NoFinals) 1002 AlreadyEmittedVars.insert(D); 1003 else 1004 LoopCountersAndUpdates[D] = F; 1005 ++IC; 1006 } 1007 } 1008 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 1009 auto IRef = C->varlist_begin(); 1010 auto ISrcRef = C->source_exprs().begin(); 1011 auto IDestRef = C->destination_exprs().begin(); 1012 for (auto *AssignOp : C->assignment_ops()) { 1013 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 1014 QualType Type = PrivateVD->getType(); 1015 auto *CanonicalVD = PrivateVD->getCanonicalDecl(); 1016 if (AlreadyEmittedVars.insert(CanonicalVD).second) { 1017 // If lastprivate variable is a loop control variable for loop-based 1018 // directive, update its value before copyin back to original 1019 // variable. 1020 if (auto *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD)) 1021 EmitIgnoredExpr(FinalExpr); 1022 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 1023 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 1024 // Get the address of the original variable. 1025 Address OriginalAddr = GetAddrOfLocalVar(DestVD); 1026 // Get the address of the private variable. 1027 Address PrivateAddr = GetAddrOfLocalVar(PrivateVD); 1028 if (auto RefTy = PrivateVD->getType()->getAs<ReferenceType>()) 1029 PrivateAddr = 1030 Address(Builder.CreateLoad(PrivateAddr), 1031 getNaturalTypeAlignment(RefTy->getPointeeType())); 1032 EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp); 1033 } 1034 ++IRef; 1035 ++ISrcRef; 1036 ++IDestRef; 1037 } 1038 if (auto *PostUpdate = C->getPostUpdateExpr()) 1039 EmitIgnoredExpr(PostUpdate); 1040 } 1041 if (IsLastIterCond) 1042 EmitBlock(DoneBB, /*IsFinished=*/true); 1043 } 1044 1045 void CodeGenFunction::EmitOMPReductionClauseInit( 1046 const OMPExecutableDirective &D, 1047 CodeGenFunction::OMPPrivateScope &PrivateScope) { 1048 if (!HaveInsertPoint()) 1049 return; 1050 SmallVector<const Expr *, 4> Shareds; 1051 SmallVector<const Expr *, 4> Privates; 1052 SmallVector<const Expr *, 4> ReductionOps; 1053 SmallVector<const Expr *, 4> LHSs; 1054 SmallVector<const Expr *, 4> RHSs; 1055 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1056 auto IPriv = C->privates().begin(); 1057 auto IRed = C->reduction_ops().begin(); 1058 auto ILHS = C->lhs_exprs().begin(); 1059 auto IRHS = C->rhs_exprs().begin(); 1060 for (const auto *Ref : C->varlists()) { 1061 Shareds.emplace_back(Ref); 1062 Privates.emplace_back(*IPriv); 1063 ReductionOps.emplace_back(*IRed); 1064 LHSs.emplace_back(*ILHS); 1065 RHSs.emplace_back(*IRHS); 1066 std::advance(IPriv, 1); 1067 std::advance(IRed, 1); 1068 std::advance(ILHS, 1); 1069 std::advance(IRHS, 1); 1070 } 1071 } 1072 ReductionCodeGen RedCG(Shareds, Privates, ReductionOps); 1073 unsigned Count = 0; 1074 auto ILHS = LHSs.begin(); 1075 auto IRHS = RHSs.begin(); 1076 auto IPriv = Privates.begin(); 1077 for (const auto *IRef : Shareds) { 1078 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl()); 1079 // Emit private VarDecl with reduction init. 1080 RedCG.emitSharedLValue(*this, Count); 1081 RedCG.emitAggregateType(*this, Count); 1082 auto Emission = EmitAutoVarAlloca(*PrivateVD); 1083 RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(), 1084 RedCG.getSharedLValue(Count), 1085 [&Emission](CodeGenFunction &CGF) { 1086 CGF.EmitAutoVarInit(Emission); 1087 return true; 1088 }); 1089 EmitAutoVarCleanups(Emission); 1090 Address BaseAddr = RedCG.adjustPrivateAddress( 1091 *this, Count, Emission.getAllocatedAddress()); 1092 bool IsRegistered = PrivateScope.addPrivate( 1093 RedCG.getBaseDecl(Count), [BaseAddr]() -> Address { return BaseAddr; }); 1094 assert(IsRegistered && "private var already registered as private"); 1095 // Silence the warning about unused variable. 1096 (void)IsRegistered; 1097 1098 auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 1099 auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 1100 QualType Type = PrivateVD->getType(); 1101 bool isaOMPArraySectionExpr = isa<OMPArraySectionExpr>(IRef); 1102 if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) { 1103 // Store the address of the original variable associated with the LHS 1104 // implicit variable. 1105 PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address { 1106 return RedCG.getSharedLValue(Count).getAddress(); 1107 }); 1108 PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { 1109 return GetAddrOfLocalVar(PrivateVD); 1110 }); 1111 } else if ((isaOMPArraySectionExpr && Type->isScalarType()) || 1112 isa<ArraySubscriptExpr>(IRef)) { 1113 // Store the address of the original variable associated with the LHS 1114 // implicit variable. 1115 PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address { 1116 return RedCG.getSharedLValue(Count).getAddress(); 1117 }); 1118 PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address { 1119 return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD), 1120 ConvertTypeForMem(RHSVD->getType()), 1121 "rhs.begin"); 1122 }); 1123 } else { 1124 QualType Type = PrivateVD->getType(); 1125 bool IsArray = getContext().getAsArrayType(Type) != nullptr; 1126 Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress(); 1127 // Store the address of the original variable associated with the LHS 1128 // implicit variable. 1129 if (IsArray) { 1130 OriginalAddr = Builder.CreateElementBitCast( 1131 OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin"); 1132 } 1133 PrivateScope.addPrivate( 1134 LHSVD, [OriginalAddr]() -> Address { return OriginalAddr; }); 1135 PrivateScope.addPrivate( 1136 RHSVD, [this, PrivateVD, RHSVD, IsArray]() -> Address { 1137 return IsArray 1138 ? Builder.CreateElementBitCast( 1139 GetAddrOfLocalVar(PrivateVD), 1140 ConvertTypeForMem(RHSVD->getType()), "rhs.begin") 1141 : GetAddrOfLocalVar(PrivateVD); 1142 }); 1143 } 1144 ++ILHS; 1145 ++IRHS; 1146 ++IPriv; 1147 ++Count; 1148 } 1149 } 1150 1151 void CodeGenFunction::EmitOMPReductionClauseFinal( 1152 const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) { 1153 if (!HaveInsertPoint()) 1154 return; 1155 llvm::SmallVector<const Expr *, 8> Privates; 1156 llvm::SmallVector<const Expr *, 8> LHSExprs; 1157 llvm::SmallVector<const Expr *, 8> RHSExprs; 1158 llvm::SmallVector<const Expr *, 8> ReductionOps; 1159 bool HasAtLeastOneReduction = false; 1160 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1161 HasAtLeastOneReduction = true; 1162 Privates.append(C->privates().begin(), C->privates().end()); 1163 LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 1164 RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 1165 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 1166 } 1167 if (HasAtLeastOneReduction) { 1168 bool WithNowait = D.getSingleClause<OMPNowaitClause>() || 1169 isOpenMPParallelDirective(D.getDirectiveKind()) || 1170 ReductionKind == OMPD_simd; 1171 bool SimpleReduction = ReductionKind == OMPD_simd; 1172 // Emit nowait reduction if nowait clause is present or directive is a 1173 // parallel directive (it always has implicit barrier). 1174 CGM.getOpenMPRuntime().emitReduction( 1175 *this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps, 1176 {WithNowait, SimpleReduction, ReductionKind}); 1177 } 1178 } 1179 1180 static void emitPostUpdateForReductionClause( 1181 CodeGenFunction &CGF, const OMPExecutableDirective &D, 1182 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1183 if (!CGF.HaveInsertPoint()) 1184 return; 1185 llvm::BasicBlock *DoneBB = nullptr; 1186 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1187 if (auto *PostUpdate = C->getPostUpdateExpr()) { 1188 if (!DoneBB) { 1189 if (auto *Cond = CondGen(CGF)) { 1190 // If the first post-update expression is found, emit conditional 1191 // block if it was requested. 1192 auto *ThenBB = CGF.createBasicBlock(".omp.reduction.pu"); 1193 DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done"); 1194 CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1195 CGF.EmitBlock(ThenBB); 1196 } 1197 } 1198 CGF.EmitIgnoredExpr(PostUpdate); 1199 } 1200 } 1201 if (DoneBB) 1202 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 1203 } 1204 1205 namespace { 1206 /// Codegen lambda for appending distribute lower and upper bounds to outlined 1207 /// parallel function. This is necessary for combined constructs such as 1208 /// 'distribute parallel for' 1209 typedef llvm::function_ref<void(CodeGenFunction &, 1210 const OMPExecutableDirective &, 1211 llvm::SmallVectorImpl<llvm::Value *> &)> 1212 CodeGenBoundParametersTy; 1213 } // anonymous namespace 1214 1215 static void emitCommonOMPParallelDirective( 1216 CodeGenFunction &CGF, const OMPExecutableDirective &S, 1217 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1218 const CodeGenBoundParametersTy &CodeGenBoundParameters) { 1219 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); 1220 auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( 1221 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 1222 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) { 1223 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 1224 auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), 1225 /*IgnoreResultAssign*/ true); 1226 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause( 1227 CGF, NumThreads, NumThreadsClause->getLocStart()); 1228 } 1229 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) { 1230 CodeGenFunction::RunCleanupsScope ProcBindScope(CGF); 1231 CGF.CGM.getOpenMPRuntime().emitProcBindClause( 1232 CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart()); 1233 } 1234 const Expr *IfCond = nullptr; 1235 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 1236 if (C->getNameModifier() == OMPD_unknown || 1237 C->getNameModifier() == OMPD_parallel) { 1238 IfCond = C->getCondition(); 1239 break; 1240 } 1241 } 1242 1243 OMPParallelScope Scope(CGF, S); 1244 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 1245 // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk 1246 // lower and upper bounds with the pragma 'for' chunking mechanism. 1247 // The following lambda takes care of appending the lower and upper bound 1248 // parameters when necessary 1249 CodeGenBoundParameters(CGF, S, CapturedVars); 1250 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 1251 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn, 1252 CapturedVars, IfCond); 1253 } 1254 1255 static void emitEmptyBoundParameters(CodeGenFunction &, 1256 const OMPExecutableDirective &, 1257 llvm::SmallVectorImpl<llvm::Value *> &) {} 1258 1259 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { 1260 // Emit parallel region as a standalone region. 1261 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1262 OMPPrivateScope PrivateScope(CGF); 1263 bool Copyins = CGF.EmitOMPCopyinClause(S); 1264 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 1265 if (Copyins) { 1266 // Emit implicit barrier to synchronize threads and avoid data races on 1267 // propagation master's thread values of threadprivate variables to local 1268 // instances of that variables of all other implicit threads. 1269 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 1270 CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 1271 /*ForceSimpleCall=*/true); 1272 } 1273 CGF.EmitOMPPrivateClause(S, PrivateScope); 1274 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 1275 (void)PrivateScope.Privatize(); 1276 CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt()); 1277 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 1278 }; 1279 emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen, 1280 emitEmptyBoundParameters); 1281 emitPostUpdateForReductionClause( 1282 *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1283 } 1284 1285 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, 1286 JumpDest LoopExit) { 1287 RunCleanupsScope BodyScope(*this); 1288 // Update counters values on current iteration. 1289 for (auto I : D.updates()) { 1290 EmitIgnoredExpr(I); 1291 } 1292 // Update the linear variables. 1293 // In distribute directives only loop counters may be marked as linear, no 1294 // need to generate the code for them. 1295 if (!isOpenMPDistributeDirective(D.getDirectiveKind())) { 1296 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1297 for (auto *U : C->updates()) 1298 EmitIgnoredExpr(U); 1299 } 1300 } 1301 1302 // On a continue in the body, jump to the end. 1303 auto Continue = getJumpDestInCurrentScope("omp.body.continue"); 1304 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1305 // Emit loop body. 1306 EmitStmt(D.getBody()); 1307 // The end (updates/cleanups). 1308 EmitBlock(Continue.getBlock()); 1309 BreakContinueStack.pop_back(); 1310 } 1311 1312 void CodeGenFunction::EmitOMPInnerLoop( 1313 const Stmt &S, bool RequiresCleanup, const Expr *LoopCond, 1314 const Expr *IncExpr, 1315 const llvm::function_ref<void(CodeGenFunction &)> &BodyGen, 1316 const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen) { 1317 auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end"); 1318 1319 // Start the loop with a block that tests the condition. 1320 auto CondBlock = createBasicBlock("omp.inner.for.cond"); 1321 EmitBlock(CondBlock); 1322 const SourceRange &R = S.getSourceRange(); 1323 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), 1324 SourceLocToDebugLoc(R.getEnd())); 1325 1326 // If there are any cleanups between here and the loop-exit scope, 1327 // create a block to stage a loop exit along. 1328 auto ExitBlock = LoopExit.getBlock(); 1329 if (RequiresCleanup) 1330 ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup"); 1331 1332 auto LoopBody = createBasicBlock("omp.inner.for.body"); 1333 1334 // Emit condition. 1335 EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S)); 1336 if (ExitBlock != LoopExit.getBlock()) { 1337 EmitBlock(ExitBlock); 1338 EmitBranchThroughCleanup(LoopExit); 1339 } 1340 1341 EmitBlock(LoopBody); 1342 incrementProfileCounter(&S); 1343 1344 // Create a block for the increment. 1345 auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc"); 1346 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1347 1348 BodyGen(*this); 1349 1350 // Emit "IV = IV + 1" and a back-edge to the condition block. 1351 EmitBlock(Continue.getBlock()); 1352 EmitIgnoredExpr(IncExpr); 1353 PostIncGen(*this); 1354 BreakContinueStack.pop_back(); 1355 EmitBranch(CondBlock); 1356 LoopStack.pop(); 1357 // Emit the fall-through block. 1358 EmitBlock(LoopExit.getBlock()); 1359 } 1360 1361 bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { 1362 if (!HaveInsertPoint()) 1363 return false; 1364 // Emit inits for the linear variables. 1365 bool HasLinears = false; 1366 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1367 for (auto *Init : C->inits()) { 1368 HasLinears = true; 1369 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl()); 1370 if (auto *Ref = dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) { 1371 AutoVarEmission Emission = EmitAutoVarAlloca(*VD); 1372 auto *OrigVD = cast<VarDecl>(Ref->getDecl()); 1373 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1374 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1375 VD->getInit()->getType(), VK_LValue, 1376 VD->getInit()->getExprLoc()); 1377 EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(), 1378 VD->getType()), 1379 /*capturedByInit=*/false); 1380 EmitAutoVarCleanups(Emission); 1381 } else 1382 EmitVarDecl(*VD); 1383 } 1384 // Emit the linear steps for the linear clauses. 1385 // If a step is not constant, it is pre-calculated before the loop. 1386 if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep())) 1387 if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) { 1388 EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl())); 1389 // Emit calculation of the linear step. 1390 EmitIgnoredExpr(CS); 1391 } 1392 } 1393 return HasLinears; 1394 } 1395 1396 void CodeGenFunction::EmitOMPLinearClauseFinal( 1397 const OMPLoopDirective &D, 1398 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1399 if (!HaveInsertPoint()) 1400 return; 1401 llvm::BasicBlock *DoneBB = nullptr; 1402 // Emit the final values of the linear variables. 1403 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1404 auto IC = C->varlist_begin(); 1405 for (auto *F : C->finals()) { 1406 if (!DoneBB) { 1407 if (auto *Cond = CondGen(*this)) { 1408 // If the first post-update expression is found, emit conditional 1409 // block if it was requested. 1410 auto *ThenBB = createBasicBlock(".omp.linear.pu"); 1411 DoneBB = createBasicBlock(".omp.linear.pu.done"); 1412 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1413 EmitBlock(ThenBB); 1414 } 1415 } 1416 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl()); 1417 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1418 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1419 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); 1420 Address OrigAddr = EmitLValue(&DRE).getAddress(); 1421 CodeGenFunction::OMPPrivateScope VarScope(*this); 1422 VarScope.addPrivate(OrigVD, [OrigAddr]() -> Address { return OrigAddr; }); 1423 (void)VarScope.Privatize(); 1424 EmitIgnoredExpr(F); 1425 ++IC; 1426 } 1427 if (auto *PostUpdate = C->getPostUpdateExpr()) 1428 EmitIgnoredExpr(PostUpdate); 1429 } 1430 if (DoneBB) 1431 EmitBlock(DoneBB, /*IsFinished=*/true); 1432 } 1433 1434 static void emitAlignedClause(CodeGenFunction &CGF, 1435 const OMPExecutableDirective &D) { 1436 if (!CGF.HaveInsertPoint()) 1437 return; 1438 for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) { 1439 unsigned ClauseAlignment = 0; 1440 if (auto AlignmentExpr = Clause->getAlignment()) { 1441 auto AlignmentCI = 1442 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); 1443 ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue()); 1444 } 1445 for (auto E : Clause->varlists()) { 1446 unsigned Alignment = ClauseAlignment; 1447 if (Alignment == 0) { 1448 // OpenMP [2.8.1, Description] 1449 // If no optional parameter is specified, implementation-defined default 1450 // alignments for SIMD instructions on the target platforms are assumed. 1451 Alignment = 1452 CGF.getContext() 1453 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign( 1454 E->getType()->getPointeeType())) 1455 .getQuantity(); 1456 } 1457 assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) && 1458 "alignment is not power of 2"); 1459 if (Alignment != 0) { 1460 llvm::Value *PtrValue = CGF.EmitScalarExpr(E); 1461 CGF.EmitAlignmentAssumption(PtrValue, Alignment); 1462 } 1463 } 1464 } 1465 } 1466 1467 void CodeGenFunction::EmitOMPPrivateLoopCounters( 1468 const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) { 1469 if (!HaveInsertPoint()) 1470 return; 1471 auto I = S.private_counters().begin(); 1472 for (auto *E : S.counters()) { 1473 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1474 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()); 1475 (void)LoopScope.addPrivate(VD, [&]() -> Address { 1476 // Emit var without initialization. 1477 if (!LocalDeclMap.count(PrivateVD)) { 1478 auto VarEmission = EmitAutoVarAlloca(*PrivateVD); 1479 EmitAutoVarCleanups(VarEmission); 1480 } 1481 DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD), 1482 /*RefersToEnclosingVariableOrCapture=*/false, 1483 (*I)->getType(), VK_LValue, (*I)->getExprLoc()); 1484 return EmitLValue(&DRE).getAddress(); 1485 }); 1486 if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) || 1487 VD->hasGlobalStorage()) { 1488 (void)LoopScope.addPrivate(PrivateVD, [&]() -> Address { 1489 DeclRefExpr DRE(const_cast<VarDecl *>(VD), 1490 LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD), 1491 E->getType(), VK_LValue, E->getExprLoc()); 1492 return EmitLValue(&DRE).getAddress(); 1493 }); 1494 } 1495 ++I; 1496 } 1497 } 1498 1499 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S, 1500 const Expr *Cond, llvm::BasicBlock *TrueBlock, 1501 llvm::BasicBlock *FalseBlock, uint64_t TrueCount) { 1502 if (!CGF.HaveInsertPoint()) 1503 return; 1504 { 1505 CodeGenFunction::OMPPrivateScope PreCondScope(CGF); 1506 CGF.EmitOMPPrivateLoopCounters(S, PreCondScope); 1507 (void)PreCondScope.Privatize(); 1508 // Get initial values of real counters. 1509 for (auto I : S.inits()) { 1510 CGF.EmitIgnoredExpr(I); 1511 } 1512 } 1513 // Check that loop is executed at least one time. 1514 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount); 1515 } 1516 1517 void CodeGenFunction::EmitOMPLinearClause( 1518 const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) { 1519 if (!HaveInsertPoint()) 1520 return; 1521 llvm::DenseSet<const VarDecl *> SIMDLCVs; 1522 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 1523 auto *LoopDirective = cast<OMPLoopDirective>(&D); 1524 for (auto *C : LoopDirective->counters()) { 1525 SIMDLCVs.insert( 1526 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 1527 } 1528 } 1529 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1530 auto CurPrivate = C->privates().begin(); 1531 for (auto *E : C->varlists()) { 1532 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1533 auto *PrivateVD = 1534 cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl()); 1535 if (!SIMDLCVs.count(VD->getCanonicalDecl())) { 1536 bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address { 1537 // Emit private VarDecl with copy init. 1538 EmitVarDecl(*PrivateVD); 1539 return GetAddrOfLocalVar(PrivateVD); 1540 }); 1541 assert(IsRegistered && "linear var already registered as private"); 1542 // Silence the warning about unused variable. 1543 (void)IsRegistered; 1544 } else 1545 EmitVarDecl(*PrivateVD); 1546 ++CurPrivate; 1547 } 1548 } 1549 } 1550 1551 static void emitSimdlenSafelenClause(CodeGenFunction &CGF, 1552 const OMPExecutableDirective &D, 1553 bool IsMonotonic) { 1554 if (!CGF.HaveInsertPoint()) 1555 return; 1556 if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) { 1557 RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(), 1558 /*ignoreResult=*/true); 1559 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 1560 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 1561 // In presence of finite 'safelen', it may be unsafe to mark all 1562 // the memory instructions parallel, because loop-carried 1563 // dependences of 'safelen' iterations are possible. 1564 if (!IsMonotonic) 1565 CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>()); 1566 } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) { 1567 RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(), 1568 /*ignoreResult=*/true); 1569 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 1570 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 1571 // In presence of finite 'safelen', it may be unsafe to mark all 1572 // the memory instructions parallel, because loop-carried 1573 // dependences of 'safelen' iterations are possible. 1574 CGF.LoopStack.setParallel(false); 1575 } 1576 } 1577 1578 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D, 1579 bool IsMonotonic) { 1580 // Walk clauses and process safelen/lastprivate. 1581 LoopStack.setParallel(!IsMonotonic); 1582 LoopStack.setVectorizeEnable(true); 1583 emitSimdlenSafelenClause(*this, D, IsMonotonic); 1584 } 1585 1586 void CodeGenFunction::EmitOMPSimdFinal( 1587 const OMPLoopDirective &D, 1588 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1589 if (!HaveInsertPoint()) 1590 return; 1591 llvm::BasicBlock *DoneBB = nullptr; 1592 auto IC = D.counters().begin(); 1593 auto IPC = D.private_counters().begin(); 1594 for (auto F : D.finals()) { 1595 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl()); 1596 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl()); 1597 auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD); 1598 if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) || 1599 OrigVD->hasGlobalStorage() || CED) { 1600 if (!DoneBB) { 1601 if (auto *Cond = CondGen(*this)) { 1602 // If the first post-update expression is found, emit conditional 1603 // block if it was requested. 1604 auto *ThenBB = createBasicBlock(".omp.final.then"); 1605 DoneBB = createBasicBlock(".omp.final.done"); 1606 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1607 EmitBlock(ThenBB); 1608 } 1609 } 1610 Address OrigAddr = Address::invalid(); 1611 if (CED) 1612 OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress(); 1613 else { 1614 DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD), 1615 /*RefersToEnclosingVariableOrCapture=*/false, 1616 (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc()); 1617 OrigAddr = EmitLValue(&DRE).getAddress(); 1618 } 1619 OMPPrivateScope VarScope(*this); 1620 VarScope.addPrivate(OrigVD, 1621 [OrigAddr]() -> Address { return OrigAddr; }); 1622 (void)VarScope.Privatize(); 1623 EmitIgnoredExpr(F); 1624 } 1625 ++IC; 1626 ++IPC; 1627 } 1628 if (DoneBB) 1629 EmitBlock(DoneBB, /*IsFinished=*/true); 1630 } 1631 1632 static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF, 1633 const OMPLoopDirective &S, 1634 CodeGenFunction::JumpDest LoopExit) { 1635 CGF.EmitOMPLoopBody(S, LoopExit); 1636 CGF.EmitStopPoint(&S); 1637 } 1638 1639 /// Emit a helper variable and return corresponding lvalue. 1640 static LValue EmitOMPHelperVar(CodeGenFunction &CGF, 1641 const DeclRefExpr *Helper) { 1642 auto VDecl = cast<VarDecl>(Helper->getDecl()); 1643 CGF.EmitVarDecl(*VDecl); 1644 return CGF.EmitLValue(Helper); 1645 } 1646 1647 static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S, 1648 PrePostActionTy &Action) { 1649 Action.Enter(CGF); 1650 assert(isOpenMPSimdDirective(S.getDirectiveKind()) && 1651 "Expected simd directive"); 1652 OMPLoopScope PreInitScope(CGF, S); 1653 // if (PreCond) { 1654 // for (IV in 0..LastIteration) BODY; 1655 // <Final counter/linear vars updates>; 1656 // } 1657 // 1658 if (isOpenMPDistributeDirective(S.getDirectiveKind()) || 1659 isOpenMPWorksharingDirective(S.getDirectiveKind()) || 1660 isOpenMPTaskLoopDirective(S.getDirectiveKind())) { 1661 (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable())); 1662 (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable())); 1663 } 1664 1665 // Emit: if (PreCond) - begin. 1666 // If the condition constant folds and can be elided, avoid emitting the 1667 // whole loop. 1668 bool CondConstant; 1669 llvm::BasicBlock *ContBlock = nullptr; 1670 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 1671 if (!CondConstant) 1672 return; 1673 } else { 1674 auto *ThenBlock = CGF.createBasicBlock("simd.if.then"); 1675 ContBlock = CGF.createBasicBlock("simd.if.end"); 1676 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 1677 CGF.getProfileCount(&S)); 1678 CGF.EmitBlock(ThenBlock); 1679 CGF.incrementProfileCounter(&S); 1680 } 1681 1682 // Emit the loop iteration variable. 1683 const Expr *IVExpr = S.getIterationVariable(); 1684 const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 1685 CGF.EmitVarDecl(*IVDecl); 1686 CGF.EmitIgnoredExpr(S.getInit()); 1687 1688 // Emit the iterations count variable. 1689 // If it is not a variable, Sema decided to calculate iterations count on 1690 // each iteration (e.g., it is foldable into a constant). 1691 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 1692 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 1693 // Emit calculation of the iterations count. 1694 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 1695 } 1696 1697 CGF.EmitOMPSimdInit(S); 1698 1699 emitAlignedClause(CGF, S); 1700 (void)CGF.EmitOMPLinearClauseInit(S); 1701 { 1702 CodeGenFunction::OMPPrivateScope LoopScope(CGF); 1703 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 1704 CGF.EmitOMPLinearClause(S, LoopScope); 1705 CGF.EmitOMPPrivateClause(S, LoopScope); 1706 CGF.EmitOMPReductionClauseInit(S, LoopScope); 1707 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 1708 (void)LoopScope.Privatize(); 1709 CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 1710 S.getInc(), 1711 [&S](CodeGenFunction &CGF) { 1712 CGF.EmitOMPLoopBody(S, CodeGenFunction::JumpDest()); 1713 CGF.EmitStopPoint(&S); 1714 }, 1715 [](CodeGenFunction &) {}); 1716 CGF.EmitOMPSimdFinal( 1717 S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1718 // Emit final copy of the lastprivate variables at the end of loops. 1719 if (HasLastprivateClause) 1720 CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true); 1721 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd); 1722 emitPostUpdateForReductionClause( 1723 CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1724 } 1725 CGF.EmitOMPLinearClauseFinal( 1726 S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1727 // Emit: if (PreCond) - end. 1728 if (ContBlock) { 1729 CGF.EmitBranch(ContBlock); 1730 CGF.EmitBlock(ContBlock, true); 1731 } 1732 } 1733 1734 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { 1735 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 1736 emitOMPSimdRegion(CGF, S, Action); 1737 }; 1738 OMPLexicalScope Scope(*this, S, OMPD_unknown); 1739 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 1740 } 1741 1742 void CodeGenFunction::EmitOMPOuterLoop( 1743 bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S, 1744 CodeGenFunction::OMPPrivateScope &LoopScope, 1745 const CodeGenFunction::OMPLoopArguments &LoopArgs, 1746 const CodeGenFunction::CodeGenLoopTy &CodeGenLoop, 1747 const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) { 1748 auto &RT = CGM.getOpenMPRuntime(); 1749 1750 const Expr *IVExpr = S.getIterationVariable(); 1751 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1752 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1753 1754 auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end"); 1755 1756 // Start the loop with a block that tests the condition. 1757 auto CondBlock = createBasicBlock("omp.dispatch.cond"); 1758 EmitBlock(CondBlock); 1759 const SourceRange &R = S.getSourceRange(); 1760 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), 1761 SourceLocToDebugLoc(R.getEnd())); 1762 1763 llvm::Value *BoolCondVal = nullptr; 1764 if (!DynamicOrOrdered) { 1765 // UB = min(UB, GlobalUB) or 1766 // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g. 1767 // 'distribute parallel for') 1768 EmitIgnoredExpr(LoopArgs.EUB); 1769 // IV = LB 1770 EmitIgnoredExpr(LoopArgs.Init); 1771 // IV < UB 1772 BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond); 1773 } else { 1774 BoolCondVal = 1775 RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, LoopArgs.IL, 1776 LoopArgs.LB, LoopArgs.UB, LoopArgs.ST); 1777 } 1778 1779 // If there are any cleanups between here and the loop-exit scope, 1780 // create a block to stage a loop exit along. 1781 auto ExitBlock = LoopExit.getBlock(); 1782 if (LoopScope.requiresCleanups()) 1783 ExitBlock = createBasicBlock("omp.dispatch.cleanup"); 1784 1785 auto LoopBody = createBasicBlock("omp.dispatch.body"); 1786 Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock); 1787 if (ExitBlock != LoopExit.getBlock()) { 1788 EmitBlock(ExitBlock); 1789 EmitBranchThroughCleanup(LoopExit); 1790 } 1791 EmitBlock(LoopBody); 1792 1793 // Emit "IV = LB" (in case of static schedule, we have already calculated new 1794 // LB for loop condition and emitted it above). 1795 if (DynamicOrOrdered) 1796 EmitIgnoredExpr(LoopArgs.Init); 1797 1798 // Create a block for the increment. 1799 auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); 1800 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1801 1802 // Generate !llvm.loop.parallel metadata for loads and stores for loops 1803 // with dynamic/guided scheduling and without ordered clause. 1804 if (!isOpenMPSimdDirective(S.getDirectiveKind())) 1805 LoopStack.setParallel(!IsMonotonic); 1806 else 1807 EmitOMPSimdInit(S, IsMonotonic); 1808 1809 SourceLocation Loc = S.getLocStart(); 1810 1811 // when 'distribute' is not combined with a 'for': 1812 // while (idx <= UB) { BODY; ++idx; } 1813 // when 'distribute' is combined with a 'for' 1814 // (e.g. 'distribute parallel for') 1815 // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; } 1816 EmitOMPInnerLoop( 1817 S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr, 1818 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { 1819 CodeGenLoop(CGF, S, LoopExit); 1820 }, 1821 [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) { 1822 CodeGenOrdered(CGF, Loc, IVSize, IVSigned); 1823 }); 1824 1825 EmitBlock(Continue.getBlock()); 1826 BreakContinueStack.pop_back(); 1827 if (!DynamicOrOrdered) { 1828 // Emit "LB = LB + Stride", "UB = UB + Stride". 1829 EmitIgnoredExpr(LoopArgs.NextLB); 1830 EmitIgnoredExpr(LoopArgs.NextUB); 1831 } 1832 1833 EmitBranch(CondBlock); 1834 LoopStack.pop(); 1835 // Emit the fall-through block. 1836 EmitBlock(LoopExit.getBlock()); 1837 1838 // Tell the runtime we are done. 1839 auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) { 1840 if (!DynamicOrOrdered) 1841 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(), 1842 S.getDirectiveKind()); 1843 }; 1844 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); 1845 } 1846 1847 void CodeGenFunction::EmitOMPForOuterLoop( 1848 const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic, 1849 const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, 1850 const OMPLoopArguments &LoopArgs, 1851 const CodeGenDispatchBoundsTy &CGDispatchBounds) { 1852 auto &RT = CGM.getOpenMPRuntime(); 1853 1854 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). 1855 const bool DynamicOrOrdered = 1856 Ordered || RT.isDynamic(ScheduleKind.Schedule); 1857 1858 assert((Ordered || 1859 !RT.isStaticNonchunked(ScheduleKind.Schedule, 1860 LoopArgs.Chunk != nullptr)) && 1861 "static non-chunked schedule does not need outer loop"); 1862 1863 // Emit outer loop. 1864 // 1865 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1866 // When schedule(dynamic,chunk_size) is specified, the iterations are 1867 // distributed to threads in the team in chunks as the threads request them. 1868 // Each thread executes a chunk of iterations, then requests another chunk, 1869 // until no chunks remain to be distributed. Each chunk contains chunk_size 1870 // iterations, except for the last chunk to be distributed, which may have 1871 // fewer iterations. When no chunk_size is specified, it defaults to 1. 1872 // 1873 // When schedule(guided,chunk_size) is specified, the iterations are assigned 1874 // to threads in the team in chunks as the executing threads request them. 1875 // Each thread executes a chunk of iterations, then requests another chunk, 1876 // until no chunks remain to be assigned. For a chunk_size of 1, the size of 1877 // each chunk is proportional to the number of unassigned iterations divided 1878 // by the number of threads in the team, decreasing to 1. For a chunk_size 1879 // with value k (greater than 1), the size of each chunk is determined in the 1880 // same way, with the restriction that the chunks do not contain fewer than k 1881 // iterations (except for the last chunk to be assigned, which may have fewer 1882 // than k iterations). 1883 // 1884 // When schedule(auto) is specified, the decision regarding scheduling is 1885 // delegated to the compiler and/or runtime system. The programmer gives the 1886 // implementation the freedom to choose any possible mapping of iterations to 1887 // threads in the team. 1888 // 1889 // When schedule(runtime) is specified, the decision regarding scheduling is 1890 // deferred until run time, and the schedule and chunk size are taken from the 1891 // run-sched-var ICV. If the ICV is set to auto, the schedule is 1892 // implementation defined 1893 // 1894 // while(__kmpc_dispatch_next(&LB, &UB)) { 1895 // idx = LB; 1896 // while (idx <= UB) { BODY; ++idx; 1897 // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only. 1898 // } // inner loop 1899 // } 1900 // 1901 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1902 // When schedule(static, chunk_size) is specified, iterations are divided into 1903 // chunks of size chunk_size, and the chunks are assigned to the threads in 1904 // the team in a round-robin fashion in the order of the thread number. 1905 // 1906 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { 1907 // while (idx <= UB) { BODY; ++idx; } // inner loop 1908 // LB = LB + ST; 1909 // UB = UB + ST; 1910 // } 1911 // 1912 1913 const Expr *IVExpr = S.getIterationVariable(); 1914 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1915 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1916 1917 if (DynamicOrOrdered) { 1918 auto DispatchBounds = CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB); 1919 llvm::Value *LBVal = DispatchBounds.first; 1920 llvm::Value *UBVal = DispatchBounds.second; 1921 CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal, 1922 LoopArgs.Chunk}; 1923 RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, IVSize, 1924 IVSigned, Ordered, DipatchRTInputValues); 1925 } else { 1926 CGOpenMPRuntime::StaticRTInput StaticInit( 1927 IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB, 1928 LoopArgs.ST, LoopArgs.Chunk); 1929 RT.emitForStaticInit(*this, S.getLocStart(), S.getDirectiveKind(), 1930 ScheduleKind, StaticInit); 1931 } 1932 1933 auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc, 1934 const unsigned IVSize, 1935 const bool IVSigned) { 1936 if (Ordered) { 1937 CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize, 1938 IVSigned); 1939 } 1940 }; 1941 1942 OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST, 1943 LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB); 1944 OuterLoopArgs.IncExpr = S.getInc(); 1945 OuterLoopArgs.Init = S.getInit(); 1946 OuterLoopArgs.Cond = S.getCond(); 1947 OuterLoopArgs.NextLB = S.getNextLowerBound(); 1948 OuterLoopArgs.NextUB = S.getNextUpperBound(); 1949 EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs, 1950 emitOMPLoopBodyWithStopPoint, CodeGenOrdered); 1951 } 1952 1953 static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc, 1954 const unsigned IVSize, const bool IVSigned) {} 1955 1956 void CodeGenFunction::EmitOMPDistributeOuterLoop( 1957 OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S, 1958 OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs, 1959 const CodeGenLoopTy &CodeGenLoopContent) { 1960 1961 auto &RT = CGM.getOpenMPRuntime(); 1962 1963 // Emit outer loop. 1964 // Same behavior as a OMPForOuterLoop, except that schedule cannot be 1965 // dynamic 1966 // 1967 1968 const Expr *IVExpr = S.getIterationVariable(); 1969 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1970 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1971 1972 CGOpenMPRuntime::StaticRTInput StaticInit( 1973 IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB, 1974 LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk); 1975 RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, StaticInit); 1976 1977 // for combined 'distribute' and 'for' the increment expression of distribute 1978 // is store in DistInc. For 'distribute' alone, it is in Inc. 1979 Expr *IncExpr; 1980 if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())) 1981 IncExpr = S.getDistInc(); 1982 else 1983 IncExpr = S.getInc(); 1984 1985 // this routine is shared by 'omp distribute parallel for' and 1986 // 'omp distribute': select the right EUB expression depending on the 1987 // directive 1988 OMPLoopArguments OuterLoopArgs; 1989 OuterLoopArgs.LB = LoopArgs.LB; 1990 OuterLoopArgs.UB = LoopArgs.UB; 1991 OuterLoopArgs.ST = LoopArgs.ST; 1992 OuterLoopArgs.IL = LoopArgs.IL; 1993 OuterLoopArgs.Chunk = LoopArgs.Chunk; 1994 OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 1995 ? S.getCombinedEnsureUpperBound() 1996 : S.getEnsureUpperBound(); 1997 OuterLoopArgs.IncExpr = IncExpr; 1998 OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 1999 ? S.getCombinedInit() 2000 : S.getInit(); 2001 OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2002 ? S.getCombinedCond() 2003 : S.getCond(); 2004 OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2005 ? S.getCombinedNextLowerBound() 2006 : S.getNextLowerBound(); 2007 OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2008 ? S.getCombinedNextUpperBound() 2009 : S.getNextUpperBound(); 2010 2011 EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S, 2012 LoopScope, OuterLoopArgs, CodeGenLoopContent, 2013 emitEmptyOrdered); 2014 } 2015 2016 static std::pair<LValue, LValue> 2017 emitDistributeParallelForInnerBounds(CodeGenFunction &CGF, 2018 const OMPExecutableDirective &S) { 2019 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 2020 LValue LB = 2021 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); 2022 LValue UB = 2023 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); 2024 2025 // When composing 'distribute' with 'for' (e.g. as in 'distribute 2026 // parallel for') we need to use the 'distribute' 2027 // chunk lower and upper bounds rather than the whole loop iteration 2028 // space. These are parameters to the outlined function for 'parallel' 2029 // and we copy the bounds of the previous schedule into the 2030 // the current ones. 2031 LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable()); 2032 LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable()); 2033 llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar( 2034 PrevLB, LS.getPrevLowerBoundVariable()->getExprLoc()); 2035 PrevLBVal = CGF.EmitScalarConversion( 2036 PrevLBVal, LS.getPrevLowerBoundVariable()->getType(), 2037 LS.getIterationVariable()->getType(), 2038 LS.getPrevLowerBoundVariable()->getExprLoc()); 2039 llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar( 2040 PrevUB, LS.getPrevUpperBoundVariable()->getExprLoc()); 2041 PrevUBVal = CGF.EmitScalarConversion( 2042 PrevUBVal, LS.getPrevUpperBoundVariable()->getType(), 2043 LS.getIterationVariable()->getType(), 2044 LS.getPrevUpperBoundVariable()->getExprLoc()); 2045 2046 CGF.EmitStoreOfScalar(PrevLBVal, LB); 2047 CGF.EmitStoreOfScalar(PrevUBVal, UB); 2048 2049 return {LB, UB}; 2050 } 2051 2052 /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then 2053 /// we need to use the LB and UB expressions generated by the worksharing 2054 /// code generation support, whereas in non combined situations we would 2055 /// just emit 0 and the LastIteration expression 2056 /// This function is necessary due to the difference of the LB and UB 2057 /// types for the RT emission routines for 'for_static_init' and 2058 /// 'for_dispatch_init' 2059 static std::pair<llvm::Value *, llvm::Value *> 2060 emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF, 2061 const OMPExecutableDirective &S, 2062 Address LB, Address UB) { 2063 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 2064 const Expr *IVExpr = LS.getIterationVariable(); 2065 // when implementing a dynamic schedule for a 'for' combined with a 2066 // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop 2067 // is not normalized as each team only executes its own assigned 2068 // distribute chunk 2069 QualType IteratorTy = IVExpr->getType(); 2070 llvm::Value *LBVal = 2071 CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, S.getLocStart()); 2072 llvm::Value *UBVal = 2073 CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, S.getLocStart()); 2074 return {LBVal, UBVal}; 2075 } 2076 2077 static void emitDistributeParallelForDistributeInnerBoundParams( 2078 CodeGenFunction &CGF, const OMPExecutableDirective &S, 2079 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) { 2080 const auto &Dir = cast<OMPLoopDirective>(S); 2081 LValue LB = 2082 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable())); 2083 auto LBCast = CGF.Builder.CreateIntCast( 2084 CGF.Builder.CreateLoad(LB.getAddress()), CGF.SizeTy, /*isSigned=*/false); 2085 CapturedVars.push_back(LBCast); 2086 LValue UB = 2087 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable())); 2088 2089 auto UBCast = CGF.Builder.CreateIntCast( 2090 CGF.Builder.CreateLoad(UB.getAddress()), CGF.SizeTy, /*isSigned=*/false); 2091 CapturedVars.push_back(UBCast); 2092 } 2093 2094 static void 2095 emitInnerParallelForWhenCombined(CodeGenFunction &CGF, 2096 const OMPLoopDirective &S, 2097 CodeGenFunction::JumpDest LoopExit) { 2098 auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF, 2099 PrePostActionTy &) { 2100 bool HasCancel = false; 2101 if (!isOpenMPSimdDirective(S.getDirectiveKind())) { 2102 if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(&S)) 2103 HasCancel = D->hasCancel(); 2104 else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(&S)) 2105 HasCancel = D->hasCancel(); 2106 else if (const auto *D = 2107 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&S)) 2108 HasCancel = D->hasCancel(); 2109 } 2110 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(), 2111 HasCancel); 2112 CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(), 2113 emitDistributeParallelForInnerBounds, 2114 emitDistributeParallelForDispatchBounds); 2115 }; 2116 2117 emitCommonOMPParallelDirective( 2118 CGF, S, 2119 isOpenMPSimdDirective(S.getDirectiveKind()) ? OMPD_for_simd : OMPD_for, 2120 CGInlinedWorksharingLoop, 2121 emitDistributeParallelForDistributeInnerBoundParams); 2122 } 2123 2124 void CodeGenFunction::EmitOMPDistributeParallelForDirective( 2125 const OMPDistributeParallelForDirective &S) { 2126 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2127 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 2128 S.getDistInc()); 2129 }; 2130 OMPLexicalScope Scope(*this, S, OMPD_parallel); 2131 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); 2132 } 2133 2134 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective( 2135 const OMPDistributeParallelForSimdDirective &S) { 2136 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2137 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 2138 S.getDistInc()); 2139 }; 2140 OMPLexicalScope Scope(*this, S, OMPD_parallel); 2141 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); 2142 } 2143 2144 void CodeGenFunction::EmitOMPDistributeSimdDirective( 2145 const OMPDistributeSimdDirective &S) { 2146 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2147 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 2148 }; 2149 OMPLexicalScope Scope(*this, S, OMPD_unknown); 2150 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 2151 } 2152 2153 void CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 2154 CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) { 2155 // Emit SPMD target parallel for region as a standalone region. 2156 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2157 emitOMPSimdRegion(CGF, S, Action); 2158 }; 2159 llvm::Function *Fn; 2160 llvm::Constant *Addr; 2161 // Emit target region as a standalone region. 2162 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 2163 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 2164 assert(Fn && Addr && "Target device function emission failed."); 2165 } 2166 2167 void CodeGenFunction::EmitOMPTargetSimdDirective( 2168 const OMPTargetSimdDirective &S) { 2169 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2170 emitOMPSimdRegion(CGF, S, Action); 2171 }; 2172 emitCommonOMPTargetDirective(*this, S, CodeGen); 2173 } 2174 2175 namespace { 2176 struct ScheduleKindModifiersTy { 2177 OpenMPScheduleClauseKind Kind; 2178 OpenMPScheduleClauseModifier M1; 2179 OpenMPScheduleClauseModifier M2; 2180 ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind, 2181 OpenMPScheduleClauseModifier M1, 2182 OpenMPScheduleClauseModifier M2) 2183 : Kind(Kind), M1(M1), M2(M2) {} 2184 }; 2185 } // namespace 2186 2187 bool CodeGenFunction::EmitOMPWorksharingLoop( 2188 const OMPLoopDirective &S, Expr *EUB, 2189 const CodeGenLoopBoundsTy &CodeGenLoopBounds, 2190 const CodeGenDispatchBoundsTy &CGDispatchBounds) { 2191 // Emit the loop iteration variable. 2192 auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 2193 auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); 2194 EmitVarDecl(*IVDecl); 2195 2196 // Emit the iterations count variable. 2197 // If it is not a variable, Sema decided to calculate iterations count on each 2198 // iteration (e.g., it is foldable into a constant). 2199 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 2200 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 2201 // Emit calculation of the iterations count. 2202 EmitIgnoredExpr(S.getCalcLastIteration()); 2203 } 2204 2205 auto &RT = CGM.getOpenMPRuntime(); 2206 2207 bool HasLastprivateClause; 2208 // Check pre-condition. 2209 { 2210 OMPLoopScope PreInitScope(*this, S); 2211 // Skip the entire loop if we don't meet the precondition. 2212 // If the condition constant folds and can be elided, avoid emitting the 2213 // whole loop. 2214 bool CondConstant; 2215 llvm::BasicBlock *ContBlock = nullptr; 2216 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 2217 if (!CondConstant) 2218 return false; 2219 } else { 2220 auto *ThenBlock = createBasicBlock("omp.precond.then"); 2221 ContBlock = createBasicBlock("omp.precond.end"); 2222 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 2223 getProfileCount(&S)); 2224 EmitBlock(ThenBlock); 2225 incrementProfileCounter(&S); 2226 } 2227 2228 bool Ordered = false; 2229 if (auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) { 2230 if (OrderedClause->getNumForLoops()) 2231 RT.emitDoacrossInit(*this, S); 2232 else 2233 Ordered = true; 2234 } 2235 2236 llvm::DenseSet<const Expr *> EmittedFinals; 2237 emitAlignedClause(*this, S); 2238 bool HasLinears = EmitOMPLinearClauseInit(S); 2239 // Emit helper vars inits. 2240 2241 std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S); 2242 LValue LB = Bounds.first; 2243 LValue UB = Bounds.second; 2244 LValue ST = 2245 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 2246 LValue IL = 2247 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 2248 2249 // Emit 'then' code. 2250 { 2251 OMPPrivateScope LoopScope(*this); 2252 if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) { 2253 // Emit implicit barrier to synchronize threads and avoid data races on 2254 // initialization of firstprivate variables and post-update of 2255 // lastprivate variables. 2256 CGM.getOpenMPRuntime().emitBarrierCall( 2257 *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 2258 /*ForceSimpleCall=*/true); 2259 } 2260 EmitOMPPrivateClause(S, LoopScope); 2261 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 2262 EmitOMPReductionClauseInit(S, LoopScope); 2263 EmitOMPPrivateLoopCounters(S, LoopScope); 2264 EmitOMPLinearClause(S, LoopScope); 2265 (void)LoopScope.Privatize(); 2266 2267 // Detect the loop schedule kind and chunk. 2268 llvm::Value *Chunk = nullptr; 2269 OpenMPScheduleTy ScheduleKind; 2270 if (auto *C = S.getSingleClause<OMPScheduleClause>()) { 2271 ScheduleKind.Schedule = C->getScheduleKind(); 2272 ScheduleKind.M1 = C->getFirstScheduleModifier(); 2273 ScheduleKind.M2 = C->getSecondScheduleModifier(); 2274 if (const auto *Ch = C->getChunkSize()) { 2275 Chunk = EmitScalarExpr(Ch); 2276 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 2277 S.getIterationVariable()->getType(), 2278 S.getLocStart()); 2279 } 2280 } 2281 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2282 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2283 // OpenMP 4.5, 2.7.1 Loop Construct, Description. 2284 // If the static schedule kind is specified or if the ordered clause is 2285 // specified, and if no monotonic modifier is specified, the effect will 2286 // be as if the monotonic modifier was specified. 2287 if (RT.isStaticNonchunked(ScheduleKind.Schedule, 2288 /* Chunked */ Chunk != nullptr) && 2289 !Ordered) { 2290 if (isOpenMPSimdDirective(S.getDirectiveKind())) 2291 EmitOMPSimdInit(S, /*IsMonotonic=*/true); 2292 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 2293 // When no chunk_size is specified, the iteration space is divided into 2294 // chunks that are approximately equal in size, and at most one chunk is 2295 // distributed to each thread. Note that the size of the chunks is 2296 // unspecified in this case. 2297 CGOpenMPRuntime::StaticRTInput StaticInit( 2298 IVSize, IVSigned, Ordered, IL.getAddress(), LB.getAddress(), 2299 UB.getAddress(), ST.getAddress()); 2300 RT.emitForStaticInit(*this, S.getLocStart(), S.getDirectiveKind(), 2301 ScheduleKind, StaticInit); 2302 auto LoopExit = 2303 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 2304 // UB = min(UB, GlobalUB); 2305 EmitIgnoredExpr(S.getEnsureUpperBound()); 2306 // IV = LB; 2307 EmitIgnoredExpr(S.getInit()); 2308 // while (idx <= UB) { BODY; ++idx; } 2309 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 2310 S.getInc(), 2311 [&S, LoopExit](CodeGenFunction &CGF) { 2312 CGF.EmitOMPLoopBody(S, LoopExit); 2313 CGF.EmitStopPoint(&S); 2314 }, 2315 [](CodeGenFunction &) {}); 2316 EmitBlock(LoopExit.getBlock()); 2317 // Tell the runtime we are done. 2318 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 2319 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(), 2320 S.getDirectiveKind()); 2321 }; 2322 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); 2323 } else { 2324 const bool IsMonotonic = 2325 Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static || 2326 ScheduleKind.Schedule == OMPC_SCHEDULE_unknown || 2327 ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic || 2328 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic; 2329 // Emit the outer loop, which requests its work chunk [LB..UB] from 2330 // runtime and runs the inner loop to process it. 2331 const OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(), 2332 ST.getAddress(), IL.getAddress(), 2333 Chunk, EUB); 2334 EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered, 2335 LoopArguments, CGDispatchBounds); 2336 } 2337 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 2338 EmitOMPSimdFinal(S, 2339 [&](CodeGenFunction &CGF) -> llvm::Value * { 2340 return CGF.Builder.CreateIsNotNull( 2341 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2342 }); 2343 } 2344 EmitOMPReductionClauseFinal( 2345 S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind()) 2346 ? /*Parallel and Simd*/ OMPD_parallel_for_simd 2347 : /*Parallel only*/ OMPD_parallel); 2348 // Emit post-update of the reduction variables if IsLastIter != 0. 2349 emitPostUpdateForReductionClause( 2350 *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * { 2351 return CGF.Builder.CreateIsNotNull( 2352 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2353 }); 2354 // Emit final copy of the lastprivate variables if IsLastIter != 0. 2355 if (HasLastprivateClause) 2356 EmitOMPLastprivateClauseFinal( 2357 S, isOpenMPSimdDirective(S.getDirectiveKind()), 2358 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart()))); 2359 } 2360 EmitOMPLinearClauseFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * { 2361 return CGF.Builder.CreateIsNotNull( 2362 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2363 }); 2364 // We're now done with the loop, so jump to the continuation block. 2365 if (ContBlock) { 2366 EmitBranch(ContBlock); 2367 EmitBlock(ContBlock, true); 2368 } 2369 } 2370 return HasLastprivateClause; 2371 } 2372 2373 /// The following two functions generate expressions for the loop lower 2374 /// and upper bounds in case of static and dynamic (dispatch) schedule 2375 /// of the associated 'for' or 'distribute' loop. 2376 static std::pair<LValue, LValue> 2377 emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 2378 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 2379 LValue LB = 2380 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); 2381 LValue UB = 2382 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); 2383 return {LB, UB}; 2384 } 2385 2386 /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not 2387 /// consider the lower and upper bound expressions generated by the 2388 /// worksharing loop support, but we use 0 and the iteration space size as 2389 /// constants 2390 static std::pair<llvm::Value *, llvm::Value *> 2391 emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S, 2392 Address LB, Address UB) { 2393 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 2394 const Expr *IVExpr = LS.getIterationVariable(); 2395 const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType()); 2396 llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0); 2397 llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration()); 2398 return {LBVal, UBVal}; 2399 } 2400 2401 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { 2402 bool HasLastprivates = false; 2403 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, 2404 PrePostActionTy &) { 2405 OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel()); 2406 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 2407 emitForLoopBounds, 2408 emitDispatchForLoopBounds); 2409 }; 2410 { 2411 OMPLexicalScope Scope(*this, S, OMPD_unknown); 2412 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen, 2413 S.hasCancel()); 2414 } 2415 2416 // Emit an implicit barrier at the end. 2417 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) { 2418 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); 2419 } 2420 } 2421 2422 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { 2423 bool HasLastprivates = false; 2424 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, 2425 PrePostActionTy &) { 2426 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 2427 emitForLoopBounds, 2428 emitDispatchForLoopBounds); 2429 }; 2430 { 2431 OMPLexicalScope Scope(*this, S, OMPD_unknown); 2432 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 2433 } 2434 2435 // Emit an implicit barrier at the end. 2436 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) { 2437 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); 2438 } 2439 } 2440 2441 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, 2442 const Twine &Name, 2443 llvm::Value *Init = nullptr) { 2444 auto LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty); 2445 if (Init) 2446 CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true); 2447 return LVal; 2448 } 2449 2450 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { 2451 const Stmt *Stmt = S.getInnermostCapturedStmt()->getCapturedStmt(); 2452 const auto *CS = dyn_cast<CompoundStmt>(Stmt); 2453 bool HasLastprivates = false; 2454 auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF, 2455 PrePostActionTy &) { 2456 auto &C = CGF.CGM.getContext(); 2457 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2458 // Emit helper vars inits. 2459 LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.", 2460 CGF.Builder.getInt32(0)); 2461 auto *GlobalUBVal = CS != nullptr ? CGF.Builder.getInt32(CS->size() - 1) 2462 : CGF.Builder.getInt32(0); 2463 LValue UB = 2464 createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal); 2465 LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.", 2466 CGF.Builder.getInt32(1)); 2467 LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.", 2468 CGF.Builder.getInt32(0)); 2469 // Loop counter. 2470 LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv."); 2471 OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 2472 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV); 2473 OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 2474 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); 2475 // Generate condition for loop. 2476 BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, 2477 OK_Ordinary, S.getLocStart(), FPOptions()); 2478 // Increment for loop counter. 2479 UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary, 2480 S.getLocStart(), true); 2481 auto BodyGen = [Stmt, CS, &S, &IV](CodeGenFunction &CGF) { 2482 // Iterate through all sections and emit a switch construct: 2483 // switch (IV) { 2484 // case 0: 2485 // <SectionStmt[0]>; 2486 // break; 2487 // ... 2488 // case <NumSection> - 1: 2489 // <SectionStmt[<NumSection> - 1]>; 2490 // break; 2491 // } 2492 // .omp.sections.exit: 2493 auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit"); 2494 auto *SwitchStmt = 2495 CGF.Builder.CreateSwitch(CGF.EmitLoadOfScalar(IV, S.getLocStart()), 2496 ExitBB, CS == nullptr ? 1 : CS->size()); 2497 if (CS) { 2498 unsigned CaseNumber = 0; 2499 for (auto *SubStmt : CS->children()) { 2500 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 2501 CGF.EmitBlock(CaseBB); 2502 SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB); 2503 CGF.EmitStmt(SubStmt); 2504 CGF.EmitBranch(ExitBB); 2505 ++CaseNumber; 2506 } 2507 } else { 2508 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 2509 CGF.EmitBlock(CaseBB); 2510 SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB); 2511 CGF.EmitStmt(Stmt); 2512 CGF.EmitBranch(ExitBB); 2513 } 2514 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 2515 }; 2516 2517 CodeGenFunction::OMPPrivateScope LoopScope(CGF); 2518 if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) { 2519 // Emit implicit barrier to synchronize threads and avoid data races on 2520 // initialization of firstprivate variables and post-update of lastprivate 2521 // variables. 2522 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 2523 CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 2524 /*ForceSimpleCall=*/true); 2525 } 2526 CGF.EmitOMPPrivateClause(S, LoopScope); 2527 HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 2528 CGF.EmitOMPReductionClauseInit(S, LoopScope); 2529 (void)LoopScope.Privatize(); 2530 2531 // Emit static non-chunked loop. 2532 OpenMPScheduleTy ScheduleKind; 2533 ScheduleKind.Schedule = OMPC_SCHEDULE_static; 2534 CGOpenMPRuntime::StaticRTInput StaticInit( 2535 /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), 2536 LB.getAddress(), UB.getAddress(), ST.getAddress()); 2537 CGF.CGM.getOpenMPRuntime().emitForStaticInit( 2538 CGF, S.getLocStart(), S.getDirectiveKind(), ScheduleKind, StaticInit); 2539 // UB = min(UB, GlobalUB); 2540 auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart()); 2541 auto *MinUBGlobalUB = CGF.Builder.CreateSelect( 2542 CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal); 2543 CGF.EmitStoreOfScalar(MinUBGlobalUB, UB); 2544 // IV = LB; 2545 CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV); 2546 // while (idx <= UB) { BODY; ++idx; } 2547 CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen, 2548 [](CodeGenFunction &) {}); 2549 // Tell the runtime we are done. 2550 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 2551 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(), 2552 S.getDirectiveKind()); 2553 }; 2554 CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen); 2555 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 2556 // Emit post-update of the reduction variables if IsLastIter != 0. 2557 emitPostUpdateForReductionClause( 2558 CGF, S, [&](CodeGenFunction &CGF) -> llvm::Value * { 2559 return CGF.Builder.CreateIsNotNull( 2560 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2561 }); 2562 2563 // Emit final copy of the lastprivate variables if IsLastIter != 0. 2564 if (HasLastprivates) 2565 CGF.EmitOMPLastprivateClauseFinal( 2566 S, /*NoFinals=*/false, 2567 CGF.Builder.CreateIsNotNull( 2568 CGF.EmitLoadOfScalar(IL, S.getLocStart()))); 2569 }; 2570 2571 bool HasCancel = false; 2572 if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S)) 2573 HasCancel = OSD->hasCancel(); 2574 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S)) 2575 HasCancel = OPSD->hasCancel(); 2576 OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel); 2577 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen, 2578 HasCancel); 2579 // Emit barrier for lastprivates only if 'sections' directive has 'nowait' 2580 // clause. Otherwise the barrier will be generated by the codegen for the 2581 // directive. 2582 if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) { 2583 // Emit implicit barrier to synchronize threads and avoid data races on 2584 // initialization of firstprivate variables. 2585 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), 2586 OMPD_unknown); 2587 } 2588 } 2589 2590 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { 2591 { 2592 OMPLexicalScope Scope(*this, S, OMPD_unknown); 2593 EmitSections(S); 2594 } 2595 // Emit an implicit barrier at the end. 2596 if (!S.getSingleClause<OMPNowaitClause>()) { 2597 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), 2598 OMPD_sections); 2599 } 2600 } 2601 2602 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { 2603 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2604 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 2605 }; 2606 OMPLexicalScope Scope(*this, S, OMPD_unknown); 2607 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen, 2608 S.hasCancel()); 2609 } 2610 2611 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { 2612 llvm::SmallVector<const Expr *, 8> CopyprivateVars; 2613 llvm::SmallVector<const Expr *, 8> DestExprs; 2614 llvm::SmallVector<const Expr *, 8> SrcExprs; 2615 llvm::SmallVector<const Expr *, 8> AssignmentOps; 2616 // Check if there are any 'copyprivate' clauses associated with this 2617 // 'single' construct. 2618 // Build a list of copyprivate variables along with helper expressions 2619 // (<source>, <destination>, <destination>=<source> expressions) 2620 for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) { 2621 CopyprivateVars.append(C->varlists().begin(), C->varlists().end()); 2622 DestExprs.append(C->destination_exprs().begin(), 2623 C->destination_exprs().end()); 2624 SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end()); 2625 AssignmentOps.append(C->assignment_ops().begin(), 2626 C->assignment_ops().end()); 2627 } 2628 // Emit code for 'single' region along with 'copyprivate' clauses 2629 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2630 Action.Enter(CGF); 2631 OMPPrivateScope SingleScope(CGF); 2632 (void)CGF.EmitOMPFirstprivateClause(S, SingleScope); 2633 CGF.EmitOMPPrivateClause(S, SingleScope); 2634 (void)SingleScope.Privatize(); 2635 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 2636 }; 2637 { 2638 OMPLexicalScope Scope(*this, S, OMPD_unknown); 2639 CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(), 2640 CopyprivateVars, DestExprs, 2641 SrcExprs, AssignmentOps); 2642 } 2643 // Emit an implicit barrier at the end (to avoid data race on firstprivate 2644 // init or if no 'nowait' clause was specified and no 'copyprivate' clause). 2645 if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) { 2646 CGM.getOpenMPRuntime().emitBarrierCall( 2647 *this, S.getLocStart(), 2648 S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single); 2649 } 2650 } 2651 2652 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { 2653 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2654 Action.Enter(CGF); 2655 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 2656 }; 2657 OMPLexicalScope Scope(*this, S, OMPD_unknown); 2658 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart()); 2659 } 2660 2661 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { 2662 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2663 Action.Enter(CGF); 2664 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 2665 }; 2666 Expr *Hint = nullptr; 2667 if (auto *HintClause = S.getSingleClause<OMPHintClause>()) 2668 Hint = HintClause->getHint(); 2669 OMPLexicalScope Scope(*this, S, OMPD_unknown); 2670 CGM.getOpenMPRuntime().emitCriticalRegion(*this, 2671 S.getDirectiveName().getAsString(), 2672 CodeGen, S.getLocStart(), Hint); 2673 } 2674 2675 void CodeGenFunction::EmitOMPParallelForDirective( 2676 const OMPParallelForDirective &S) { 2677 // Emit directive as a combined directive that consists of two implicit 2678 // directives: 'parallel' with 'for' directive. 2679 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2680 OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel()); 2681 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 2682 emitDispatchForLoopBounds); 2683 }; 2684 emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen, 2685 emitEmptyBoundParameters); 2686 } 2687 2688 void CodeGenFunction::EmitOMPParallelForSimdDirective( 2689 const OMPParallelForSimdDirective &S) { 2690 // Emit directive as a combined directive that consists of two implicit 2691 // directives: 'parallel' with 'for' directive. 2692 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2693 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 2694 emitDispatchForLoopBounds); 2695 }; 2696 emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen, 2697 emitEmptyBoundParameters); 2698 } 2699 2700 void CodeGenFunction::EmitOMPParallelSectionsDirective( 2701 const OMPParallelSectionsDirective &S) { 2702 // Emit directive as a combined directive that consists of two implicit 2703 // directives: 'parallel' with 'sections' directive. 2704 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2705 CGF.EmitSections(S); 2706 }; 2707 emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen, 2708 emitEmptyBoundParameters); 2709 } 2710 2711 void CodeGenFunction::EmitOMPTaskBasedDirective( 2712 const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion, 2713 const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen, 2714 OMPTaskDataTy &Data) { 2715 // Emit outlined function for task construct. 2716 const CapturedStmt *CS = S.getCapturedStmt(CapturedRegion); 2717 auto *I = CS->getCapturedDecl()->param_begin(); 2718 auto *PartId = std::next(I); 2719 auto *TaskT = std::next(I, 4); 2720 // Check if the task is final 2721 if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) { 2722 // If the condition constant folds and can be elided, try to avoid emitting 2723 // the condition and the dead arm of the if/else. 2724 auto *Cond = Clause->getCondition(); 2725 bool CondConstant; 2726 if (ConstantFoldsToSimpleInteger(Cond, CondConstant)) 2727 Data.Final.setInt(CondConstant); 2728 else 2729 Data.Final.setPointer(EvaluateExprAsBool(Cond)); 2730 } else { 2731 // By default the task is not final. 2732 Data.Final.setInt(/*IntVal=*/false); 2733 } 2734 // Check if the task has 'priority' clause. 2735 if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) { 2736 auto *Prio = Clause->getPriority(); 2737 Data.Priority.setInt(/*IntVal=*/true); 2738 Data.Priority.setPointer(EmitScalarConversion( 2739 EmitScalarExpr(Prio), Prio->getType(), 2740 getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1), 2741 Prio->getExprLoc())); 2742 } 2743 // The first function argument for tasks is a thread id, the second one is a 2744 // part id (0 for tied tasks, >=0 for untied task). 2745 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 2746 // Get list of private variables. 2747 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 2748 auto IRef = C->varlist_begin(); 2749 for (auto *IInit : C->private_copies()) { 2750 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2751 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2752 Data.PrivateVars.push_back(*IRef); 2753 Data.PrivateCopies.push_back(IInit); 2754 } 2755 ++IRef; 2756 } 2757 } 2758 EmittedAsPrivate.clear(); 2759 // Get list of firstprivate variables. 2760 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 2761 auto IRef = C->varlist_begin(); 2762 auto IElemInitRef = C->inits().begin(); 2763 for (auto *IInit : C->private_copies()) { 2764 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2765 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2766 Data.FirstprivateVars.push_back(*IRef); 2767 Data.FirstprivateCopies.push_back(IInit); 2768 Data.FirstprivateInits.push_back(*IElemInitRef); 2769 } 2770 ++IRef; 2771 ++IElemInitRef; 2772 } 2773 } 2774 // Get list of lastprivate variables (for taskloops). 2775 llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs; 2776 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 2777 auto IRef = C->varlist_begin(); 2778 auto ID = C->destination_exprs().begin(); 2779 for (auto *IInit : C->private_copies()) { 2780 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2781 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2782 Data.LastprivateVars.push_back(*IRef); 2783 Data.LastprivateCopies.push_back(IInit); 2784 } 2785 LastprivateDstsOrigs.insert( 2786 {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()), 2787 cast<DeclRefExpr>(*IRef)}); 2788 ++IRef; 2789 ++ID; 2790 } 2791 } 2792 SmallVector<const Expr *, 4> LHSs; 2793 SmallVector<const Expr *, 4> RHSs; 2794 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 2795 auto IPriv = C->privates().begin(); 2796 auto IRed = C->reduction_ops().begin(); 2797 auto ILHS = C->lhs_exprs().begin(); 2798 auto IRHS = C->rhs_exprs().begin(); 2799 for (const auto *Ref : C->varlists()) { 2800 Data.ReductionVars.emplace_back(Ref); 2801 Data.ReductionCopies.emplace_back(*IPriv); 2802 Data.ReductionOps.emplace_back(*IRed); 2803 LHSs.emplace_back(*ILHS); 2804 RHSs.emplace_back(*IRHS); 2805 std::advance(IPriv, 1); 2806 std::advance(IRed, 1); 2807 std::advance(ILHS, 1); 2808 std::advance(IRHS, 1); 2809 } 2810 } 2811 Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit( 2812 *this, S.getLocStart(), LHSs, RHSs, Data); 2813 // Build list of dependences. 2814 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) 2815 for (auto *IRef : C->varlists()) 2816 Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef)); 2817 auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs, 2818 CapturedRegion](CodeGenFunction &CGF, 2819 PrePostActionTy &Action) { 2820 // Set proper addresses for generated private copies. 2821 OMPPrivateScope Scope(CGF); 2822 if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() || 2823 !Data.LastprivateVars.empty()) { 2824 enum { PrivatesParam = 2, CopyFnParam = 3 }; 2825 auto *CopyFn = CGF.Builder.CreateLoad( 2826 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3))); 2827 auto *PrivatesPtr = CGF.Builder.CreateLoad( 2828 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2))); 2829 // Map privates. 2830 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; 2831 llvm::SmallVector<llvm::Value *, 16> CallArgs; 2832 CallArgs.push_back(PrivatesPtr); 2833 for (auto *E : Data.PrivateVars) { 2834 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2835 Address PrivatePtr = CGF.CreateMemTemp( 2836 CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr"); 2837 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2838 CallArgs.push_back(PrivatePtr.getPointer()); 2839 } 2840 for (auto *E : Data.FirstprivateVars) { 2841 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2842 Address PrivatePtr = 2843 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 2844 ".firstpriv.ptr.addr"); 2845 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2846 CallArgs.push_back(PrivatePtr.getPointer()); 2847 } 2848 for (auto *E : Data.LastprivateVars) { 2849 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2850 Address PrivatePtr = 2851 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 2852 ".lastpriv.ptr.addr"); 2853 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2854 CallArgs.push_back(PrivatePtr.getPointer()); 2855 } 2856 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(), 2857 CopyFn, CallArgs); 2858 for (auto &&Pair : LastprivateDstsOrigs) { 2859 auto *OrigVD = cast<VarDecl>(Pair.second->getDecl()); 2860 DeclRefExpr DRE( 2861 const_cast<VarDecl *>(OrigVD), 2862 /*RefersToEnclosingVariableOrCapture=*/CGF.CapturedStmtInfo->lookup( 2863 OrigVD) != nullptr, 2864 Pair.second->getType(), VK_LValue, Pair.second->getExprLoc()); 2865 Scope.addPrivate(Pair.first, [&CGF, &DRE]() { 2866 return CGF.EmitLValue(&DRE).getAddress(); 2867 }); 2868 } 2869 for (auto &&Pair : PrivatePtrs) { 2870 Address Replacement(CGF.Builder.CreateLoad(Pair.second), 2871 CGF.getContext().getDeclAlign(Pair.first)); 2872 Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); 2873 } 2874 } 2875 if (Data.Reductions) { 2876 OMPLexicalScope LexScope(CGF, S, CapturedRegion); 2877 ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionCopies, 2878 Data.ReductionOps); 2879 llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad( 2880 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9))); 2881 for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) { 2882 RedCG.emitSharedLValue(CGF, Cnt); 2883 RedCG.emitAggregateType(CGF, Cnt); 2884 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( 2885 CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); 2886 Replacement = 2887 Address(CGF.EmitScalarConversion( 2888 Replacement.getPointer(), CGF.getContext().VoidPtrTy, 2889 CGF.getContext().getPointerType( 2890 Data.ReductionCopies[Cnt]->getType()), 2891 Data.ReductionCopies[Cnt]->getExprLoc()), 2892 Replacement.getAlignment()); 2893 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); 2894 Scope.addPrivate(RedCG.getBaseDecl(Cnt), 2895 [Replacement]() { return Replacement; }); 2896 // FIXME: This must removed once the runtime library is fixed. 2897 // Emit required threadprivate variables for 2898 // initilizer/combiner/finalizer. 2899 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(), 2900 RedCG, Cnt); 2901 } 2902 } 2903 // Privatize all private variables except for in_reduction items. 2904 (void)Scope.Privatize(); 2905 SmallVector<const Expr *, 4> InRedVars; 2906 SmallVector<const Expr *, 4> InRedPrivs; 2907 SmallVector<const Expr *, 4> InRedOps; 2908 SmallVector<const Expr *, 4> TaskgroupDescriptors; 2909 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) { 2910 auto IPriv = C->privates().begin(); 2911 auto IRed = C->reduction_ops().begin(); 2912 auto ITD = C->taskgroup_descriptors().begin(); 2913 for (const auto *Ref : C->varlists()) { 2914 InRedVars.emplace_back(Ref); 2915 InRedPrivs.emplace_back(*IPriv); 2916 InRedOps.emplace_back(*IRed); 2917 TaskgroupDescriptors.emplace_back(*ITD); 2918 std::advance(IPriv, 1); 2919 std::advance(IRed, 1); 2920 std::advance(ITD, 1); 2921 } 2922 } 2923 // Privatize in_reduction items here, because taskgroup descriptors must be 2924 // privatized earlier. 2925 OMPPrivateScope InRedScope(CGF); 2926 if (!InRedVars.empty()) { 2927 ReductionCodeGen RedCG(InRedVars, InRedPrivs, InRedOps); 2928 for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) { 2929 RedCG.emitSharedLValue(CGF, Cnt); 2930 RedCG.emitAggregateType(CGF, Cnt); 2931 // The taskgroup descriptor variable is always implicit firstprivate and 2932 // privatized already during procoessing of the firstprivates. 2933 llvm::Value *ReductionsPtr = 2934 CGF.EmitLoadOfScalar(CGF.EmitLValue(TaskgroupDescriptors[Cnt]), 2935 TaskgroupDescriptors[Cnt]->getExprLoc()); 2936 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( 2937 CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); 2938 Replacement = Address( 2939 CGF.EmitScalarConversion( 2940 Replacement.getPointer(), CGF.getContext().VoidPtrTy, 2941 CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()), 2942 InRedPrivs[Cnt]->getExprLoc()), 2943 Replacement.getAlignment()); 2944 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); 2945 InRedScope.addPrivate(RedCG.getBaseDecl(Cnt), 2946 [Replacement]() { return Replacement; }); 2947 // FIXME: This must removed once the runtime library is fixed. 2948 // Emit required threadprivate variables for 2949 // initilizer/combiner/finalizer. 2950 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(), 2951 RedCG, Cnt); 2952 } 2953 } 2954 (void)InRedScope.Privatize(); 2955 2956 Action.Enter(CGF); 2957 BodyGen(CGF); 2958 }; 2959 auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( 2960 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied, 2961 Data.NumberOfParts); 2962 OMPLexicalScope Scope(*this, S); 2963 TaskGen(*this, OutlinedFn, Data); 2964 } 2965 2966 static ImplicitParamDecl * 2967 createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data, 2968 QualType Ty, CapturedDecl *CD, 2969 SourceLocation Loc) { 2970 auto *OrigVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty, 2971 ImplicitParamDecl::Other); 2972 auto *OrigRef = DeclRefExpr::Create( 2973 C, NestedNameSpecifierLoc(), SourceLocation(), OrigVD, 2974 /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue); 2975 auto *PrivateVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty, 2976 ImplicitParamDecl::Other); 2977 auto *PrivateRef = DeclRefExpr::Create( 2978 C, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD, 2979 /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue); 2980 QualType ElemType = C.getBaseElementType(Ty); 2981 auto *InitVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, ElemType, 2982 ImplicitParamDecl::Other); 2983 auto *InitRef = DeclRefExpr::Create( 2984 C, NestedNameSpecifierLoc(), SourceLocation(), InitVD, 2985 /*RefersToEnclosingVariableOrCapture=*/false, Loc, ElemType, VK_LValue); 2986 PrivateVD->setInitStyle(VarDecl::CInit); 2987 PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue, 2988 InitRef, /*BasePath=*/nullptr, 2989 VK_RValue)); 2990 Data.FirstprivateVars.emplace_back(OrigRef); 2991 Data.FirstprivateCopies.emplace_back(PrivateRef); 2992 Data.FirstprivateInits.emplace_back(InitRef); 2993 return OrigVD; 2994 } 2995 2996 void CodeGenFunction::EmitOMPTargetTaskBasedDirective( 2997 const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen, 2998 OMPTargetDataInfo &InputInfo) { 2999 // Emit outlined function for task construct. 3000 auto CS = S.getCapturedStmt(OMPD_task); 3001 auto CapturedStruct = GenerateCapturedStmtArgument(*CS); 3002 auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 3003 auto *I = CS->getCapturedDecl()->param_begin(); 3004 auto *PartId = std::next(I); 3005 auto *TaskT = std::next(I, 4); 3006 OMPTaskDataTy Data; 3007 // The task is not final. 3008 Data.Final.setInt(/*IntVal=*/false); 3009 // Get list of firstprivate variables. 3010 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 3011 auto IRef = C->varlist_begin(); 3012 auto IElemInitRef = C->inits().begin(); 3013 for (auto *IInit : C->private_copies()) { 3014 Data.FirstprivateVars.push_back(*IRef); 3015 Data.FirstprivateCopies.push_back(IInit); 3016 Data.FirstprivateInits.push_back(*IElemInitRef); 3017 ++IRef; 3018 ++IElemInitRef; 3019 } 3020 } 3021 OMPPrivateScope TargetScope(*this); 3022 VarDecl *BPVD = nullptr; 3023 VarDecl *PVD = nullptr; 3024 VarDecl *SVD = nullptr; 3025 if (InputInfo.NumberOfTargetItems > 0) { 3026 auto *CD = CapturedDecl::Create( 3027 getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0); 3028 llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems); 3029 QualType BaseAndPointersType = getContext().getConstantArrayType( 3030 getContext().VoidPtrTy, ArrSize, ArrayType::Normal, 3031 /*IndexTypeQuals=*/0); 3032 BPVD = createImplicitFirstprivateForType( 3033 getContext(), Data, BaseAndPointersType, CD, S.getLocStart()); 3034 PVD = createImplicitFirstprivateForType( 3035 getContext(), Data, BaseAndPointersType, CD, S.getLocStart()); 3036 QualType SizesType = getContext().getConstantArrayType( 3037 getContext().getSizeType(), ArrSize, ArrayType::Normal, 3038 /*IndexTypeQuals=*/0); 3039 SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD, 3040 S.getLocStart()); 3041 TargetScope.addPrivate( 3042 BPVD, [&InputInfo]() { return InputInfo.BasePointersArray; }); 3043 TargetScope.addPrivate(PVD, 3044 [&InputInfo]() { return InputInfo.PointersArray; }); 3045 TargetScope.addPrivate(SVD, 3046 [&InputInfo]() { return InputInfo.SizesArray; }); 3047 } 3048 (void)TargetScope.Privatize(); 3049 // Build list of dependences. 3050 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) 3051 for (auto *IRef : C->varlists()) 3052 Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef)); 3053 auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD, 3054 &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) { 3055 // Set proper addresses for generated private copies. 3056 OMPPrivateScope Scope(CGF); 3057 if (!Data.FirstprivateVars.empty()) { 3058 enum { PrivatesParam = 2, CopyFnParam = 3 }; 3059 auto *CopyFn = CGF.Builder.CreateLoad( 3060 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3))); 3061 auto *PrivatesPtr = CGF.Builder.CreateLoad( 3062 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2))); 3063 // Map privates. 3064 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; 3065 llvm::SmallVector<llvm::Value *, 16> CallArgs; 3066 CallArgs.push_back(PrivatesPtr); 3067 for (auto *E : Data.FirstprivateVars) { 3068 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3069 Address PrivatePtr = 3070 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 3071 ".firstpriv.ptr.addr"); 3072 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 3073 CallArgs.push_back(PrivatePtr.getPointer()); 3074 } 3075 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(), 3076 CopyFn, CallArgs); 3077 for (auto &&Pair : PrivatePtrs) { 3078 Address Replacement(CGF.Builder.CreateLoad(Pair.second), 3079 CGF.getContext().getDeclAlign(Pair.first)); 3080 Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); 3081 } 3082 } 3083 // Privatize all private variables except for in_reduction items. 3084 (void)Scope.Privatize(); 3085 if (InputInfo.NumberOfTargetItems > 0) { 3086 InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP( 3087 CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0, CGF.getPointerSize()); 3088 InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP( 3089 CGF.GetAddrOfLocalVar(PVD), /*Index=*/0, CGF.getPointerSize()); 3090 InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP( 3091 CGF.GetAddrOfLocalVar(SVD), /*Index=*/0, CGF.getSizeSize()); 3092 } 3093 3094 Action.Enter(CGF); 3095 OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false); 3096 BodyGen(CGF); 3097 }; 3098 auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( 3099 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true, 3100 Data.NumberOfParts); 3101 llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0); 3102 IntegerLiteral IfCond(getContext(), TrueOrFalse, 3103 getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 3104 SourceLocation()); 3105 3106 CGM.getOpenMPRuntime().emitTaskCall(*this, S.getLocStart(), S, OutlinedFn, 3107 SharedsTy, CapturedStruct, &IfCond, Data); 3108 } 3109 3110 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { 3111 // Emit outlined function for task construct. 3112 const CapturedStmt *CS = S.getCapturedStmt(OMPD_task); 3113 auto CapturedStruct = GenerateCapturedStmtArgument(*CS); 3114 auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 3115 const Expr *IfCond = nullptr; 3116 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 3117 if (C->getNameModifier() == OMPD_unknown || 3118 C->getNameModifier() == OMPD_task) { 3119 IfCond = C->getCondition(); 3120 break; 3121 } 3122 } 3123 3124 OMPTaskDataTy Data; 3125 // Check if we should emit tied or untied task. 3126 Data.Tied = !S.getSingleClause<OMPUntiedClause>(); 3127 auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { 3128 CGF.EmitStmt(CS->getCapturedStmt()); 3129 }; 3130 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 3131 IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn, 3132 const OMPTaskDataTy &Data) { 3133 CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getLocStart(), S, OutlinedFn, 3134 SharedsTy, CapturedStruct, IfCond, 3135 Data); 3136 }; 3137 EmitOMPTaskBasedDirective(S, OMPD_task, BodyGen, TaskGen, Data); 3138 } 3139 3140 void CodeGenFunction::EmitOMPTaskyieldDirective( 3141 const OMPTaskyieldDirective &S) { 3142 CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart()); 3143 } 3144 3145 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { 3146 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier); 3147 } 3148 3149 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) { 3150 CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart()); 3151 } 3152 3153 void CodeGenFunction::EmitOMPTaskgroupDirective( 3154 const OMPTaskgroupDirective &S) { 3155 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3156 Action.Enter(CGF); 3157 if (const Expr *E = S.getReductionRef()) { 3158 SmallVector<const Expr *, 4> LHSs; 3159 SmallVector<const Expr *, 4> RHSs; 3160 OMPTaskDataTy Data; 3161 for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) { 3162 auto IPriv = C->privates().begin(); 3163 auto IRed = C->reduction_ops().begin(); 3164 auto ILHS = C->lhs_exprs().begin(); 3165 auto IRHS = C->rhs_exprs().begin(); 3166 for (const auto *Ref : C->varlists()) { 3167 Data.ReductionVars.emplace_back(Ref); 3168 Data.ReductionCopies.emplace_back(*IPriv); 3169 Data.ReductionOps.emplace_back(*IRed); 3170 LHSs.emplace_back(*ILHS); 3171 RHSs.emplace_back(*IRHS); 3172 std::advance(IPriv, 1); 3173 std::advance(IRed, 1); 3174 std::advance(ILHS, 1); 3175 std::advance(IRHS, 1); 3176 } 3177 } 3178 llvm::Value *ReductionDesc = 3179 CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getLocStart(), 3180 LHSs, RHSs, Data); 3181 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3182 CGF.EmitVarDecl(*VD); 3183 CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD), 3184 /*Volatile=*/false, E->getType()); 3185 } 3186 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 3187 }; 3188 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3189 CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart()); 3190 } 3191 3192 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { 3193 CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> { 3194 if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) { 3195 return llvm::makeArrayRef(FlushClause->varlist_begin(), 3196 FlushClause->varlist_end()); 3197 } 3198 return llvm::None; 3199 }(), S.getLocStart()); 3200 } 3201 3202 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, 3203 const CodeGenLoopTy &CodeGenLoop, 3204 Expr *IncExpr) { 3205 // Emit the loop iteration variable. 3206 auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 3207 auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); 3208 EmitVarDecl(*IVDecl); 3209 3210 // Emit the iterations count variable. 3211 // If it is not a variable, Sema decided to calculate iterations count on each 3212 // iteration (e.g., it is foldable into a constant). 3213 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 3214 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 3215 // Emit calculation of the iterations count. 3216 EmitIgnoredExpr(S.getCalcLastIteration()); 3217 } 3218 3219 auto &RT = CGM.getOpenMPRuntime(); 3220 3221 bool HasLastprivateClause = false; 3222 // Check pre-condition. 3223 { 3224 OMPLoopScope PreInitScope(*this, S); 3225 // Skip the entire loop if we don't meet the precondition. 3226 // If the condition constant folds and can be elided, avoid emitting the 3227 // whole loop. 3228 bool CondConstant; 3229 llvm::BasicBlock *ContBlock = nullptr; 3230 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 3231 if (!CondConstant) 3232 return; 3233 } else { 3234 auto *ThenBlock = createBasicBlock("omp.precond.then"); 3235 ContBlock = createBasicBlock("omp.precond.end"); 3236 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 3237 getProfileCount(&S)); 3238 EmitBlock(ThenBlock); 3239 incrementProfileCounter(&S); 3240 } 3241 3242 emitAlignedClause(*this, S); 3243 // Emit 'then' code. 3244 { 3245 // Emit helper vars inits. 3246 3247 LValue LB = EmitOMPHelperVar( 3248 *this, cast<DeclRefExpr>( 3249 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3250 ? S.getCombinedLowerBoundVariable() 3251 : S.getLowerBoundVariable()))); 3252 LValue UB = EmitOMPHelperVar( 3253 *this, cast<DeclRefExpr>( 3254 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3255 ? S.getCombinedUpperBoundVariable() 3256 : S.getUpperBoundVariable()))); 3257 LValue ST = 3258 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 3259 LValue IL = 3260 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 3261 3262 OMPPrivateScope LoopScope(*this); 3263 if (EmitOMPFirstprivateClause(S, LoopScope)) { 3264 // Emit implicit barrier to synchronize threads and avoid data races 3265 // on initialization of firstprivate variables and post-update of 3266 // lastprivate variables. 3267 CGM.getOpenMPRuntime().emitBarrierCall( 3268 *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 3269 /*ForceSimpleCall=*/true); 3270 } 3271 EmitOMPPrivateClause(S, LoopScope); 3272 if (isOpenMPSimdDirective(S.getDirectiveKind()) && 3273 !isOpenMPParallelDirective(S.getDirectiveKind()) && 3274 !isOpenMPTeamsDirective(S.getDirectiveKind())) 3275 EmitOMPReductionClauseInit(S, LoopScope); 3276 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 3277 EmitOMPPrivateLoopCounters(S, LoopScope); 3278 (void)LoopScope.Privatize(); 3279 3280 // Detect the distribute schedule kind and chunk. 3281 llvm::Value *Chunk = nullptr; 3282 OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown; 3283 if (auto *C = S.getSingleClause<OMPDistScheduleClause>()) { 3284 ScheduleKind = C->getDistScheduleKind(); 3285 if (const auto *Ch = C->getChunkSize()) { 3286 Chunk = EmitScalarExpr(Ch); 3287 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 3288 S.getIterationVariable()->getType(), 3289 S.getLocStart()); 3290 } 3291 } 3292 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 3293 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 3294 3295 // OpenMP [2.10.8, distribute Construct, Description] 3296 // If dist_schedule is specified, kind must be static. If specified, 3297 // iterations are divided into chunks of size chunk_size, chunks are 3298 // assigned to the teams of the league in a round-robin fashion in the 3299 // order of the team number. When no chunk_size is specified, the 3300 // iteration space is divided into chunks that are approximately equal 3301 // in size, and at most one chunk is distributed to each team of the 3302 // league. The size of the chunks is unspecified in this case. 3303 if (RT.isStaticNonchunked(ScheduleKind, 3304 /* Chunked */ Chunk != nullptr)) { 3305 if (isOpenMPSimdDirective(S.getDirectiveKind())) 3306 EmitOMPSimdInit(S, /*IsMonotonic=*/true); 3307 CGOpenMPRuntime::StaticRTInput StaticInit( 3308 IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(), 3309 LB.getAddress(), UB.getAddress(), ST.getAddress()); 3310 RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, 3311 StaticInit); 3312 auto LoopExit = 3313 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 3314 // UB = min(UB, GlobalUB); 3315 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3316 ? S.getCombinedEnsureUpperBound() 3317 : S.getEnsureUpperBound()); 3318 // IV = LB; 3319 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3320 ? S.getCombinedInit() 3321 : S.getInit()); 3322 3323 Expr *Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3324 ? S.getCombinedCond() 3325 : S.getCond(); 3326 3327 // for distribute alone, codegen 3328 // while (idx <= UB) { BODY; ++idx; } 3329 // when combined with 'for' (e.g. as in 'distribute parallel for') 3330 // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; } 3331 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), Cond, IncExpr, 3332 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { 3333 CodeGenLoop(CGF, S, LoopExit); 3334 }, 3335 [](CodeGenFunction &) {}); 3336 EmitBlock(LoopExit.getBlock()); 3337 // Tell the runtime we are done. 3338 RT.emitForStaticFinish(*this, S.getLocStart(), S.getDirectiveKind()); 3339 } else { 3340 // Emit the outer loop, which requests its work chunk [LB..UB] from 3341 // runtime and runs the inner loop to process it. 3342 const OMPLoopArguments LoopArguments = { 3343 LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(), 3344 Chunk}; 3345 EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments, 3346 CodeGenLoop); 3347 } 3348 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 3349 EmitOMPSimdFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * { 3350 return CGF.Builder.CreateIsNotNull( 3351 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 3352 }); 3353 } 3354 OpenMPDirectiveKind ReductionKind = OMPD_unknown; 3355 if (isOpenMPParallelDirective(S.getDirectiveKind()) && 3356 isOpenMPSimdDirective(S.getDirectiveKind())) { 3357 ReductionKind = OMPD_parallel_for_simd; 3358 } else if (isOpenMPParallelDirective(S.getDirectiveKind())) { 3359 ReductionKind = OMPD_parallel_for; 3360 } else if (isOpenMPSimdDirective(S.getDirectiveKind())) { 3361 ReductionKind = OMPD_simd; 3362 } else if (!isOpenMPTeamsDirective(S.getDirectiveKind()) && 3363 S.hasClausesOfKind<OMPReductionClause>()) { 3364 llvm_unreachable( 3365 "No reduction clauses is allowed in distribute directive."); 3366 } 3367 EmitOMPReductionClauseFinal(S, ReductionKind); 3368 // Emit post-update of the reduction variables if IsLastIter != 0. 3369 emitPostUpdateForReductionClause( 3370 *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * { 3371 return CGF.Builder.CreateIsNotNull( 3372 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 3373 }); 3374 // Emit final copy of the lastprivate variables if IsLastIter != 0. 3375 if (HasLastprivateClause) { 3376 EmitOMPLastprivateClauseFinal( 3377 S, /*NoFinals=*/false, 3378 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart()))); 3379 } 3380 } 3381 3382 // We're now done with the loop, so jump to the continuation block. 3383 if (ContBlock) { 3384 EmitBranch(ContBlock); 3385 EmitBlock(ContBlock, true); 3386 } 3387 } 3388 } 3389 3390 void CodeGenFunction::EmitOMPDistributeDirective( 3391 const OMPDistributeDirective &S) { 3392 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3393 3394 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 3395 }; 3396 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3397 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); 3398 } 3399 3400 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, 3401 const CapturedStmt *S) { 3402 CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); 3403 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; 3404 CGF.CapturedStmtInfo = &CapStmtInfo; 3405 auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S); 3406 Fn->addFnAttr(llvm::Attribute::NoInline); 3407 return Fn; 3408 } 3409 3410 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { 3411 if (S.hasClausesOfKind<OMPDependClause>()) { 3412 assert(!S.getAssociatedStmt() && 3413 "No associated statement must be in ordered depend construct."); 3414 for (const auto *DC : S.getClausesOfKind<OMPDependClause>()) 3415 CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC); 3416 return; 3417 } 3418 auto *C = S.getSingleClause<OMPSIMDClause>(); 3419 auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF, 3420 PrePostActionTy &Action) { 3421 const CapturedStmt *CS = S.getInnermostCapturedStmt(); 3422 if (C) { 3423 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 3424 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 3425 auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS); 3426 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(), 3427 OutlinedFn, CapturedVars); 3428 } else { 3429 Action.Enter(CGF); 3430 CGF.EmitStmt(CS->getCapturedStmt()); 3431 } 3432 }; 3433 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3434 CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C); 3435 } 3436 3437 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, 3438 QualType SrcType, QualType DestType, 3439 SourceLocation Loc) { 3440 assert(CGF.hasScalarEvaluationKind(DestType) && 3441 "DestType must have scalar evaluation kind."); 3442 assert(!Val.isAggregate() && "Must be a scalar or complex."); 3443 return Val.isScalar() 3444 ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType, 3445 Loc) 3446 : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType, 3447 DestType, Loc); 3448 } 3449 3450 static CodeGenFunction::ComplexPairTy 3451 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, 3452 QualType DestType, SourceLocation Loc) { 3453 assert(CGF.getEvaluationKind(DestType) == TEK_Complex && 3454 "DestType must have complex evaluation kind."); 3455 CodeGenFunction::ComplexPairTy ComplexVal; 3456 if (Val.isScalar()) { 3457 // Convert the input element to the element type of the complex. 3458 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 3459 auto ScalarVal = CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, 3460 DestElementType, Loc); 3461 ComplexVal = CodeGenFunction::ComplexPairTy( 3462 ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType())); 3463 } else { 3464 assert(Val.isComplex() && "Must be a scalar or complex."); 3465 auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType(); 3466 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 3467 ComplexVal.first = CGF.EmitScalarConversion( 3468 Val.getComplexVal().first, SrcElementType, DestElementType, Loc); 3469 ComplexVal.second = CGF.EmitScalarConversion( 3470 Val.getComplexVal().second, SrcElementType, DestElementType, Loc); 3471 } 3472 return ComplexVal; 3473 } 3474 3475 static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst, 3476 LValue LVal, RValue RVal) { 3477 if (LVal.isGlobalReg()) { 3478 CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal); 3479 } else { 3480 CGF.EmitAtomicStore(RVal, LVal, 3481 IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3482 : llvm::AtomicOrdering::Monotonic, 3483 LVal.isVolatile(), /*IsInit=*/false); 3484 } 3485 } 3486 3487 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal, 3488 QualType RValTy, SourceLocation Loc) { 3489 switch (getEvaluationKind(LVal.getType())) { 3490 case TEK_Scalar: 3491 EmitStoreThroughLValue(RValue::get(convertToScalarValue( 3492 *this, RVal, RValTy, LVal.getType(), Loc)), 3493 LVal); 3494 break; 3495 case TEK_Complex: 3496 EmitStoreOfComplex( 3497 convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal, 3498 /*isInit=*/false); 3499 break; 3500 case TEK_Aggregate: 3501 llvm_unreachable("Must be a scalar or complex."); 3502 } 3503 } 3504 3505 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst, 3506 const Expr *X, const Expr *V, 3507 SourceLocation Loc) { 3508 // v = x; 3509 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue"); 3510 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue"); 3511 LValue XLValue = CGF.EmitLValue(X); 3512 LValue VLValue = CGF.EmitLValue(V); 3513 RValue Res = XLValue.isGlobalReg() 3514 ? CGF.EmitLoadOfLValue(XLValue, Loc) 3515 : CGF.EmitAtomicLoad( 3516 XLValue, Loc, 3517 IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3518 : llvm::AtomicOrdering::Monotonic, 3519 XLValue.isVolatile()); 3520 // OpenMP, 2.12.6, atomic Construct 3521 // Any atomic construct with a seq_cst clause forces the atomically 3522 // performed operation to include an implicit flush operation without a 3523 // list. 3524 if (IsSeqCst) 3525 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3526 CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc); 3527 } 3528 3529 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst, 3530 const Expr *X, const Expr *E, 3531 SourceLocation Loc) { 3532 // x = expr; 3533 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue"); 3534 emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E)); 3535 // OpenMP, 2.12.6, atomic Construct 3536 // Any atomic construct with a seq_cst clause forces the atomically 3537 // performed operation to include an implicit flush operation without a 3538 // list. 3539 if (IsSeqCst) 3540 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3541 } 3542 3543 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, 3544 RValue Update, 3545 BinaryOperatorKind BO, 3546 llvm::AtomicOrdering AO, 3547 bool IsXLHSInRHSPart) { 3548 auto &Context = CGF.CGM.getContext(); 3549 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x' 3550 // expression is simple and atomic is allowed for the given type for the 3551 // target platform. 3552 if (BO == BO_Comma || !Update.isScalar() || 3553 !Update.getScalarVal()->getType()->isIntegerTy() || 3554 !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) && 3555 (Update.getScalarVal()->getType() != 3556 X.getAddress().getElementType())) || 3557 !X.getAddress().getElementType()->isIntegerTy() || 3558 !Context.getTargetInfo().hasBuiltinAtomic( 3559 Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment()))) 3560 return std::make_pair(false, RValue::get(nullptr)); 3561 3562 llvm::AtomicRMWInst::BinOp RMWOp; 3563 switch (BO) { 3564 case BO_Add: 3565 RMWOp = llvm::AtomicRMWInst::Add; 3566 break; 3567 case BO_Sub: 3568 if (!IsXLHSInRHSPart) 3569 return std::make_pair(false, RValue::get(nullptr)); 3570 RMWOp = llvm::AtomicRMWInst::Sub; 3571 break; 3572 case BO_And: 3573 RMWOp = llvm::AtomicRMWInst::And; 3574 break; 3575 case BO_Or: 3576 RMWOp = llvm::AtomicRMWInst::Or; 3577 break; 3578 case BO_Xor: 3579 RMWOp = llvm::AtomicRMWInst::Xor; 3580 break; 3581 case BO_LT: 3582 RMWOp = X.getType()->hasSignedIntegerRepresentation() 3583 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min 3584 : llvm::AtomicRMWInst::Max) 3585 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin 3586 : llvm::AtomicRMWInst::UMax); 3587 break; 3588 case BO_GT: 3589 RMWOp = X.getType()->hasSignedIntegerRepresentation() 3590 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max 3591 : llvm::AtomicRMWInst::Min) 3592 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax 3593 : llvm::AtomicRMWInst::UMin); 3594 break; 3595 case BO_Assign: 3596 RMWOp = llvm::AtomicRMWInst::Xchg; 3597 break; 3598 case BO_Mul: 3599 case BO_Div: 3600 case BO_Rem: 3601 case BO_Shl: 3602 case BO_Shr: 3603 case BO_LAnd: 3604 case BO_LOr: 3605 return std::make_pair(false, RValue::get(nullptr)); 3606 case BO_PtrMemD: 3607 case BO_PtrMemI: 3608 case BO_LE: 3609 case BO_GE: 3610 case BO_EQ: 3611 case BO_NE: 3612 case BO_Cmp: 3613 case BO_AddAssign: 3614 case BO_SubAssign: 3615 case BO_AndAssign: 3616 case BO_OrAssign: 3617 case BO_XorAssign: 3618 case BO_MulAssign: 3619 case BO_DivAssign: 3620 case BO_RemAssign: 3621 case BO_ShlAssign: 3622 case BO_ShrAssign: 3623 case BO_Comma: 3624 llvm_unreachable("Unsupported atomic update operation"); 3625 } 3626 auto *UpdateVal = Update.getScalarVal(); 3627 if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) { 3628 UpdateVal = CGF.Builder.CreateIntCast( 3629 IC, X.getAddress().getElementType(), 3630 X.getType()->hasSignedIntegerRepresentation()); 3631 } 3632 auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO); 3633 return std::make_pair(true, RValue::get(Res)); 3634 } 3635 3636 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr( 3637 LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart, 3638 llvm::AtomicOrdering AO, SourceLocation Loc, 3639 const llvm::function_ref<RValue(RValue)> &CommonGen) { 3640 // Update expressions are allowed to have the following forms: 3641 // x binop= expr; -> xrval + expr; 3642 // x++, ++x -> xrval + 1; 3643 // x--, --x -> xrval - 1; 3644 // x = x binop expr; -> xrval binop expr 3645 // x = expr Op x; - > expr binop xrval; 3646 auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart); 3647 if (!Res.first) { 3648 if (X.isGlobalReg()) { 3649 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop 3650 // 'xrval'. 3651 EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X); 3652 } else { 3653 // Perform compare-and-swap procedure. 3654 EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified()); 3655 } 3656 } 3657 return Res; 3658 } 3659 3660 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst, 3661 const Expr *X, const Expr *E, 3662 const Expr *UE, bool IsXLHSInRHSPart, 3663 SourceLocation Loc) { 3664 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 3665 "Update expr in 'atomic update' must be a binary operator."); 3666 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 3667 // Update expressions are allowed to have the following forms: 3668 // x binop= expr; -> xrval + expr; 3669 // x++, ++x -> xrval + 1; 3670 // x--, --x -> xrval - 1; 3671 // x = x binop expr; -> xrval binop expr 3672 // x = expr Op x; - > expr binop xrval; 3673 assert(X->isLValue() && "X of 'omp atomic update' is not lvalue"); 3674 LValue XLValue = CGF.EmitLValue(X); 3675 RValue ExprRValue = CGF.EmitAnyExpr(E); 3676 auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3677 : llvm::AtomicOrdering::Monotonic; 3678 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 3679 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 3680 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 3681 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 3682 auto Gen = 3683 [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue { 3684 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3685 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 3686 return CGF.EmitAnyExpr(UE); 3687 }; 3688 (void)CGF.EmitOMPAtomicSimpleUpdateExpr( 3689 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 3690 // OpenMP, 2.12.6, atomic Construct 3691 // Any atomic construct with a seq_cst clause forces the atomically 3692 // performed operation to include an implicit flush operation without a 3693 // list. 3694 if (IsSeqCst) 3695 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3696 } 3697 3698 static RValue convertToType(CodeGenFunction &CGF, RValue Value, 3699 QualType SourceType, QualType ResType, 3700 SourceLocation Loc) { 3701 switch (CGF.getEvaluationKind(ResType)) { 3702 case TEK_Scalar: 3703 return RValue::get( 3704 convertToScalarValue(CGF, Value, SourceType, ResType, Loc)); 3705 case TEK_Complex: { 3706 auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc); 3707 return RValue::getComplex(Res.first, Res.second); 3708 } 3709 case TEK_Aggregate: 3710 break; 3711 } 3712 llvm_unreachable("Must be a scalar or complex."); 3713 } 3714 3715 static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst, 3716 bool IsPostfixUpdate, const Expr *V, 3717 const Expr *X, const Expr *E, 3718 const Expr *UE, bool IsXLHSInRHSPart, 3719 SourceLocation Loc) { 3720 assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue"); 3721 assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue"); 3722 RValue NewVVal; 3723 LValue VLValue = CGF.EmitLValue(V); 3724 LValue XLValue = CGF.EmitLValue(X); 3725 RValue ExprRValue = CGF.EmitAnyExpr(E); 3726 auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3727 : llvm::AtomicOrdering::Monotonic; 3728 QualType NewVValType; 3729 if (UE) { 3730 // 'x' is updated with some additional value. 3731 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 3732 "Update expr in 'atomic capture' must be a binary operator."); 3733 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 3734 // Update expressions are allowed to have the following forms: 3735 // x binop= expr; -> xrval + expr; 3736 // x++, ++x -> xrval + 1; 3737 // x--, --x -> xrval - 1; 3738 // x = x binop expr; -> xrval binop expr 3739 // x = expr Op x; - > expr binop xrval; 3740 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 3741 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 3742 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 3743 NewVValType = XRValExpr->getType(); 3744 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 3745 auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr, 3746 IsPostfixUpdate](RValue XRValue) -> RValue { 3747 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3748 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 3749 RValue Res = CGF.EmitAnyExpr(UE); 3750 NewVVal = IsPostfixUpdate ? XRValue : Res; 3751 return Res; 3752 }; 3753 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 3754 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 3755 if (Res.first) { 3756 // 'atomicrmw' instruction was generated. 3757 if (IsPostfixUpdate) { 3758 // Use old value from 'atomicrmw'. 3759 NewVVal = Res.second; 3760 } else { 3761 // 'atomicrmw' does not provide new value, so evaluate it using old 3762 // value of 'x'. 3763 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3764 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second); 3765 NewVVal = CGF.EmitAnyExpr(UE); 3766 } 3767 } 3768 } else { 3769 // 'x' is simply rewritten with some 'expr'. 3770 NewVValType = X->getType().getNonReferenceType(); 3771 ExprRValue = convertToType(CGF, ExprRValue, E->getType(), 3772 X->getType().getNonReferenceType(), Loc); 3773 auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) -> RValue { 3774 NewVVal = XRValue; 3775 return ExprRValue; 3776 }; 3777 // Try to perform atomicrmw xchg, otherwise simple exchange. 3778 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 3779 XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO, 3780 Loc, Gen); 3781 if (Res.first) { 3782 // 'atomicrmw' instruction was generated. 3783 NewVVal = IsPostfixUpdate ? Res.second : ExprRValue; 3784 } 3785 } 3786 // Emit post-update store to 'v' of old/new 'x' value. 3787 CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc); 3788 // OpenMP, 2.12.6, atomic Construct 3789 // Any atomic construct with a seq_cst clause forces the atomically 3790 // performed operation to include an implicit flush operation without a 3791 // list. 3792 if (IsSeqCst) 3793 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3794 } 3795 3796 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, 3797 bool IsSeqCst, bool IsPostfixUpdate, 3798 const Expr *X, const Expr *V, const Expr *E, 3799 const Expr *UE, bool IsXLHSInRHSPart, 3800 SourceLocation Loc) { 3801 switch (Kind) { 3802 case OMPC_read: 3803 EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc); 3804 break; 3805 case OMPC_write: 3806 EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc); 3807 break; 3808 case OMPC_unknown: 3809 case OMPC_update: 3810 EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc); 3811 break; 3812 case OMPC_capture: 3813 EmitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE, 3814 IsXLHSInRHSPart, Loc); 3815 break; 3816 case OMPC_if: 3817 case OMPC_final: 3818 case OMPC_num_threads: 3819 case OMPC_private: 3820 case OMPC_firstprivate: 3821 case OMPC_lastprivate: 3822 case OMPC_reduction: 3823 case OMPC_task_reduction: 3824 case OMPC_in_reduction: 3825 case OMPC_safelen: 3826 case OMPC_simdlen: 3827 case OMPC_collapse: 3828 case OMPC_default: 3829 case OMPC_seq_cst: 3830 case OMPC_shared: 3831 case OMPC_linear: 3832 case OMPC_aligned: 3833 case OMPC_copyin: 3834 case OMPC_copyprivate: 3835 case OMPC_flush: 3836 case OMPC_proc_bind: 3837 case OMPC_schedule: 3838 case OMPC_ordered: 3839 case OMPC_nowait: 3840 case OMPC_untied: 3841 case OMPC_threadprivate: 3842 case OMPC_depend: 3843 case OMPC_mergeable: 3844 case OMPC_device: 3845 case OMPC_threads: 3846 case OMPC_simd: 3847 case OMPC_map: 3848 case OMPC_num_teams: 3849 case OMPC_thread_limit: 3850 case OMPC_priority: 3851 case OMPC_grainsize: 3852 case OMPC_nogroup: 3853 case OMPC_num_tasks: 3854 case OMPC_hint: 3855 case OMPC_dist_schedule: 3856 case OMPC_defaultmap: 3857 case OMPC_uniform: 3858 case OMPC_to: 3859 case OMPC_from: 3860 case OMPC_use_device_ptr: 3861 case OMPC_is_device_ptr: 3862 llvm_unreachable("Clause is not allowed in 'omp atomic'."); 3863 } 3864 } 3865 3866 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { 3867 bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>(); 3868 OpenMPClauseKind Kind = OMPC_unknown; 3869 for (auto *C : S.clauses()) { 3870 // Find first clause (skip seq_cst clause, if it is first). 3871 if (C->getClauseKind() != OMPC_seq_cst) { 3872 Kind = C->getClauseKind(); 3873 break; 3874 } 3875 } 3876 3877 const auto *CS = S.getInnermostCapturedStmt()->IgnoreContainers(); 3878 if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) { 3879 enterFullExpression(EWC); 3880 } 3881 // Processing for statements under 'atomic capture'. 3882 if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) { 3883 for (const auto *C : Compound->body()) { 3884 if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) { 3885 enterFullExpression(EWC); 3886 } 3887 } 3888 } 3889 3890 auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF, 3891 PrePostActionTy &) { 3892 CGF.EmitStopPoint(CS); 3893 EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(), 3894 S.getV(), S.getExpr(), S.getUpdateExpr(), 3895 S.isXLHSInRHSPart(), S.getLocStart()); 3896 }; 3897 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3898 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen); 3899 } 3900 3901 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, 3902 const OMPExecutableDirective &S, 3903 const RegionCodeGenTy &CodeGen) { 3904 assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind())); 3905 CodeGenModule &CGM = CGF.CGM; 3906 3907 llvm::Function *Fn = nullptr; 3908 llvm::Constant *FnID = nullptr; 3909 3910 const Expr *IfCond = nullptr; 3911 // Check for the at most one if clause associated with the target region. 3912 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 3913 if (C->getNameModifier() == OMPD_unknown || 3914 C->getNameModifier() == OMPD_target) { 3915 IfCond = C->getCondition(); 3916 break; 3917 } 3918 } 3919 3920 // Check if we have any device clause associated with the directive. 3921 const Expr *Device = nullptr; 3922 if (auto *C = S.getSingleClause<OMPDeviceClause>()) { 3923 Device = C->getDevice(); 3924 } 3925 3926 // Check if we have an if clause whose conditional always evaluates to false 3927 // or if we do not have any targets specified. If so the target region is not 3928 // an offload entry point. 3929 bool IsOffloadEntry = true; 3930 if (IfCond) { 3931 bool Val; 3932 if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val) 3933 IsOffloadEntry = false; 3934 } 3935 if (CGM.getLangOpts().OMPTargetTriples.empty()) 3936 IsOffloadEntry = false; 3937 3938 assert(CGF.CurFuncDecl && "No parent declaration for target region!"); 3939 StringRef ParentName; 3940 // In case we have Ctors/Dtors we use the complete type variant to produce 3941 // the mangling of the device outlined kernel. 3942 if (auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl)) 3943 ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete)); 3944 else if (auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl)) 3945 ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete)); 3946 else 3947 ParentName = 3948 CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl))); 3949 3950 // Emit target region as a standalone region. 3951 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID, 3952 IsOffloadEntry, CodeGen); 3953 OMPLexicalScope Scope(CGF, S, OMPD_task); 3954 CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device); 3955 } 3956 3957 static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S, 3958 PrePostActionTy &Action) { 3959 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 3960 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 3961 CGF.EmitOMPPrivateClause(S, PrivateScope); 3962 (void)PrivateScope.Privatize(); 3963 3964 Action.Enter(CGF); 3965 CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt()); 3966 } 3967 3968 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM, 3969 StringRef ParentName, 3970 const OMPTargetDirective &S) { 3971 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3972 emitTargetRegion(CGF, S, Action); 3973 }; 3974 llvm::Function *Fn; 3975 llvm::Constant *Addr; 3976 // Emit target region as a standalone region. 3977 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 3978 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 3979 assert(Fn && Addr && "Target device function emission failed."); 3980 } 3981 3982 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { 3983 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3984 emitTargetRegion(CGF, S, Action); 3985 }; 3986 emitCommonOMPTargetDirective(*this, S, CodeGen); 3987 } 3988 3989 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF, 3990 const OMPExecutableDirective &S, 3991 OpenMPDirectiveKind InnermostKind, 3992 const RegionCodeGenTy &CodeGen) { 3993 const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams); 3994 auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction( 3995 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 3996 3997 const OMPNumTeamsClause *NT = S.getSingleClause<OMPNumTeamsClause>(); 3998 const OMPThreadLimitClause *TL = S.getSingleClause<OMPThreadLimitClause>(); 3999 if (NT || TL) { 4000 Expr *NumTeams = (NT) ? NT->getNumTeams() : nullptr; 4001 Expr *ThreadLimit = (TL) ? TL->getThreadLimit() : nullptr; 4002 4003 CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit, 4004 S.getLocStart()); 4005 } 4006 4007 OMPTeamsScope Scope(CGF, S); 4008 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 4009 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 4010 CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn, 4011 CapturedVars); 4012 } 4013 4014 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) { 4015 // Emit teams region as a standalone region. 4016 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4017 OMPPrivateScope PrivateScope(CGF); 4018 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 4019 CGF.EmitOMPPrivateClause(S, PrivateScope); 4020 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4021 (void)PrivateScope.Privatize(); 4022 CGF.EmitStmt(S.getCapturedStmt(OMPD_teams)->getCapturedStmt()); 4023 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4024 }; 4025 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen); 4026 emitPostUpdateForReductionClause( 4027 *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 4028 } 4029 4030 static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action, 4031 const OMPTargetTeamsDirective &S) { 4032 auto *CS = S.getCapturedStmt(OMPD_teams); 4033 Action.Enter(CGF); 4034 // Emit teams region as a standalone region. 4035 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) { 4036 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 4037 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 4038 CGF.EmitOMPPrivateClause(S, PrivateScope); 4039 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4040 (void)PrivateScope.Privatize(); 4041 Action.Enter(CGF); 4042 CGF.EmitStmt(CS->getCapturedStmt()); 4043 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4044 }; 4045 emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen); 4046 emitPostUpdateForReductionClause( 4047 CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 4048 } 4049 4050 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 4051 CodeGenModule &CGM, StringRef ParentName, 4052 const OMPTargetTeamsDirective &S) { 4053 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4054 emitTargetTeamsRegion(CGF, Action, S); 4055 }; 4056 llvm::Function *Fn; 4057 llvm::Constant *Addr; 4058 // Emit target region as a standalone region. 4059 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 4060 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 4061 assert(Fn && Addr && "Target device function emission failed."); 4062 } 4063 4064 void CodeGenFunction::EmitOMPTargetTeamsDirective( 4065 const OMPTargetTeamsDirective &S) { 4066 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4067 emitTargetTeamsRegion(CGF, Action, S); 4068 }; 4069 emitCommonOMPTargetDirective(*this, S, CodeGen); 4070 } 4071 4072 static void 4073 emitTargetTeamsDistributeRegion(CodeGenFunction &CGF, PrePostActionTy &Action, 4074 const OMPTargetTeamsDistributeDirective &S) { 4075 Action.Enter(CGF); 4076 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4077 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 4078 }; 4079 4080 // Emit teams region as a standalone region. 4081 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 4082 PrePostActionTy &) { 4083 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 4084 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4085 (void)PrivateScope.Privatize(); 4086 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 4087 CodeGenDistribute); 4088 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4089 }; 4090 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute, CodeGen); 4091 emitPostUpdateForReductionClause(CGF, S, 4092 [](CodeGenFunction &) { return nullptr; }); 4093 } 4094 4095 void CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 4096 CodeGenModule &CGM, StringRef ParentName, 4097 const OMPTargetTeamsDistributeDirective &S) { 4098 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4099 emitTargetTeamsDistributeRegion(CGF, Action, S); 4100 }; 4101 llvm::Function *Fn; 4102 llvm::Constant *Addr; 4103 // Emit target region as a standalone region. 4104 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 4105 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 4106 assert(Fn && Addr && "Target device function emission failed."); 4107 } 4108 4109 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective( 4110 const OMPTargetTeamsDistributeDirective &S) { 4111 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4112 emitTargetTeamsDistributeRegion(CGF, Action, S); 4113 }; 4114 emitCommonOMPTargetDirective(*this, S, CodeGen); 4115 } 4116 4117 static void emitTargetTeamsDistributeSimdRegion( 4118 CodeGenFunction &CGF, PrePostActionTy &Action, 4119 const OMPTargetTeamsDistributeSimdDirective &S) { 4120 Action.Enter(CGF); 4121 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4122 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 4123 }; 4124 4125 // Emit teams region as a standalone region. 4126 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 4127 PrePostActionTy &) { 4128 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 4129 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4130 (void)PrivateScope.Privatize(); 4131 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 4132 CodeGenDistribute); 4133 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4134 }; 4135 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_simd, CodeGen); 4136 emitPostUpdateForReductionClause(CGF, S, 4137 [](CodeGenFunction &) { return nullptr; }); 4138 } 4139 4140 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 4141 CodeGenModule &CGM, StringRef ParentName, 4142 const OMPTargetTeamsDistributeSimdDirective &S) { 4143 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4144 emitTargetTeamsDistributeSimdRegion(CGF, Action, S); 4145 }; 4146 llvm::Function *Fn; 4147 llvm::Constant *Addr; 4148 // Emit target region as a standalone region. 4149 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 4150 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 4151 assert(Fn && Addr && "Target device function emission failed."); 4152 } 4153 4154 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective( 4155 const OMPTargetTeamsDistributeSimdDirective &S) { 4156 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4157 emitTargetTeamsDistributeSimdRegion(CGF, Action, S); 4158 }; 4159 emitCommonOMPTargetDirective(*this, S, CodeGen); 4160 } 4161 4162 void CodeGenFunction::EmitOMPTeamsDistributeDirective( 4163 const OMPTeamsDistributeDirective &S) { 4164 4165 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4166 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 4167 }; 4168 4169 // Emit teams region as a standalone region. 4170 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 4171 PrePostActionTy &) { 4172 OMPPrivateScope PrivateScope(CGF); 4173 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4174 (void)PrivateScope.Privatize(); 4175 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 4176 CodeGenDistribute); 4177 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4178 }; 4179 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen); 4180 emitPostUpdateForReductionClause(*this, S, 4181 [](CodeGenFunction &) { return nullptr; }); 4182 } 4183 4184 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective( 4185 const OMPTeamsDistributeSimdDirective &S) { 4186 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4187 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 4188 }; 4189 4190 // Emit teams region as a standalone region. 4191 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 4192 PrePostActionTy &) { 4193 OMPPrivateScope PrivateScope(CGF); 4194 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4195 (void)PrivateScope.Privatize(); 4196 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_simd, 4197 CodeGenDistribute); 4198 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4199 }; 4200 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_simd, CodeGen); 4201 emitPostUpdateForReductionClause(*this, S, 4202 [](CodeGenFunction &) { return nullptr; }); 4203 } 4204 4205 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective( 4206 const OMPTeamsDistributeParallelForDirective &S) { 4207 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4208 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 4209 S.getDistInc()); 4210 }; 4211 4212 // Emit teams region as a standalone region. 4213 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 4214 PrePostActionTy &) { 4215 OMPPrivateScope PrivateScope(CGF); 4216 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4217 (void)PrivateScope.Privatize(); 4218 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 4219 CodeGenDistribute); 4220 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4221 }; 4222 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen); 4223 emitPostUpdateForReductionClause(*this, S, 4224 [](CodeGenFunction &) { return nullptr; }); 4225 } 4226 4227 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective( 4228 const OMPTeamsDistributeParallelForSimdDirective &S) { 4229 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4230 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 4231 S.getDistInc()); 4232 }; 4233 4234 // Emit teams region as a standalone region. 4235 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 4236 PrePostActionTy &) { 4237 OMPPrivateScope PrivateScope(CGF); 4238 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4239 (void)PrivateScope.Privatize(); 4240 CGF.CGM.getOpenMPRuntime().emitInlinedDirective( 4241 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); 4242 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4243 }; 4244 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen); 4245 emitPostUpdateForReductionClause(*this, S, 4246 [](CodeGenFunction &) { return nullptr; }); 4247 } 4248 4249 static void emitTargetTeamsDistributeParallelForRegion( 4250 CodeGenFunction &CGF, const OMPTargetTeamsDistributeParallelForDirective &S, 4251 PrePostActionTy &Action) { 4252 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4253 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 4254 S.getDistInc()); 4255 }; 4256 4257 // Emit teams region as a standalone region. 4258 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 4259 PrePostActionTy &) { 4260 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 4261 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4262 (void)PrivateScope.Privatize(); 4263 CGF.CGM.getOpenMPRuntime().emitInlinedDirective( 4264 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); 4265 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4266 }; 4267 4268 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for, 4269 CodeGenTeams); 4270 emitPostUpdateForReductionClause(CGF, S, 4271 [](CodeGenFunction &) { return nullptr; }); 4272 } 4273 4274 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 4275 CodeGenModule &CGM, StringRef ParentName, 4276 const OMPTargetTeamsDistributeParallelForDirective &S) { 4277 // Emit SPMD target teams distribute parallel for region as a standalone 4278 // region. 4279 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4280 emitTargetTeamsDistributeParallelForRegion(CGF, S, Action); 4281 }; 4282 llvm::Function *Fn; 4283 llvm::Constant *Addr; 4284 // Emit target region as a standalone region. 4285 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 4286 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 4287 assert(Fn && Addr && "Target device function emission failed."); 4288 } 4289 4290 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective( 4291 const OMPTargetTeamsDistributeParallelForDirective &S) { 4292 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4293 emitTargetTeamsDistributeParallelForRegion(CGF, S, Action); 4294 }; 4295 emitCommonOMPTargetDirective(*this, S, CodeGen); 4296 } 4297 4298 static void emitTargetTeamsDistributeParallelForSimdRegion( 4299 CodeGenFunction &CGF, 4300 const OMPTargetTeamsDistributeParallelForSimdDirective &S, 4301 PrePostActionTy &Action) { 4302 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4303 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 4304 S.getDistInc()); 4305 }; 4306 4307 // Emit teams region as a standalone region. 4308 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 4309 PrePostActionTy &) { 4310 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 4311 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4312 (void)PrivateScope.Privatize(); 4313 CGF.CGM.getOpenMPRuntime().emitInlinedDirective( 4314 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); 4315 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4316 }; 4317 4318 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for_simd, 4319 CodeGenTeams); 4320 emitPostUpdateForReductionClause(CGF, S, 4321 [](CodeGenFunction &) { return nullptr; }); 4322 } 4323 4324 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 4325 CodeGenModule &CGM, StringRef ParentName, 4326 const OMPTargetTeamsDistributeParallelForSimdDirective &S) { 4327 // Emit SPMD target teams distribute parallel for simd region as a standalone 4328 // region. 4329 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4330 emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action); 4331 }; 4332 llvm::Function *Fn; 4333 llvm::Constant *Addr; 4334 // Emit target region as a standalone region. 4335 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 4336 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 4337 assert(Fn && Addr && "Target device function emission failed."); 4338 } 4339 4340 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective( 4341 const OMPTargetTeamsDistributeParallelForSimdDirective &S) { 4342 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4343 emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action); 4344 }; 4345 emitCommonOMPTargetDirective(*this, S, CodeGen); 4346 } 4347 4348 void CodeGenFunction::EmitOMPCancellationPointDirective( 4349 const OMPCancellationPointDirective &S) { 4350 CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(), 4351 S.getCancelRegion()); 4352 } 4353 4354 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { 4355 const Expr *IfCond = nullptr; 4356 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 4357 if (C->getNameModifier() == OMPD_unknown || 4358 C->getNameModifier() == OMPD_cancel) { 4359 IfCond = C->getCondition(); 4360 break; 4361 } 4362 } 4363 CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(), IfCond, 4364 S.getCancelRegion()); 4365 } 4366 4367 CodeGenFunction::JumpDest 4368 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) { 4369 if (Kind == OMPD_parallel || Kind == OMPD_task || 4370 Kind == OMPD_target_parallel) 4371 return ReturnBlock; 4372 assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections || 4373 Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for || 4374 Kind == OMPD_distribute_parallel_for || 4375 Kind == OMPD_target_parallel_for || 4376 Kind == OMPD_teams_distribute_parallel_for || 4377 Kind == OMPD_target_teams_distribute_parallel_for); 4378 return OMPCancelStack.getExitBlock(); 4379 } 4380 4381 void CodeGenFunction::EmitOMPUseDevicePtrClause( 4382 const OMPClause &NC, OMPPrivateScope &PrivateScope, 4383 const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) { 4384 const auto &C = cast<OMPUseDevicePtrClause>(NC); 4385 auto OrigVarIt = C.varlist_begin(); 4386 auto InitIt = C.inits().begin(); 4387 for (auto PvtVarIt : C.private_copies()) { 4388 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl()); 4389 auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl()); 4390 auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl()); 4391 4392 // In order to identify the right initializer we need to match the 4393 // declaration used by the mapping logic. In some cases we may get 4394 // OMPCapturedExprDecl that refers to the original declaration. 4395 const ValueDecl *MatchingVD = OrigVD; 4396 if (auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) { 4397 // OMPCapturedExprDecl are used to privative fields of the current 4398 // structure. 4399 auto *ME = cast<MemberExpr>(OED->getInit()); 4400 assert(isa<CXXThisExpr>(ME->getBase()) && 4401 "Base should be the current struct!"); 4402 MatchingVD = ME->getMemberDecl(); 4403 } 4404 4405 // If we don't have information about the current list item, move on to 4406 // the next one. 4407 auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD); 4408 if (InitAddrIt == CaptureDeviceAddrMap.end()) 4409 continue; 4410 4411 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 4412 // Initialize the temporary initialization variable with the address we 4413 // get from the runtime library. We have to cast the source address 4414 // because it is always a void *. References are materialized in the 4415 // privatization scope, so the initialization here disregards the fact 4416 // the original variable is a reference. 4417 QualType AddrQTy = 4418 getContext().getPointerType(OrigVD->getType().getNonReferenceType()); 4419 llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy); 4420 Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy); 4421 setAddrOfLocalVar(InitVD, InitAddr); 4422 4423 // Emit private declaration, it will be initialized by the value we 4424 // declaration we just added to the local declarations map. 4425 EmitDecl(*PvtVD); 4426 4427 // The initialization variables reached its purpose in the emission 4428 // ofthe previous declaration, so we don't need it anymore. 4429 LocalDeclMap.erase(InitVD); 4430 4431 // Return the address of the private variable. 4432 return GetAddrOfLocalVar(PvtVD); 4433 }); 4434 assert(IsRegistered && "firstprivate var already registered as private"); 4435 // Silence the warning about unused variable. 4436 (void)IsRegistered; 4437 4438 ++OrigVarIt; 4439 ++InitIt; 4440 } 4441 } 4442 4443 // Generate the instructions for '#pragma omp target data' directive. 4444 void CodeGenFunction::EmitOMPTargetDataDirective( 4445 const OMPTargetDataDirective &S) { 4446 CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true); 4447 4448 // Create a pre/post action to signal the privatization of the device pointer. 4449 // This action can be replaced by the OpenMP runtime code generation to 4450 // deactivate privatization. 4451 bool PrivatizeDevicePointers = false; 4452 class DevicePointerPrivActionTy : public PrePostActionTy { 4453 bool &PrivatizeDevicePointers; 4454 4455 public: 4456 explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers) 4457 : PrePostActionTy(), PrivatizeDevicePointers(PrivatizeDevicePointers) {} 4458 void Enter(CodeGenFunction &CGF) override { 4459 PrivatizeDevicePointers = true; 4460 } 4461 }; 4462 DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers); 4463 4464 auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers]( 4465 CodeGenFunction &CGF, PrePostActionTy &Action) { 4466 auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4467 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 4468 }; 4469 4470 // Codegen that selects wheather to generate the privatization code or not. 4471 auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers, 4472 &InnermostCodeGen](CodeGenFunction &CGF, 4473 PrePostActionTy &Action) { 4474 RegionCodeGenTy RCG(InnermostCodeGen); 4475 PrivatizeDevicePointers = false; 4476 4477 // Call the pre-action to change the status of PrivatizeDevicePointers if 4478 // needed. 4479 Action.Enter(CGF); 4480 4481 if (PrivatizeDevicePointers) { 4482 OMPPrivateScope PrivateScope(CGF); 4483 // Emit all instances of the use_device_ptr clause. 4484 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>()) 4485 CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope, 4486 Info.CaptureDeviceAddrMap); 4487 (void)PrivateScope.Privatize(); 4488 RCG(CGF); 4489 } else 4490 RCG(CGF); 4491 }; 4492 4493 // Forward the provided action to the privatization codegen. 4494 RegionCodeGenTy PrivRCG(PrivCodeGen); 4495 PrivRCG.setAction(Action); 4496 4497 // Notwithstanding the body of the region is emitted as inlined directive, 4498 // we don't use an inline scope as changes in the references inside the 4499 // region are expected to be visible outside, so we do not privative them. 4500 OMPLexicalScope Scope(CGF, S); 4501 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data, 4502 PrivRCG); 4503 }; 4504 4505 RegionCodeGenTy RCG(CodeGen); 4506 4507 // If we don't have target devices, don't bother emitting the data mapping 4508 // code. 4509 if (CGM.getLangOpts().OMPTargetTriples.empty()) { 4510 RCG(*this); 4511 return; 4512 } 4513 4514 // Check if we have any if clause associated with the directive. 4515 const Expr *IfCond = nullptr; 4516 if (auto *C = S.getSingleClause<OMPIfClause>()) 4517 IfCond = C->getCondition(); 4518 4519 // Check if we have any device clause associated with the directive. 4520 const Expr *Device = nullptr; 4521 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 4522 Device = C->getDevice(); 4523 4524 // Set the action to signal privatization of device pointers. 4525 RCG.setAction(PrivAction); 4526 4527 // Emit region code. 4528 CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG, 4529 Info); 4530 } 4531 4532 void CodeGenFunction::EmitOMPTargetEnterDataDirective( 4533 const OMPTargetEnterDataDirective &S) { 4534 // If we don't have target devices, don't bother emitting the data mapping 4535 // code. 4536 if (CGM.getLangOpts().OMPTargetTriples.empty()) 4537 return; 4538 4539 // Check if we have any if clause associated with the directive. 4540 const Expr *IfCond = nullptr; 4541 if (auto *C = S.getSingleClause<OMPIfClause>()) 4542 IfCond = C->getCondition(); 4543 4544 // Check if we have any device clause associated with the directive. 4545 const Expr *Device = nullptr; 4546 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 4547 Device = C->getDevice(); 4548 4549 OMPLexicalScope Scope(*this, S, OMPD_task); 4550 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 4551 } 4552 4553 void CodeGenFunction::EmitOMPTargetExitDataDirective( 4554 const OMPTargetExitDataDirective &S) { 4555 // If we don't have target devices, don't bother emitting the data mapping 4556 // code. 4557 if (CGM.getLangOpts().OMPTargetTriples.empty()) 4558 return; 4559 4560 // Check if we have any if clause associated with the directive. 4561 const Expr *IfCond = nullptr; 4562 if (auto *C = S.getSingleClause<OMPIfClause>()) 4563 IfCond = C->getCondition(); 4564 4565 // Check if we have any device clause associated with the directive. 4566 const Expr *Device = nullptr; 4567 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 4568 Device = C->getDevice(); 4569 4570 OMPLexicalScope Scope(*this, S, OMPD_task); 4571 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 4572 } 4573 4574 static void emitTargetParallelRegion(CodeGenFunction &CGF, 4575 const OMPTargetParallelDirective &S, 4576 PrePostActionTy &Action) { 4577 // Get the captured statement associated with the 'parallel' region. 4578 auto *CS = S.getCapturedStmt(OMPD_parallel); 4579 Action.Enter(CGF); 4580 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &) { 4581 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 4582 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 4583 CGF.EmitOMPPrivateClause(S, PrivateScope); 4584 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4585 (void)PrivateScope.Privatize(); 4586 // TODO: Add support for clauses. 4587 CGF.EmitStmt(CS->getCapturedStmt()); 4588 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 4589 }; 4590 emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen, 4591 emitEmptyBoundParameters); 4592 emitPostUpdateForReductionClause( 4593 CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 4594 } 4595 4596 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 4597 CodeGenModule &CGM, StringRef ParentName, 4598 const OMPTargetParallelDirective &S) { 4599 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4600 emitTargetParallelRegion(CGF, S, Action); 4601 }; 4602 llvm::Function *Fn; 4603 llvm::Constant *Addr; 4604 // Emit target region as a standalone region. 4605 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 4606 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 4607 assert(Fn && Addr && "Target device function emission failed."); 4608 } 4609 4610 void CodeGenFunction::EmitOMPTargetParallelDirective( 4611 const OMPTargetParallelDirective &S) { 4612 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4613 emitTargetParallelRegion(CGF, S, Action); 4614 }; 4615 emitCommonOMPTargetDirective(*this, S, CodeGen); 4616 } 4617 4618 static void emitTargetParallelForRegion(CodeGenFunction &CGF, 4619 const OMPTargetParallelForDirective &S, 4620 PrePostActionTy &Action) { 4621 Action.Enter(CGF); 4622 // Emit directive as a combined directive that consists of two implicit 4623 // directives: 'parallel' with 'for' directive. 4624 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4625 CodeGenFunction::OMPCancelStackRAII CancelRegion( 4626 CGF, OMPD_target_parallel_for, S.hasCancel()); 4627 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 4628 emitDispatchForLoopBounds); 4629 }; 4630 emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen, 4631 emitEmptyBoundParameters); 4632 } 4633 4634 void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 4635 CodeGenModule &CGM, StringRef ParentName, 4636 const OMPTargetParallelForDirective &S) { 4637 // Emit SPMD target parallel for region as a standalone region. 4638 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4639 emitTargetParallelForRegion(CGF, S, Action); 4640 }; 4641 llvm::Function *Fn; 4642 llvm::Constant *Addr; 4643 // Emit target region as a standalone region. 4644 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 4645 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 4646 assert(Fn && Addr && "Target device function emission failed."); 4647 } 4648 4649 void CodeGenFunction::EmitOMPTargetParallelForDirective( 4650 const OMPTargetParallelForDirective &S) { 4651 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4652 emitTargetParallelForRegion(CGF, S, Action); 4653 }; 4654 emitCommonOMPTargetDirective(*this, S, CodeGen); 4655 } 4656 4657 static void 4658 emitTargetParallelForSimdRegion(CodeGenFunction &CGF, 4659 const OMPTargetParallelForSimdDirective &S, 4660 PrePostActionTy &Action) { 4661 Action.Enter(CGF); 4662 // Emit directive as a combined directive that consists of two implicit 4663 // directives: 'parallel' with 'for' directive. 4664 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4665 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 4666 emitDispatchForLoopBounds); 4667 }; 4668 emitCommonOMPParallelDirective(CGF, S, OMPD_simd, CodeGen, 4669 emitEmptyBoundParameters); 4670 } 4671 4672 void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 4673 CodeGenModule &CGM, StringRef ParentName, 4674 const OMPTargetParallelForSimdDirective &S) { 4675 // Emit SPMD target parallel for region as a standalone region. 4676 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4677 emitTargetParallelForSimdRegion(CGF, S, Action); 4678 }; 4679 llvm::Function *Fn; 4680 llvm::Constant *Addr; 4681 // Emit target region as a standalone region. 4682 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 4683 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 4684 assert(Fn && Addr && "Target device function emission failed."); 4685 } 4686 4687 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective( 4688 const OMPTargetParallelForSimdDirective &S) { 4689 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4690 emitTargetParallelForSimdRegion(CGF, S, Action); 4691 }; 4692 emitCommonOMPTargetDirective(*this, S, CodeGen); 4693 } 4694 4695 /// Emit a helper variable and return corresponding lvalue. 4696 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper, 4697 const ImplicitParamDecl *PVD, 4698 CodeGenFunction::OMPPrivateScope &Privates) { 4699 auto *VDecl = cast<VarDecl>(Helper->getDecl()); 4700 Privates.addPrivate( 4701 VDecl, [&CGF, PVD]() -> Address { return CGF.GetAddrOfLocalVar(PVD); }); 4702 } 4703 4704 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { 4705 assert(isOpenMPTaskLoopDirective(S.getDirectiveKind())); 4706 // Emit outlined function for task construct. 4707 const CapturedStmt *CS = S.getCapturedStmt(OMPD_taskloop); 4708 auto CapturedStruct = GenerateCapturedStmtArgument(*CS); 4709 auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 4710 const Expr *IfCond = nullptr; 4711 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 4712 if (C->getNameModifier() == OMPD_unknown || 4713 C->getNameModifier() == OMPD_taskloop) { 4714 IfCond = C->getCondition(); 4715 break; 4716 } 4717 } 4718 4719 OMPTaskDataTy Data; 4720 // Check if taskloop must be emitted without taskgroup. 4721 Data.Nogroup = S.getSingleClause<OMPNogroupClause>(); 4722 // TODO: Check if we should emit tied or untied task. 4723 Data.Tied = true; 4724 // Set scheduling for taskloop 4725 if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) { 4726 // grainsize clause 4727 Data.Schedule.setInt(/*IntVal=*/false); 4728 Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize())); 4729 } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) { 4730 // num_tasks clause 4731 Data.Schedule.setInt(/*IntVal=*/true); 4732 Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks())); 4733 } 4734 4735 auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) { 4736 // if (PreCond) { 4737 // for (IV in 0..LastIteration) BODY; 4738 // <Final counter/linear vars updates>; 4739 // } 4740 // 4741 4742 // Emit: if (PreCond) - begin. 4743 // If the condition constant folds and can be elided, avoid emitting the 4744 // whole loop. 4745 bool CondConstant; 4746 llvm::BasicBlock *ContBlock = nullptr; 4747 OMPLoopScope PreInitScope(CGF, S); 4748 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 4749 if (!CondConstant) 4750 return; 4751 } else { 4752 auto *ThenBlock = CGF.createBasicBlock("taskloop.if.then"); 4753 ContBlock = CGF.createBasicBlock("taskloop.if.end"); 4754 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 4755 CGF.getProfileCount(&S)); 4756 CGF.EmitBlock(ThenBlock); 4757 CGF.incrementProfileCounter(&S); 4758 } 4759 4760 if (isOpenMPSimdDirective(S.getDirectiveKind())) 4761 CGF.EmitOMPSimdInit(S); 4762 4763 OMPPrivateScope LoopScope(CGF); 4764 // Emit helper vars inits. 4765 enum { LowerBound = 5, UpperBound, Stride, LastIter }; 4766 auto *I = CS->getCapturedDecl()->param_begin(); 4767 auto *LBP = std::next(I, LowerBound); 4768 auto *UBP = std::next(I, UpperBound); 4769 auto *STP = std::next(I, Stride); 4770 auto *LIP = std::next(I, LastIter); 4771 mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP, 4772 LoopScope); 4773 mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP, 4774 LoopScope); 4775 mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope); 4776 mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP, 4777 LoopScope); 4778 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 4779 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 4780 (void)LoopScope.Privatize(); 4781 // Emit the loop iteration variable. 4782 const Expr *IVExpr = S.getIterationVariable(); 4783 const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 4784 CGF.EmitVarDecl(*IVDecl); 4785 CGF.EmitIgnoredExpr(S.getInit()); 4786 4787 // Emit the iterations count variable. 4788 // If it is not a variable, Sema decided to calculate iterations count on 4789 // each iteration (e.g., it is foldable into a constant). 4790 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 4791 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 4792 // Emit calculation of the iterations count. 4793 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 4794 } 4795 4796 CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 4797 S.getInc(), 4798 [&S](CodeGenFunction &CGF) { 4799 CGF.EmitOMPLoopBody(S, JumpDest()); 4800 CGF.EmitStopPoint(&S); 4801 }, 4802 [](CodeGenFunction &) {}); 4803 // Emit: if (PreCond) - end. 4804 if (ContBlock) { 4805 CGF.EmitBranch(ContBlock); 4806 CGF.EmitBlock(ContBlock, true); 4807 } 4808 // Emit final copy of the lastprivate variables if IsLastIter != 0. 4809 if (HasLastprivateClause) { 4810 CGF.EmitOMPLastprivateClauseFinal( 4811 S, isOpenMPSimdDirective(S.getDirectiveKind()), 4812 CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar( 4813 CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false, 4814 (*LIP)->getType(), S.getLocStart()))); 4815 } 4816 }; 4817 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 4818 IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn, 4819 const OMPTaskDataTy &Data) { 4820 auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &) { 4821 OMPLoopScope PreInitScope(CGF, S); 4822 CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getLocStart(), S, 4823 OutlinedFn, SharedsTy, 4824 CapturedStruct, IfCond, Data); 4825 }; 4826 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop, 4827 CodeGen); 4828 }; 4829 if (Data.Nogroup) { 4830 EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, Data); 4831 } else { 4832 CGM.getOpenMPRuntime().emitTaskgroupRegion( 4833 *this, 4834 [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF, 4835 PrePostActionTy &Action) { 4836 Action.Enter(CGF); 4837 CGF.EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, 4838 Data); 4839 }, 4840 S.getLocStart()); 4841 } 4842 } 4843 4844 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) { 4845 EmitOMPTaskLoopBasedDirective(S); 4846 } 4847 4848 void CodeGenFunction::EmitOMPTaskLoopSimdDirective( 4849 const OMPTaskLoopSimdDirective &S) { 4850 EmitOMPTaskLoopBasedDirective(S); 4851 } 4852 4853 // Generate the instructions for '#pragma omp target update' directive. 4854 void CodeGenFunction::EmitOMPTargetUpdateDirective( 4855 const OMPTargetUpdateDirective &S) { 4856 // If we don't have target devices, don't bother emitting the data mapping 4857 // code. 4858 if (CGM.getLangOpts().OMPTargetTriples.empty()) 4859 return; 4860 4861 // Check if we have any if clause associated with the directive. 4862 const Expr *IfCond = nullptr; 4863 if (auto *C = S.getSingleClause<OMPIfClause>()) 4864 IfCond = C->getCondition(); 4865 4866 // Check if we have any device clause associated with the directive. 4867 const Expr *Device = nullptr; 4868 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 4869 Device = C->getDevice(); 4870 4871 OMPLexicalScope Scope(*this, S, OMPD_task); 4872 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 4873 } 4874 4875 void CodeGenFunction::EmitSimpleOMPExecutableDirective( 4876 const OMPExecutableDirective &D) { 4877 if (!D.hasAssociatedStmt() || !D.getAssociatedStmt()) 4878 return; 4879 auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) { 4880 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 4881 emitOMPSimdRegion(CGF, cast<OMPLoopDirective>(D), Action); 4882 } else { 4883 if (const auto *LD = dyn_cast<OMPLoopDirective>(&D)) { 4884 for (const auto *E : LD->counters()) { 4885 if (const auto *VD = dyn_cast<OMPCapturedExprDecl>( 4886 cast<DeclRefExpr>(E)->getDecl())) { 4887 // Emit only those that were not explicitly referenced in clauses. 4888 if (!CGF.LocalDeclMap.count(VD)) 4889 CGF.EmitVarDecl(*VD); 4890 } 4891 } 4892 } 4893 CGF.EmitStmt(D.getInnermostCapturedStmt()->getCapturedStmt()); 4894 } 4895 }; 4896 OMPSimdLexicalScope Scope(*this, D); 4897 CGM.getOpenMPRuntime().emitInlinedDirective( 4898 *this, 4899 isOpenMPSimdDirective(D.getDirectiveKind()) ? OMPD_simd 4900 : D.getDirectiveKind(), 4901 CodeGen); 4902 } 4903