1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This contains code to emit OpenMP nodes as LLVM code. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGCleanup.h" 15 #include "CGOpenMPRuntime.h" 16 #include "CodeGenFunction.h" 17 #include "CodeGenModule.h" 18 #include "TargetInfo.h" 19 #include "clang/AST/Stmt.h" 20 #include "clang/AST/StmtOpenMP.h" 21 #include "clang/AST/DeclOpenMP.h" 22 #include "llvm/IR/CallSite.h" 23 using namespace clang; 24 using namespace CodeGen; 25 26 namespace { 27 /// Lexical scope for OpenMP executable constructs, that handles correct codegen 28 /// for captured expressions. 29 class OMPLexicalScope : public CodeGenFunction::LexicalScope { 30 void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 31 for (const auto *C : S.clauses()) { 32 if (auto *CPI = OMPClauseWithPreInit::get(C)) { 33 if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { 34 for (const auto *I : PreInit->decls()) { 35 if (!I->hasAttr<OMPCaptureNoInitAttr>()) 36 CGF.EmitVarDecl(cast<VarDecl>(*I)); 37 else { 38 CodeGenFunction::AutoVarEmission Emission = 39 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 40 CGF.EmitAutoVarCleanups(Emission); 41 } 42 } 43 } 44 } 45 } 46 } 47 CodeGenFunction::OMPPrivateScope InlinedShareds; 48 49 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { 50 return CGF.LambdaCaptureFields.lookup(VD) || 51 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || 52 (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl)); 53 } 54 55 public: 56 OMPLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S, 57 bool AsInlined = false, bool EmitPreInitStmt = true) 58 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), 59 InlinedShareds(CGF) { 60 if (EmitPreInitStmt) 61 emitPreInitStmt(CGF, S); 62 if (AsInlined) { 63 if (S.hasAssociatedStmt()) { 64 auto *CS = cast<CapturedStmt>(S.getAssociatedStmt()); 65 for (auto &C : CS->captures()) { 66 if (C.capturesVariable() || C.capturesVariableByCopy()) { 67 auto *VD = C.getCapturedVar(); 68 assert(VD == VD->getCanonicalDecl() && 69 "Canonical decl must be captured."); 70 DeclRefExpr DRE(const_cast<VarDecl *>(VD), 71 isCapturedVar(CGF, VD) || 72 (CGF.CapturedStmtInfo && 73 InlinedShareds.isGlobalVarCaptured(VD)), 74 VD->getType().getNonReferenceType(), VK_LValue, 75 SourceLocation()); 76 InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address { 77 return CGF.EmitLValue(&DRE).getAddress(); 78 }); 79 } 80 } 81 (void)InlinedShareds.Privatize(); 82 } 83 } 84 } 85 }; 86 87 /// Lexical scope for OpenMP parallel construct, that handles correct codegen 88 /// for captured expressions. 89 class OMPParallelScope final : public OMPLexicalScope { 90 bool EmitPreInitStmt(const OMPExecutableDirective &S) { 91 OpenMPDirectiveKind Kind = S.getDirectiveKind(); 92 return !(isOpenMPTargetExecutionDirective(Kind) || 93 isOpenMPLoopBoundSharingDirective(Kind)) && 94 isOpenMPParallelDirective(Kind); 95 } 96 97 public: 98 OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 99 : OMPLexicalScope(CGF, S, 100 /*AsInlined=*/false, 101 /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {} 102 }; 103 104 /// Lexical scope for OpenMP teams construct, that handles correct codegen 105 /// for captured expressions. 106 class OMPTeamsScope final : public OMPLexicalScope { 107 bool EmitPreInitStmt(const OMPExecutableDirective &S) { 108 OpenMPDirectiveKind Kind = S.getDirectiveKind(); 109 return !isOpenMPTargetExecutionDirective(Kind) && 110 isOpenMPTeamsDirective(Kind); 111 } 112 113 public: 114 OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 115 : OMPLexicalScope(CGF, S, 116 /*AsInlined=*/false, 117 /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {} 118 }; 119 120 /// Private scope for OpenMP loop-based directives, that supports capturing 121 /// of used expression from loop statement. 122 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { 123 void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) { 124 if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) { 125 if (auto *PreInits = cast_or_null<DeclStmt>(LD->getPreInits())) { 126 for (const auto *I : PreInits->decls()) 127 CGF.EmitVarDecl(cast<VarDecl>(*I)); 128 } 129 } 130 } 131 132 public: 133 OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S) 134 : CodeGenFunction::RunCleanupsScope(CGF) { 135 emitPreInitStmt(CGF, S); 136 } 137 }; 138 139 } // namespace 140 141 LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) { 142 if (auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) { 143 if (auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) { 144 OrigVD = OrigVD->getCanonicalDecl(); 145 bool IsCaptured = 146 LambdaCaptureFields.lookup(OrigVD) || 147 (CapturedStmtInfo && CapturedStmtInfo->lookup(OrigVD)) || 148 (CurCodeDecl && isa<BlockDecl>(CurCodeDecl)); 149 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), IsCaptured, 150 OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc()); 151 return EmitLValue(&DRE); 152 } 153 } 154 return EmitLValue(E); 155 } 156 157 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) { 158 auto &C = getContext(); 159 llvm::Value *Size = nullptr; 160 auto SizeInChars = C.getTypeSizeInChars(Ty); 161 if (SizeInChars.isZero()) { 162 // getTypeSizeInChars() returns 0 for a VLA. 163 while (auto *VAT = C.getAsVariableArrayType(Ty)) { 164 llvm::Value *ArraySize; 165 std::tie(ArraySize, Ty) = getVLASize(VAT); 166 Size = Size ? Builder.CreateNUWMul(Size, ArraySize) : ArraySize; 167 } 168 SizeInChars = C.getTypeSizeInChars(Ty); 169 if (SizeInChars.isZero()) 170 return llvm::ConstantInt::get(SizeTy, /*V=*/0); 171 Size = Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars)); 172 } else 173 Size = CGM.getSize(SizeInChars); 174 return Size; 175 } 176 177 void CodeGenFunction::GenerateOpenMPCapturedVars( 178 const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) { 179 const RecordDecl *RD = S.getCapturedRecordDecl(); 180 auto CurField = RD->field_begin(); 181 auto CurCap = S.captures().begin(); 182 for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(), 183 E = S.capture_init_end(); 184 I != E; ++I, ++CurField, ++CurCap) { 185 if (CurField->hasCapturedVLAType()) { 186 auto VAT = CurField->getCapturedVLAType(); 187 auto *Val = VLASizeMap[VAT->getSizeExpr()]; 188 CapturedVars.push_back(Val); 189 } else if (CurCap->capturesThis()) 190 CapturedVars.push_back(CXXThisValue); 191 else if (CurCap->capturesVariableByCopy()) { 192 llvm::Value *CV = 193 EmitLoadOfLValue(EmitLValue(*I), SourceLocation()).getScalarVal(); 194 195 // If the field is not a pointer, we need to save the actual value 196 // and load it as a void pointer. 197 if (!CurField->getType()->isAnyPointerType()) { 198 auto &Ctx = getContext(); 199 auto DstAddr = CreateMemTemp( 200 Ctx.getUIntPtrType(), 201 Twine(CurCap->getCapturedVar()->getName()) + ".casted"); 202 LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType()); 203 204 auto *SrcAddrVal = EmitScalarConversion( 205 DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()), 206 Ctx.getPointerType(CurField->getType()), SourceLocation()); 207 LValue SrcLV = 208 MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType()); 209 210 // Store the value using the source type pointer. 211 EmitStoreThroughLValue(RValue::get(CV), SrcLV); 212 213 // Load the value using the destination type pointer. 214 CV = EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal(); 215 } 216 CapturedVars.push_back(CV); 217 } else { 218 assert(CurCap->capturesVariable() && "Expected capture by reference."); 219 CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer()); 220 } 221 } 222 } 223 224 static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType, 225 StringRef Name, LValue AddrLV, 226 bool isReferenceType = false) { 227 ASTContext &Ctx = CGF.getContext(); 228 229 auto *CastedPtr = CGF.EmitScalarConversion( 230 AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(), 231 Ctx.getPointerType(DstType), SourceLocation()); 232 auto TmpAddr = 233 CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType)) 234 .getAddress(); 235 236 // If we are dealing with references we need to return the address of the 237 // reference instead of the reference of the value. 238 if (isReferenceType) { 239 QualType RefType = Ctx.getLValueReferenceType(DstType); 240 auto *RefVal = TmpAddr.getPointer(); 241 TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name) + ".ref"); 242 auto TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType); 243 CGF.EmitStoreThroughLValue(RValue::get(RefVal), TmpLVal, /*isInit*/ true); 244 } 245 246 return TmpAddr; 247 } 248 249 static QualType getCanonicalParamType(ASTContext &C, QualType T) { 250 if (T->isLValueReferenceType()) { 251 return C.getLValueReferenceType( 252 getCanonicalParamType(C, T.getNonReferenceType()), 253 /*SpelledAsLValue=*/false); 254 } 255 if (T->isPointerType()) 256 return C.getPointerType(getCanonicalParamType(C, T->getPointeeType())); 257 return C.getCanonicalParamType(T); 258 } 259 260 namespace { 261 /// Contains required data for proper outlined function codegen. 262 struct FunctionOptions { 263 /// Captured statement for which the function is generated. 264 const CapturedStmt *S = nullptr; 265 /// true if cast to/from UIntPtr is required for variables captured by 266 /// value. 267 const bool UIntPtrCastRequired = true; 268 /// true if only casted arguments must be registered as local args or VLA 269 /// sizes. 270 const bool RegisterCastedArgsOnly = false; 271 /// Name of the generated function. 272 const StringRef FunctionName; 273 explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired, 274 bool RegisterCastedArgsOnly, 275 StringRef FunctionName) 276 : S(S), UIntPtrCastRequired(UIntPtrCastRequired), 277 RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly), 278 FunctionName(FunctionName) {} 279 }; 280 } 281 282 static llvm::Function *emitOutlinedFunctionPrologue( 283 CodeGenFunction &CGF, FunctionArgList &Args, 284 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> 285 &LocalAddrs, 286 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> 287 &VLASizes, 288 llvm::Value *&CXXThisValue, const FunctionOptions &FO) { 289 const CapturedDecl *CD = FO.S->getCapturedDecl(); 290 const RecordDecl *RD = FO.S->getCapturedRecordDecl(); 291 assert(CD->hasBody() && "missing CapturedDecl body"); 292 293 CXXThisValue = nullptr; 294 // Build the argument list. 295 CodeGenModule &CGM = CGF.CGM; 296 ASTContext &Ctx = CGM.getContext(); 297 FunctionArgList TargetArgs; 298 Args.append(CD->param_begin(), 299 std::next(CD->param_begin(), CD->getContextParamPosition())); 300 TargetArgs.append( 301 CD->param_begin(), 302 std::next(CD->param_begin(), CD->getContextParamPosition())); 303 auto I = FO.S->captures().begin(); 304 for (auto *FD : RD->fields()) { 305 QualType ArgType = FD->getType(); 306 IdentifierInfo *II = nullptr; 307 VarDecl *CapVar = nullptr; 308 309 // If this is a capture by copy and the type is not a pointer, the outlined 310 // function argument type should be uintptr and the value properly casted to 311 // uintptr. This is necessary given that the runtime library is only able to 312 // deal with pointers. We can pass in the same way the VLA type sizes to the 313 // outlined function. 314 if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) || 315 I->capturesVariableArrayType()) { 316 if (FO.UIntPtrCastRequired) 317 ArgType = Ctx.getUIntPtrType(); 318 } 319 320 if (I->capturesVariable() || I->capturesVariableByCopy()) { 321 CapVar = I->getCapturedVar(); 322 II = CapVar->getIdentifier(); 323 } else if (I->capturesThis()) 324 II = &Ctx.Idents.get("this"); 325 else { 326 assert(I->capturesVariableArrayType()); 327 II = &Ctx.Idents.get("vla"); 328 } 329 if (ArgType->isVariablyModifiedType()) 330 ArgType = getCanonicalParamType(Ctx, ArgType.getNonReferenceType()); 331 auto *Arg = 332 ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(), II, 333 ArgType, ImplicitParamDecl::Other); 334 Args.emplace_back(Arg); 335 // Do not cast arguments if we emit function with non-original types. 336 TargetArgs.emplace_back( 337 FO.UIntPtrCastRequired 338 ? Arg 339 : CGM.getOpenMPRuntime().translateParameter(FD, Arg)); 340 ++I; 341 } 342 Args.append( 343 std::next(CD->param_begin(), CD->getContextParamPosition() + 1), 344 CD->param_end()); 345 TargetArgs.append( 346 std::next(CD->param_begin(), CD->getContextParamPosition() + 1), 347 CD->param_end()); 348 349 // Create the function declaration. 350 FunctionType::ExtInfo ExtInfo; 351 const CGFunctionInfo &FuncInfo = 352 CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs); 353 llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); 354 355 llvm::Function *F = 356 llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage, 357 FO.FunctionName, &CGM.getModule()); 358 CGM.SetInternalFunctionAttributes(CD, F, FuncInfo); 359 if (CD->isNothrow()) 360 F->setDoesNotThrow(); 361 362 // Generate the function. 363 CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs, 364 FO.S->getLocStart(), CD->getBody()->getLocStart()); 365 unsigned Cnt = CD->getContextParamPosition(); 366 I = FO.S->captures().begin(); 367 for (auto *FD : RD->fields()) { 368 // Do not map arguments if we emit function with non-original types. 369 Address LocalAddr(Address::invalid()); 370 if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) { 371 LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt], 372 TargetArgs[Cnt]); 373 } else { 374 LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]); 375 } 376 // If we are capturing a pointer by copy we don't need to do anything, just 377 // use the value that we get from the arguments. 378 if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) { 379 const VarDecl *CurVD = I->getCapturedVar(); 380 // If the variable is a reference we need to materialize it here. 381 if (CurVD->getType()->isReferenceType()) { 382 Address RefAddr = CGF.CreateMemTemp( 383 CurVD->getType(), CGM.getPointerAlign(), ".materialized_ref"); 384 CGF.EmitStoreOfScalar(LocalAddr.getPointer(), RefAddr, 385 /*Volatile=*/false, CurVD->getType()); 386 LocalAddr = RefAddr; 387 } 388 if (!FO.RegisterCastedArgsOnly) 389 LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}}); 390 ++Cnt; 391 ++I; 392 continue; 393 } 394 395 LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(), 396 AlignmentSource::Decl); 397 if (FD->hasCapturedVLAType()) { 398 if (FO.UIntPtrCastRequired) { 399 ArgLVal = CGF.MakeAddrLValue(castValueFromUintptr(CGF, FD->getType(), 400 Args[Cnt]->getName(), 401 ArgLVal), 402 FD->getType(), AlignmentSource::Decl); 403 } 404 auto *ExprArg = 405 CGF.EmitLoadOfLValue(ArgLVal, SourceLocation()).getScalarVal(); 406 auto VAT = FD->getCapturedVLAType(); 407 VLASizes.insert({Args[Cnt], {VAT->getSizeExpr(), ExprArg}}); 408 } else if (I->capturesVariable()) { 409 auto *Var = I->getCapturedVar(); 410 QualType VarTy = Var->getType(); 411 Address ArgAddr = ArgLVal.getAddress(); 412 if (!VarTy->isReferenceType()) { 413 if (ArgLVal.getType()->isLValueReferenceType()) { 414 ArgAddr = CGF.EmitLoadOfReference( 415 ArgAddr, ArgLVal.getType()->castAs<ReferenceType>()); 416 } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) { 417 assert(ArgLVal.getType()->isPointerType()); 418 ArgAddr = CGF.EmitLoadOfPointer( 419 ArgAddr, ArgLVal.getType()->castAs<PointerType>()); 420 } 421 } 422 if (!FO.RegisterCastedArgsOnly) { 423 LocalAddrs.insert( 424 {Args[Cnt], 425 {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}}); 426 } 427 } else if (I->capturesVariableByCopy()) { 428 assert(!FD->getType()->isAnyPointerType() && 429 "Not expecting a captured pointer."); 430 auto *Var = I->getCapturedVar(); 431 QualType VarTy = Var->getType(); 432 LocalAddrs.insert( 433 {Args[Cnt], 434 {Var, 435 FO.UIntPtrCastRequired 436 ? castValueFromUintptr(CGF, FD->getType(), Args[Cnt]->getName(), 437 ArgLVal, VarTy->isReferenceType()) 438 : ArgLVal.getAddress()}}); 439 } else { 440 // If 'this' is captured, load it into CXXThisValue. 441 assert(I->capturesThis()); 442 CXXThisValue = CGF.EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation()) 443 .getScalarVal(); 444 LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress()}}); 445 } 446 ++Cnt; 447 ++I; 448 } 449 450 return F; 451 } 452 453 llvm::Function * 454 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { 455 assert( 456 CapturedStmtInfo && 457 "CapturedStmtInfo should be set when generating the captured function"); 458 const CapturedDecl *CD = S.getCapturedDecl(); 459 // Build the argument list. 460 bool NeedWrapperFunction = 461 getDebugInfo() && 462 CGM.getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo; 463 FunctionArgList Args; 464 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs; 465 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes; 466 SmallString<256> Buffer; 467 llvm::raw_svector_ostream Out(Buffer); 468 Out << CapturedStmtInfo->getHelperName(); 469 if (NeedWrapperFunction) 470 Out << "_debug__"; 471 FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false, 472 Out.str()); 473 llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs, 474 VLASizes, CXXThisValue, FO); 475 for (const auto &LocalAddrPair : LocalAddrs) { 476 if (LocalAddrPair.second.first) { 477 setAddrOfLocalVar(LocalAddrPair.second.first, 478 LocalAddrPair.second.second); 479 } 480 } 481 for (const auto &VLASizePair : VLASizes) 482 VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second; 483 PGO.assignRegionCounters(GlobalDecl(CD), F); 484 CapturedStmtInfo->EmitBody(*this, CD->getBody()); 485 FinishFunction(CD->getBodyRBrace()); 486 if (!NeedWrapperFunction) 487 return F; 488 489 FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true, 490 /*RegisterCastedArgsOnly=*/true, 491 CapturedStmtInfo->getHelperName()); 492 CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true); 493 Args.clear(); 494 LocalAddrs.clear(); 495 VLASizes.clear(); 496 llvm::Function *WrapperF = 497 emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes, 498 WrapperCGF.CXXThisValue, WrapperFO); 499 llvm::SmallVector<llvm::Value *, 4> CallArgs; 500 for (const auto *Arg : Args) { 501 llvm::Value *CallArg; 502 auto I = LocalAddrs.find(Arg); 503 if (I != LocalAddrs.end()) { 504 LValue LV = WrapperCGF.MakeAddrLValue( 505 I->second.second, 506 I->second.first ? I->second.first->getType() : Arg->getType(), 507 AlignmentSource::Decl); 508 CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation()); 509 } else { 510 auto EI = VLASizes.find(Arg); 511 if (EI != VLASizes.end()) 512 CallArg = EI->second.second; 513 else { 514 LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg), 515 Arg->getType(), 516 AlignmentSource::Decl); 517 CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation()); 518 } 519 } 520 CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType())); 521 } 522 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, S.getLocStart(), 523 F, CallArgs); 524 WrapperCGF.FinishFunction(); 525 return WrapperF; 526 } 527 528 //===----------------------------------------------------------------------===// 529 // OpenMP Directive Emission 530 //===----------------------------------------------------------------------===// 531 void CodeGenFunction::EmitOMPAggregateAssign( 532 Address DestAddr, Address SrcAddr, QualType OriginalType, 533 const llvm::function_ref<void(Address, Address)> &CopyGen) { 534 // Perform element-by-element initialization. 535 QualType ElementTy; 536 537 // Drill down to the base element type on both arrays. 538 auto ArrayTy = OriginalType->getAsArrayTypeUnsafe(); 539 auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr); 540 SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 541 542 auto SrcBegin = SrcAddr.getPointer(); 543 auto DestBegin = DestAddr.getPointer(); 544 // Cast from pointer to array type to pointer to single element. 545 auto DestEnd = Builder.CreateGEP(DestBegin, NumElements); 546 // The basic structure here is a while-do loop. 547 auto BodyBB = createBasicBlock("omp.arraycpy.body"); 548 auto DoneBB = createBasicBlock("omp.arraycpy.done"); 549 auto IsEmpty = 550 Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty"); 551 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 552 553 // Enter the loop body, making that address the current address. 554 auto EntryBB = Builder.GetInsertBlock(); 555 EmitBlock(BodyBB); 556 557 CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy); 558 559 llvm::PHINode *SrcElementPHI = 560 Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 561 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 562 Address SrcElementCurrent = 563 Address(SrcElementPHI, 564 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 565 566 llvm::PHINode *DestElementPHI = 567 Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 568 DestElementPHI->addIncoming(DestBegin, EntryBB); 569 Address DestElementCurrent = 570 Address(DestElementPHI, 571 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 572 573 // Emit copy. 574 CopyGen(DestElementCurrent, SrcElementCurrent); 575 576 // Shift the address forward by one element. 577 auto DestElementNext = Builder.CreateConstGEP1_32( 578 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 579 auto SrcElementNext = Builder.CreateConstGEP1_32( 580 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 581 // Check whether we've reached the end. 582 auto Done = 583 Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 584 Builder.CreateCondBr(Done, DoneBB, BodyBB); 585 DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock()); 586 SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock()); 587 588 // Done. 589 EmitBlock(DoneBB, /*IsFinished=*/true); 590 } 591 592 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr, 593 Address SrcAddr, const VarDecl *DestVD, 594 const VarDecl *SrcVD, const Expr *Copy) { 595 if (OriginalType->isArrayType()) { 596 auto *BO = dyn_cast<BinaryOperator>(Copy); 597 if (BO && BO->getOpcode() == BO_Assign) { 598 // Perform simple memcpy for simple copying. 599 EmitAggregateAssign(DestAddr, SrcAddr, OriginalType); 600 } else { 601 // For arrays with complex element types perform element by element 602 // copying. 603 EmitOMPAggregateAssign( 604 DestAddr, SrcAddr, OriginalType, 605 [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) { 606 // Working with the single array element, so have to remap 607 // destination and source variables to corresponding array 608 // elements. 609 CodeGenFunction::OMPPrivateScope Remap(*this); 610 Remap.addPrivate(DestVD, [DestElement]() -> Address { 611 return DestElement; 612 }); 613 Remap.addPrivate( 614 SrcVD, [SrcElement]() -> Address { return SrcElement; }); 615 (void)Remap.Privatize(); 616 EmitIgnoredExpr(Copy); 617 }); 618 } 619 } else { 620 // Remap pseudo source variable to private copy. 621 CodeGenFunction::OMPPrivateScope Remap(*this); 622 Remap.addPrivate(SrcVD, [SrcAddr]() -> Address { return SrcAddr; }); 623 Remap.addPrivate(DestVD, [DestAddr]() -> Address { return DestAddr; }); 624 (void)Remap.Privatize(); 625 // Emit copying of the whole variable. 626 EmitIgnoredExpr(Copy); 627 } 628 } 629 630 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, 631 OMPPrivateScope &PrivateScope) { 632 if (!HaveInsertPoint()) 633 return false; 634 bool FirstprivateIsLastprivate = false; 635 llvm::DenseSet<const VarDecl *> Lastprivates; 636 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 637 for (const auto *D : C->varlists()) 638 Lastprivates.insert( 639 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); 640 } 641 llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate; 642 CGCapturedStmtInfo CapturesInfo(cast<CapturedStmt>(*D.getAssociatedStmt())); 643 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { 644 auto IRef = C->varlist_begin(); 645 auto InitsRef = C->inits().begin(); 646 for (auto IInit : C->private_copies()) { 647 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 648 bool ThisFirstprivateIsLastprivate = 649 Lastprivates.count(OrigVD->getCanonicalDecl()) > 0; 650 auto *CapFD = CapturesInfo.lookup(OrigVD); 651 auto *FD = CapturedStmtInfo->lookup(OrigVD); 652 if (!ThisFirstprivateIsLastprivate && FD && (FD == CapFD) && 653 !FD->getType()->isReferenceType()) { 654 EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()); 655 ++IRef; 656 ++InitsRef; 657 continue; 658 } 659 FirstprivateIsLastprivate = 660 FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate; 661 if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) { 662 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 663 auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl()); 664 bool IsRegistered; 665 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 666 /*RefersToEnclosingVariableOrCapture=*/FD != nullptr, 667 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 668 Address OriginalAddr = EmitLValue(&DRE).getAddress(); 669 QualType Type = VD->getType(); 670 if (Type->isArrayType()) { 671 // Emit VarDecl with copy init for arrays. 672 // Get the address of the original variable captured in current 673 // captured region. 674 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 675 auto Emission = EmitAutoVarAlloca(*VD); 676 auto *Init = VD->getInit(); 677 if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) { 678 // Perform simple memcpy. 679 EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr, 680 Type); 681 } else { 682 EmitOMPAggregateAssign( 683 Emission.getAllocatedAddress(), OriginalAddr, Type, 684 [this, VDInit, Init](Address DestElement, 685 Address SrcElement) { 686 // Clean up any temporaries needed by the initialization. 687 RunCleanupsScope InitScope(*this); 688 // Emit initialization for single element. 689 setAddrOfLocalVar(VDInit, SrcElement); 690 EmitAnyExprToMem(Init, DestElement, 691 Init->getType().getQualifiers(), 692 /*IsInitializer*/ false); 693 LocalDeclMap.erase(VDInit); 694 }); 695 } 696 EmitAutoVarCleanups(Emission); 697 return Emission.getAllocatedAddress(); 698 }); 699 } else { 700 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 701 // Emit private VarDecl with copy init. 702 // Remap temp VDInit variable to the address of the original 703 // variable 704 // (for proper handling of captured global variables). 705 setAddrOfLocalVar(VDInit, OriginalAddr); 706 EmitDecl(*VD); 707 LocalDeclMap.erase(VDInit); 708 return GetAddrOfLocalVar(VD); 709 }); 710 } 711 assert(IsRegistered && 712 "firstprivate var already registered as private"); 713 // Silence the warning about unused variable. 714 (void)IsRegistered; 715 } 716 ++IRef; 717 ++InitsRef; 718 } 719 } 720 return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty(); 721 } 722 723 void CodeGenFunction::EmitOMPPrivateClause( 724 const OMPExecutableDirective &D, 725 CodeGenFunction::OMPPrivateScope &PrivateScope) { 726 if (!HaveInsertPoint()) 727 return; 728 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 729 for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) { 730 auto IRef = C->varlist_begin(); 731 for (auto IInit : C->private_copies()) { 732 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 733 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 734 auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 735 bool IsRegistered = 736 PrivateScope.addPrivate(OrigVD, [&]() -> Address { 737 // Emit private VarDecl with copy init. 738 EmitDecl(*VD); 739 return GetAddrOfLocalVar(VD); 740 }); 741 assert(IsRegistered && "private var already registered as private"); 742 // Silence the warning about unused variable. 743 (void)IsRegistered; 744 } 745 ++IRef; 746 } 747 } 748 } 749 750 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { 751 if (!HaveInsertPoint()) 752 return false; 753 // threadprivate_var1 = master_threadprivate_var1; 754 // operator=(threadprivate_var2, master_threadprivate_var2); 755 // ... 756 // __kmpc_barrier(&loc, global_tid); 757 llvm::DenseSet<const VarDecl *> CopiedVars; 758 llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr; 759 for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) { 760 auto IRef = C->varlist_begin(); 761 auto ISrcRef = C->source_exprs().begin(); 762 auto IDestRef = C->destination_exprs().begin(); 763 for (auto *AssignOp : C->assignment_ops()) { 764 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 765 QualType Type = VD->getType(); 766 if (CopiedVars.insert(VD->getCanonicalDecl()).second) { 767 // Get the address of the master variable. If we are emitting code with 768 // TLS support, the address is passed from the master as field in the 769 // captured declaration. 770 Address MasterAddr = Address::invalid(); 771 if (getLangOpts().OpenMPUseTLS && 772 getContext().getTargetInfo().isTLSSupported()) { 773 assert(CapturedStmtInfo->lookup(VD) && 774 "Copyin threadprivates should have been captured!"); 775 DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(), 776 VK_LValue, (*IRef)->getExprLoc()); 777 MasterAddr = EmitLValue(&DRE).getAddress(); 778 LocalDeclMap.erase(VD); 779 } else { 780 MasterAddr = 781 Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD) 782 : CGM.GetAddrOfGlobal(VD), 783 getContext().getDeclAlign(VD)); 784 } 785 // Get the address of the threadprivate variable. 786 Address PrivateAddr = EmitLValue(*IRef).getAddress(); 787 if (CopiedVars.size() == 1) { 788 // At first check if current thread is a master thread. If it is, no 789 // need to copy data. 790 CopyBegin = createBasicBlock("copyin.not.master"); 791 CopyEnd = createBasicBlock("copyin.not.master.end"); 792 Builder.CreateCondBr( 793 Builder.CreateICmpNE( 794 Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy), 795 Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy)), 796 CopyBegin, CopyEnd); 797 EmitBlock(CopyBegin); 798 } 799 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 800 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 801 EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp); 802 } 803 ++IRef; 804 ++ISrcRef; 805 ++IDestRef; 806 } 807 } 808 if (CopyEnd) { 809 // Exit out of copying procedure for non-master thread. 810 EmitBlock(CopyEnd, /*IsFinished=*/true); 811 return true; 812 } 813 return false; 814 } 815 816 bool CodeGenFunction::EmitOMPLastprivateClauseInit( 817 const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) { 818 if (!HaveInsertPoint()) 819 return false; 820 bool HasAtLeastOneLastprivate = false; 821 llvm::DenseSet<const VarDecl *> SIMDLCVs; 822 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 823 auto *LoopDirective = cast<OMPLoopDirective>(&D); 824 for (auto *C : LoopDirective->counters()) { 825 SIMDLCVs.insert( 826 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 827 } 828 } 829 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 830 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 831 HasAtLeastOneLastprivate = true; 832 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) 833 break; 834 auto IRef = C->varlist_begin(); 835 auto IDestRef = C->destination_exprs().begin(); 836 for (auto *IInit : C->private_copies()) { 837 // Keep the address of the original variable for future update at the end 838 // of the loop. 839 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 840 // Taskloops do not require additional initialization, it is done in 841 // runtime support library. 842 if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) { 843 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 844 PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address { 845 DeclRefExpr DRE( 846 const_cast<VarDecl *>(OrigVD), 847 /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup( 848 OrigVD) != nullptr, 849 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 850 return EmitLValue(&DRE).getAddress(); 851 }); 852 // Check if the variable is also a firstprivate: in this case IInit is 853 // not generated. Initialization of this variable will happen in codegen 854 // for 'firstprivate' clause. 855 if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) { 856 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 857 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 858 // Emit private VarDecl with copy init. 859 EmitDecl(*VD); 860 return GetAddrOfLocalVar(VD); 861 }); 862 assert(IsRegistered && 863 "lastprivate var already registered as private"); 864 (void)IsRegistered; 865 } 866 } 867 ++IRef; 868 ++IDestRef; 869 } 870 } 871 return HasAtLeastOneLastprivate; 872 } 873 874 void CodeGenFunction::EmitOMPLastprivateClauseFinal( 875 const OMPExecutableDirective &D, bool NoFinals, 876 llvm::Value *IsLastIterCond) { 877 if (!HaveInsertPoint()) 878 return; 879 // Emit following code: 880 // if (<IsLastIterCond>) { 881 // orig_var1 = private_orig_var1; 882 // ... 883 // orig_varn = private_orig_varn; 884 // } 885 llvm::BasicBlock *ThenBB = nullptr; 886 llvm::BasicBlock *DoneBB = nullptr; 887 if (IsLastIterCond) { 888 ThenBB = createBasicBlock(".omp.lastprivate.then"); 889 DoneBB = createBasicBlock(".omp.lastprivate.done"); 890 Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB); 891 EmitBlock(ThenBB); 892 } 893 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 894 llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates; 895 if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) { 896 auto IC = LoopDirective->counters().begin(); 897 for (auto F : LoopDirective->finals()) { 898 auto *D = 899 cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl(); 900 if (NoFinals) 901 AlreadyEmittedVars.insert(D); 902 else 903 LoopCountersAndUpdates[D] = F; 904 ++IC; 905 } 906 } 907 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 908 auto IRef = C->varlist_begin(); 909 auto ISrcRef = C->source_exprs().begin(); 910 auto IDestRef = C->destination_exprs().begin(); 911 for (auto *AssignOp : C->assignment_ops()) { 912 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 913 QualType Type = PrivateVD->getType(); 914 auto *CanonicalVD = PrivateVD->getCanonicalDecl(); 915 if (AlreadyEmittedVars.insert(CanonicalVD).second) { 916 // If lastprivate variable is a loop control variable for loop-based 917 // directive, update its value before copyin back to original 918 // variable. 919 if (auto *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD)) 920 EmitIgnoredExpr(FinalExpr); 921 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 922 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 923 // Get the address of the original variable. 924 Address OriginalAddr = GetAddrOfLocalVar(DestVD); 925 // Get the address of the private variable. 926 Address PrivateAddr = GetAddrOfLocalVar(PrivateVD); 927 if (auto RefTy = PrivateVD->getType()->getAs<ReferenceType>()) 928 PrivateAddr = 929 Address(Builder.CreateLoad(PrivateAddr), 930 getNaturalTypeAlignment(RefTy->getPointeeType())); 931 EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp); 932 } 933 ++IRef; 934 ++ISrcRef; 935 ++IDestRef; 936 } 937 if (auto *PostUpdate = C->getPostUpdateExpr()) 938 EmitIgnoredExpr(PostUpdate); 939 } 940 if (IsLastIterCond) 941 EmitBlock(DoneBB, /*IsFinished=*/true); 942 } 943 944 void CodeGenFunction::EmitOMPReductionClauseInit( 945 const OMPExecutableDirective &D, 946 CodeGenFunction::OMPPrivateScope &PrivateScope) { 947 if (!HaveInsertPoint()) 948 return; 949 SmallVector<const Expr *, 4> Shareds; 950 SmallVector<const Expr *, 4> Privates; 951 SmallVector<const Expr *, 4> ReductionOps; 952 SmallVector<const Expr *, 4> LHSs; 953 SmallVector<const Expr *, 4> RHSs; 954 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 955 auto IPriv = C->privates().begin(); 956 auto IRed = C->reduction_ops().begin(); 957 auto ILHS = C->lhs_exprs().begin(); 958 auto IRHS = C->rhs_exprs().begin(); 959 for (const auto *Ref : C->varlists()) { 960 Shareds.emplace_back(Ref); 961 Privates.emplace_back(*IPriv); 962 ReductionOps.emplace_back(*IRed); 963 LHSs.emplace_back(*ILHS); 964 RHSs.emplace_back(*IRHS); 965 std::advance(IPriv, 1); 966 std::advance(IRed, 1); 967 std::advance(ILHS, 1); 968 std::advance(IRHS, 1); 969 } 970 } 971 ReductionCodeGen RedCG(Shareds, Privates, ReductionOps); 972 unsigned Count = 0; 973 auto ILHS = LHSs.begin(); 974 auto IRHS = RHSs.begin(); 975 auto IPriv = Privates.begin(); 976 for (const auto *IRef : Shareds) { 977 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl()); 978 // Emit private VarDecl with reduction init. 979 RedCG.emitSharedLValue(*this, Count); 980 RedCG.emitAggregateType(*this, Count); 981 auto Emission = EmitAutoVarAlloca(*PrivateVD); 982 RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(), 983 RedCG.getSharedLValue(Count), 984 [&Emission](CodeGenFunction &CGF) { 985 CGF.EmitAutoVarInit(Emission); 986 return true; 987 }); 988 EmitAutoVarCleanups(Emission); 989 Address BaseAddr = RedCG.adjustPrivateAddress( 990 *this, Count, Emission.getAllocatedAddress()); 991 bool IsRegistered = PrivateScope.addPrivate( 992 RedCG.getBaseDecl(Count), [BaseAddr]() -> Address { return BaseAddr; }); 993 assert(IsRegistered && "private var already registered as private"); 994 // Silence the warning about unused variable. 995 (void)IsRegistered; 996 997 auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 998 auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 999 if (isa<OMPArraySectionExpr>(IRef)) { 1000 // Store the address of the original variable associated with the LHS 1001 // implicit variable. 1002 PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address { 1003 return RedCG.getSharedLValue(Count).getAddress(); 1004 }); 1005 PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { 1006 return GetAddrOfLocalVar(PrivateVD); 1007 }); 1008 } else if (isa<ArraySubscriptExpr>(IRef)) { 1009 // Store the address of the original variable associated with the LHS 1010 // implicit variable. 1011 PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address { 1012 return RedCG.getSharedLValue(Count).getAddress(); 1013 }); 1014 PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address { 1015 return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD), 1016 ConvertTypeForMem(RHSVD->getType()), 1017 "rhs.begin"); 1018 }); 1019 } else { 1020 QualType Type = PrivateVD->getType(); 1021 bool IsArray = getContext().getAsArrayType(Type) != nullptr; 1022 Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress(); 1023 // Store the address of the original variable associated with the LHS 1024 // implicit variable. 1025 if (IsArray) { 1026 OriginalAddr = Builder.CreateElementBitCast( 1027 OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin"); 1028 } 1029 PrivateScope.addPrivate( 1030 LHSVD, [OriginalAddr]() -> Address { return OriginalAddr; }); 1031 PrivateScope.addPrivate( 1032 RHSVD, [this, PrivateVD, RHSVD, IsArray]() -> Address { 1033 return IsArray 1034 ? Builder.CreateElementBitCast( 1035 GetAddrOfLocalVar(PrivateVD), 1036 ConvertTypeForMem(RHSVD->getType()), "rhs.begin") 1037 : GetAddrOfLocalVar(PrivateVD); 1038 }); 1039 } 1040 ++ILHS; 1041 ++IRHS; 1042 ++IPriv; 1043 ++Count; 1044 } 1045 } 1046 1047 void CodeGenFunction::EmitOMPReductionClauseFinal( 1048 const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) { 1049 if (!HaveInsertPoint()) 1050 return; 1051 llvm::SmallVector<const Expr *, 8> Privates; 1052 llvm::SmallVector<const Expr *, 8> LHSExprs; 1053 llvm::SmallVector<const Expr *, 8> RHSExprs; 1054 llvm::SmallVector<const Expr *, 8> ReductionOps; 1055 bool HasAtLeastOneReduction = false; 1056 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1057 HasAtLeastOneReduction = true; 1058 Privates.append(C->privates().begin(), C->privates().end()); 1059 LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 1060 RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 1061 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 1062 } 1063 if (HasAtLeastOneReduction) { 1064 bool WithNowait = D.getSingleClause<OMPNowaitClause>() || 1065 isOpenMPParallelDirective(D.getDirectiveKind()) || 1066 D.getDirectiveKind() == OMPD_simd; 1067 bool SimpleReduction = D.getDirectiveKind() == OMPD_simd; 1068 // Emit nowait reduction if nowait clause is present or directive is a 1069 // parallel directive (it always has implicit barrier). 1070 CGM.getOpenMPRuntime().emitReduction( 1071 *this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps, 1072 {WithNowait, SimpleReduction, ReductionKind}); 1073 } 1074 } 1075 1076 static void emitPostUpdateForReductionClause( 1077 CodeGenFunction &CGF, const OMPExecutableDirective &D, 1078 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1079 if (!CGF.HaveInsertPoint()) 1080 return; 1081 llvm::BasicBlock *DoneBB = nullptr; 1082 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1083 if (auto *PostUpdate = C->getPostUpdateExpr()) { 1084 if (!DoneBB) { 1085 if (auto *Cond = CondGen(CGF)) { 1086 // If the first post-update expression is found, emit conditional 1087 // block if it was requested. 1088 auto *ThenBB = CGF.createBasicBlock(".omp.reduction.pu"); 1089 DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done"); 1090 CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1091 CGF.EmitBlock(ThenBB); 1092 } 1093 } 1094 CGF.EmitIgnoredExpr(PostUpdate); 1095 } 1096 } 1097 if (DoneBB) 1098 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 1099 } 1100 1101 namespace { 1102 /// Codegen lambda for appending distribute lower and upper bounds to outlined 1103 /// parallel function. This is necessary for combined constructs such as 1104 /// 'distribute parallel for' 1105 typedef llvm::function_ref<void(CodeGenFunction &, 1106 const OMPExecutableDirective &, 1107 llvm::SmallVectorImpl<llvm::Value *> &)> 1108 CodeGenBoundParametersTy; 1109 } // anonymous namespace 1110 1111 static void emitCommonOMPParallelDirective( 1112 CodeGenFunction &CGF, const OMPExecutableDirective &S, 1113 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1114 const CodeGenBoundParametersTy &CodeGenBoundParameters) { 1115 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); 1116 auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( 1117 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 1118 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) { 1119 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 1120 auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), 1121 /*IgnoreResultAssign*/ true); 1122 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause( 1123 CGF, NumThreads, NumThreadsClause->getLocStart()); 1124 } 1125 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) { 1126 CodeGenFunction::RunCleanupsScope ProcBindScope(CGF); 1127 CGF.CGM.getOpenMPRuntime().emitProcBindClause( 1128 CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart()); 1129 } 1130 const Expr *IfCond = nullptr; 1131 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 1132 if (C->getNameModifier() == OMPD_unknown || 1133 C->getNameModifier() == OMPD_parallel) { 1134 IfCond = C->getCondition(); 1135 break; 1136 } 1137 } 1138 1139 OMPParallelScope Scope(CGF, S); 1140 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 1141 // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk 1142 // lower and upper bounds with the pragma 'for' chunking mechanism. 1143 // The following lambda takes care of appending the lower and upper bound 1144 // parameters when necessary 1145 CodeGenBoundParameters(CGF, S, CapturedVars); 1146 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 1147 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn, 1148 CapturedVars, IfCond); 1149 } 1150 1151 static void emitEmptyBoundParameters(CodeGenFunction &, 1152 const OMPExecutableDirective &, 1153 llvm::SmallVectorImpl<llvm::Value *> &) {} 1154 1155 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { 1156 // Emit parallel region as a standalone region. 1157 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1158 OMPPrivateScope PrivateScope(CGF); 1159 bool Copyins = CGF.EmitOMPCopyinClause(S); 1160 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 1161 if (Copyins) { 1162 // Emit implicit barrier to synchronize threads and avoid data races on 1163 // propagation master's thread values of threadprivate variables to local 1164 // instances of that variables of all other implicit threads. 1165 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 1166 CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 1167 /*ForceSimpleCall=*/true); 1168 } 1169 CGF.EmitOMPPrivateClause(S, PrivateScope); 1170 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 1171 (void)PrivateScope.Privatize(); 1172 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1173 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 1174 }; 1175 emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen, 1176 emitEmptyBoundParameters); 1177 emitPostUpdateForReductionClause( 1178 *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1179 } 1180 1181 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, 1182 JumpDest LoopExit) { 1183 RunCleanupsScope BodyScope(*this); 1184 // Update counters values on current iteration. 1185 for (auto I : D.updates()) { 1186 EmitIgnoredExpr(I); 1187 } 1188 // Update the linear variables. 1189 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1190 for (auto *U : C->updates()) 1191 EmitIgnoredExpr(U); 1192 } 1193 1194 // On a continue in the body, jump to the end. 1195 auto Continue = getJumpDestInCurrentScope("omp.body.continue"); 1196 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1197 // Emit loop body. 1198 EmitStmt(D.getBody()); 1199 // The end (updates/cleanups). 1200 EmitBlock(Continue.getBlock()); 1201 BreakContinueStack.pop_back(); 1202 } 1203 1204 void CodeGenFunction::EmitOMPInnerLoop( 1205 const Stmt &S, bool RequiresCleanup, const Expr *LoopCond, 1206 const Expr *IncExpr, 1207 const llvm::function_ref<void(CodeGenFunction &)> &BodyGen, 1208 const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen) { 1209 auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end"); 1210 1211 // Start the loop with a block that tests the condition. 1212 auto CondBlock = createBasicBlock("omp.inner.for.cond"); 1213 EmitBlock(CondBlock); 1214 const SourceRange &R = S.getSourceRange(); 1215 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), 1216 SourceLocToDebugLoc(R.getEnd())); 1217 1218 // If there are any cleanups between here and the loop-exit scope, 1219 // create a block to stage a loop exit along. 1220 auto ExitBlock = LoopExit.getBlock(); 1221 if (RequiresCleanup) 1222 ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup"); 1223 1224 auto LoopBody = createBasicBlock("omp.inner.for.body"); 1225 1226 // Emit condition. 1227 EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S)); 1228 if (ExitBlock != LoopExit.getBlock()) { 1229 EmitBlock(ExitBlock); 1230 EmitBranchThroughCleanup(LoopExit); 1231 } 1232 1233 EmitBlock(LoopBody); 1234 incrementProfileCounter(&S); 1235 1236 // Create a block for the increment. 1237 auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc"); 1238 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1239 1240 BodyGen(*this); 1241 1242 // Emit "IV = IV + 1" and a back-edge to the condition block. 1243 EmitBlock(Continue.getBlock()); 1244 EmitIgnoredExpr(IncExpr); 1245 PostIncGen(*this); 1246 BreakContinueStack.pop_back(); 1247 EmitBranch(CondBlock); 1248 LoopStack.pop(); 1249 // Emit the fall-through block. 1250 EmitBlock(LoopExit.getBlock()); 1251 } 1252 1253 bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { 1254 if (!HaveInsertPoint()) 1255 return false; 1256 // Emit inits for the linear variables. 1257 bool HasLinears = false; 1258 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1259 for (auto *Init : C->inits()) { 1260 HasLinears = true; 1261 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl()); 1262 if (auto *Ref = dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) { 1263 AutoVarEmission Emission = EmitAutoVarAlloca(*VD); 1264 auto *OrigVD = cast<VarDecl>(Ref->getDecl()); 1265 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1266 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1267 VD->getInit()->getType(), VK_LValue, 1268 VD->getInit()->getExprLoc()); 1269 EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(), 1270 VD->getType()), 1271 /*capturedByInit=*/false); 1272 EmitAutoVarCleanups(Emission); 1273 } else 1274 EmitVarDecl(*VD); 1275 } 1276 // Emit the linear steps for the linear clauses. 1277 // If a step is not constant, it is pre-calculated before the loop. 1278 if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep())) 1279 if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) { 1280 EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl())); 1281 // Emit calculation of the linear step. 1282 EmitIgnoredExpr(CS); 1283 } 1284 } 1285 return HasLinears; 1286 } 1287 1288 void CodeGenFunction::EmitOMPLinearClauseFinal( 1289 const OMPLoopDirective &D, 1290 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1291 if (!HaveInsertPoint()) 1292 return; 1293 llvm::BasicBlock *DoneBB = nullptr; 1294 // Emit the final values of the linear variables. 1295 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1296 auto IC = C->varlist_begin(); 1297 for (auto *F : C->finals()) { 1298 if (!DoneBB) { 1299 if (auto *Cond = CondGen(*this)) { 1300 // If the first post-update expression is found, emit conditional 1301 // block if it was requested. 1302 auto *ThenBB = createBasicBlock(".omp.linear.pu"); 1303 DoneBB = createBasicBlock(".omp.linear.pu.done"); 1304 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1305 EmitBlock(ThenBB); 1306 } 1307 } 1308 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl()); 1309 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1310 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1311 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); 1312 Address OrigAddr = EmitLValue(&DRE).getAddress(); 1313 CodeGenFunction::OMPPrivateScope VarScope(*this); 1314 VarScope.addPrivate(OrigVD, [OrigAddr]() -> Address { return OrigAddr; }); 1315 (void)VarScope.Privatize(); 1316 EmitIgnoredExpr(F); 1317 ++IC; 1318 } 1319 if (auto *PostUpdate = C->getPostUpdateExpr()) 1320 EmitIgnoredExpr(PostUpdate); 1321 } 1322 if (DoneBB) 1323 EmitBlock(DoneBB, /*IsFinished=*/true); 1324 } 1325 1326 static void emitAlignedClause(CodeGenFunction &CGF, 1327 const OMPExecutableDirective &D) { 1328 if (!CGF.HaveInsertPoint()) 1329 return; 1330 for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) { 1331 unsigned ClauseAlignment = 0; 1332 if (auto AlignmentExpr = Clause->getAlignment()) { 1333 auto AlignmentCI = 1334 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); 1335 ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue()); 1336 } 1337 for (auto E : Clause->varlists()) { 1338 unsigned Alignment = ClauseAlignment; 1339 if (Alignment == 0) { 1340 // OpenMP [2.8.1, Description] 1341 // If no optional parameter is specified, implementation-defined default 1342 // alignments for SIMD instructions on the target platforms are assumed. 1343 Alignment = 1344 CGF.getContext() 1345 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign( 1346 E->getType()->getPointeeType())) 1347 .getQuantity(); 1348 } 1349 assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) && 1350 "alignment is not power of 2"); 1351 if (Alignment != 0) { 1352 llvm::Value *PtrValue = CGF.EmitScalarExpr(E); 1353 CGF.EmitAlignmentAssumption(PtrValue, Alignment); 1354 } 1355 } 1356 } 1357 } 1358 1359 void CodeGenFunction::EmitOMPPrivateLoopCounters( 1360 const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) { 1361 if (!HaveInsertPoint()) 1362 return; 1363 auto I = S.private_counters().begin(); 1364 for (auto *E : S.counters()) { 1365 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1366 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()); 1367 (void)LoopScope.addPrivate(VD, [&]() -> Address { 1368 // Emit var without initialization. 1369 if (!LocalDeclMap.count(PrivateVD)) { 1370 auto VarEmission = EmitAutoVarAlloca(*PrivateVD); 1371 EmitAutoVarCleanups(VarEmission); 1372 } 1373 DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD), 1374 /*RefersToEnclosingVariableOrCapture=*/false, 1375 (*I)->getType(), VK_LValue, (*I)->getExprLoc()); 1376 return EmitLValue(&DRE).getAddress(); 1377 }); 1378 if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) || 1379 VD->hasGlobalStorage()) { 1380 (void)LoopScope.addPrivate(PrivateVD, [&]() -> Address { 1381 DeclRefExpr DRE(const_cast<VarDecl *>(VD), 1382 LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD), 1383 E->getType(), VK_LValue, E->getExprLoc()); 1384 return EmitLValue(&DRE).getAddress(); 1385 }); 1386 } 1387 ++I; 1388 } 1389 } 1390 1391 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S, 1392 const Expr *Cond, llvm::BasicBlock *TrueBlock, 1393 llvm::BasicBlock *FalseBlock, uint64_t TrueCount) { 1394 if (!CGF.HaveInsertPoint()) 1395 return; 1396 { 1397 CodeGenFunction::OMPPrivateScope PreCondScope(CGF); 1398 CGF.EmitOMPPrivateLoopCounters(S, PreCondScope); 1399 (void)PreCondScope.Privatize(); 1400 // Get initial values of real counters. 1401 for (auto I : S.inits()) { 1402 CGF.EmitIgnoredExpr(I); 1403 } 1404 } 1405 // Check that loop is executed at least one time. 1406 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount); 1407 } 1408 1409 void CodeGenFunction::EmitOMPLinearClause( 1410 const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) { 1411 if (!HaveInsertPoint()) 1412 return; 1413 llvm::DenseSet<const VarDecl *> SIMDLCVs; 1414 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 1415 auto *LoopDirective = cast<OMPLoopDirective>(&D); 1416 for (auto *C : LoopDirective->counters()) { 1417 SIMDLCVs.insert( 1418 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 1419 } 1420 } 1421 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1422 auto CurPrivate = C->privates().begin(); 1423 for (auto *E : C->varlists()) { 1424 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1425 auto *PrivateVD = 1426 cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl()); 1427 if (!SIMDLCVs.count(VD->getCanonicalDecl())) { 1428 bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address { 1429 // Emit private VarDecl with copy init. 1430 EmitVarDecl(*PrivateVD); 1431 return GetAddrOfLocalVar(PrivateVD); 1432 }); 1433 assert(IsRegistered && "linear var already registered as private"); 1434 // Silence the warning about unused variable. 1435 (void)IsRegistered; 1436 } else 1437 EmitVarDecl(*PrivateVD); 1438 ++CurPrivate; 1439 } 1440 } 1441 } 1442 1443 static void emitSimdlenSafelenClause(CodeGenFunction &CGF, 1444 const OMPExecutableDirective &D, 1445 bool IsMonotonic) { 1446 if (!CGF.HaveInsertPoint()) 1447 return; 1448 if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) { 1449 RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(), 1450 /*ignoreResult=*/true); 1451 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 1452 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 1453 // In presence of finite 'safelen', it may be unsafe to mark all 1454 // the memory instructions parallel, because loop-carried 1455 // dependences of 'safelen' iterations are possible. 1456 if (!IsMonotonic) 1457 CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>()); 1458 } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) { 1459 RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(), 1460 /*ignoreResult=*/true); 1461 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 1462 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 1463 // In presence of finite 'safelen', it may be unsafe to mark all 1464 // the memory instructions parallel, because loop-carried 1465 // dependences of 'safelen' iterations are possible. 1466 CGF.LoopStack.setParallel(false); 1467 } 1468 } 1469 1470 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D, 1471 bool IsMonotonic) { 1472 // Walk clauses and process safelen/lastprivate. 1473 LoopStack.setParallel(!IsMonotonic); 1474 LoopStack.setVectorizeEnable(true); 1475 emitSimdlenSafelenClause(*this, D, IsMonotonic); 1476 } 1477 1478 void CodeGenFunction::EmitOMPSimdFinal( 1479 const OMPLoopDirective &D, 1480 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1481 if (!HaveInsertPoint()) 1482 return; 1483 llvm::BasicBlock *DoneBB = nullptr; 1484 auto IC = D.counters().begin(); 1485 auto IPC = D.private_counters().begin(); 1486 for (auto F : D.finals()) { 1487 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl()); 1488 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl()); 1489 auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD); 1490 if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) || 1491 OrigVD->hasGlobalStorage() || CED) { 1492 if (!DoneBB) { 1493 if (auto *Cond = CondGen(*this)) { 1494 // If the first post-update expression is found, emit conditional 1495 // block if it was requested. 1496 auto *ThenBB = createBasicBlock(".omp.final.then"); 1497 DoneBB = createBasicBlock(".omp.final.done"); 1498 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1499 EmitBlock(ThenBB); 1500 } 1501 } 1502 Address OrigAddr = Address::invalid(); 1503 if (CED) 1504 OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress(); 1505 else { 1506 DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD), 1507 /*RefersToEnclosingVariableOrCapture=*/false, 1508 (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc()); 1509 OrigAddr = EmitLValue(&DRE).getAddress(); 1510 } 1511 OMPPrivateScope VarScope(*this); 1512 VarScope.addPrivate(OrigVD, 1513 [OrigAddr]() -> Address { return OrigAddr; }); 1514 (void)VarScope.Privatize(); 1515 EmitIgnoredExpr(F); 1516 } 1517 ++IC; 1518 ++IPC; 1519 } 1520 if (DoneBB) 1521 EmitBlock(DoneBB, /*IsFinished=*/true); 1522 } 1523 1524 static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF, 1525 const OMPLoopDirective &S, 1526 CodeGenFunction::JumpDest LoopExit) { 1527 CGF.EmitOMPLoopBody(S, LoopExit); 1528 CGF.EmitStopPoint(&S); 1529 } 1530 1531 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { 1532 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1533 OMPLoopScope PreInitScope(CGF, S); 1534 // if (PreCond) { 1535 // for (IV in 0..LastIteration) BODY; 1536 // <Final counter/linear vars updates>; 1537 // } 1538 // 1539 1540 // Emit: if (PreCond) - begin. 1541 // If the condition constant folds and can be elided, avoid emitting the 1542 // whole loop. 1543 bool CondConstant; 1544 llvm::BasicBlock *ContBlock = nullptr; 1545 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 1546 if (!CondConstant) 1547 return; 1548 } else { 1549 auto *ThenBlock = CGF.createBasicBlock("simd.if.then"); 1550 ContBlock = CGF.createBasicBlock("simd.if.end"); 1551 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 1552 CGF.getProfileCount(&S)); 1553 CGF.EmitBlock(ThenBlock); 1554 CGF.incrementProfileCounter(&S); 1555 } 1556 1557 // Emit the loop iteration variable. 1558 const Expr *IVExpr = S.getIterationVariable(); 1559 const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 1560 CGF.EmitVarDecl(*IVDecl); 1561 CGF.EmitIgnoredExpr(S.getInit()); 1562 1563 // Emit the iterations count variable. 1564 // If it is not a variable, Sema decided to calculate iterations count on 1565 // each iteration (e.g., it is foldable into a constant). 1566 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 1567 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 1568 // Emit calculation of the iterations count. 1569 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 1570 } 1571 1572 CGF.EmitOMPSimdInit(S); 1573 1574 emitAlignedClause(CGF, S); 1575 (void)CGF.EmitOMPLinearClauseInit(S); 1576 { 1577 OMPPrivateScope LoopScope(CGF); 1578 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 1579 CGF.EmitOMPLinearClause(S, LoopScope); 1580 CGF.EmitOMPPrivateClause(S, LoopScope); 1581 CGF.EmitOMPReductionClauseInit(S, LoopScope); 1582 bool HasLastprivateClause = 1583 CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 1584 (void)LoopScope.Privatize(); 1585 CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 1586 S.getInc(), 1587 [&S](CodeGenFunction &CGF) { 1588 CGF.EmitOMPLoopBody(S, JumpDest()); 1589 CGF.EmitStopPoint(&S); 1590 }, 1591 [](CodeGenFunction &) {}); 1592 CGF.EmitOMPSimdFinal( 1593 S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1594 // Emit final copy of the lastprivate variables at the end of loops. 1595 if (HasLastprivateClause) 1596 CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true); 1597 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd); 1598 emitPostUpdateForReductionClause( 1599 CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1600 } 1601 CGF.EmitOMPLinearClauseFinal( 1602 S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1603 // Emit: if (PreCond) - end. 1604 if (ContBlock) { 1605 CGF.EmitBranch(ContBlock); 1606 CGF.EmitBlock(ContBlock, true); 1607 } 1608 }; 1609 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1610 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 1611 } 1612 1613 void CodeGenFunction::EmitOMPOuterLoop( 1614 bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S, 1615 CodeGenFunction::OMPPrivateScope &LoopScope, 1616 const CodeGenFunction::OMPLoopArguments &LoopArgs, 1617 const CodeGenFunction::CodeGenLoopTy &CodeGenLoop, 1618 const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) { 1619 auto &RT = CGM.getOpenMPRuntime(); 1620 1621 const Expr *IVExpr = S.getIterationVariable(); 1622 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1623 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1624 1625 auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end"); 1626 1627 // Start the loop with a block that tests the condition. 1628 auto CondBlock = createBasicBlock("omp.dispatch.cond"); 1629 EmitBlock(CondBlock); 1630 const SourceRange &R = S.getSourceRange(); 1631 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), 1632 SourceLocToDebugLoc(R.getEnd())); 1633 1634 llvm::Value *BoolCondVal = nullptr; 1635 if (!DynamicOrOrdered) { 1636 // UB = min(UB, GlobalUB) or 1637 // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g. 1638 // 'distribute parallel for') 1639 EmitIgnoredExpr(LoopArgs.EUB); 1640 // IV = LB 1641 EmitIgnoredExpr(LoopArgs.Init); 1642 // IV < UB 1643 BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond); 1644 } else { 1645 BoolCondVal = 1646 RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, LoopArgs.IL, 1647 LoopArgs.LB, LoopArgs.UB, LoopArgs.ST); 1648 } 1649 1650 // If there are any cleanups between here and the loop-exit scope, 1651 // create a block to stage a loop exit along. 1652 auto ExitBlock = LoopExit.getBlock(); 1653 if (LoopScope.requiresCleanups()) 1654 ExitBlock = createBasicBlock("omp.dispatch.cleanup"); 1655 1656 auto LoopBody = createBasicBlock("omp.dispatch.body"); 1657 Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock); 1658 if (ExitBlock != LoopExit.getBlock()) { 1659 EmitBlock(ExitBlock); 1660 EmitBranchThroughCleanup(LoopExit); 1661 } 1662 EmitBlock(LoopBody); 1663 1664 // Emit "IV = LB" (in case of static schedule, we have already calculated new 1665 // LB for loop condition and emitted it above). 1666 if (DynamicOrOrdered) 1667 EmitIgnoredExpr(LoopArgs.Init); 1668 1669 // Create a block for the increment. 1670 auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); 1671 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1672 1673 // Generate !llvm.loop.parallel metadata for loads and stores for loops 1674 // with dynamic/guided scheduling and without ordered clause. 1675 if (!isOpenMPSimdDirective(S.getDirectiveKind())) 1676 LoopStack.setParallel(!IsMonotonic); 1677 else 1678 EmitOMPSimdInit(S, IsMonotonic); 1679 1680 SourceLocation Loc = S.getLocStart(); 1681 1682 // when 'distribute' is not combined with a 'for': 1683 // while (idx <= UB) { BODY; ++idx; } 1684 // when 'distribute' is combined with a 'for' 1685 // (e.g. 'distribute parallel for') 1686 // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; } 1687 EmitOMPInnerLoop( 1688 S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr, 1689 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { 1690 CodeGenLoop(CGF, S, LoopExit); 1691 }, 1692 [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) { 1693 CodeGenOrdered(CGF, Loc, IVSize, IVSigned); 1694 }); 1695 1696 EmitBlock(Continue.getBlock()); 1697 BreakContinueStack.pop_back(); 1698 if (!DynamicOrOrdered) { 1699 // Emit "LB = LB + Stride", "UB = UB + Stride". 1700 EmitIgnoredExpr(LoopArgs.NextLB); 1701 EmitIgnoredExpr(LoopArgs.NextUB); 1702 } 1703 1704 EmitBranch(CondBlock); 1705 LoopStack.pop(); 1706 // Emit the fall-through block. 1707 EmitBlock(LoopExit.getBlock()); 1708 1709 // Tell the runtime we are done. 1710 auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) { 1711 if (!DynamicOrOrdered) 1712 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(), 1713 S.getDirectiveKind()); 1714 }; 1715 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); 1716 } 1717 1718 void CodeGenFunction::EmitOMPForOuterLoop( 1719 const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic, 1720 const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, 1721 const OMPLoopArguments &LoopArgs, 1722 const CodeGenDispatchBoundsTy &CGDispatchBounds) { 1723 auto &RT = CGM.getOpenMPRuntime(); 1724 1725 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). 1726 const bool DynamicOrOrdered = 1727 Ordered || RT.isDynamic(ScheduleKind.Schedule); 1728 1729 assert((Ordered || 1730 !RT.isStaticNonchunked(ScheduleKind.Schedule, 1731 LoopArgs.Chunk != nullptr)) && 1732 "static non-chunked schedule does not need outer loop"); 1733 1734 // Emit outer loop. 1735 // 1736 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1737 // When schedule(dynamic,chunk_size) is specified, the iterations are 1738 // distributed to threads in the team in chunks as the threads request them. 1739 // Each thread executes a chunk of iterations, then requests another chunk, 1740 // until no chunks remain to be distributed. Each chunk contains chunk_size 1741 // iterations, except for the last chunk to be distributed, which may have 1742 // fewer iterations. When no chunk_size is specified, it defaults to 1. 1743 // 1744 // When schedule(guided,chunk_size) is specified, the iterations are assigned 1745 // to threads in the team in chunks as the executing threads request them. 1746 // Each thread executes a chunk of iterations, then requests another chunk, 1747 // until no chunks remain to be assigned. For a chunk_size of 1, the size of 1748 // each chunk is proportional to the number of unassigned iterations divided 1749 // by the number of threads in the team, decreasing to 1. For a chunk_size 1750 // with value k (greater than 1), the size of each chunk is determined in the 1751 // same way, with the restriction that the chunks do not contain fewer than k 1752 // iterations (except for the last chunk to be assigned, which may have fewer 1753 // than k iterations). 1754 // 1755 // When schedule(auto) is specified, the decision regarding scheduling is 1756 // delegated to the compiler and/or runtime system. The programmer gives the 1757 // implementation the freedom to choose any possible mapping of iterations to 1758 // threads in the team. 1759 // 1760 // When schedule(runtime) is specified, the decision regarding scheduling is 1761 // deferred until run time, and the schedule and chunk size are taken from the 1762 // run-sched-var ICV. If the ICV is set to auto, the schedule is 1763 // implementation defined 1764 // 1765 // while(__kmpc_dispatch_next(&LB, &UB)) { 1766 // idx = LB; 1767 // while (idx <= UB) { BODY; ++idx; 1768 // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only. 1769 // } // inner loop 1770 // } 1771 // 1772 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1773 // When schedule(static, chunk_size) is specified, iterations are divided into 1774 // chunks of size chunk_size, and the chunks are assigned to the threads in 1775 // the team in a round-robin fashion in the order of the thread number. 1776 // 1777 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { 1778 // while (idx <= UB) { BODY; ++idx; } // inner loop 1779 // LB = LB + ST; 1780 // UB = UB + ST; 1781 // } 1782 // 1783 1784 const Expr *IVExpr = S.getIterationVariable(); 1785 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1786 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1787 1788 if (DynamicOrOrdered) { 1789 auto DispatchBounds = CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB); 1790 llvm::Value *LBVal = DispatchBounds.first; 1791 llvm::Value *UBVal = DispatchBounds.second; 1792 CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal, 1793 LoopArgs.Chunk}; 1794 RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, IVSize, 1795 IVSigned, Ordered, DipatchRTInputValues); 1796 } else { 1797 CGOpenMPRuntime::StaticRTInput StaticInit( 1798 IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB, 1799 LoopArgs.ST, LoopArgs.Chunk); 1800 RT.emitForStaticInit(*this, S.getLocStart(), S.getDirectiveKind(), 1801 ScheduleKind, StaticInit); 1802 } 1803 1804 auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc, 1805 const unsigned IVSize, 1806 const bool IVSigned) { 1807 if (Ordered) { 1808 CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize, 1809 IVSigned); 1810 } 1811 }; 1812 1813 OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST, 1814 LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB); 1815 OuterLoopArgs.IncExpr = S.getInc(); 1816 OuterLoopArgs.Init = S.getInit(); 1817 OuterLoopArgs.Cond = S.getCond(); 1818 OuterLoopArgs.NextLB = S.getNextLowerBound(); 1819 OuterLoopArgs.NextUB = S.getNextUpperBound(); 1820 EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs, 1821 emitOMPLoopBodyWithStopPoint, CodeGenOrdered); 1822 } 1823 1824 static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc, 1825 const unsigned IVSize, const bool IVSigned) {} 1826 1827 void CodeGenFunction::EmitOMPDistributeOuterLoop( 1828 OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S, 1829 OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs, 1830 const CodeGenLoopTy &CodeGenLoopContent) { 1831 1832 auto &RT = CGM.getOpenMPRuntime(); 1833 1834 // Emit outer loop. 1835 // Same behavior as a OMPForOuterLoop, except that schedule cannot be 1836 // dynamic 1837 // 1838 1839 const Expr *IVExpr = S.getIterationVariable(); 1840 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1841 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1842 1843 CGOpenMPRuntime::StaticRTInput StaticInit( 1844 IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB, 1845 LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk); 1846 RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, StaticInit); 1847 1848 // for combined 'distribute' and 'for' the increment expression of distribute 1849 // is store in DistInc. For 'distribute' alone, it is in Inc. 1850 Expr *IncExpr; 1851 if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())) 1852 IncExpr = S.getDistInc(); 1853 else 1854 IncExpr = S.getInc(); 1855 1856 // this routine is shared by 'omp distribute parallel for' and 1857 // 'omp distribute': select the right EUB expression depending on the 1858 // directive 1859 OMPLoopArguments OuterLoopArgs; 1860 OuterLoopArgs.LB = LoopArgs.LB; 1861 OuterLoopArgs.UB = LoopArgs.UB; 1862 OuterLoopArgs.ST = LoopArgs.ST; 1863 OuterLoopArgs.IL = LoopArgs.IL; 1864 OuterLoopArgs.Chunk = LoopArgs.Chunk; 1865 OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 1866 ? S.getCombinedEnsureUpperBound() 1867 : S.getEnsureUpperBound(); 1868 OuterLoopArgs.IncExpr = IncExpr; 1869 OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 1870 ? S.getCombinedInit() 1871 : S.getInit(); 1872 OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 1873 ? S.getCombinedCond() 1874 : S.getCond(); 1875 OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 1876 ? S.getCombinedNextLowerBound() 1877 : S.getNextLowerBound(); 1878 OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 1879 ? S.getCombinedNextUpperBound() 1880 : S.getNextUpperBound(); 1881 1882 EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S, 1883 LoopScope, OuterLoopArgs, CodeGenLoopContent, 1884 emitEmptyOrdered); 1885 } 1886 1887 /// Emit a helper variable and return corresponding lvalue. 1888 static LValue EmitOMPHelperVar(CodeGenFunction &CGF, 1889 const DeclRefExpr *Helper) { 1890 auto VDecl = cast<VarDecl>(Helper->getDecl()); 1891 CGF.EmitVarDecl(*VDecl); 1892 return CGF.EmitLValue(Helper); 1893 } 1894 1895 static std::pair<LValue, LValue> 1896 emitDistributeParallelForInnerBounds(CodeGenFunction &CGF, 1897 const OMPExecutableDirective &S) { 1898 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 1899 LValue LB = 1900 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); 1901 LValue UB = 1902 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); 1903 1904 // When composing 'distribute' with 'for' (e.g. as in 'distribute 1905 // parallel for') we need to use the 'distribute' 1906 // chunk lower and upper bounds rather than the whole loop iteration 1907 // space. These are parameters to the outlined function for 'parallel' 1908 // and we copy the bounds of the previous schedule into the 1909 // the current ones. 1910 LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable()); 1911 LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable()); 1912 llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(PrevLB, SourceLocation()); 1913 PrevLBVal = CGF.EmitScalarConversion( 1914 PrevLBVal, LS.getPrevLowerBoundVariable()->getType(), 1915 LS.getIterationVariable()->getType(), SourceLocation()); 1916 llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(PrevUB, SourceLocation()); 1917 PrevUBVal = CGF.EmitScalarConversion( 1918 PrevUBVal, LS.getPrevUpperBoundVariable()->getType(), 1919 LS.getIterationVariable()->getType(), SourceLocation()); 1920 1921 CGF.EmitStoreOfScalar(PrevLBVal, LB); 1922 CGF.EmitStoreOfScalar(PrevUBVal, UB); 1923 1924 return {LB, UB}; 1925 } 1926 1927 /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then 1928 /// we need to use the LB and UB expressions generated by the worksharing 1929 /// code generation support, whereas in non combined situations we would 1930 /// just emit 0 and the LastIteration expression 1931 /// This function is necessary due to the difference of the LB and UB 1932 /// types for the RT emission routines for 'for_static_init' and 1933 /// 'for_dispatch_init' 1934 static std::pair<llvm::Value *, llvm::Value *> 1935 emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF, 1936 const OMPExecutableDirective &S, 1937 Address LB, Address UB) { 1938 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 1939 const Expr *IVExpr = LS.getIterationVariable(); 1940 // when implementing a dynamic schedule for a 'for' combined with a 1941 // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop 1942 // is not normalized as each team only executes its own assigned 1943 // distribute chunk 1944 QualType IteratorTy = IVExpr->getType(); 1945 llvm::Value *LBVal = CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, 1946 SourceLocation()); 1947 llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, 1948 SourceLocation()); 1949 return {LBVal, UBVal}; 1950 } 1951 1952 static void emitDistributeParallelForDistributeInnerBoundParams( 1953 CodeGenFunction &CGF, const OMPExecutableDirective &S, 1954 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) { 1955 const auto &Dir = cast<OMPLoopDirective>(S); 1956 LValue LB = 1957 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable())); 1958 auto LBCast = CGF.Builder.CreateIntCast( 1959 CGF.Builder.CreateLoad(LB.getAddress()), CGF.SizeTy, /*isSigned=*/false); 1960 CapturedVars.push_back(LBCast); 1961 LValue UB = 1962 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable())); 1963 1964 auto UBCast = CGF.Builder.CreateIntCast( 1965 CGF.Builder.CreateLoad(UB.getAddress()), CGF.SizeTy, /*isSigned=*/false); 1966 CapturedVars.push_back(UBCast); 1967 } 1968 1969 static void 1970 emitInnerParallelForWhenCombined(CodeGenFunction &CGF, 1971 const OMPLoopDirective &S, 1972 CodeGenFunction::JumpDest LoopExit) { 1973 auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF, 1974 PrePostActionTy &) { 1975 CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(), 1976 emitDistributeParallelForInnerBounds, 1977 emitDistributeParallelForDispatchBounds); 1978 }; 1979 1980 emitCommonOMPParallelDirective( 1981 CGF, S, OMPD_for, CGInlinedWorksharingLoop, 1982 emitDistributeParallelForDistributeInnerBoundParams); 1983 } 1984 1985 void CodeGenFunction::EmitOMPDistributeParallelForDirective( 1986 const OMPDistributeParallelForDirective &S) { 1987 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1988 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 1989 S.getDistInc()); 1990 }; 1991 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1992 OMPCancelStackRAII CancelRegion(*this, OMPD_distribute_parallel_for, 1993 /*HasCancel=*/false); 1994 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen, 1995 /*HasCancel=*/false); 1996 } 1997 1998 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective( 1999 const OMPDistributeParallelForSimdDirective &S) { 2000 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2001 CGM.getOpenMPRuntime().emitInlinedDirective( 2002 *this, OMPD_distribute_parallel_for_simd, 2003 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2004 OMPLoopScope PreInitScope(CGF, S); 2005 CGF.EmitStmt( 2006 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2007 }); 2008 } 2009 2010 void CodeGenFunction::EmitOMPDistributeSimdDirective( 2011 const OMPDistributeSimdDirective &S) { 2012 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2013 CGM.getOpenMPRuntime().emitInlinedDirective( 2014 *this, OMPD_distribute_simd, 2015 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2016 OMPLoopScope PreInitScope(CGF, S); 2017 CGF.EmitStmt( 2018 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2019 }); 2020 } 2021 2022 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective( 2023 const OMPTargetParallelForSimdDirective &S) { 2024 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2025 CGM.getOpenMPRuntime().emitInlinedDirective( 2026 *this, OMPD_target_parallel_for_simd, 2027 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2028 OMPLoopScope PreInitScope(CGF, S); 2029 CGF.EmitStmt( 2030 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2031 }); 2032 } 2033 2034 void CodeGenFunction::EmitOMPTargetSimdDirective( 2035 const OMPTargetSimdDirective &S) { 2036 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2037 CGM.getOpenMPRuntime().emitInlinedDirective( 2038 *this, OMPD_target_simd, [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2039 OMPLoopScope PreInitScope(CGF, S); 2040 CGF.EmitStmt( 2041 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2042 }); 2043 } 2044 2045 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective( 2046 const OMPTeamsDistributeSimdDirective &S) { 2047 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2048 CGM.getOpenMPRuntime().emitInlinedDirective( 2049 *this, OMPD_teams_distribute_simd, 2050 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2051 OMPLoopScope PreInitScope(CGF, S); 2052 CGF.EmitStmt( 2053 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2054 }); 2055 } 2056 2057 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective( 2058 const OMPTeamsDistributeParallelForSimdDirective &S) { 2059 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2060 CGM.getOpenMPRuntime().emitInlinedDirective( 2061 *this, OMPD_teams_distribute_parallel_for_simd, 2062 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2063 OMPLoopScope PreInitScope(CGF, S); 2064 CGF.EmitStmt( 2065 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2066 }); 2067 } 2068 2069 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective( 2070 const OMPTeamsDistributeParallelForDirective &S) { 2071 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2072 CGM.getOpenMPRuntime().emitInlinedDirective( 2073 *this, OMPD_teams_distribute_parallel_for, 2074 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2075 OMPLoopScope PreInitScope(CGF, S); 2076 CGF.EmitStmt( 2077 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2078 }); 2079 } 2080 2081 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective( 2082 const OMPTargetTeamsDistributeDirective &S) { 2083 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2084 CGM.getOpenMPRuntime().emitInlinedDirective( 2085 *this, OMPD_target_teams_distribute, 2086 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2087 CGF.EmitStmt( 2088 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2089 }); 2090 } 2091 2092 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective( 2093 const OMPTargetTeamsDistributeParallelForDirective &S) { 2094 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2095 CGM.getOpenMPRuntime().emitInlinedDirective( 2096 *this, OMPD_target_teams_distribute_parallel_for, 2097 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2098 CGF.EmitStmt( 2099 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2100 }); 2101 } 2102 2103 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective( 2104 const OMPTargetTeamsDistributeParallelForSimdDirective &S) { 2105 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2106 CGM.getOpenMPRuntime().emitInlinedDirective( 2107 *this, OMPD_target_teams_distribute_parallel_for_simd, 2108 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2109 CGF.EmitStmt( 2110 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2111 }); 2112 } 2113 2114 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective( 2115 const OMPTargetTeamsDistributeSimdDirective &S) { 2116 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2117 CGM.getOpenMPRuntime().emitInlinedDirective( 2118 *this, OMPD_target_teams_distribute_simd, 2119 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2120 CGF.EmitStmt( 2121 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2122 }); 2123 } 2124 2125 namespace { 2126 struct ScheduleKindModifiersTy { 2127 OpenMPScheduleClauseKind Kind; 2128 OpenMPScheduleClauseModifier M1; 2129 OpenMPScheduleClauseModifier M2; 2130 ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind, 2131 OpenMPScheduleClauseModifier M1, 2132 OpenMPScheduleClauseModifier M2) 2133 : Kind(Kind), M1(M1), M2(M2) {} 2134 }; 2135 } // namespace 2136 2137 bool CodeGenFunction::EmitOMPWorksharingLoop( 2138 const OMPLoopDirective &S, Expr *EUB, 2139 const CodeGenLoopBoundsTy &CodeGenLoopBounds, 2140 const CodeGenDispatchBoundsTy &CGDispatchBounds) { 2141 // Emit the loop iteration variable. 2142 auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 2143 auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); 2144 EmitVarDecl(*IVDecl); 2145 2146 // Emit the iterations count variable. 2147 // If it is not a variable, Sema decided to calculate iterations count on each 2148 // iteration (e.g., it is foldable into a constant). 2149 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 2150 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 2151 // Emit calculation of the iterations count. 2152 EmitIgnoredExpr(S.getCalcLastIteration()); 2153 } 2154 2155 auto &RT = CGM.getOpenMPRuntime(); 2156 2157 bool HasLastprivateClause; 2158 // Check pre-condition. 2159 { 2160 OMPLoopScope PreInitScope(*this, S); 2161 // Skip the entire loop if we don't meet the precondition. 2162 // If the condition constant folds and can be elided, avoid emitting the 2163 // whole loop. 2164 bool CondConstant; 2165 llvm::BasicBlock *ContBlock = nullptr; 2166 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 2167 if (!CondConstant) 2168 return false; 2169 } else { 2170 auto *ThenBlock = createBasicBlock("omp.precond.then"); 2171 ContBlock = createBasicBlock("omp.precond.end"); 2172 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 2173 getProfileCount(&S)); 2174 EmitBlock(ThenBlock); 2175 incrementProfileCounter(&S); 2176 } 2177 2178 bool Ordered = false; 2179 if (auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) { 2180 if (OrderedClause->getNumForLoops()) 2181 RT.emitDoacrossInit(*this, S); 2182 else 2183 Ordered = true; 2184 } 2185 2186 llvm::DenseSet<const Expr *> EmittedFinals; 2187 emitAlignedClause(*this, S); 2188 bool HasLinears = EmitOMPLinearClauseInit(S); 2189 // Emit helper vars inits. 2190 2191 std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S); 2192 LValue LB = Bounds.first; 2193 LValue UB = Bounds.second; 2194 LValue ST = 2195 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 2196 LValue IL = 2197 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 2198 2199 // Emit 'then' code. 2200 { 2201 OMPPrivateScope LoopScope(*this); 2202 if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) { 2203 // Emit implicit barrier to synchronize threads and avoid data races on 2204 // initialization of firstprivate variables and post-update of 2205 // lastprivate variables. 2206 CGM.getOpenMPRuntime().emitBarrierCall( 2207 *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 2208 /*ForceSimpleCall=*/true); 2209 } 2210 EmitOMPPrivateClause(S, LoopScope); 2211 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 2212 EmitOMPReductionClauseInit(S, LoopScope); 2213 EmitOMPPrivateLoopCounters(S, LoopScope); 2214 EmitOMPLinearClause(S, LoopScope); 2215 (void)LoopScope.Privatize(); 2216 2217 // Detect the loop schedule kind and chunk. 2218 llvm::Value *Chunk = nullptr; 2219 OpenMPScheduleTy ScheduleKind; 2220 if (auto *C = S.getSingleClause<OMPScheduleClause>()) { 2221 ScheduleKind.Schedule = C->getScheduleKind(); 2222 ScheduleKind.M1 = C->getFirstScheduleModifier(); 2223 ScheduleKind.M2 = C->getSecondScheduleModifier(); 2224 if (const auto *Ch = C->getChunkSize()) { 2225 Chunk = EmitScalarExpr(Ch); 2226 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 2227 S.getIterationVariable()->getType(), 2228 S.getLocStart()); 2229 } 2230 } 2231 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2232 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2233 // OpenMP 4.5, 2.7.1 Loop Construct, Description. 2234 // If the static schedule kind is specified or if the ordered clause is 2235 // specified, and if no monotonic modifier is specified, the effect will 2236 // be as if the monotonic modifier was specified. 2237 if (RT.isStaticNonchunked(ScheduleKind.Schedule, 2238 /* Chunked */ Chunk != nullptr) && 2239 !Ordered) { 2240 if (isOpenMPSimdDirective(S.getDirectiveKind())) 2241 EmitOMPSimdInit(S, /*IsMonotonic=*/true); 2242 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 2243 // When no chunk_size is specified, the iteration space is divided into 2244 // chunks that are approximately equal in size, and at most one chunk is 2245 // distributed to each thread. Note that the size of the chunks is 2246 // unspecified in this case. 2247 CGOpenMPRuntime::StaticRTInput StaticInit( 2248 IVSize, IVSigned, Ordered, IL.getAddress(), LB.getAddress(), 2249 UB.getAddress(), ST.getAddress()); 2250 RT.emitForStaticInit(*this, S.getLocStart(), S.getDirectiveKind(), 2251 ScheduleKind, StaticInit); 2252 auto LoopExit = 2253 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 2254 // UB = min(UB, GlobalUB); 2255 EmitIgnoredExpr(S.getEnsureUpperBound()); 2256 // IV = LB; 2257 EmitIgnoredExpr(S.getInit()); 2258 // while (idx <= UB) { BODY; ++idx; } 2259 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 2260 S.getInc(), 2261 [&S, LoopExit](CodeGenFunction &CGF) { 2262 CGF.EmitOMPLoopBody(S, LoopExit); 2263 CGF.EmitStopPoint(&S); 2264 }, 2265 [](CodeGenFunction &) {}); 2266 EmitBlock(LoopExit.getBlock()); 2267 // Tell the runtime we are done. 2268 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 2269 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(), 2270 S.getDirectiveKind()); 2271 }; 2272 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); 2273 } else { 2274 const bool IsMonotonic = 2275 Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static || 2276 ScheduleKind.Schedule == OMPC_SCHEDULE_unknown || 2277 ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic || 2278 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic; 2279 // Emit the outer loop, which requests its work chunk [LB..UB] from 2280 // runtime and runs the inner loop to process it. 2281 const OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(), 2282 ST.getAddress(), IL.getAddress(), 2283 Chunk, EUB); 2284 EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered, 2285 LoopArguments, CGDispatchBounds); 2286 } 2287 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 2288 EmitOMPSimdFinal(S, 2289 [&](CodeGenFunction &CGF) -> llvm::Value * { 2290 return CGF.Builder.CreateIsNotNull( 2291 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2292 }); 2293 } 2294 EmitOMPReductionClauseFinal( 2295 S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind()) 2296 ? /*Parallel and Simd*/ OMPD_parallel_for_simd 2297 : /*Parallel only*/ OMPD_parallel); 2298 // Emit post-update of the reduction variables if IsLastIter != 0. 2299 emitPostUpdateForReductionClause( 2300 *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * { 2301 return CGF.Builder.CreateIsNotNull( 2302 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2303 }); 2304 // Emit final copy of the lastprivate variables if IsLastIter != 0. 2305 if (HasLastprivateClause) 2306 EmitOMPLastprivateClauseFinal( 2307 S, isOpenMPSimdDirective(S.getDirectiveKind()), 2308 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart()))); 2309 } 2310 EmitOMPLinearClauseFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * { 2311 return CGF.Builder.CreateIsNotNull( 2312 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2313 }); 2314 // We're now done with the loop, so jump to the continuation block. 2315 if (ContBlock) { 2316 EmitBranch(ContBlock); 2317 EmitBlock(ContBlock, true); 2318 } 2319 } 2320 return HasLastprivateClause; 2321 } 2322 2323 /// The following two functions generate expressions for the loop lower 2324 /// and upper bounds in case of static and dynamic (dispatch) schedule 2325 /// of the associated 'for' or 'distribute' loop. 2326 static std::pair<LValue, LValue> 2327 emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 2328 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 2329 LValue LB = 2330 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); 2331 LValue UB = 2332 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); 2333 return {LB, UB}; 2334 } 2335 2336 /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not 2337 /// consider the lower and upper bound expressions generated by the 2338 /// worksharing loop support, but we use 0 and the iteration space size as 2339 /// constants 2340 static std::pair<llvm::Value *, llvm::Value *> 2341 emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S, 2342 Address LB, Address UB) { 2343 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 2344 const Expr *IVExpr = LS.getIterationVariable(); 2345 const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType()); 2346 llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0); 2347 llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration()); 2348 return {LBVal, UBVal}; 2349 } 2350 2351 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { 2352 bool HasLastprivates = false; 2353 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, 2354 PrePostActionTy &) { 2355 OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel()); 2356 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 2357 emitForLoopBounds, 2358 emitDispatchForLoopBounds); 2359 }; 2360 { 2361 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2362 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen, 2363 S.hasCancel()); 2364 } 2365 2366 // Emit an implicit barrier at the end. 2367 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) { 2368 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); 2369 } 2370 } 2371 2372 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { 2373 bool HasLastprivates = false; 2374 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, 2375 PrePostActionTy &) { 2376 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 2377 emitForLoopBounds, 2378 emitDispatchForLoopBounds); 2379 }; 2380 { 2381 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2382 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 2383 } 2384 2385 // Emit an implicit barrier at the end. 2386 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) { 2387 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); 2388 } 2389 } 2390 2391 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, 2392 const Twine &Name, 2393 llvm::Value *Init = nullptr) { 2394 auto LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty); 2395 if (Init) 2396 CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true); 2397 return LVal; 2398 } 2399 2400 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { 2401 auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt(); 2402 auto *CS = dyn_cast<CompoundStmt>(Stmt); 2403 bool HasLastprivates = false; 2404 auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF, 2405 PrePostActionTy &) { 2406 auto &C = CGF.CGM.getContext(); 2407 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2408 // Emit helper vars inits. 2409 LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.", 2410 CGF.Builder.getInt32(0)); 2411 auto *GlobalUBVal = CS != nullptr ? CGF.Builder.getInt32(CS->size() - 1) 2412 : CGF.Builder.getInt32(0); 2413 LValue UB = 2414 createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal); 2415 LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.", 2416 CGF.Builder.getInt32(1)); 2417 LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.", 2418 CGF.Builder.getInt32(0)); 2419 // Loop counter. 2420 LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv."); 2421 OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 2422 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV); 2423 OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 2424 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); 2425 // Generate condition for loop. 2426 BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, 2427 OK_Ordinary, S.getLocStart(), FPOptions()); 2428 // Increment for loop counter. 2429 UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary, 2430 S.getLocStart()); 2431 auto BodyGen = [Stmt, CS, &S, &IV](CodeGenFunction &CGF) { 2432 // Iterate through all sections and emit a switch construct: 2433 // switch (IV) { 2434 // case 0: 2435 // <SectionStmt[0]>; 2436 // break; 2437 // ... 2438 // case <NumSection> - 1: 2439 // <SectionStmt[<NumSection> - 1]>; 2440 // break; 2441 // } 2442 // .omp.sections.exit: 2443 auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit"); 2444 auto *SwitchStmt = CGF.Builder.CreateSwitch( 2445 CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB, 2446 CS == nullptr ? 1 : CS->size()); 2447 if (CS) { 2448 unsigned CaseNumber = 0; 2449 for (auto *SubStmt : CS->children()) { 2450 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 2451 CGF.EmitBlock(CaseBB); 2452 SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB); 2453 CGF.EmitStmt(SubStmt); 2454 CGF.EmitBranch(ExitBB); 2455 ++CaseNumber; 2456 } 2457 } else { 2458 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 2459 CGF.EmitBlock(CaseBB); 2460 SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB); 2461 CGF.EmitStmt(Stmt); 2462 CGF.EmitBranch(ExitBB); 2463 } 2464 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 2465 }; 2466 2467 CodeGenFunction::OMPPrivateScope LoopScope(CGF); 2468 if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) { 2469 // Emit implicit barrier to synchronize threads and avoid data races on 2470 // initialization of firstprivate variables and post-update of lastprivate 2471 // variables. 2472 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 2473 CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 2474 /*ForceSimpleCall=*/true); 2475 } 2476 CGF.EmitOMPPrivateClause(S, LoopScope); 2477 HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 2478 CGF.EmitOMPReductionClauseInit(S, LoopScope); 2479 (void)LoopScope.Privatize(); 2480 2481 // Emit static non-chunked loop. 2482 OpenMPScheduleTy ScheduleKind; 2483 ScheduleKind.Schedule = OMPC_SCHEDULE_static; 2484 CGOpenMPRuntime::StaticRTInput StaticInit( 2485 /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), 2486 LB.getAddress(), UB.getAddress(), ST.getAddress()); 2487 CGF.CGM.getOpenMPRuntime().emitForStaticInit( 2488 CGF, S.getLocStart(), S.getDirectiveKind(), ScheduleKind, StaticInit); 2489 // UB = min(UB, GlobalUB); 2490 auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart()); 2491 auto *MinUBGlobalUB = CGF.Builder.CreateSelect( 2492 CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal); 2493 CGF.EmitStoreOfScalar(MinUBGlobalUB, UB); 2494 // IV = LB; 2495 CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV); 2496 // while (idx <= UB) { BODY; ++idx; } 2497 CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen, 2498 [](CodeGenFunction &) {}); 2499 // Tell the runtime we are done. 2500 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 2501 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(), 2502 S.getDirectiveKind()); 2503 }; 2504 CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen); 2505 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 2506 // Emit post-update of the reduction variables if IsLastIter != 0. 2507 emitPostUpdateForReductionClause( 2508 CGF, S, [&](CodeGenFunction &CGF) -> llvm::Value * { 2509 return CGF.Builder.CreateIsNotNull( 2510 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2511 }); 2512 2513 // Emit final copy of the lastprivate variables if IsLastIter != 0. 2514 if (HasLastprivates) 2515 CGF.EmitOMPLastprivateClauseFinal( 2516 S, /*NoFinals=*/false, 2517 CGF.Builder.CreateIsNotNull( 2518 CGF.EmitLoadOfScalar(IL, S.getLocStart()))); 2519 }; 2520 2521 bool HasCancel = false; 2522 if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S)) 2523 HasCancel = OSD->hasCancel(); 2524 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S)) 2525 HasCancel = OPSD->hasCancel(); 2526 OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel); 2527 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen, 2528 HasCancel); 2529 // Emit barrier for lastprivates only if 'sections' directive has 'nowait' 2530 // clause. Otherwise the barrier will be generated by the codegen for the 2531 // directive. 2532 if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) { 2533 // Emit implicit barrier to synchronize threads and avoid data races on 2534 // initialization of firstprivate variables. 2535 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), 2536 OMPD_unknown); 2537 } 2538 } 2539 2540 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { 2541 { 2542 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2543 EmitSections(S); 2544 } 2545 // Emit an implicit barrier at the end. 2546 if (!S.getSingleClause<OMPNowaitClause>()) { 2547 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), 2548 OMPD_sections); 2549 } 2550 } 2551 2552 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { 2553 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2554 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2555 }; 2556 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2557 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen, 2558 S.hasCancel()); 2559 } 2560 2561 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { 2562 llvm::SmallVector<const Expr *, 8> CopyprivateVars; 2563 llvm::SmallVector<const Expr *, 8> DestExprs; 2564 llvm::SmallVector<const Expr *, 8> SrcExprs; 2565 llvm::SmallVector<const Expr *, 8> AssignmentOps; 2566 // Check if there are any 'copyprivate' clauses associated with this 2567 // 'single' construct. 2568 // Build a list of copyprivate variables along with helper expressions 2569 // (<source>, <destination>, <destination>=<source> expressions) 2570 for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) { 2571 CopyprivateVars.append(C->varlists().begin(), C->varlists().end()); 2572 DestExprs.append(C->destination_exprs().begin(), 2573 C->destination_exprs().end()); 2574 SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end()); 2575 AssignmentOps.append(C->assignment_ops().begin(), 2576 C->assignment_ops().end()); 2577 } 2578 // Emit code for 'single' region along with 'copyprivate' clauses 2579 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2580 Action.Enter(CGF); 2581 OMPPrivateScope SingleScope(CGF); 2582 (void)CGF.EmitOMPFirstprivateClause(S, SingleScope); 2583 CGF.EmitOMPPrivateClause(S, SingleScope); 2584 (void)SingleScope.Privatize(); 2585 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2586 }; 2587 { 2588 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2589 CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(), 2590 CopyprivateVars, DestExprs, 2591 SrcExprs, AssignmentOps); 2592 } 2593 // Emit an implicit barrier at the end (to avoid data race on firstprivate 2594 // init or if no 'nowait' clause was specified and no 'copyprivate' clause). 2595 if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) { 2596 CGM.getOpenMPRuntime().emitBarrierCall( 2597 *this, S.getLocStart(), 2598 S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single); 2599 } 2600 } 2601 2602 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { 2603 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2604 Action.Enter(CGF); 2605 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2606 }; 2607 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2608 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart()); 2609 } 2610 2611 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { 2612 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2613 Action.Enter(CGF); 2614 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2615 }; 2616 Expr *Hint = nullptr; 2617 if (auto *HintClause = S.getSingleClause<OMPHintClause>()) 2618 Hint = HintClause->getHint(); 2619 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2620 CGM.getOpenMPRuntime().emitCriticalRegion(*this, 2621 S.getDirectiveName().getAsString(), 2622 CodeGen, S.getLocStart(), Hint); 2623 } 2624 2625 void CodeGenFunction::EmitOMPParallelForDirective( 2626 const OMPParallelForDirective &S) { 2627 // Emit directive as a combined directive that consists of two implicit 2628 // directives: 'parallel' with 'for' directive. 2629 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2630 OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel()); 2631 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 2632 emitDispatchForLoopBounds); 2633 }; 2634 emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen, 2635 emitEmptyBoundParameters); 2636 } 2637 2638 void CodeGenFunction::EmitOMPParallelForSimdDirective( 2639 const OMPParallelForSimdDirective &S) { 2640 // Emit directive as a combined directive that consists of two implicit 2641 // directives: 'parallel' with 'for' directive. 2642 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2643 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 2644 emitDispatchForLoopBounds); 2645 }; 2646 emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen, 2647 emitEmptyBoundParameters); 2648 } 2649 2650 void CodeGenFunction::EmitOMPParallelSectionsDirective( 2651 const OMPParallelSectionsDirective &S) { 2652 // Emit directive as a combined directive that consists of two implicit 2653 // directives: 'parallel' with 'sections' directive. 2654 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2655 CGF.EmitSections(S); 2656 }; 2657 emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen, 2658 emitEmptyBoundParameters); 2659 } 2660 2661 void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, 2662 const RegionCodeGenTy &BodyGen, 2663 const TaskGenTy &TaskGen, 2664 OMPTaskDataTy &Data) { 2665 // Emit outlined function for task construct. 2666 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2667 auto *I = CS->getCapturedDecl()->param_begin(); 2668 auto *PartId = std::next(I); 2669 auto *TaskT = std::next(I, 4); 2670 // Check if the task is final 2671 if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) { 2672 // If the condition constant folds and can be elided, try to avoid emitting 2673 // the condition and the dead arm of the if/else. 2674 auto *Cond = Clause->getCondition(); 2675 bool CondConstant; 2676 if (ConstantFoldsToSimpleInteger(Cond, CondConstant)) 2677 Data.Final.setInt(CondConstant); 2678 else 2679 Data.Final.setPointer(EvaluateExprAsBool(Cond)); 2680 } else { 2681 // By default the task is not final. 2682 Data.Final.setInt(/*IntVal=*/false); 2683 } 2684 // Check if the task has 'priority' clause. 2685 if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) { 2686 auto *Prio = Clause->getPriority(); 2687 Data.Priority.setInt(/*IntVal=*/true); 2688 Data.Priority.setPointer(EmitScalarConversion( 2689 EmitScalarExpr(Prio), Prio->getType(), 2690 getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1), 2691 Prio->getExprLoc())); 2692 } 2693 // The first function argument for tasks is a thread id, the second one is a 2694 // part id (0 for tied tasks, >=0 for untied task). 2695 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 2696 // Get list of private variables. 2697 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 2698 auto IRef = C->varlist_begin(); 2699 for (auto *IInit : C->private_copies()) { 2700 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2701 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2702 Data.PrivateVars.push_back(*IRef); 2703 Data.PrivateCopies.push_back(IInit); 2704 } 2705 ++IRef; 2706 } 2707 } 2708 EmittedAsPrivate.clear(); 2709 // Get list of firstprivate variables. 2710 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 2711 auto IRef = C->varlist_begin(); 2712 auto IElemInitRef = C->inits().begin(); 2713 for (auto *IInit : C->private_copies()) { 2714 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2715 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2716 Data.FirstprivateVars.push_back(*IRef); 2717 Data.FirstprivateCopies.push_back(IInit); 2718 Data.FirstprivateInits.push_back(*IElemInitRef); 2719 } 2720 ++IRef; 2721 ++IElemInitRef; 2722 } 2723 } 2724 // Get list of lastprivate variables (for taskloops). 2725 llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs; 2726 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 2727 auto IRef = C->varlist_begin(); 2728 auto ID = C->destination_exprs().begin(); 2729 for (auto *IInit : C->private_copies()) { 2730 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2731 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2732 Data.LastprivateVars.push_back(*IRef); 2733 Data.LastprivateCopies.push_back(IInit); 2734 } 2735 LastprivateDstsOrigs.insert( 2736 {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()), 2737 cast<DeclRefExpr>(*IRef)}); 2738 ++IRef; 2739 ++ID; 2740 } 2741 } 2742 SmallVector<const Expr *, 4> LHSs; 2743 SmallVector<const Expr *, 4> RHSs; 2744 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 2745 auto IPriv = C->privates().begin(); 2746 auto IRed = C->reduction_ops().begin(); 2747 auto ILHS = C->lhs_exprs().begin(); 2748 auto IRHS = C->rhs_exprs().begin(); 2749 for (const auto *Ref : C->varlists()) { 2750 Data.ReductionVars.emplace_back(Ref); 2751 Data.ReductionCopies.emplace_back(*IPriv); 2752 Data.ReductionOps.emplace_back(*IRed); 2753 LHSs.emplace_back(*ILHS); 2754 RHSs.emplace_back(*IRHS); 2755 std::advance(IPriv, 1); 2756 std::advance(IRed, 1); 2757 std::advance(ILHS, 1); 2758 std::advance(IRHS, 1); 2759 } 2760 } 2761 Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit( 2762 *this, S.getLocStart(), LHSs, RHSs, Data); 2763 // Build list of dependences. 2764 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) 2765 for (auto *IRef : C->varlists()) 2766 Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef)); 2767 auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs]( 2768 CodeGenFunction &CGF, PrePostActionTy &Action) { 2769 // Set proper addresses for generated private copies. 2770 OMPPrivateScope Scope(CGF); 2771 if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() || 2772 !Data.LastprivateVars.empty()) { 2773 enum { PrivatesParam = 2, CopyFnParam = 3 }; 2774 auto *CopyFn = CGF.Builder.CreateLoad( 2775 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3))); 2776 auto *PrivatesPtr = CGF.Builder.CreateLoad( 2777 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2))); 2778 // Map privates. 2779 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; 2780 llvm::SmallVector<llvm::Value *, 16> CallArgs; 2781 CallArgs.push_back(PrivatesPtr); 2782 for (auto *E : Data.PrivateVars) { 2783 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2784 Address PrivatePtr = CGF.CreateMemTemp( 2785 CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr"); 2786 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2787 CallArgs.push_back(PrivatePtr.getPointer()); 2788 } 2789 for (auto *E : Data.FirstprivateVars) { 2790 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2791 Address PrivatePtr = 2792 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 2793 ".firstpriv.ptr.addr"); 2794 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2795 CallArgs.push_back(PrivatePtr.getPointer()); 2796 } 2797 for (auto *E : Data.LastprivateVars) { 2798 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2799 Address PrivatePtr = 2800 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 2801 ".lastpriv.ptr.addr"); 2802 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2803 CallArgs.push_back(PrivatePtr.getPointer()); 2804 } 2805 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(), 2806 CopyFn, CallArgs); 2807 for (auto &&Pair : LastprivateDstsOrigs) { 2808 auto *OrigVD = cast<VarDecl>(Pair.second->getDecl()); 2809 DeclRefExpr DRE( 2810 const_cast<VarDecl *>(OrigVD), 2811 /*RefersToEnclosingVariableOrCapture=*/CGF.CapturedStmtInfo->lookup( 2812 OrigVD) != nullptr, 2813 Pair.second->getType(), VK_LValue, Pair.second->getExprLoc()); 2814 Scope.addPrivate(Pair.first, [&CGF, &DRE]() { 2815 return CGF.EmitLValue(&DRE).getAddress(); 2816 }); 2817 } 2818 for (auto &&Pair : PrivatePtrs) { 2819 Address Replacement(CGF.Builder.CreateLoad(Pair.second), 2820 CGF.getContext().getDeclAlign(Pair.first)); 2821 Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); 2822 } 2823 } 2824 if (Data.Reductions) { 2825 OMPLexicalScope LexScope(CGF, S, /*AsInlined=*/true); 2826 ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionCopies, 2827 Data.ReductionOps); 2828 llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad( 2829 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9))); 2830 for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) { 2831 RedCG.emitSharedLValue(CGF, Cnt); 2832 RedCG.emitAggregateType(CGF, Cnt); 2833 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( 2834 CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); 2835 Replacement = 2836 Address(CGF.EmitScalarConversion( 2837 Replacement.getPointer(), CGF.getContext().VoidPtrTy, 2838 CGF.getContext().getPointerType( 2839 Data.ReductionCopies[Cnt]->getType()), 2840 SourceLocation()), 2841 Replacement.getAlignment()); 2842 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); 2843 Scope.addPrivate(RedCG.getBaseDecl(Cnt), 2844 [Replacement]() { return Replacement; }); 2845 // FIXME: This must removed once the runtime library is fixed. 2846 // Emit required threadprivate variables for 2847 // initilizer/combiner/finalizer. 2848 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(), 2849 RedCG, Cnt); 2850 } 2851 } 2852 // Privatize all private variables except for in_reduction items. 2853 (void)Scope.Privatize(); 2854 SmallVector<const Expr *, 4> InRedVars; 2855 SmallVector<const Expr *, 4> InRedPrivs; 2856 SmallVector<const Expr *, 4> InRedOps; 2857 SmallVector<const Expr *, 4> TaskgroupDescriptors; 2858 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) { 2859 auto IPriv = C->privates().begin(); 2860 auto IRed = C->reduction_ops().begin(); 2861 auto ITD = C->taskgroup_descriptors().begin(); 2862 for (const auto *Ref : C->varlists()) { 2863 InRedVars.emplace_back(Ref); 2864 InRedPrivs.emplace_back(*IPriv); 2865 InRedOps.emplace_back(*IRed); 2866 TaskgroupDescriptors.emplace_back(*ITD); 2867 std::advance(IPriv, 1); 2868 std::advance(IRed, 1); 2869 std::advance(ITD, 1); 2870 } 2871 } 2872 // Privatize in_reduction items here, because taskgroup descriptors must be 2873 // privatized earlier. 2874 OMPPrivateScope InRedScope(CGF); 2875 if (!InRedVars.empty()) { 2876 ReductionCodeGen RedCG(InRedVars, InRedPrivs, InRedOps); 2877 for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) { 2878 RedCG.emitSharedLValue(CGF, Cnt); 2879 RedCG.emitAggregateType(CGF, Cnt); 2880 // The taskgroup descriptor variable is always implicit firstprivate and 2881 // privatized already during procoessing of the firstprivates. 2882 llvm::Value *ReductionsPtr = CGF.EmitLoadOfScalar( 2883 CGF.EmitLValue(TaskgroupDescriptors[Cnt]), SourceLocation()); 2884 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( 2885 CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); 2886 Replacement = Address( 2887 CGF.EmitScalarConversion( 2888 Replacement.getPointer(), CGF.getContext().VoidPtrTy, 2889 CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()), 2890 SourceLocation()), 2891 Replacement.getAlignment()); 2892 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); 2893 InRedScope.addPrivate(RedCG.getBaseDecl(Cnt), 2894 [Replacement]() { return Replacement; }); 2895 // FIXME: This must removed once the runtime library is fixed. 2896 // Emit required threadprivate variables for 2897 // initilizer/combiner/finalizer. 2898 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(), 2899 RedCG, Cnt); 2900 } 2901 } 2902 (void)InRedScope.Privatize(); 2903 2904 Action.Enter(CGF); 2905 BodyGen(CGF); 2906 }; 2907 auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( 2908 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied, 2909 Data.NumberOfParts); 2910 OMPLexicalScope Scope(*this, S); 2911 TaskGen(*this, OutlinedFn, Data); 2912 } 2913 2914 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { 2915 // Emit outlined function for task construct. 2916 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2917 auto CapturedStruct = GenerateCapturedStmtArgument(*CS); 2918 auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 2919 const Expr *IfCond = nullptr; 2920 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 2921 if (C->getNameModifier() == OMPD_unknown || 2922 C->getNameModifier() == OMPD_task) { 2923 IfCond = C->getCondition(); 2924 break; 2925 } 2926 } 2927 2928 OMPTaskDataTy Data; 2929 // Check if we should emit tied or untied task. 2930 Data.Tied = !S.getSingleClause<OMPUntiedClause>(); 2931 auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { 2932 CGF.EmitStmt(CS->getCapturedStmt()); 2933 }; 2934 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 2935 IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn, 2936 const OMPTaskDataTy &Data) { 2937 CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getLocStart(), S, OutlinedFn, 2938 SharedsTy, CapturedStruct, IfCond, 2939 Data); 2940 }; 2941 EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data); 2942 } 2943 2944 void CodeGenFunction::EmitOMPTaskyieldDirective( 2945 const OMPTaskyieldDirective &S) { 2946 CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart()); 2947 } 2948 2949 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { 2950 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier); 2951 } 2952 2953 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) { 2954 CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart()); 2955 } 2956 2957 void CodeGenFunction::EmitOMPTaskgroupDirective( 2958 const OMPTaskgroupDirective &S) { 2959 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2960 Action.Enter(CGF); 2961 if (const Expr *E = S.getReductionRef()) { 2962 SmallVector<const Expr *, 4> LHSs; 2963 SmallVector<const Expr *, 4> RHSs; 2964 OMPTaskDataTy Data; 2965 for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) { 2966 auto IPriv = C->privates().begin(); 2967 auto IRed = C->reduction_ops().begin(); 2968 auto ILHS = C->lhs_exprs().begin(); 2969 auto IRHS = C->rhs_exprs().begin(); 2970 for (const auto *Ref : C->varlists()) { 2971 Data.ReductionVars.emplace_back(Ref); 2972 Data.ReductionCopies.emplace_back(*IPriv); 2973 Data.ReductionOps.emplace_back(*IRed); 2974 LHSs.emplace_back(*ILHS); 2975 RHSs.emplace_back(*IRHS); 2976 std::advance(IPriv, 1); 2977 std::advance(IRed, 1); 2978 std::advance(ILHS, 1); 2979 std::advance(IRHS, 1); 2980 } 2981 } 2982 llvm::Value *ReductionDesc = 2983 CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getLocStart(), 2984 LHSs, RHSs, Data); 2985 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2986 CGF.EmitVarDecl(*VD); 2987 CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD), 2988 /*Volatile=*/false, E->getType()); 2989 } 2990 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2991 }; 2992 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2993 CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart()); 2994 } 2995 2996 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { 2997 CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> { 2998 if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) { 2999 return llvm::makeArrayRef(FlushClause->varlist_begin(), 3000 FlushClause->varlist_end()); 3001 } 3002 return llvm::None; 3003 }(), S.getLocStart()); 3004 } 3005 3006 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, 3007 const CodeGenLoopTy &CodeGenLoop, 3008 Expr *IncExpr) { 3009 // Emit the loop iteration variable. 3010 auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 3011 auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); 3012 EmitVarDecl(*IVDecl); 3013 3014 // Emit the iterations count variable. 3015 // If it is not a variable, Sema decided to calculate iterations count on each 3016 // iteration (e.g., it is foldable into a constant). 3017 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 3018 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 3019 // Emit calculation of the iterations count. 3020 EmitIgnoredExpr(S.getCalcLastIteration()); 3021 } 3022 3023 auto &RT = CGM.getOpenMPRuntime(); 3024 3025 bool HasLastprivateClause = false; 3026 // Check pre-condition. 3027 { 3028 OMPLoopScope PreInitScope(*this, S); 3029 // Skip the entire loop if we don't meet the precondition. 3030 // If the condition constant folds and can be elided, avoid emitting the 3031 // whole loop. 3032 bool CondConstant; 3033 llvm::BasicBlock *ContBlock = nullptr; 3034 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 3035 if (!CondConstant) 3036 return; 3037 } else { 3038 auto *ThenBlock = createBasicBlock("omp.precond.then"); 3039 ContBlock = createBasicBlock("omp.precond.end"); 3040 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 3041 getProfileCount(&S)); 3042 EmitBlock(ThenBlock); 3043 incrementProfileCounter(&S); 3044 } 3045 3046 // Emit 'then' code. 3047 { 3048 // Emit helper vars inits. 3049 3050 LValue LB = EmitOMPHelperVar( 3051 *this, cast<DeclRefExpr>( 3052 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3053 ? S.getCombinedLowerBoundVariable() 3054 : S.getLowerBoundVariable()))); 3055 LValue UB = EmitOMPHelperVar( 3056 *this, cast<DeclRefExpr>( 3057 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3058 ? S.getCombinedUpperBoundVariable() 3059 : S.getUpperBoundVariable()))); 3060 LValue ST = 3061 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 3062 LValue IL = 3063 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 3064 3065 OMPPrivateScope LoopScope(*this); 3066 if (EmitOMPFirstprivateClause(S, LoopScope)) { 3067 // Emit implicit barrier to synchronize threads and avoid data races on 3068 // initialization of firstprivate variables and post-update of 3069 // lastprivate variables. 3070 CGM.getOpenMPRuntime().emitBarrierCall( 3071 *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 3072 /*ForceSimpleCall=*/true); 3073 } 3074 EmitOMPPrivateClause(S, LoopScope); 3075 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 3076 EmitOMPPrivateLoopCounters(S, LoopScope); 3077 (void)LoopScope.Privatize(); 3078 3079 // Detect the distribute schedule kind and chunk. 3080 llvm::Value *Chunk = nullptr; 3081 OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown; 3082 if (auto *C = S.getSingleClause<OMPDistScheduleClause>()) { 3083 ScheduleKind = C->getDistScheduleKind(); 3084 if (const auto *Ch = C->getChunkSize()) { 3085 Chunk = EmitScalarExpr(Ch); 3086 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 3087 S.getIterationVariable()->getType(), 3088 S.getLocStart()); 3089 } 3090 } 3091 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 3092 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 3093 3094 // OpenMP [2.10.8, distribute Construct, Description] 3095 // If dist_schedule is specified, kind must be static. If specified, 3096 // iterations are divided into chunks of size chunk_size, chunks are 3097 // assigned to the teams of the league in a round-robin fashion in the 3098 // order of the team number. When no chunk_size is specified, the 3099 // iteration space is divided into chunks that are approximately equal 3100 // in size, and at most one chunk is distributed to each team of the 3101 // league. The size of the chunks is unspecified in this case. 3102 if (RT.isStaticNonchunked(ScheduleKind, 3103 /* Chunked */ Chunk != nullptr)) { 3104 CGOpenMPRuntime::StaticRTInput StaticInit( 3105 IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(), 3106 LB.getAddress(), UB.getAddress(), ST.getAddress()); 3107 RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, 3108 StaticInit); 3109 auto LoopExit = 3110 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 3111 // UB = min(UB, GlobalUB); 3112 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3113 ? S.getCombinedEnsureUpperBound() 3114 : S.getEnsureUpperBound()); 3115 // IV = LB; 3116 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3117 ? S.getCombinedInit() 3118 : S.getInit()); 3119 3120 Expr *Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3121 ? S.getCombinedCond() 3122 : S.getCond(); 3123 3124 // for distribute alone, codegen 3125 // while (idx <= UB) { BODY; ++idx; } 3126 // when combined with 'for' (e.g. as in 'distribute parallel for') 3127 // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; } 3128 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), Cond, IncExpr, 3129 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { 3130 CodeGenLoop(CGF, S, LoopExit); 3131 }, 3132 [](CodeGenFunction &) {}); 3133 EmitBlock(LoopExit.getBlock()); 3134 // Tell the runtime we are done. 3135 RT.emitForStaticFinish(*this, S.getLocStart(), S.getDirectiveKind()); 3136 } else { 3137 // Emit the outer loop, which requests its work chunk [LB..UB] from 3138 // runtime and runs the inner loop to process it. 3139 const OMPLoopArguments LoopArguments = { 3140 LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(), 3141 Chunk}; 3142 EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments, 3143 CodeGenLoop); 3144 } 3145 3146 // Emit final copy of the lastprivate variables if IsLastIter != 0. 3147 if (HasLastprivateClause) 3148 EmitOMPLastprivateClauseFinal( 3149 S, /*NoFinals=*/false, 3150 Builder.CreateIsNotNull( 3151 EmitLoadOfScalar(IL, S.getLocStart()))); 3152 } 3153 3154 // We're now done with the loop, so jump to the continuation block. 3155 if (ContBlock) { 3156 EmitBranch(ContBlock); 3157 EmitBlock(ContBlock, true); 3158 } 3159 } 3160 } 3161 3162 void CodeGenFunction::EmitOMPDistributeDirective( 3163 const OMPDistributeDirective &S) { 3164 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3165 3166 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 3167 }; 3168 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 3169 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen, 3170 false); 3171 } 3172 3173 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, 3174 const CapturedStmt *S) { 3175 CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); 3176 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; 3177 CGF.CapturedStmtInfo = &CapStmtInfo; 3178 auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S); 3179 Fn->addFnAttr(llvm::Attribute::NoInline); 3180 return Fn; 3181 } 3182 3183 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { 3184 if (!S.getAssociatedStmt()) { 3185 for (const auto *DC : S.getClausesOfKind<OMPDependClause>()) 3186 CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC); 3187 return; 3188 } 3189 auto *C = S.getSingleClause<OMPSIMDClause>(); 3190 auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF, 3191 PrePostActionTy &Action) { 3192 if (C) { 3193 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 3194 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 3195 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 3196 auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS); 3197 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(), 3198 OutlinedFn, CapturedVars); 3199 } else { 3200 Action.Enter(CGF); 3201 CGF.EmitStmt( 3202 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3203 } 3204 }; 3205 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 3206 CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C); 3207 } 3208 3209 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, 3210 QualType SrcType, QualType DestType, 3211 SourceLocation Loc) { 3212 assert(CGF.hasScalarEvaluationKind(DestType) && 3213 "DestType must have scalar evaluation kind."); 3214 assert(!Val.isAggregate() && "Must be a scalar or complex."); 3215 return Val.isScalar() 3216 ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType, 3217 Loc) 3218 : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType, 3219 DestType, Loc); 3220 } 3221 3222 static CodeGenFunction::ComplexPairTy 3223 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, 3224 QualType DestType, SourceLocation Loc) { 3225 assert(CGF.getEvaluationKind(DestType) == TEK_Complex && 3226 "DestType must have complex evaluation kind."); 3227 CodeGenFunction::ComplexPairTy ComplexVal; 3228 if (Val.isScalar()) { 3229 // Convert the input element to the element type of the complex. 3230 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 3231 auto ScalarVal = CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, 3232 DestElementType, Loc); 3233 ComplexVal = CodeGenFunction::ComplexPairTy( 3234 ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType())); 3235 } else { 3236 assert(Val.isComplex() && "Must be a scalar or complex."); 3237 auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType(); 3238 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 3239 ComplexVal.first = CGF.EmitScalarConversion( 3240 Val.getComplexVal().first, SrcElementType, DestElementType, Loc); 3241 ComplexVal.second = CGF.EmitScalarConversion( 3242 Val.getComplexVal().second, SrcElementType, DestElementType, Loc); 3243 } 3244 return ComplexVal; 3245 } 3246 3247 static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst, 3248 LValue LVal, RValue RVal) { 3249 if (LVal.isGlobalReg()) { 3250 CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal); 3251 } else { 3252 CGF.EmitAtomicStore(RVal, LVal, 3253 IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3254 : llvm::AtomicOrdering::Monotonic, 3255 LVal.isVolatile(), /*IsInit=*/false); 3256 } 3257 } 3258 3259 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal, 3260 QualType RValTy, SourceLocation Loc) { 3261 switch (getEvaluationKind(LVal.getType())) { 3262 case TEK_Scalar: 3263 EmitStoreThroughLValue(RValue::get(convertToScalarValue( 3264 *this, RVal, RValTy, LVal.getType(), Loc)), 3265 LVal); 3266 break; 3267 case TEK_Complex: 3268 EmitStoreOfComplex( 3269 convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal, 3270 /*isInit=*/false); 3271 break; 3272 case TEK_Aggregate: 3273 llvm_unreachable("Must be a scalar or complex."); 3274 } 3275 } 3276 3277 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst, 3278 const Expr *X, const Expr *V, 3279 SourceLocation Loc) { 3280 // v = x; 3281 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue"); 3282 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue"); 3283 LValue XLValue = CGF.EmitLValue(X); 3284 LValue VLValue = CGF.EmitLValue(V); 3285 RValue Res = XLValue.isGlobalReg() 3286 ? CGF.EmitLoadOfLValue(XLValue, Loc) 3287 : CGF.EmitAtomicLoad( 3288 XLValue, Loc, 3289 IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3290 : llvm::AtomicOrdering::Monotonic, 3291 XLValue.isVolatile()); 3292 // OpenMP, 2.12.6, atomic Construct 3293 // Any atomic construct with a seq_cst clause forces the atomically 3294 // performed operation to include an implicit flush operation without a 3295 // list. 3296 if (IsSeqCst) 3297 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3298 CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc); 3299 } 3300 3301 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst, 3302 const Expr *X, const Expr *E, 3303 SourceLocation Loc) { 3304 // x = expr; 3305 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue"); 3306 emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E)); 3307 // OpenMP, 2.12.6, atomic Construct 3308 // Any atomic construct with a seq_cst clause forces the atomically 3309 // performed operation to include an implicit flush operation without a 3310 // list. 3311 if (IsSeqCst) 3312 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3313 } 3314 3315 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, 3316 RValue Update, 3317 BinaryOperatorKind BO, 3318 llvm::AtomicOrdering AO, 3319 bool IsXLHSInRHSPart) { 3320 auto &Context = CGF.CGM.getContext(); 3321 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x' 3322 // expression is simple and atomic is allowed for the given type for the 3323 // target platform. 3324 if (BO == BO_Comma || !Update.isScalar() || 3325 !Update.getScalarVal()->getType()->isIntegerTy() || 3326 !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) && 3327 (Update.getScalarVal()->getType() != 3328 X.getAddress().getElementType())) || 3329 !X.getAddress().getElementType()->isIntegerTy() || 3330 !Context.getTargetInfo().hasBuiltinAtomic( 3331 Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment()))) 3332 return std::make_pair(false, RValue::get(nullptr)); 3333 3334 llvm::AtomicRMWInst::BinOp RMWOp; 3335 switch (BO) { 3336 case BO_Add: 3337 RMWOp = llvm::AtomicRMWInst::Add; 3338 break; 3339 case BO_Sub: 3340 if (!IsXLHSInRHSPart) 3341 return std::make_pair(false, RValue::get(nullptr)); 3342 RMWOp = llvm::AtomicRMWInst::Sub; 3343 break; 3344 case BO_And: 3345 RMWOp = llvm::AtomicRMWInst::And; 3346 break; 3347 case BO_Or: 3348 RMWOp = llvm::AtomicRMWInst::Or; 3349 break; 3350 case BO_Xor: 3351 RMWOp = llvm::AtomicRMWInst::Xor; 3352 break; 3353 case BO_LT: 3354 RMWOp = X.getType()->hasSignedIntegerRepresentation() 3355 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min 3356 : llvm::AtomicRMWInst::Max) 3357 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin 3358 : llvm::AtomicRMWInst::UMax); 3359 break; 3360 case BO_GT: 3361 RMWOp = X.getType()->hasSignedIntegerRepresentation() 3362 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max 3363 : llvm::AtomicRMWInst::Min) 3364 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax 3365 : llvm::AtomicRMWInst::UMin); 3366 break; 3367 case BO_Assign: 3368 RMWOp = llvm::AtomicRMWInst::Xchg; 3369 break; 3370 case BO_Mul: 3371 case BO_Div: 3372 case BO_Rem: 3373 case BO_Shl: 3374 case BO_Shr: 3375 case BO_LAnd: 3376 case BO_LOr: 3377 return std::make_pair(false, RValue::get(nullptr)); 3378 case BO_PtrMemD: 3379 case BO_PtrMemI: 3380 case BO_LE: 3381 case BO_GE: 3382 case BO_EQ: 3383 case BO_NE: 3384 case BO_AddAssign: 3385 case BO_SubAssign: 3386 case BO_AndAssign: 3387 case BO_OrAssign: 3388 case BO_XorAssign: 3389 case BO_MulAssign: 3390 case BO_DivAssign: 3391 case BO_RemAssign: 3392 case BO_ShlAssign: 3393 case BO_ShrAssign: 3394 case BO_Comma: 3395 llvm_unreachable("Unsupported atomic update operation"); 3396 } 3397 auto *UpdateVal = Update.getScalarVal(); 3398 if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) { 3399 UpdateVal = CGF.Builder.CreateIntCast( 3400 IC, X.getAddress().getElementType(), 3401 X.getType()->hasSignedIntegerRepresentation()); 3402 } 3403 auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO); 3404 return std::make_pair(true, RValue::get(Res)); 3405 } 3406 3407 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr( 3408 LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart, 3409 llvm::AtomicOrdering AO, SourceLocation Loc, 3410 const llvm::function_ref<RValue(RValue)> &CommonGen) { 3411 // Update expressions are allowed to have the following forms: 3412 // x binop= expr; -> xrval + expr; 3413 // x++, ++x -> xrval + 1; 3414 // x--, --x -> xrval - 1; 3415 // x = x binop expr; -> xrval binop expr 3416 // x = expr Op x; - > expr binop xrval; 3417 auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart); 3418 if (!Res.first) { 3419 if (X.isGlobalReg()) { 3420 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop 3421 // 'xrval'. 3422 EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X); 3423 } else { 3424 // Perform compare-and-swap procedure. 3425 EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified()); 3426 } 3427 } 3428 return Res; 3429 } 3430 3431 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst, 3432 const Expr *X, const Expr *E, 3433 const Expr *UE, bool IsXLHSInRHSPart, 3434 SourceLocation Loc) { 3435 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 3436 "Update expr in 'atomic update' must be a binary operator."); 3437 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 3438 // Update expressions are allowed to have the following forms: 3439 // x binop= expr; -> xrval + expr; 3440 // x++, ++x -> xrval + 1; 3441 // x--, --x -> xrval - 1; 3442 // x = x binop expr; -> xrval binop expr 3443 // x = expr Op x; - > expr binop xrval; 3444 assert(X->isLValue() && "X of 'omp atomic update' is not lvalue"); 3445 LValue XLValue = CGF.EmitLValue(X); 3446 RValue ExprRValue = CGF.EmitAnyExpr(E); 3447 auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3448 : llvm::AtomicOrdering::Monotonic; 3449 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 3450 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 3451 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 3452 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 3453 auto Gen = 3454 [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue { 3455 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3456 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 3457 return CGF.EmitAnyExpr(UE); 3458 }; 3459 (void)CGF.EmitOMPAtomicSimpleUpdateExpr( 3460 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 3461 // OpenMP, 2.12.6, atomic Construct 3462 // Any atomic construct with a seq_cst clause forces the atomically 3463 // performed operation to include an implicit flush operation without a 3464 // list. 3465 if (IsSeqCst) 3466 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3467 } 3468 3469 static RValue convertToType(CodeGenFunction &CGF, RValue Value, 3470 QualType SourceType, QualType ResType, 3471 SourceLocation Loc) { 3472 switch (CGF.getEvaluationKind(ResType)) { 3473 case TEK_Scalar: 3474 return RValue::get( 3475 convertToScalarValue(CGF, Value, SourceType, ResType, Loc)); 3476 case TEK_Complex: { 3477 auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc); 3478 return RValue::getComplex(Res.first, Res.second); 3479 } 3480 case TEK_Aggregate: 3481 break; 3482 } 3483 llvm_unreachable("Must be a scalar or complex."); 3484 } 3485 3486 static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst, 3487 bool IsPostfixUpdate, const Expr *V, 3488 const Expr *X, const Expr *E, 3489 const Expr *UE, bool IsXLHSInRHSPart, 3490 SourceLocation Loc) { 3491 assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue"); 3492 assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue"); 3493 RValue NewVVal; 3494 LValue VLValue = CGF.EmitLValue(V); 3495 LValue XLValue = CGF.EmitLValue(X); 3496 RValue ExprRValue = CGF.EmitAnyExpr(E); 3497 auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3498 : llvm::AtomicOrdering::Monotonic; 3499 QualType NewVValType; 3500 if (UE) { 3501 // 'x' is updated with some additional value. 3502 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 3503 "Update expr in 'atomic capture' must be a binary operator."); 3504 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 3505 // Update expressions are allowed to have the following forms: 3506 // x binop= expr; -> xrval + expr; 3507 // x++, ++x -> xrval + 1; 3508 // x--, --x -> xrval - 1; 3509 // x = x binop expr; -> xrval binop expr 3510 // x = expr Op x; - > expr binop xrval; 3511 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 3512 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 3513 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 3514 NewVValType = XRValExpr->getType(); 3515 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 3516 auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr, 3517 IsPostfixUpdate](RValue XRValue) -> RValue { 3518 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3519 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 3520 RValue Res = CGF.EmitAnyExpr(UE); 3521 NewVVal = IsPostfixUpdate ? XRValue : Res; 3522 return Res; 3523 }; 3524 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 3525 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 3526 if (Res.first) { 3527 // 'atomicrmw' instruction was generated. 3528 if (IsPostfixUpdate) { 3529 // Use old value from 'atomicrmw'. 3530 NewVVal = Res.second; 3531 } else { 3532 // 'atomicrmw' does not provide new value, so evaluate it using old 3533 // value of 'x'. 3534 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3535 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second); 3536 NewVVal = CGF.EmitAnyExpr(UE); 3537 } 3538 } 3539 } else { 3540 // 'x' is simply rewritten with some 'expr'. 3541 NewVValType = X->getType().getNonReferenceType(); 3542 ExprRValue = convertToType(CGF, ExprRValue, E->getType(), 3543 X->getType().getNonReferenceType(), Loc); 3544 auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) -> RValue { 3545 NewVVal = XRValue; 3546 return ExprRValue; 3547 }; 3548 // Try to perform atomicrmw xchg, otherwise simple exchange. 3549 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 3550 XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO, 3551 Loc, Gen); 3552 if (Res.first) { 3553 // 'atomicrmw' instruction was generated. 3554 NewVVal = IsPostfixUpdate ? Res.second : ExprRValue; 3555 } 3556 } 3557 // Emit post-update store to 'v' of old/new 'x' value. 3558 CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc); 3559 // OpenMP, 2.12.6, atomic Construct 3560 // Any atomic construct with a seq_cst clause forces the atomically 3561 // performed operation to include an implicit flush operation without a 3562 // list. 3563 if (IsSeqCst) 3564 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3565 } 3566 3567 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, 3568 bool IsSeqCst, bool IsPostfixUpdate, 3569 const Expr *X, const Expr *V, const Expr *E, 3570 const Expr *UE, bool IsXLHSInRHSPart, 3571 SourceLocation Loc) { 3572 switch (Kind) { 3573 case OMPC_read: 3574 EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc); 3575 break; 3576 case OMPC_write: 3577 EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc); 3578 break; 3579 case OMPC_unknown: 3580 case OMPC_update: 3581 EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc); 3582 break; 3583 case OMPC_capture: 3584 EmitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE, 3585 IsXLHSInRHSPart, Loc); 3586 break; 3587 case OMPC_if: 3588 case OMPC_final: 3589 case OMPC_num_threads: 3590 case OMPC_private: 3591 case OMPC_firstprivate: 3592 case OMPC_lastprivate: 3593 case OMPC_reduction: 3594 case OMPC_task_reduction: 3595 case OMPC_in_reduction: 3596 case OMPC_safelen: 3597 case OMPC_simdlen: 3598 case OMPC_collapse: 3599 case OMPC_default: 3600 case OMPC_seq_cst: 3601 case OMPC_shared: 3602 case OMPC_linear: 3603 case OMPC_aligned: 3604 case OMPC_copyin: 3605 case OMPC_copyprivate: 3606 case OMPC_flush: 3607 case OMPC_proc_bind: 3608 case OMPC_schedule: 3609 case OMPC_ordered: 3610 case OMPC_nowait: 3611 case OMPC_untied: 3612 case OMPC_threadprivate: 3613 case OMPC_depend: 3614 case OMPC_mergeable: 3615 case OMPC_device: 3616 case OMPC_threads: 3617 case OMPC_simd: 3618 case OMPC_map: 3619 case OMPC_num_teams: 3620 case OMPC_thread_limit: 3621 case OMPC_priority: 3622 case OMPC_grainsize: 3623 case OMPC_nogroup: 3624 case OMPC_num_tasks: 3625 case OMPC_hint: 3626 case OMPC_dist_schedule: 3627 case OMPC_defaultmap: 3628 case OMPC_uniform: 3629 case OMPC_to: 3630 case OMPC_from: 3631 case OMPC_use_device_ptr: 3632 case OMPC_is_device_ptr: 3633 llvm_unreachable("Clause is not allowed in 'omp atomic'."); 3634 } 3635 } 3636 3637 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { 3638 bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>(); 3639 OpenMPClauseKind Kind = OMPC_unknown; 3640 for (auto *C : S.clauses()) { 3641 // Find first clause (skip seq_cst clause, if it is first). 3642 if (C->getClauseKind() != OMPC_seq_cst) { 3643 Kind = C->getClauseKind(); 3644 break; 3645 } 3646 } 3647 3648 const auto *CS = 3649 S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 3650 if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) { 3651 enterFullExpression(EWC); 3652 } 3653 // Processing for statements under 'atomic capture'. 3654 if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) { 3655 for (const auto *C : Compound->body()) { 3656 if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) { 3657 enterFullExpression(EWC); 3658 } 3659 } 3660 } 3661 3662 auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF, 3663 PrePostActionTy &) { 3664 CGF.EmitStopPoint(CS); 3665 EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(), 3666 S.getV(), S.getExpr(), S.getUpdateExpr(), 3667 S.isXLHSInRHSPart(), S.getLocStart()); 3668 }; 3669 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 3670 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen); 3671 } 3672 3673 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, 3674 const OMPExecutableDirective &S, 3675 const RegionCodeGenTy &CodeGen) { 3676 assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind())); 3677 CodeGenModule &CGM = CGF.CGM; 3678 const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt()); 3679 3680 llvm::Function *Fn = nullptr; 3681 llvm::Constant *FnID = nullptr; 3682 3683 const Expr *IfCond = nullptr; 3684 // Check for the at most one if clause associated with the target region. 3685 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 3686 if (C->getNameModifier() == OMPD_unknown || 3687 C->getNameModifier() == OMPD_target) { 3688 IfCond = C->getCondition(); 3689 break; 3690 } 3691 } 3692 3693 // Check if we have any device clause associated with the directive. 3694 const Expr *Device = nullptr; 3695 if (auto *C = S.getSingleClause<OMPDeviceClause>()) { 3696 Device = C->getDevice(); 3697 } 3698 3699 // Check if we have an if clause whose conditional always evaluates to false 3700 // or if we do not have any targets specified. If so the target region is not 3701 // an offload entry point. 3702 bool IsOffloadEntry = true; 3703 if (IfCond) { 3704 bool Val; 3705 if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val) 3706 IsOffloadEntry = false; 3707 } 3708 if (CGM.getLangOpts().OMPTargetTriples.empty()) 3709 IsOffloadEntry = false; 3710 3711 assert(CGF.CurFuncDecl && "No parent declaration for target region!"); 3712 StringRef ParentName; 3713 // In case we have Ctors/Dtors we use the complete type variant to produce 3714 // the mangling of the device outlined kernel. 3715 if (auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl)) 3716 ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete)); 3717 else if (auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl)) 3718 ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete)); 3719 else 3720 ParentName = 3721 CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl))); 3722 3723 // Emit target region as a standalone region. 3724 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID, 3725 IsOffloadEntry, CodeGen); 3726 OMPLexicalScope Scope(CGF, S); 3727 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 3728 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 3729 CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device, 3730 CapturedVars); 3731 } 3732 3733 static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S, 3734 PrePostActionTy &Action) { 3735 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 3736 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 3737 CGF.EmitOMPPrivateClause(S, PrivateScope); 3738 (void)PrivateScope.Privatize(); 3739 3740 Action.Enter(CGF); 3741 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3742 } 3743 3744 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM, 3745 StringRef ParentName, 3746 const OMPTargetDirective &S) { 3747 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3748 emitTargetRegion(CGF, S, Action); 3749 }; 3750 llvm::Function *Fn; 3751 llvm::Constant *Addr; 3752 // Emit target region as a standalone region. 3753 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 3754 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 3755 assert(Fn && Addr && "Target device function emission failed."); 3756 } 3757 3758 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { 3759 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3760 emitTargetRegion(CGF, S, Action); 3761 }; 3762 emitCommonOMPTargetDirective(*this, S, CodeGen); 3763 } 3764 3765 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF, 3766 const OMPExecutableDirective &S, 3767 OpenMPDirectiveKind InnermostKind, 3768 const RegionCodeGenTy &CodeGen) { 3769 const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams); 3770 auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction( 3771 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 3772 3773 const OMPNumTeamsClause *NT = S.getSingleClause<OMPNumTeamsClause>(); 3774 const OMPThreadLimitClause *TL = S.getSingleClause<OMPThreadLimitClause>(); 3775 if (NT || TL) { 3776 Expr *NumTeams = (NT) ? NT->getNumTeams() : nullptr; 3777 Expr *ThreadLimit = (TL) ? TL->getThreadLimit() : nullptr; 3778 3779 CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit, 3780 S.getLocStart()); 3781 } 3782 3783 OMPTeamsScope Scope(CGF, S); 3784 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 3785 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 3786 CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn, 3787 CapturedVars); 3788 } 3789 3790 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) { 3791 // Emit teams region as a standalone region. 3792 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3793 OMPPrivateScope PrivateScope(CGF); 3794 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 3795 CGF.EmitOMPPrivateClause(S, PrivateScope); 3796 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 3797 (void)PrivateScope.Privatize(); 3798 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3799 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 3800 }; 3801 emitCommonOMPTeamsDirective(*this, S, OMPD_teams, CodeGen); 3802 emitPostUpdateForReductionClause( 3803 *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 3804 } 3805 3806 static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action, 3807 const OMPTargetTeamsDirective &S) { 3808 auto *CS = S.getCapturedStmt(OMPD_teams); 3809 Action.Enter(CGF); 3810 auto &&CodeGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { 3811 // TODO: Add support for clauses. 3812 CGF.EmitStmt(CS->getCapturedStmt()); 3813 }; 3814 emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen); 3815 } 3816 3817 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 3818 CodeGenModule &CGM, StringRef ParentName, 3819 const OMPTargetTeamsDirective &S) { 3820 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3821 emitTargetTeamsRegion(CGF, Action, S); 3822 }; 3823 llvm::Function *Fn; 3824 llvm::Constant *Addr; 3825 // Emit target region as a standalone region. 3826 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 3827 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 3828 assert(Fn && Addr && "Target device function emission failed."); 3829 } 3830 3831 void CodeGenFunction::EmitOMPTargetTeamsDirective( 3832 const OMPTargetTeamsDirective &S) { 3833 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3834 emitTargetTeamsRegion(CGF, Action, S); 3835 }; 3836 emitCommonOMPTargetDirective(*this, S, CodeGen); 3837 } 3838 3839 void CodeGenFunction::EmitOMPTeamsDistributeDirective( 3840 const OMPTeamsDistributeDirective &S) { 3841 3842 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3843 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 3844 }; 3845 3846 // Emit teams region as a standalone region. 3847 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 3848 PrePostActionTy &) { 3849 OMPPrivateScope PrivateScope(CGF); 3850 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 3851 (void)PrivateScope.Privatize(); 3852 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 3853 CodeGenDistribute); 3854 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 3855 }; 3856 emitCommonOMPTeamsDirective(*this, S, OMPD_teams, CodeGen); 3857 emitPostUpdateForReductionClause(*this, S, 3858 [](CodeGenFunction &) { return nullptr; }); 3859 } 3860 3861 void CodeGenFunction::EmitOMPCancellationPointDirective( 3862 const OMPCancellationPointDirective &S) { 3863 CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(), 3864 S.getCancelRegion()); 3865 } 3866 3867 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { 3868 const Expr *IfCond = nullptr; 3869 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 3870 if (C->getNameModifier() == OMPD_unknown || 3871 C->getNameModifier() == OMPD_cancel) { 3872 IfCond = C->getCondition(); 3873 break; 3874 } 3875 } 3876 CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(), IfCond, 3877 S.getCancelRegion()); 3878 } 3879 3880 CodeGenFunction::JumpDest 3881 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) { 3882 if (Kind == OMPD_parallel || Kind == OMPD_task || 3883 Kind == OMPD_target_parallel) 3884 return ReturnBlock; 3885 assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections || 3886 Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for || 3887 Kind == OMPD_distribute_parallel_for || 3888 Kind == OMPD_target_parallel_for); 3889 return OMPCancelStack.getExitBlock(); 3890 } 3891 3892 void CodeGenFunction::EmitOMPUseDevicePtrClause( 3893 const OMPClause &NC, OMPPrivateScope &PrivateScope, 3894 const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) { 3895 const auto &C = cast<OMPUseDevicePtrClause>(NC); 3896 auto OrigVarIt = C.varlist_begin(); 3897 auto InitIt = C.inits().begin(); 3898 for (auto PvtVarIt : C.private_copies()) { 3899 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl()); 3900 auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl()); 3901 auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl()); 3902 3903 // In order to identify the right initializer we need to match the 3904 // declaration used by the mapping logic. In some cases we may get 3905 // OMPCapturedExprDecl that refers to the original declaration. 3906 const ValueDecl *MatchingVD = OrigVD; 3907 if (auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) { 3908 // OMPCapturedExprDecl are used to privative fields of the current 3909 // structure. 3910 auto *ME = cast<MemberExpr>(OED->getInit()); 3911 assert(isa<CXXThisExpr>(ME->getBase()) && 3912 "Base should be the current struct!"); 3913 MatchingVD = ME->getMemberDecl(); 3914 } 3915 3916 // If we don't have information about the current list item, move on to 3917 // the next one. 3918 auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD); 3919 if (InitAddrIt == CaptureDeviceAddrMap.end()) 3920 continue; 3921 3922 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 3923 // Initialize the temporary initialization variable with the address we 3924 // get from the runtime library. We have to cast the source address 3925 // because it is always a void *. References are materialized in the 3926 // privatization scope, so the initialization here disregards the fact 3927 // the original variable is a reference. 3928 QualType AddrQTy = 3929 getContext().getPointerType(OrigVD->getType().getNonReferenceType()); 3930 llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy); 3931 Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy); 3932 setAddrOfLocalVar(InitVD, InitAddr); 3933 3934 // Emit private declaration, it will be initialized by the value we 3935 // declaration we just added to the local declarations map. 3936 EmitDecl(*PvtVD); 3937 3938 // The initialization variables reached its purpose in the emission 3939 // ofthe previous declaration, so we don't need it anymore. 3940 LocalDeclMap.erase(InitVD); 3941 3942 // Return the address of the private variable. 3943 return GetAddrOfLocalVar(PvtVD); 3944 }); 3945 assert(IsRegistered && "firstprivate var already registered as private"); 3946 // Silence the warning about unused variable. 3947 (void)IsRegistered; 3948 3949 ++OrigVarIt; 3950 ++InitIt; 3951 } 3952 } 3953 3954 // Generate the instructions for '#pragma omp target data' directive. 3955 void CodeGenFunction::EmitOMPTargetDataDirective( 3956 const OMPTargetDataDirective &S) { 3957 CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true); 3958 3959 // Create a pre/post action to signal the privatization of the device pointer. 3960 // This action can be replaced by the OpenMP runtime code generation to 3961 // deactivate privatization. 3962 bool PrivatizeDevicePointers = false; 3963 class DevicePointerPrivActionTy : public PrePostActionTy { 3964 bool &PrivatizeDevicePointers; 3965 3966 public: 3967 explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers) 3968 : PrePostActionTy(), PrivatizeDevicePointers(PrivatizeDevicePointers) {} 3969 void Enter(CodeGenFunction &CGF) override { 3970 PrivatizeDevicePointers = true; 3971 } 3972 }; 3973 DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers); 3974 3975 auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers]( 3976 CodeGenFunction &CGF, PrePostActionTy &Action) { 3977 auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3978 CGF.EmitStmt( 3979 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3980 }; 3981 3982 // Codegen that selects wheather to generate the privatization code or not. 3983 auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers, 3984 &InnermostCodeGen](CodeGenFunction &CGF, 3985 PrePostActionTy &Action) { 3986 RegionCodeGenTy RCG(InnermostCodeGen); 3987 PrivatizeDevicePointers = false; 3988 3989 // Call the pre-action to change the status of PrivatizeDevicePointers if 3990 // needed. 3991 Action.Enter(CGF); 3992 3993 if (PrivatizeDevicePointers) { 3994 OMPPrivateScope PrivateScope(CGF); 3995 // Emit all instances of the use_device_ptr clause. 3996 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>()) 3997 CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope, 3998 Info.CaptureDeviceAddrMap); 3999 (void)PrivateScope.Privatize(); 4000 RCG(CGF); 4001 } else 4002 RCG(CGF); 4003 }; 4004 4005 // Forward the provided action to the privatization codegen. 4006 RegionCodeGenTy PrivRCG(PrivCodeGen); 4007 PrivRCG.setAction(Action); 4008 4009 // Notwithstanding the body of the region is emitted as inlined directive, 4010 // we don't use an inline scope as changes in the references inside the 4011 // region are expected to be visible outside, so we do not privative them. 4012 OMPLexicalScope Scope(CGF, S); 4013 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data, 4014 PrivRCG); 4015 }; 4016 4017 RegionCodeGenTy RCG(CodeGen); 4018 4019 // If we don't have target devices, don't bother emitting the data mapping 4020 // code. 4021 if (CGM.getLangOpts().OMPTargetTriples.empty()) { 4022 RCG(*this); 4023 return; 4024 } 4025 4026 // Check if we have any if clause associated with the directive. 4027 const Expr *IfCond = nullptr; 4028 if (auto *C = S.getSingleClause<OMPIfClause>()) 4029 IfCond = C->getCondition(); 4030 4031 // Check if we have any device clause associated with the directive. 4032 const Expr *Device = nullptr; 4033 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 4034 Device = C->getDevice(); 4035 4036 // Set the action to signal privatization of device pointers. 4037 RCG.setAction(PrivAction); 4038 4039 // Emit region code. 4040 CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG, 4041 Info); 4042 } 4043 4044 void CodeGenFunction::EmitOMPTargetEnterDataDirective( 4045 const OMPTargetEnterDataDirective &S) { 4046 // If we don't have target devices, don't bother emitting the data mapping 4047 // code. 4048 if (CGM.getLangOpts().OMPTargetTriples.empty()) 4049 return; 4050 4051 // Check if we have any if clause associated with the directive. 4052 const Expr *IfCond = nullptr; 4053 if (auto *C = S.getSingleClause<OMPIfClause>()) 4054 IfCond = C->getCondition(); 4055 4056 // Check if we have any device clause associated with the directive. 4057 const Expr *Device = nullptr; 4058 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 4059 Device = C->getDevice(); 4060 4061 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 4062 } 4063 4064 void CodeGenFunction::EmitOMPTargetExitDataDirective( 4065 const OMPTargetExitDataDirective &S) { 4066 // If we don't have target devices, don't bother emitting the data mapping 4067 // code. 4068 if (CGM.getLangOpts().OMPTargetTriples.empty()) 4069 return; 4070 4071 // Check if we have any if clause associated with the directive. 4072 const Expr *IfCond = nullptr; 4073 if (auto *C = S.getSingleClause<OMPIfClause>()) 4074 IfCond = C->getCondition(); 4075 4076 // Check if we have any device clause associated with the directive. 4077 const Expr *Device = nullptr; 4078 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 4079 Device = C->getDevice(); 4080 4081 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 4082 } 4083 4084 static void emitTargetParallelRegion(CodeGenFunction &CGF, 4085 const OMPTargetParallelDirective &S, 4086 PrePostActionTy &Action) { 4087 // Get the captured statement associated with the 'parallel' region. 4088 auto *CS = S.getCapturedStmt(OMPD_parallel); 4089 Action.Enter(CGF); 4090 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &) { 4091 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 4092 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 4093 CGF.EmitOMPPrivateClause(S, PrivateScope); 4094 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4095 (void)PrivateScope.Privatize(); 4096 // TODO: Add support for clauses. 4097 CGF.EmitStmt(CS->getCapturedStmt()); 4098 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 4099 }; 4100 emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen, 4101 emitEmptyBoundParameters); 4102 emitPostUpdateForReductionClause( 4103 CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 4104 } 4105 4106 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 4107 CodeGenModule &CGM, StringRef ParentName, 4108 const OMPTargetParallelDirective &S) { 4109 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4110 emitTargetParallelRegion(CGF, S, Action); 4111 }; 4112 llvm::Function *Fn; 4113 llvm::Constant *Addr; 4114 // Emit target region as a standalone region. 4115 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 4116 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 4117 assert(Fn && Addr && "Target device function emission failed."); 4118 } 4119 4120 void CodeGenFunction::EmitOMPTargetParallelDirective( 4121 const OMPTargetParallelDirective &S) { 4122 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4123 emitTargetParallelRegion(CGF, S, Action); 4124 }; 4125 emitCommonOMPTargetDirective(*this, S, CodeGen); 4126 } 4127 4128 void CodeGenFunction::EmitOMPTargetParallelForDirective( 4129 const OMPTargetParallelForDirective &S) { 4130 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 4131 CGM.getOpenMPRuntime().emitInlinedDirective( 4132 *this, OMPD_target_parallel_for, 4133 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4134 OMPLoopScope PreInitScope(CGF, S); 4135 CGF.EmitStmt( 4136 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 4137 }); 4138 } 4139 4140 /// Emit a helper variable and return corresponding lvalue. 4141 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper, 4142 const ImplicitParamDecl *PVD, 4143 CodeGenFunction::OMPPrivateScope &Privates) { 4144 auto *VDecl = cast<VarDecl>(Helper->getDecl()); 4145 Privates.addPrivate( 4146 VDecl, [&CGF, PVD]() -> Address { return CGF.GetAddrOfLocalVar(PVD); }); 4147 } 4148 4149 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { 4150 assert(isOpenMPTaskLoopDirective(S.getDirectiveKind())); 4151 // Emit outlined function for task construct. 4152 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 4153 auto CapturedStruct = GenerateCapturedStmtArgument(*CS); 4154 auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 4155 const Expr *IfCond = nullptr; 4156 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 4157 if (C->getNameModifier() == OMPD_unknown || 4158 C->getNameModifier() == OMPD_taskloop) { 4159 IfCond = C->getCondition(); 4160 break; 4161 } 4162 } 4163 4164 OMPTaskDataTy Data; 4165 // Check if taskloop must be emitted without taskgroup. 4166 Data.Nogroup = S.getSingleClause<OMPNogroupClause>(); 4167 // TODO: Check if we should emit tied or untied task. 4168 Data.Tied = true; 4169 // Set scheduling for taskloop 4170 if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) { 4171 // grainsize clause 4172 Data.Schedule.setInt(/*IntVal=*/false); 4173 Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize())); 4174 } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) { 4175 // num_tasks clause 4176 Data.Schedule.setInt(/*IntVal=*/true); 4177 Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks())); 4178 } 4179 4180 auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) { 4181 // if (PreCond) { 4182 // for (IV in 0..LastIteration) BODY; 4183 // <Final counter/linear vars updates>; 4184 // } 4185 // 4186 4187 // Emit: if (PreCond) - begin. 4188 // If the condition constant folds and can be elided, avoid emitting the 4189 // whole loop. 4190 bool CondConstant; 4191 llvm::BasicBlock *ContBlock = nullptr; 4192 OMPLoopScope PreInitScope(CGF, S); 4193 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 4194 if (!CondConstant) 4195 return; 4196 } else { 4197 auto *ThenBlock = CGF.createBasicBlock("taskloop.if.then"); 4198 ContBlock = CGF.createBasicBlock("taskloop.if.end"); 4199 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 4200 CGF.getProfileCount(&S)); 4201 CGF.EmitBlock(ThenBlock); 4202 CGF.incrementProfileCounter(&S); 4203 } 4204 4205 if (isOpenMPSimdDirective(S.getDirectiveKind())) 4206 CGF.EmitOMPSimdInit(S); 4207 4208 OMPPrivateScope LoopScope(CGF); 4209 // Emit helper vars inits. 4210 enum { LowerBound = 5, UpperBound, Stride, LastIter }; 4211 auto *I = CS->getCapturedDecl()->param_begin(); 4212 auto *LBP = std::next(I, LowerBound); 4213 auto *UBP = std::next(I, UpperBound); 4214 auto *STP = std::next(I, Stride); 4215 auto *LIP = std::next(I, LastIter); 4216 mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP, 4217 LoopScope); 4218 mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP, 4219 LoopScope); 4220 mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope); 4221 mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP, 4222 LoopScope); 4223 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 4224 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 4225 (void)LoopScope.Privatize(); 4226 // Emit the loop iteration variable. 4227 const Expr *IVExpr = S.getIterationVariable(); 4228 const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 4229 CGF.EmitVarDecl(*IVDecl); 4230 CGF.EmitIgnoredExpr(S.getInit()); 4231 4232 // Emit the iterations count variable. 4233 // If it is not a variable, Sema decided to calculate iterations count on 4234 // each iteration (e.g., it is foldable into a constant). 4235 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 4236 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 4237 // Emit calculation of the iterations count. 4238 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 4239 } 4240 4241 CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 4242 S.getInc(), 4243 [&S](CodeGenFunction &CGF) { 4244 CGF.EmitOMPLoopBody(S, JumpDest()); 4245 CGF.EmitStopPoint(&S); 4246 }, 4247 [](CodeGenFunction &) {}); 4248 // Emit: if (PreCond) - end. 4249 if (ContBlock) { 4250 CGF.EmitBranch(ContBlock); 4251 CGF.EmitBlock(ContBlock, true); 4252 } 4253 // Emit final copy of the lastprivate variables if IsLastIter != 0. 4254 if (HasLastprivateClause) { 4255 CGF.EmitOMPLastprivateClauseFinal( 4256 S, isOpenMPSimdDirective(S.getDirectiveKind()), 4257 CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar( 4258 CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false, 4259 (*LIP)->getType(), S.getLocStart()))); 4260 } 4261 }; 4262 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 4263 IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn, 4264 const OMPTaskDataTy &Data) { 4265 auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &) { 4266 OMPLoopScope PreInitScope(CGF, S); 4267 CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getLocStart(), S, 4268 OutlinedFn, SharedsTy, 4269 CapturedStruct, IfCond, Data); 4270 }; 4271 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop, 4272 CodeGen); 4273 }; 4274 if (Data.Nogroup) 4275 EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data); 4276 else { 4277 CGM.getOpenMPRuntime().emitTaskgroupRegion( 4278 *this, 4279 [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF, 4280 PrePostActionTy &Action) { 4281 Action.Enter(CGF); 4282 CGF.EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data); 4283 }, 4284 S.getLocStart()); 4285 } 4286 } 4287 4288 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) { 4289 EmitOMPTaskLoopBasedDirective(S); 4290 } 4291 4292 void CodeGenFunction::EmitOMPTaskLoopSimdDirective( 4293 const OMPTaskLoopSimdDirective &S) { 4294 EmitOMPTaskLoopBasedDirective(S); 4295 } 4296 4297 // Generate the instructions for '#pragma omp target update' directive. 4298 void CodeGenFunction::EmitOMPTargetUpdateDirective( 4299 const OMPTargetUpdateDirective &S) { 4300 // If we don't have target devices, don't bother emitting the data mapping 4301 // code. 4302 if (CGM.getLangOpts().OMPTargetTriples.empty()) 4303 return; 4304 4305 // Check if we have any if clause associated with the directive. 4306 const Expr *IfCond = nullptr; 4307 if (auto *C = S.getSingleClause<OMPIfClause>()) 4308 IfCond = C->getCondition(); 4309 4310 // Check if we have any device clause associated with the directive. 4311 const Expr *Device = nullptr; 4312 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 4313 Device = C->getDevice(); 4314 4315 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 4316 } 4317