1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This contains code to emit OpenMP nodes as LLVM code. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGCleanup.h" 15 #include "CGOpenMPRuntime.h" 16 #include "CodeGenFunction.h" 17 #include "CodeGenModule.h" 18 #include "TargetInfo.h" 19 #include "clang/AST/Stmt.h" 20 #include "clang/AST/StmtOpenMP.h" 21 #include "clang/AST/DeclOpenMP.h" 22 #include "llvm/IR/CallSite.h" 23 using namespace clang; 24 using namespace CodeGen; 25 26 namespace { 27 /// Lexical scope for OpenMP executable constructs, that handles correct codegen 28 /// for captured expressions. 29 class OMPLexicalScope : public CodeGenFunction::LexicalScope { 30 void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 31 for (const auto *C : S.clauses()) { 32 if (auto *CPI = OMPClauseWithPreInit::get(C)) { 33 if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { 34 for (const auto *I : PreInit->decls()) { 35 if (!I->hasAttr<OMPCaptureNoInitAttr>()) 36 CGF.EmitVarDecl(cast<VarDecl>(*I)); 37 else { 38 CodeGenFunction::AutoVarEmission Emission = 39 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 40 CGF.EmitAutoVarCleanups(Emission); 41 } 42 } 43 } 44 } 45 } 46 } 47 CodeGenFunction::OMPPrivateScope InlinedShareds; 48 49 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { 50 return CGF.LambdaCaptureFields.lookup(VD) || 51 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || 52 (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl)); 53 } 54 55 public: 56 OMPLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S, 57 bool AsInlined = false, bool EmitPreInitStmt = true) 58 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), 59 InlinedShareds(CGF) { 60 if (EmitPreInitStmt) 61 emitPreInitStmt(CGF, S); 62 if (AsInlined) { 63 if (S.hasAssociatedStmt()) { 64 auto *CS = cast<CapturedStmt>(S.getAssociatedStmt()); 65 for (auto &C : CS->captures()) { 66 if (C.capturesVariable() || C.capturesVariableByCopy()) { 67 auto *VD = C.getCapturedVar(); 68 DeclRefExpr DRE(const_cast<VarDecl *>(VD), 69 isCapturedVar(CGF, VD) || 70 (CGF.CapturedStmtInfo && 71 InlinedShareds.isGlobalVarCaptured(VD)), 72 VD->getType().getNonReferenceType(), VK_LValue, 73 SourceLocation()); 74 InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address { 75 return CGF.EmitLValue(&DRE).getAddress(); 76 }); 77 } 78 } 79 (void)InlinedShareds.Privatize(); 80 } 81 } 82 } 83 }; 84 85 /// Lexical scope for OpenMP parallel construct, that handles correct codegen 86 /// for captured expressions. 87 class OMPParallelScope final : public OMPLexicalScope { 88 bool EmitPreInitStmt(const OMPExecutableDirective &S) { 89 OpenMPDirectiveKind Kind = S.getDirectiveKind(); 90 return !(isOpenMPTargetExecutionDirective(Kind) || 91 isOpenMPLoopBoundSharingDirective(Kind)) && 92 isOpenMPParallelDirective(Kind); 93 } 94 95 public: 96 OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 97 : OMPLexicalScope(CGF, S, 98 /*AsInlined=*/false, 99 /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {} 100 }; 101 102 /// Lexical scope for OpenMP teams construct, that handles correct codegen 103 /// for captured expressions. 104 class OMPTeamsScope final : public OMPLexicalScope { 105 bool EmitPreInitStmt(const OMPExecutableDirective &S) { 106 OpenMPDirectiveKind Kind = S.getDirectiveKind(); 107 return !isOpenMPTargetExecutionDirective(Kind) && 108 isOpenMPTeamsDirective(Kind); 109 } 110 111 public: 112 OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 113 : OMPLexicalScope(CGF, S, 114 /*AsInlined=*/false, 115 /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {} 116 }; 117 118 /// Private scope for OpenMP loop-based directives, that supports capturing 119 /// of used expression from loop statement. 120 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { 121 void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) { 122 if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) { 123 if (auto *PreInits = cast_or_null<DeclStmt>(LD->getPreInits())) { 124 for (const auto *I : PreInits->decls()) 125 CGF.EmitVarDecl(cast<VarDecl>(*I)); 126 } 127 } 128 } 129 130 public: 131 OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S) 132 : CodeGenFunction::RunCleanupsScope(CGF) { 133 emitPreInitStmt(CGF, S); 134 } 135 }; 136 137 } // namespace 138 139 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) { 140 auto &C = getContext(); 141 llvm::Value *Size = nullptr; 142 auto SizeInChars = C.getTypeSizeInChars(Ty); 143 if (SizeInChars.isZero()) { 144 // getTypeSizeInChars() returns 0 for a VLA. 145 while (auto *VAT = C.getAsVariableArrayType(Ty)) { 146 llvm::Value *ArraySize; 147 std::tie(ArraySize, Ty) = getVLASize(VAT); 148 Size = Size ? Builder.CreateNUWMul(Size, ArraySize) : ArraySize; 149 } 150 SizeInChars = C.getTypeSizeInChars(Ty); 151 if (SizeInChars.isZero()) 152 return llvm::ConstantInt::get(SizeTy, /*V=*/0); 153 Size = Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars)); 154 } else 155 Size = CGM.getSize(SizeInChars); 156 return Size; 157 } 158 159 void CodeGenFunction::GenerateOpenMPCapturedVars( 160 const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) { 161 const RecordDecl *RD = S.getCapturedRecordDecl(); 162 auto CurField = RD->field_begin(); 163 auto CurCap = S.captures().begin(); 164 for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(), 165 E = S.capture_init_end(); 166 I != E; ++I, ++CurField, ++CurCap) { 167 if (CurField->hasCapturedVLAType()) { 168 auto VAT = CurField->getCapturedVLAType(); 169 auto *Val = VLASizeMap[VAT->getSizeExpr()]; 170 CapturedVars.push_back(Val); 171 } else if (CurCap->capturesThis()) 172 CapturedVars.push_back(CXXThisValue); 173 else if (CurCap->capturesVariableByCopy()) { 174 llvm::Value *CV = 175 EmitLoadOfLValue(EmitLValue(*I), SourceLocation()).getScalarVal(); 176 177 // If the field is not a pointer, we need to save the actual value 178 // and load it as a void pointer. 179 if (!CurField->getType()->isAnyPointerType()) { 180 auto &Ctx = getContext(); 181 auto DstAddr = CreateMemTemp( 182 Ctx.getUIntPtrType(), 183 Twine(CurCap->getCapturedVar()->getName()) + ".casted"); 184 LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType()); 185 186 auto *SrcAddrVal = EmitScalarConversion( 187 DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()), 188 Ctx.getPointerType(CurField->getType()), SourceLocation()); 189 LValue SrcLV = 190 MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType()); 191 192 // Store the value using the source type pointer. 193 EmitStoreThroughLValue(RValue::get(CV), SrcLV); 194 195 // Load the value using the destination type pointer. 196 CV = EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal(); 197 } 198 CapturedVars.push_back(CV); 199 } else { 200 assert(CurCap->capturesVariable() && "Expected capture by reference."); 201 CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer()); 202 } 203 } 204 } 205 206 static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType, 207 StringRef Name, LValue AddrLV, 208 bool isReferenceType = false) { 209 ASTContext &Ctx = CGF.getContext(); 210 211 auto *CastedPtr = CGF.EmitScalarConversion( 212 AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(), 213 Ctx.getPointerType(DstType), SourceLocation()); 214 auto TmpAddr = 215 CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType)) 216 .getAddress(); 217 218 // If we are dealing with references we need to return the address of the 219 // reference instead of the reference of the value. 220 if (isReferenceType) { 221 QualType RefType = Ctx.getLValueReferenceType(DstType); 222 auto *RefVal = TmpAddr.getPointer(); 223 TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name) + ".ref"); 224 auto TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType); 225 CGF.EmitStoreThroughLValue(RValue::get(RefVal), TmpLVal, /*isInit*/ true); 226 } 227 228 return TmpAddr; 229 } 230 231 static QualType getCanonicalParamType(ASTContext &C, QualType T) { 232 if (T->isLValueReferenceType()) { 233 return C.getLValueReferenceType( 234 getCanonicalParamType(C, T.getNonReferenceType()), 235 /*SpelledAsLValue=*/false); 236 } 237 if (T->isPointerType()) 238 return C.getPointerType(getCanonicalParamType(C, T->getPointeeType())); 239 return C.getCanonicalParamType(T); 240 } 241 242 namespace { 243 /// Contains required data for proper outlined function codegen. 244 struct FunctionOptions { 245 /// Captured statement for which the function is generated. 246 const CapturedStmt *S = nullptr; 247 /// true if cast to/from UIntPtr is required for variables captured by 248 /// value. 249 bool UIntPtrCastRequired = true; 250 /// true if only casted argumefnts must be registered as local args or VLA 251 /// sizes. 252 bool RegisterCastedArgsOnly = false; 253 /// Name of the generated function. 254 StringRef FunctionName; 255 explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired, 256 bool RegisterCastedArgsOnly, 257 StringRef FunctionName) 258 : S(S), UIntPtrCastRequired(UIntPtrCastRequired), 259 RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly), 260 FunctionName(FunctionName) {} 261 }; 262 } 263 264 static std::pair<llvm::Function *, bool> emitOutlinedFunctionPrologue( 265 CodeGenFunction &CGF, FunctionArgList &Args, 266 llvm::DenseMap<const Decl *, std::pair<const VarDecl *, Address>> 267 &LocalAddrs, 268 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> 269 &VLASizes, 270 llvm::Value *&CXXThisValue, const FunctionOptions &FO) { 271 const CapturedDecl *CD = FO.S->getCapturedDecl(); 272 const RecordDecl *RD = FO.S->getCapturedRecordDecl(); 273 assert(CD->hasBody() && "missing CapturedDecl body"); 274 275 CXXThisValue = nullptr; 276 // Build the argument list. 277 CodeGenModule &CGM = CGF.CGM; 278 ASTContext &Ctx = CGM.getContext(); 279 bool HasUIntPtrArgs = false; 280 Args.append(CD->param_begin(), 281 std::next(CD->param_begin(), CD->getContextParamPosition())); 282 auto I = FO.S->captures().begin(); 283 for (auto *FD : RD->fields()) { 284 QualType ArgType = FD->getType(); 285 IdentifierInfo *II = nullptr; 286 VarDecl *CapVar = nullptr; 287 288 // If this is a capture by copy and the type is not a pointer, the outlined 289 // function argument type should be uintptr and the value properly casted to 290 // uintptr. This is necessary given that the runtime library is only able to 291 // deal with pointers. We can pass in the same way the VLA type sizes to the 292 // outlined function. 293 if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) || 294 I->capturesVariableArrayType()) { 295 HasUIntPtrArgs = true; 296 if (FO.UIntPtrCastRequired) 297 ArgType = Ctx.getUIntPtrType(); 298 } 299 300 if (I->capturesVariable() || I->capturesVariableByCopy()) { 301 CapVar = I->getCapturedVar(); 302 II = CapVar->getIdentifier(); 303 } else if (I->capturesThis()) 304 II = &Ctx.Idents.get("this"); 305 else { 306 assert(I->capturesVariableArrayType()); 307 II = &Ctx.Idents.get("vla"); 308 } 309 if (ArgType->isVariablyModifiedType()) 310 ArgType = getCanonicalParamType(Ctx, ArgType.getNonReferenceType()); 311 Args.push_back(ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, 312 FD->getLocation(), II, ArgType, 313 ImplicitParamDecl::Other)); 314 ++I; 315 } 316 Args.append( 317 std::next(CD->param_begin(), CD->getContextParamPosition() + 1), 318 CD->param_end()); 319 320 // Create the function declaration. 321 FunctionType::ExtInfo ExtInfo; 322 const CGFunctionInfo &FuncInfo = 323 CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args); 324 llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); 325 326 llvm::Function *F = 327 llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage, 328 FO.FunctionName, &CGM.getModule()); 329 CGM.SetInternalFunctionAttributes(CD, F, FuncInfo); 330 if (CD->isNothrow()) 331 F->addFnAttr(llvm::Attribute::NoUnwind); 332 333 // Generate the function. 334 CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getLocation(), 335 CD->getBody()->getLocStart()); 336 unsigned Cnt = CD->getContextParamPosition(); 337 I = FO.S->captures().begin(); 338 for (auto *FD : RD->fields()) { 339 // If we are capturing a pointer by copy we don't need to do anything, just 340 // use the value that we get from the arguments. 341 if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) { 342 const VarDecl *CurVD = I->getCapturedVar(); 343 Address LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]); 344 // If the variable is a reference we need to materialize it here. 345 if (CurVD->getType()->isReferenceType()) { 346 Address RefAddr = CGF.CreateMemTemp( 347 CurVD->getType(), CGM.getPointerAlign(), ".materialized_ref"); 348 CGF.EmitStoreOfScalar(LocalAddr.getPointer(), RefAddr, 349 /*Volatile=*/false, CurVD->getType()); 350 LocalAddr = RefAddr; 351 } 352 if (!FO.RegisterCastedArgsOnly) 353 LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}}); 354 ++Cnt; 355 ++I; 356 continue; 357 } 358 359 LValueBaseInfo BaseInfo(AlignmentSource::Decl, false); 360 LValue ArgLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(Args[Cnt]), 361 Args[Cnt]->getType(), BaseInfo); 362 if (FD->hasCapturedVLAType()) { 363 if (FO.UIntPtrCastRequired) { 364 ArgLVal = CGF.MakeAddrLValue(castValueFromUintptr(CGF, FD->getType(), 365 Args[Cnt]->getName(), 366 ArgLVal), 367 FD->getType(), BaseInfo); 368 } 369 auto *ExprArg = 370 CGF.EmitLoadOfLValue(ArgLVal, SourceLocation()).getScalarVal(); 371 auto VAT = FD->getCapturedVLAType(); 372 VLASizes.insert({Args[Cnt], {VAT->getSizeExpr(), ExprArg}}); 373 } else if (I->capturesVariable()) { 374 auto *Var = I->getCapturedVar(); 375 QualType VarTy = Var->getType(); 376 Address ArgAddr = ArgLVal.getAddress(); 377 if (!VarTy->isReferenceType()) { 378 if (ArgLVal.getType()->isLValueReferenceType()) { 379 ArgAddr = CGF.EmitLoadOfReference( 380 ArgAddr, ArgLVal.getType()->castAs<ReferenceType>()); 381 } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) { 382 assert(ArgLVal.getType()->isPointerType()); 383 ArgAddr = CGF.EmitLoadOfPointer( 384 ArgAddr, ArgLVal.getType()->castAs<PointerType>()); 385 } 386 } 387 if (!FO.RegisterCastedArgsOnly) { 388 LocalAddrs.insert( 389 {Args[Cnt], 390 {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}}); 391 } 392 } else if (I->capturesVariableByCopy()) { 393 assert(!FD->getType()->isAnyPointerType() && 394 "Not expecting a captured pointer."); 395 auto *Var = I->getCapturedVar(); 396 QualType VarTy = Var->getType(); 397 LocalAddrs.insert( 398 {Args[Cnt], 399 {Var, 400 FO.UIntPtrCastRequired 401 ? castValueFromUintptr(CGF, FD->getType(), Args[Cnt]->getName(), 402 ArgLVal, VarTy->isReferenceType()) 403 : ArgLVal.getAddress()}}); 404 } else { 405 // If 'this' is captured, load it into CXXThisValue. 406 assert(I->capturesThis()); 407 CXXThisValue = CGF.EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation()) 408 .getScalarVal(); 409 LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress()}}); 410 } 411 ++Cnt; 412 ++I; 413 } 414 415 return {F, HasUIntPtrArgs}; 416 } 417 418 llvm::Function * 419 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { 420 assert( 421 CapturedStmtInfo && 422 "CapturedStmtInfo should be set when generating the captured function"); 423 const CapturedDecl *CD = S.getCapturedDecl(); 424 // Build the argument list. 425 bool NeedWrapperFunction = 426 getDebugInfo() && 427 CGM.getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo; 428 FunctionArgList Args; 429 llvm::DenseMap<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs; 430 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes; 431 FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false, 432 CapturedStmtInfo->getHelperName()); 433 llvm::Function *F; 434 bool HasUIntPtrArgs; 435 std::tie(F, HasUIntPtrArgs) = emitOutlinedFunctionPrologue( 436 *this, Args, LocalAddrs, VLASizes, CXXThisValue, FO); 437 for (const auto &LocalAddrPair : LocalAddrs) { 438 if (LocalAddrPair.second.first) { 439 setAddrOfLocalVar(LocalAddrPair.second.first, 440 LocalAddrPair.second.second); 441 } 442 } 443 for (const auto &VLASizePair : VLASizes) 444 VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second; 445 PGO.assignRegionCounters(GlobalDecl(CD), F); 446 CapturedStmtInfo->EmitBody(*this, CD->getBody()); 447 FinishFunction(CD->getBodyRBrace()); 448 if (!NeedWrapperFunction || !HasUIntPtrArgs) 449 return F; 450 451 FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true, 452 /*RegisterCastedArgsOnly=*/true, 453 ".nondebug_wrapper."); 454 CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true); 455 WrapperCGF.disableDebugInfo(); 456 Args.clear(); 457 LocalAddrs.clear(); 458 VLASizes.clear(); 459 llvm::Function *WrapperF = 460 emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes, 461 WrapperCGF.CXXThisValue, WrapperFO).first; 462 LValueBaseInfo BaseInfo(AlignmentSource::Decl, false); 463 llvm::SmallVector<llvm::Value *, 4> CallArgs; 464 for (const auto *Arg : Args) { 465 llvm::Value *CallArg; 466 auto I = LocalAddrs.find(Arg); 467 if (I != LocalAddrs.end()) { 468 LValue LV = 469 WrapperCGF.MakeAddrLValue(I->second.second, Arg->getType(), BaseInfo); 470 CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation()); 471 } else { 472 auto EI = VLASizes.find(Arg); 473 if (EI != VLASizes.end()) 474 CallArg = EI->second.second; 475 else { 476 LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg), 477 Arg->getType(), BaseInfo); 478 CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation()); 479 } 480 } 481 CallArgs.emplace_back(CallArg); 482 } 483 WrapperCGF.Builder.CreateCall(F, CallArgs); 484 WrapperCGF.FinishFunction(); 485 return WrapperF; 486 } 487 488 //===----------------------------------------------------------------------===// 489 // OpenMP Directive Emission 490 //===----------------------------------------------------------------------===// 491 void CodeGenFunction::EmitOMPAggregateAssign( 492 Address DestAddr, Address SrcAddr, QualType OriginalType, 493 const llvm::function_ref<void(Address, Address)> &CopyGen) { 494 // Perform element-by-element initialization. 495 QualType ElementTy; 496 497 // Drill down to the base element type on both arrays. 498 auto ArrayTy = OriginalType->getAsArrayTypeUnsafe(); 499 auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr); 500 SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 501 502 auto SrcBegin = SrcAddr.getPointer(); 503 auto DestBegin = DestAddr.getPointer(); 504 // Cast from pointer to array type to pointer to single element. 505 auto DestEnd = Builder.CreateGEP(DestBegin, NumElements); 506 // The basic structure here is a while-do loop. 507 auto BodyBB = createBasicBlock("omp.arraycpy.body"); 508 auto DoneBB = createBasicBlock("omp.arraycpy.done"); 509 auto IsEmpty = 510 Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty"); 511 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 512 513 // Enter the loop body, making that address the current address. 514 auto EntryBB = Builder.GetInsertBlock(); 515 EmitBlock(BodyBB); 516 517 CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy); 518 519 llvm::PHINode *SrcElementPHI = 520 Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 521 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 522 Address SrcElementCurrent = 523 Address(SrcElementPHI, 524 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 525 526 llvm::PHINode *DestElementPHI = 527 Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 528 DestElementPHI->addIncoming(DestBegin, EntryBB); 529 Address DestElementCurrent = 530 Address(DestElementPHI, 531 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 532 533 // Emit copy. 534 CopyGen(DestElementCurrent, SrcElementCurrent); 535 536 // Shift the address forward by one element. 537 auto DestElementNext = Builder.CreateConstGEP1_32( 538 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 539 auto SrcElementNext = Builder.CreateConstGEP1_32( 540 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 541 // Check whether we've reached the end. 542 auto Done = 543 Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 544 Builder.CreateCondBr(Done, DoneBB, BodyBB); 545 DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock()); 546 SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock()); 547 548 // Done. 549 EmitBlock(DoneBB, /*IsFinished=*/true); 550 } 551 552 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr, 553 Address SrcAddr, const VarDecl *DestVD, 554 const VarDecl *SrcVD, const Expr *Copy) { 555 if (OriginalType->isArrayType()) { 556 auto *BO = dyn_cast<BinaryOperator>(Copy); 557 if (BO && BO->getOpcode() == BO_Assign) { 558 // Perform simple memcpy for simple copying. 559 EmitAggregateAssign(DestAddr, SrcAddr, OriginalType); 560 } else { 561 // For arrays with complex element types perform element by element 562 // copying. 563 EmitOMPAggregateAssign( 564 DestAddr, SrcAddr, OriginalType, 565 [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) { 566 // Working with the single array element, so have to remap 567 // destination and source variables to corresponding array 568 // elements. 569 CodeGenFunction::OMPPrivateScope Remap(*this); 570 Remap.addPrivate(DestVD, [DestElement]() -> Address { 571 return DestElement; 572 }); 573 Remap.addPrivate( 574 SrcVD, [SrcElement]() -> Address { return SrcElement; }); 575 (void)Remap.Privatize(); 576 EmitIgnoredExpr(Copy); 577 }); 578 } 579 } else { 580 // Remap pseudo source variable to private copy. 581 CodeGenFunction::OMPPrivateScope Remap(*this); 582 Remap.addPrivate(SrcVD, [SrcAddr]() -> Address { return SrcAddr; }); 583 Remap.addPrivate(DestVD, [DestAddr]() -> Address { return DestAddr; }); 584 (void)Remap.Privatize(); 585 // Emit copying of the whole variable. 586 EmitIgnoredExpr(Copy); 587 } 588 } 589 590 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, 591 OMPPrivateScope &PrivateScope) { 592 if (!HaveInsertPoint()) 593 return false; 594 bool FirstprivateIsLastprivate = false; 595 llvm::DenseSet<const VarDecl *> Lastprivates; 596 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 597 for (const auto *D : C->varlists()) 598 Lastprivates.insert( 599 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); 600 } 601 llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate; 602 CGCapturedStmtInfo CapturesInfo(cast<CapturedStmt>(*D.getAssociatedStmt())); 603 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { 604 auto IRef = C->varlist_begin(); 605 auto InitsRef = C->inits().begin(); 606 for (auto IInit : C->private_copies()) { 607 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 608 bool ThisFirstprivateIsLastprivate = 609 Lastprivates.count(OrigVD->getCanonicalDecl()) > 0; 610 auto *CapFD = CapturesInfo.lookup(OrigVD); 611 auto *FD = CapturedStmtInfo->lookup(OrigVD); 612 if (!ThisFirstprivateIsLastprivate && FD && (FD == CapFD) && 613 !FD->getType()->isReferenceType()) { 614 EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()); 615 ++IRef; 616 ++InitsRef; 617 continue; 618 } 619 FirstprivateIsLastprivate = 620 FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate; 621 if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) { 622 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 623 auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl()); 624 bool IsRegistered; 625 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 626 /*RefersToEnclosingVariableOrCapture=*/FD != nullptr, 627 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 628 Address OriginalAddr = EmitLValue(&DRE).getAddress(); 629 QualType Type = VD->getType(); 630 if (Type->isArrayType()) { 631 // Emit VarDecl with copy init for arrays. 632 // Get the address of the original variable captured in current 633 // captured region. 634 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 635 auto Emission = EmitAutoVarAlloca(*VD); 636 auto *Init = VD->getInit(); 637 if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) { 638 // Perform simple memcpy. 639 EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr, 640 Type); 641 } else { 642 EmitOMPAggregateAssign( 643 Emission.getAllocatedAddress(), OriginalAddr, Type, 644 [this, VDInit, Init](Address DestElement, 645 Address SrcElement) { 646 // Clean up any temporaries needed by the initialization. 647 RunCleanupsScope InitScope(*this); 648 // Emit initialization for single element. 649 setAddrOfLocalVar(VDInit, SrcElement); 650 EmitAnyExprToMem(Init, DestElement, 651 Init->getType().getQualifiers(), 652 /*IsInitializer*/ false); 653 LocalDeclMap.erase(VDInit); 654 }); 655 } 656 EmitAutoVarCleanups(Emission); 657 return Emission.getAllocatedAddress(); 658 }); 659 } else { 660 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 661 // Emit private VarDecl with copy init. 662 // Remap temp VDInit variable to the address of the original 663 // variable 664 // (for proper handling of captured global variables). 665 setAddrOfLocalVar(VDInit, OriginalAddr); 666 EmitDecl(*VD); 667 LocalDeclMap.erase(VDInit); 668 return GetAddrOfLocalVar(VD); 669 }); 670 } 671 assert(IsRegistered && 672 "firstprivate var already registered as private"); 673 // Silence the warning about unused variable. 674 (void)IsRegistered; 675 } 676 ++IRef; 677 ++InitsRef; 678 } 679 } 680 return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty(); 681 } 682 683 void CodeGenFunction::EmitOMPPrivateClause( 684 const OMPExecutableDirective &D, 685 CodeGenFunction::OMPPrivateScope &PrivateScope) { 686 if (!HaveInsertPoint()) 687 return; 688 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 689 for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) { 690 auto IRef = C->varlist_begin(); 691 for (auto IInit : C->private_copies()) { 692 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 693 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 694 auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 695 bool IsRegistered = 696 PrivateScope.addPrivate(OrigVD, [&]() -> Address { 697 // Emit private VarDecl with copy init. 698 EmitDecl(*VD); 699 return GetAddrOfLocalVar(VD); 700 }); 701 assert(IsRegistered && "private var already registered as private"); 702 // Silence the warning about unused variable. 703 (void)IsRegistered; 704 } 705 ++IRef; 706 } 707 } 708 } 709 710 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { 711 if (!HaveInsertPoint()) 712 return false; 713 // threadprivate_var1 = master_threadprivate_var1; 714 // operator=(threadprivate_var2, master_threadprivate_var2); 715 // ... 716 // __kmpc_barrier(&loc, global_tid); 717 llvm::DenseSet<const VarDecl *> CopiedVars; 718 llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr; 719 for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) { 720 auto IRef = C->varlist_begin(); 721 auto ISrcRef = C->source_exprs().begin(); 722 auto IDestRef = C->destination_exprs().begin(); 723 for (auto *AssignOp : C->assignment_ops()) { 724 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 725 QualType Type = VD->getType(); 726 if (CopiedVars.insert(VD->getCanonicalDecl()).second) { 727 // Get the address of the master variable. If we are emitting code with 728 // TLS support, the address is passed from the master as field in the 729 // captured declaration. 730 Address MasterAddr = Address::invalid(); 731 if (getLangOpts().OpenMPUseTLS && 732 getContext().getTargetInfo().isTLSSupported()) { 733 assert(CapturedStmtInfo->lookup(VD) && 734 "Copyin threadprivates should have been captured!"); 735 DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(), 736 VK_LValue, (*IRef)->getExprLoc()); 737 MasterAddr = EmitLValue(&DRE).getAddress(); 738 LocalDeclMap.erase(VD); 739 } else { 740 MasterAddr = 741 Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD) 742 : CGM.GetAddrOfGlobal(VD), 743 getContext().getDeclAlign(VD)); 744 } 745 // Get the address of the threadprivate variable. 746 Address PrivateAddr = EmitLValue(*IRef).getAddress(); 747 if (CopiedVars.size() == 1) { 748 // At first check if current thread is a master thread. If it is, no 749 // need to copy data. 750 CopyBegin = createBasicBlock("copyin.not.master"); 751 CopyEnd = createBasicBlock("copyin.not.master.end"); 752 Builder.CreateCondBr( 753 Builder.CreateICmpNE( 754 Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy), 755 Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy)), 756 CopyBegin, CopyEnd); 757 EmitBlock(CopyBegin); 758 } 759 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 760 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 761 EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp); 762 } 763 ++IRef; 764 ++ISrcRef; 765 ++IDestRef; 766 } 767 } 768 if (CopyEnd) { 769 // Exit out of copying procedure for non-master thread. 770 EmitBlock(CopyEnd, /*IsFinished=*/true); 771 return true; 772 } 773 return false; 774 } 775 776 bool CodeGenFunction::EmitOMPLastprivateClauseInit( 777 const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) { 778 if (!HaveInsertPoint()) 779 return false; 780 bool HasAtLeastOneLastprivate = false; 781 llvm::DenseSet<const VarDecl *> SIMDLCVs; 782 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 783 auto *LoopDirective = cast<OMPLoopDirective>(&D); 784 for (auto *C : LoopDirective->counters()) { 785 SIMDLCVs.insert( 786 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 787 } 788 } 789 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 790 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 791 HasAtLeastOneLastprivate = true; 792 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) 793 break; 794 auto IRef = C->varlist_begin(); 795 auto IDestRef = C->destination_exprs().begin(); 796 for (auto *IInit : C->private_copies()) { 797 // Keep the address of the original variable for future update at the end 798 // of the loop. 799 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 800 // Taskloops do not require additional initialization, it is done in 801 // runtime support library. 802 if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) { 803 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 804 PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address { 805 DeclRefExpr DRE( 806 const_cast<VarDecl *>(OrigVD), 807 /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup( 808 OrigVD) != nullptr, 809 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 810 return EmitLValue(&DRE).getAddress(); 811 }); 812 // Check if the variable is also a firstprivate: in this case IInit is 813 // not generated. Initialization of this variable will happen in codegen 814 // for 'firstprivate' clause. 815 if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) { 816 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 817 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 818 // Emit private VarDecl with copy init. 819 EmitDecl(*VD); 820 return GetAddrOfLocalVar(VD); 821 }); 822 assert(IsRegistered && 823 "lastprivate var already registered as private"); 824 (void)IsRegistered; 825 } 826 } 827 ++IRef; 828 ++IDestRef; 829 } 830 } 831 return HasAtLeastOneLastprivate; 832 } 833 834 void CodeGenFunction::EmitOMPLastprivateClauseFinal( 835 const OMPExecutableDirective &D, bool NoFinals, 836 llvm::Value *IsLastIterCond) { 837 if (!HaveInsertPoint()) 838 return; 839 // Emit following code: 840 // if (<IsLastIterCond>) { 841 // orig_var1 = private_orig_var1; 842 // ... 843 // orig_varn = private_orig_varn; 844 // } 845 llvm::BasicBlock *ThenBB = nullptr; 846 llvm::BasicBlock *DoneBB = nullptr; 847 if (IsLastIterCond) { 848 ThenBB = createBasicBlock(".omp.lastprivate.then"); 849 DoneBB = createBasicBlock(".omp.lastprivate.done"); 850 Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB); 851 EmitBlock(ThenBB); 852 } 853 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 854 llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates; 855 if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) { 856 auto IC = LoopDirective->counters().begin(); 857 for (auto F : LoopDirective->finals()) { 858 auto *D = 859 cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl(); 860 if (NoFinals) 861 AlreadyEmittedVars.insert(D); 862 else 863 LoopCountersAndUpdates[D] = F; 864 ++IC; 865 } 866 } 867 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 868 auto IRef = C->varlist_begin(); 869 auto ISrcRef = C->source_exprs().begin(); 870 auto IDestRef = C->destination_exprs().begin(); 871 for (auto *AssignOp : C->assignment_ops()) { 872 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 873 QualType Type = PrivateVD->getType(); 874 auto *CanonicalVD = PrivateVD->getCanonicalDecl(); 875 if (AlreadyEmittedVars.insert(CanonicalVD).second) { 876 // If lastprivate variable is a loop control variable for loop-based 877 // directive, update its value before copyin back to original 878 // variable. 879 if (auto *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD)) 880 EmitIgnoredExpr(FinalExpr); 881 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 882 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 883 // Get the address of the original variable. 884 Address OriginalAddr = GetAddrOfLocalVar(DestVD); 885 // Get the address of the private variable. 886 Address PrivateAddr = GetAddrOfLocalVar(PrivateVD); 887 if (auto RefTy = PrivateVD->getType()->getAs<ReferenceType>()) 888 PrivateAddr = 889 Address(Builder.CreateLoad(PrivateAddr), 890 getNaturalTypeAlignment(RefTy->getPointeeType())); 891 EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp); 892 } 893 ++IRef; 894 ++ISrcRef; 895 ++IDestRef; 896 } 897 if (auto *PostUpdate = C->getPostUpdateExpr()) 898 EmitIgnoredExpr(PostUpdate); 899 } 900 if (IsLastIterCond) 901 EmitBlock(DoneBB, /*IsFinished=*/true); 902 } 903 904 void CodeGenFunction::EmitOMPReductionClauseInit( 905 const OMPExecutableDirective &D, 906 CodeGenFunction::OMPPrivateScope &PrivateScope) { 907 if (!HaveInsertPoint()) 908 return; 909 SmallVector<const Expr *, 4> Shareds; 910 SmallVector<const Expr *, 4> Privates; 911 SmallVector<const Expr *, 4> ReductionOps; 912 SmallVector<const Expr *, 4> LHSs; 913 SmallVector<const Expr *, 4> RHSs; 914 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 915 auto IPriv = C->privates().begin(); 916 auto IRed = C->reduction_ops().begin(); 917 auto ILHS = C->lhs_exprs().begin(); 918 auto IRHS = C->rhs_exprs().begin(); 919 for (const auto *Ref : C->varlists()) { 920 Shareds.emplace_back(Ref); 921 Privates.emplace_back(*IPriv); 922 ReductionOps.emplace_back(*IRed); 923 LHSs.emplace_back(*ILHS); 924 RHSs.emplace_back(*IRHS); 925 std::advance(IPriv, 1); 926 std::advance(IRed, 1); 927 std::advance(ILHS, 1); 928 std::advance(IRHS, 1); 929 } 930 } 931 ReductionCodeGen RedCG(Shareds, Privates, ReductionOps); 932 unsigned Count = 0; 933 auto ILHS = LHSs.begin(); 934 auto IRHS = RHSs.begin(); 935 auto IPriv = Privates.begin(); 936 for (const auto *IRef : Shareds) { 937 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl()); 938 // Emit private VarDecl with reduction init. 939 RedCG.emitSharedLValue(*this, Count); 940 RedCG.emitAggregateType(*this, Count); 941 auto Emission = EmitAutoVarAlloca(*PrivateVD); 942 RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(), 943 RedCG.getSharedLValue(Count), 944 [&Emission](CodeGenFunction &CGF) { 945 CGF.EmitAutoVarInit(Emission); 946 return true; 947 }); 948 EmitAutoVarCleanups(Emission); 949 Address BaseAddr = RedCG.adjustPrivateAddress( 950 *this, Count, Emission.getAllocatedAddress()); 951 bool IsRegistered = PrivateScope.addPrivate( 952 RedCG.getBaseDecl(Count), [BaseAddr]() -> Address { return BaseAddr; }); 953 assert(IsRegistered && "private var already registered as private"); 954 // Silence the warning about unused variable. 955 (void)IsRegistered; 956 957 auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 958 auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 959 if (isa<OMPArraySectionExpr>(IRef)) { 960 // Store the address of the original variable associated with the LHS 961 // implicit variable. 962 PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address { 963 return RedCG.getSharedLValue(Count).getAddress(); 964 }); 965 PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { 966 return GetAddrOfLocalVar(PrivateVD); 967 }); 968 } else if (isa<ArraySubscriptExpr>(IRef)) { 969 // Store the address of the original variable associated with the LHS 970 // implicit variable. 971 PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address { 972 return RedCG.getSharedLValue(Count).getAddress(); 973 }); 974 PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address { 975 return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD), 976 ConvertTypeForMem(RHSVD->getType()), 977 "rhs.begin"); 978 }); 979 } else { 980 QualType Type = PrivateVD->getType(); 981 bool IsArray = getContext().getAsArrayType(Type) != nullptr; 982 Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress(); 983 // Store the address of the original variable associated with the LHS 984 // implicit variable. 985 if (IsArray) { 986 OriginalAddr = Builder.CreateElementBitCast( 987 OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin"); 988 } 989 PrivateScope.addPrivate( 990 LHSVD, [OriginalAddr]() -> Address { return OriginalAddr; }); 991 PrivateScope.addPrivate( 992 RHSVD, [this, PrivateVD, RHSVD, IsArray]() -> Address { 993 return IsArray 994 ? Builder.CreateElementBitCast( 995 GetAddrOfLocalVar(PrivateVD), 996 ConvertTypeForMem(RHSVD->getType()), "rhs.begin") 997 : GetAddrOfLocalVar(PrivateVD); 998 }); 999 } 1000 ++ILHS; 1001 ++IRHS; 1002 ++IPriv; 1003 ++Count; 1004 } 1005 } 1006 1007 void CodeGenFunction::EmitOMPReductionClauseFinal( 1008 const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) { 1009 if (!HaveInsertPoint()) 1010 return; 1011 llvm::SmallVector<const Expr *, 8> Privates; 1012 llvm::SmallVector<const Expr *, 8> LHSExprs; 1013 llvm::SmallVector<const Expr *, 8> RHSExprs; 1014 llvm::SmallVector<const Expr *, 8> ReductionOps; 1015 bool HasAtLeastOneReduction = false; 1016 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1017 HasAtLeastOneReduction = true; 1018 Privates.append(C->privates().begin(), C->privates().end()); 1019 LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 1020 RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 1021 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 1022 } 1023 if (HasAtLeastOneReduction) { 1024 bool WithNowait = D.getSingleClause<OMPNowaitClause>() || 1025 isOpenMPParallelDirective(D.getDirectiveKind()) || 1026 D.getDirectiveKind() == OMPD_simd; 1027 bool SimpleReduction = D.getDirectiveKind() == OMPD_simd; 1028 // Emit nowait reduction if nowait clause is present or directive is a 1029 // parallel directive (it always has implicit barrier). 1030 CGM.getOpenMPRuntime().emitReduction( 1031 *this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps, 1032 {WithNowait, SimpleReduction, ReductionKind}); 1033 } 1034 } 1035 1036 static void emitPostUpdateForReductionClause( 1037 CodeGenFunction &CGF, const OMPExecutableDirective &D, 1038 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1039 if (!CGF.HaveInsertPoint()) 1040 return; 1041 llvm::BasicBlock *DoneBB = nullptr; 1042 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1043 if (auto *PostUpdate = C->getPostUpdateExpr()) { 1044 if (!DoneBB) { 1045 if (auto *Cond = CondGen(CGF)) { 1046 // If the first post-update expression is found, emit conditional 1047 // block if it was requested. 1048 auto *ThenBB = CGF.createBasicBlock(".omp.reduction.pu"); 1049 DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done"); 1050 CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1051 CGF.EmitBlock(ThenBB); 1052 } 1053 } 1054 CGF.EmitIgnoredExpr(PostUpdate); 1055 } 1056 } 1057 if (DoneBB) 1058 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 1059 } 1060 1061 namespace { 1062 /// Codegen lambda for appending distribute lower and upper bounds to outlined 1063 /// parallel function. This is necessary for combined constructs such as 1064 /// 'distribute parallel for' 1065 typedef llvm::function_ref<void(CodeGenFunction &, 1066 const OMPExecutableDirective &, 1067 llvm::SmallVectorImpl<llvm::Value *> &)> 1068 CodeGenBoundParametersTy; 1069 } // anonymous namespace 1070 1071 static void emitCommonOMPParallelDirective( 1072 CodeGenFunction &CGF, const OMPExecutableDirective &S, 1073 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1074 const CodeGenBoundParametersTy &CodeGenBoundParameters) { 1075 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); 1076 auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( 1077 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 1078 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) { 1079 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 1080 auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), 1081 /*IgnoreResultAssign*/ true); 1082 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause( 1083 CGF, NumThreads, NumThreadsClause->getLocStart()); 1084 } 1085 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) { 1086 CodeGenFunction::RunCleanupsScope ProcBindScope(CGF); 1087 CGF.CGM.getOpenMPRuntime().emitProcBindClause( 1088 CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart()); 1089 } 1090 const Expr *IfCond = nullptr; 1091 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 1092 if (C->getNameModifier() == OMPD_unknown || 1093 C->getNameModifier() == OMPD_parallel) { 1094 IfCond = C->getCondition(); 1095 break; 1096 } 1097 } 1098 1099 OMPParallelScope Scope(CGF, S); 1100 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 1101 // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk 1102 // lower and upper bounds with the pragma 'for' chunking mechanism. 1103 // The following lambda takes care of appending the lower and upper bound 1104 // parameters when necessary 1105 CodeGenBoundParameters(CGF, S, CapturedVars); 1106 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 1107 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn, 1108 CapturedVars, IfCond); 1109 } 1110 1111 static void emitEmptyBoundParameters(CodeGenFunction &, 1112 const OMPExecutableDirective &, 1113 llvm::SmallVectorImpl<llvm::Value *> &) {} 1114 1115 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { 1116 // Emit parallel region as a standalone region. 1117 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1118 OMPPrivateScope PrivateScope(CGF); 1119 bool Copyins = CGF.EmitOMPCopyinClause(S); 1120 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 1121 if (Copyins) { 1122 // Emit implicit barrier to synchronize threads and avoid data races on 1123 // propagation master's thread values of threadprivate variables to local 1124 // instances of that variables of all other implicit threads. 1125 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 1126 CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 1127 /*ForceSimpleCall=*/true); 1128 } 1129 CGF.EmitOMPPrivateClause(S, PrivateScope); 1130 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 1131 (void)PrivateScope.Privatize(); 1132 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1133 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 1134 }; 1135 emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen, 1136 emitEmptyBoundParameters); 1137 emitPostUpdateForReductionClause( 1138 *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1139 } 1140 1141 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, 1142 JumpDest LoopExit) { 1143 RunCleanupsScope BodyScope(*this); 1144 // Update counters values on current iteration. 1145 for (auto I : D.updates()) { 1146 EmitIgnoredExpr(I); 1147 } 1148 // Update the linear variables. 1149 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1150 for (auto *U : C->updates()) 1151 EmitIgnoredExpr(U); 1152 } 1153 1154 // On a continue in the body, jump to the end. 1155 auto Continue = getJumpDestInCurrentScope("omp.body.continue"); 1156 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1157 // Emit loop body. 1158 EmitStmt(D.getBody()); 1159 // The end (updates/cleanups). 1160 EmitBlock(Continue.getBlock()); 1161 BreakContinueStack.pop_back(); 1162 } 1163 1164 void CodeGenFunction::EmitOMPInnerLoop( 1165 const Stmt &S, bool RequiresCleanup, const Expr *LoopCond, 1166 const Expr *IncExpr, 1167 const llvm::function_ref<void(CodeGenFunction &)> &BodyGen, 1168 const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen) { 1169 auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end"); 1170 1171 // Start the loop with a block that tests the condition. 1172 auto CondBlock = createBasicBlock("omp.inner.for.cond"); 1173 EmitBlock(CondBlock); 1174 const SourceRange &R = S.getSourceRange(); 1175 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), 1176 SourceLocToDebugLoc(R.getEnd())); 1177 1178 // If there are any cleanups between here and the loop-exit scope, 1179 // create a block to stage a loop exit along. 1180 auto ExitBlock = LoopExit.getBlock(); 1181 if (RequiresCleanup) 1182 ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup"); 1183 1184 auto LoopBody = createBasicBlock("omp.inner.for.body"); 1185 1186 // Emit condition. 1187 EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S)); 1188 if (ExitBlock != LoopExit.getBlock()) { 1189 EmitBlock(ExitBlock); 1190 EmitBranchThroughCleanup(LoopExit); 1191 } 1192 1193 EmitBlock(LoopBody); 1194 incrementProfileCounter(&S); 1195 1196 // Create a block for the increment. 1197 auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc"); 1198 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1199 1200 BodyGen(*this); 1201 1202 // Emit "IV = IV + 1" and a back-edge to the condition block. 1203 EmitBlock(Continue.getBlock()); 1204 EmitIgnoredExpr(IncExpr); 1205 PostIncGen(*this); 1206 BreakContinueStack.pop_back(); 1207 EmitBranch(CondBlock); 1208 LoopStack.pop(); 1209 // Emit the fall-through block. 1210 EmitBlock(LoopExit.getBlock()); 1211 } 1212 1213 void CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { 1214 if (!HaveInsertPoint()) 1215 return; 1216 // Emit inits for the linear variables. 1217 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1218 for (auto *Init : C->inits()) { 1219 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl()); 1220 if (auto *Ref = dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) { 1221 AutoVarEmission Emission = EmitAutoVarAlloca(*VD); 1222 auto *OrigVD = cast<VarDecl>(Ref->getDecl()); 1223 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1224 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1225 VD->getInit()->getType(), VK_LValue, 1226 VD->getInit()->getExprLoc()); 1227 EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(), 1228 VD->getType()), 1229 /*capturedByInit=*/false); 1230 EmitAutoVarCleanups(Emission); 1231 } else 1232 EmitVarDecl(*VD); 1233 } 1234 // Emit the linear steps for the linear clauses. 1235 // If a step is not constant, it is pre-calculated before the loop. 1236 if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep())) 1237 if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) { 1238 EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl())); 1239 // Emit calculation of the linear step. 1240 EmitIgnoredExpr(CS); 1241 } 1242 } 1243 } 1244 1245 void CodeGenFunction::EmitOMPLinearClauseFinal( 1246 const OMPLoopDirective &D, 1247 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1248 if (!HaveInsertPoint()) 1249 return; 1250 llvm::BasicBlock *DoneBB = nullptr; 1251 // Emit the final values of the linear variables. 1252 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1253 auto IC = C->varlist_begin(); 1254 for (auto *F : C->finals()) { 1255 if (!DoneBB) { 1256 if (auto *Cond = CondGen(*this)) { 1257 // If the first post-update expression is found, emit conditional 1258 // block if it was requested. 1259 auto *ThenBB = createBasicBlock(".omp.linear.pu"); 1260 DoneBB = createBasicBlock(".omp.linear.pu.done"); 1261 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1262 EmitBlock(ThenBB); 1263 } 1264 } 1265 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl()); 1266 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1267 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1268 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); 1269 Address OrigAddr = EmitLValue(&DRE).getAddress(); 1270 CodeGenFunction::OMPPrivateScope VarScope(*this); 1271 VarScope.addPrivate(OrigVD, [OrigAddr]() -> Address { return OrigAddr; }); 1272 (void)VarScope.Privatize(); 1273 EmitIgnoredExpr(F); 1274 ++IC; 1275 } 1276 if (auto *PostUpdate = C->getPostUpdateExpr()) 1277 EmitIgnoredExpr(PostUpdate); 1278 } 1279 if (DoneBB) 1280 EmitBlock(DoneBB, /*IsFinished=*/true); 1281 } 1282 1283 static void emitAlignedClause(CodeGenFunction &CGF, 1284 const OMPExecutableDirective &D) { 1285 if (!CGF.HaveInsertPoint()) 1286 return; 1287 for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) { 1288 unsigned ClauseAlignment = 0; 1289 if (auto AlignmentExpr = Clause->getAlignment()) { 1290 auto AlignmentCI = 1291 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); 1292 ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue()); 1293 } 1294 for (auto E : Clause->varlists()) { 1295 unsigned Alignment = ClauseAlignment; 1296 if (Alignment == 0) { 1297 // OpenMP [2.8.1, Description] 1298 // If no optional parameter is specified, implementation-defined default 1299 // alignments for SIMD instructions on the target platforms are assumed. 1300 Alignment = 1301 CGF.getContext() 1302 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign( 1303 E->getType()->getPointeeType())) 1304 .getQuantity(); 1305 } 1306 assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) && 1307 "alignment is not power of 2"); 1308 if (Alignment != 0) { 1309 llvm::Value *PtrValue = CGF.EmitScalarExpr(E); 1310 CGF.EmitAlignmentAssumption(PtrValue, Alignment); 1311 } 1312 } 1313 } 1314 } 1315 1316 void CodeGenFunction::EmitOMPPrivateLoopCounters( 1317 const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) { 1318 if (!HaveInsertPoint()) 1319 return; 1320 auto I = S.private_counters().begin(); 1321 for (auto *E : S.counters()) { 1322 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1323 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()); 1324 (void)LoopScope.addPrivate(VD, [&]() -> Address { 1325 // Emit var without initialization. 1326 if (!LocalDeclMap.count(PrivateVD)) { 1327 auto VarEmission = EmitAutoVarAlloca(*PrivateVD); 1328 EmitAutoVarCleanups(VarEmission); 1329 } 1330 DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD), 1331 /*RefersToEnclosingVariableOrCapture=*/false, 1332 (*I)->getType(), VK_LValue, (*I)->getExprLoc()); 1333 return EmitLValue(&DRE).getAddress(); 1334 }); 1335 if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) || 1336 VD->hasGlobalStorage()) { 1337 (void)LoopScope.addPrivate(PrivateVD, [&]() -> Address { 1338 DeclRefExpr DRE(const_cast<VarDecl *>(VD), 1339 LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD), 1340 E->getType(), VK_LValue, E->getExprLoc()); 1341 return EmitLValue(&DRE).getAddress(); 1342 }); 1343 } 1344 ++I; 1345 } 1346 } 1347 1348 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S, 1349 const Expr *Cond, llvm::BasicBlock *TrueBlock, 1350 llvm::BasicBlock *FalseBlock, uint64_t TrueCount) { 1351 if (!CGF.HaveInsertPoint()) 1352 return; 1353 { 1354 CodeGenFunction::OMPPrivateScope PreCondScope(CGF); 1355 CGF.EmitOMPPrivateLoopCounters(S, PreCondScope); 1356 (void)PreCondScope.Privatize(); 1357 // Get initial values of real counters. 1358 for (auto I : S.inits()) { 1359 CGF.EmitIgnoredExpr(I); 1360 } 1361 } 1362 // Check that loop is executed at least one time. 1363 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount); 1364 } 1365 1366 void CodeGenFunction::EmitOMPLinearClause( 1367 const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) { 1368 if (!HaveInsertPoint()) 1369 return; 1370 llvm::DenseSet<const VarDecl *> SIMDLCVs; 1371 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 1372 auto *LoopDirective = cast<OMPLoopDirective>(&D); 1373 for (auto *C : LoopDirective->counters()) { 1374 SIMDLCVs.insert( 1375 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 1376 } 1377 } 1378 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1379 auto CurPrivate = C->privates().begin(); 1380 for (auto *E : C->varlists()) { 1381 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1382 auto *PrivateVD = 1383 cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl()); 1384 if (!SIMDLCVs.count(VD->getCanonicalDecl())) { 1385 bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address { 1386 // Emit private VarDecl with copy init. 1387 EmitVarDecl(*PrivateVD); 1388 return GetAddrOfLocalVar(PrivateVD); 1389 }); 1390 assert(IsRegistered && "linear var already registered as private"); 1391 // Silence the warning about unused variable. 1392 (void)IsRegistered; 1393 } else 1394 EmitVarDecl(*PrivateVD); 1395 ++CurPrivate; 1396 } 1397 } 1398 } 1399 1400 static void emitSimdlenSafelenClause(CodeGenFunction &CGF, 1401 const OMPExecutableDirective &D, 1402 bool IsMonotonic) { 1403 if (!CGF.HaveInsertPoint()) 1404 return; 1405 if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) { 1406 RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(), 1407 /*ignoreResult=*/true); 1408 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 1409 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 1410 // In presence of finite 'safelen', it may be unsafe to mark all 1411 // the memory instructions parallel, because loop-carried 1412 // dependences of 'safelen' iterations are possible. 1413 if (!IsMonotonic) 1414 CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>()); 1415 } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) { 1416 RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(), 1417 /*ignoreResult=*/true); 1418 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 1419 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 1420 // In presence of finite 'safelen', it may be unsafe to mark all 1421 // the memory instructions parallel, because loop-carried 1422 // dependences of 'safelen' iterations are possible. 1423 CGF.LoopStack.setParallel(false); 1424 } 1425 } 1426 1427 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D, 1428 bool IsMonotonic) { 1429 // Walk clauses and process safelen/lastprivate. 1430 LoopStack.setParallel(!IsMonotonic); 1431 LoopStack.setVectorizeEnable(true); 1432 emitSimdlenSafelenClause(*this, D, IsMonotonic); 1433 } 1434 1435 void CodeGenFunction::EmitOMPSimdFinal( 1436 const OMPLoopDirective &D, 1437 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1438 if (!HaveInsertPoint()) 1439 return; 1440 llvm::BasicBlock *DoneBB = nullptr; 1441 auto IC = D.counters().begin(); 1442 auto IPC = D.private_counters().begin(); 1443 for (auto F : D.finals()) { 1444 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl()); 1445 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl()); 1446 auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD); 1447 if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) || 1448 OrigVD->hasGlobalStorage() || CED) { 1449 if (!DoneBB) { 1450 if (auto *Cond = CondGen(*this)) { 1451 // If the first post-update expression is found, emit conditional 1452 // block if it was requested. 1453 auto *ThenBB = createBasicBlock(".omp.final.then"); 1454 DoneBB = createBasicBlock(".omp.final.done"); 1455 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1456 EmitBlock(ThenBB); 1457 } 1458 } 1459 Address OrigAddr = Address::invalid(); 1460 if (CED) 1461 OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress(); 1462 else { 1463 DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD), 1464 /*RefersToEnclosingVariableOrCapture=*/false, 1465 (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc()); 1466 OrigAddr = EmitLValue(&DRE).getAddress(); 1467 } 1468 OMPPrivateScope VarScope(*this); 1469 VarScope.addPrivate(OrigVD, 1470 [OrigAddr]() -> Address { return OrigAddr; }); 1471 (void)VarScope.Privatize(); 1472 EmitIgnoredExpr(F); 1473 } 1474 ++IC; 1475 ++IPC; 1476 } 1477 if (DoneBB) 1478 EmitBlock(DoneBB, /*IsFinished=*/true); 1479 } 1480 1481 static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF, 1482 const OMPLoopDirective &S, 1483 CodeGenFunction::JumpDest LoopExit) { 1484 CGF.EmitOMPLoopBody(S, LoopExit); 1485 CGF.EmitStopPoint(&S); 1486 } 1487 1488 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { 1489 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1490 OMPLoopScope PreInitScope(CGF, S); 1491 // if (PreCond) { 1492 // for (IV in 0..LastIteration) BODY; 1493 // <Final counter/linear vars updates>; 1494 // } 1495 // 1496 1497 // Emit: if (PreCond) - begin. 1498 // If the condition constant folds and can be elided, avoid emitting the 1499 // whole loop. 1500 bool CondConstant; 1501 llvm::BasicBlock *ContBlock = nullptr; 1502 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 1503 if (!CondConstant) 1504 return; 1505 } else { 1506 auto *ThenBlock = CGF.createBasicBlock("simd.if.then"); 1507 ContBlock = CGF.createBasicBlock("simd.if.end"); 1508 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 1509 CGF.getProfileCount(&S)); 1510 CGF.EmitBlock(ThenBlock); 1511 CGF.incrementProfileCounter(&S); 1512 } 1513 1514 // Emit the loop iteration variable. 1515 const Expr *IVExpr = S.getIterationVariable(); 1516 const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 1517 CGF.EmitVarDecl(*IVDecl); 1518 CGF.EmitIgnoredExpr(S.getInit()); 1519 1520 // Emit the iterations count variable. 1521 // If it is not a variable, Sema decided to calculate iterations count on 1522 // each iteration (e.g., it is foldable into a constant). 1523 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 1524 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 1525 // Emit calculation of the iterations count. 1526 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 1527 } 1528 1529 CGF.EmitOMPSimdInit(S); 1530 1531 emitAlignedClause(CGF, S); 1532 CGF.EmitOMPLinearClauseInit(S); 1533 { 1534 OMPPrivateScope LoopScope(CGF); 1535 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 1536 CGF.EmitOMPLinearClause(S, LoopScope); 1537 CGF.EmitOMPPrivateClause(S, LoopScope); 1538 CGF.EmitOMPReductionClauseInit(S, LoopScope); 1539 bool HasLastprivateClause = 1540 CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 1541 (void)LoopScope.Privatize(); 1542 CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 1543 S.getInc(), 1544 [&S](CodeGenFunction &CGF) { 1545 CGF.EmitOMPLoopBody(S, JumpDest()); 1546 CGF.EmitStopPoint(&S); 1547 }, 1548 [](CodeGenFunction &) {}); 1549 CGF.EmitOMPSimdFinal( 1550 S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1551 // Emit final copy of the lastprivate variables at the end of loops. 1552 if (HasLastprivateClause) 1553 CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true); 1554 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd); 1555 emitPostUpdateForReductionClause( 1556 CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1557 } 1558 CGF.EmitOMPLinearClauseFinal( 1559 S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1560 // Emit: if (PreCond) - end. 1561 if (ContBlock) { 1562 CGF.EmitBranch(ContBlock); 1563 CGF.EmitBlock(ContBlock, true); 1564 } 1565 }; 1566 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1567 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 1568 } 1569 1570 void CodeGenFunction::EmitOMPOuterLoop( 1571 bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S, 1572 CodeGenFunction::OMPPrivateScope &LoopScope, 1573 const CodeGenFunction::OMPLoopArguments &LoopArgs, 1574 const CodeGenFunction::CodeGenLoopTy &CodeGenLoop, 1575 const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) { 1576 auto &RT = CGM.getOpenMPRuntime(); 1577 1578 const Expr *IVExpr = S.getIterationVariable(); 1579 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1580 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1581 1582 auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end"); 1583 1584 // Start the loop with a block that tests the condition. 1585 auto CondBlock = createBasicBlock("omp.dispatch.cond"); 1586 EmitBlock(CondBlock); 1587 const SourceRange &R = S.getSourceRange(); 1588 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), 1589 SourceLocToDebugLoc(R.getEnd())); 1590 1591 llvm::Value *BoolCondVal = nullptr; 1592 if (!DynamicOrOrdered) { 1593 // UB = min(UB, GlobalUB) or 1594 // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g. 1595 // 'distribute parallel for') 1596 EmitIgnoredExpr(LoopArgs.EUB); 1597 // IV = LB 1598 EmitIgnoredExpr(LoopArgs.Init); 1599 // IV < UB 1600 BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond); 1601 } else { 1602 BoolCondVal = 1603 RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, LoopArgs.IL, 1604 LoopArgs.LB, LoopArgs.UB, LoopArgs.ST); 1605 } 1606 1607 // If there are any cleanups between here and the loop-exit scope, 1608 // create a block to stage a loop exit along. 1609 auto ExitBlock = LoopExit.getBlock(); 1610 if (LoopScope.requiresCleanups()) 1611 ExitBlock = createBasicBlock("omp.dispatch.cleanup"); 1612 1613 auto LoopBody = createBasicBlock("omp.dispatch.body"); 1614 Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock); 1615 if (ExitBlock != LoopExit.getBlock()) { 1616 EmitBlock(ExitBlock); 1617 EmitBranchThroughCleanup(LoopExit); 1618 } 1619 EmitBlock(LoopBody); 1620 1621 // Emit "IV = LB" (in case of static schedule, we have already calculated new 1622 // LB for loop condition and emitted it above). 1623 if (DynamicOrOrdered) 1624 EmitIgnoredExpr(LoopArgs.Init); 1625 1626 // Create a block for the increment. 1627 auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); 1628 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1629 1630 // Generate !llvm.loop.parallel metadata for loads and stores for loops 1631 // with dynamic/guided scheduling and without ordered clause. 1632 if (!isOpenMPSimdDirective(S.getDirectiveKind())) 1633 LoopStack.setParallel(!IsMonotonic); 1634 else 1635 EmitOMPSimdInit(S, IsMonotonic); 1636 1637 SourceLocation Loc = S.getLocStart(); 1638 1639 // when 'distribute' is not combined with a 'for': 1640 // while (idx <= UB) { BODY; ++idx; } 1641 // when 'distribute' is combined with a 'for' 1642 // (e.g. 'distribute parallel for') 1643 // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; } 1644 EmitOMPInnerLoop( 1645 S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr, 1646 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { 1647 CodeGenLoop(CGF, S, LoopExit); 1648 }, 1649 [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) { 1650 CodeGenOrdered(CGF, Loc, IVSize, IVSigned); 1651 }); 1652 1653 EmitBlock(Continue.getBlock()); 1654 BreakContinueStack.pop_back(); 1655 if (!DynamicOrOrdered) { 1656 // Emit "LB = LB + Stride", "UB = UB + Stride". 1657 EmitIgnoredExpr(LoopArgs.NextLB); 1658 EmitIgnoredExpr(LoopArgs.NextUB); 1659 } 1660 1661 EmitBranch(CondBlock); 1662 LoopStack.pop(); 1663 // Emit the fall-through block. 1664 EmitBlock(LoopExit.getBlock()); 1665 1666 // Tell the runtime we are done. 1667 auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) { 1668 if (!DynamicOrOrdered) 1669 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd()); 1670 }; 1671 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); 1672 } 1673 1674 void CodeGenFunction::EmitOMPForOuterLoop( 1675 const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic, 1676 const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, 1677 const OMPLoopArguments &LoopArgs, 1678 const CodeGenDispatchBoundsTy &CGDispatchBounds) { 1679 auto &RT = CGM.getOpenMPRuntime(); 1680 1681 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). 1682 const bool DynamicOrOrdered = 1683 Ordered || RT.isDynamic(ScheduleKind.Schedule); 1684 1685 assert((Ordered || 1686 !RT.isStaticNonchunked(ScheduleKind.Schedule, 1687 LoopArgs.Chunk != nullptr)) && 1688 "static non-chunked schedule does not need outer loop"); 1689 1690 // Emit outer loop. 1691 // 1692 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1693 // When schedule(dynamic,chunk_size) is specified, the iterations are 1694 // distributed to threads in the team in chunks as the threads request them. 1695 // Each thread executes a chunk of iterations, then requests another chunk, 1696 // until no chunks remain to be distributed. Each chunk contains chunk_size 1697 // iterations, except for the last chunk to be distributed, which may have 1698 // fewer iterations. When no chunk_size is specified, it defaults to 1. 1699 // 1700 // When schedule(guided,chunk_size) is specified, the iterations are assigned 1701 // to threads in the team in chunks as the executing threads request them. 1702 // Each thread executes a chunk of iterations, then requests another chunk, 1703 // until no chunks remain to be assigned. For a chunk_size of 1, the size of 1704 // each chunk is proportional to the number of unassigned iterations divided 1705 // by the number of threads in the team, decreasing to 1. For a chunk_size 1706 // with value k (greater than 1), the size of each chunk is determined in the 1707 // same way, with the restriction that the chunks do not contain fewer than k 1708 // iterations (except for the last chunk to be assigned, which may have fewer 1709 // than k iterations). 1710 // 1711 // When schedule(auto) is specified, the decision regarding scheduling is 1712 // delegated to the compiler and/or runtime system. The programmer gives the 1713 // implementation the freedom to choose any possible mapping of iterations to 1714 // threads in the team. 1715 // 1716 // When schedule(runtime) is specified, the decision regarding scheduling is 1717 // deferred until run time, and the schedule and chunk size are taken from the 1718 // run-sched-var ICV. If the ICV is set to auto, the schedule is 1719 // implementation defined 1720 // 1721 // while(__kmpc_dispatch_next(&LB, &UB)) { 1722 // idx = LB; 1723 // while (idx <= UB) { BODY; ++idx; 1724 // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only. 1725 // } // inner loop 1726 // } 1727 // 1728 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1729 // When schedule(static, chunk_size) is specified, iterations are divided into 1730 // chunks of size chunk_size, and the chunks are assigned to the threads in 1731 // the team in a round-robin fashion in the order of the thread number. 1732 // 1733 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { 1734 // while (idx <= UB) { BODY; ++idx; } // inner loop 1735 // LB = LB + ST; 1736 // UB = UB + ST; 1737 // } 1738 // 1739 1740 const Expr *IVExpr = S.getIterationVariable(); 1741 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1742 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1743 1744 if (DynamicOrOrdered) { 1745 auto DispatchBounds = CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB); 1746 llvm::Value *LBVal = DispatchBounds.first; 1747 llvm::Value *UBVal = DispatchBounds.second; 1748 CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal, 1749 LoopArgs.Chunk}; 1750 RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, IVSize, 1751 IVSigned, Ordered, DipatchRTInputValues); 1752 } else { 1753 RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, 1754 Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB, 1755 LoopArgs.ST, LoopArgs.Chunk); 1756 } 1757 1758 auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc, 1759 const unsigned IVSize, 1760 const bool IVSigned) { 1761 if (Ordered) { 1762 CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize, 1763 IVSigned); 1764 } 1765 }; 1766 1767 OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST, 1768 LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB); 1769 OuterLoopArgs.IncExpr = S.getInc(); 1770 OuterLoopArgs.Init = S.getInit(); 1771 OuterLoopArgs.Cond = S.getCond(); 1772 OuterLoopArgs.NextLB = S.getNextLowerBound(); 1773 OuterLoopArgs.NextUB = S.getNextUpperBound(); 1774 EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs, 1775 emitOMPLoopBodyWithStopPoint, CodeGenOrdered); 1776 } 1777 1778 static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc, 1779 const unsigned IVSize, const bool IVSigned) {} 1780 1781 void CodeGenFunction::EmitOMPDistributeOuterLoop( 1782 OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S, 1783 OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs, 1784 const CodeGenLoopTy &CodeGenLoopContent) { 1785 1786 auto &RT = CGM.getOpenMPRuntime(); 1787 1788 // Emit outer loop. 1789 // Same behavior as a OMPForOuterLoop, except that schedule cannot be 1790 // dynamic 1791 // 1792 1793 const Expr *IVExpr = S.getIterationVariable(); 1794 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1795 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1796 1797 RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize, 1798 IVSigned, /* Ordered = */ false, LoopArgs.IL, 1799 LoopArgs.LB, LoopArgs.UB, LoopArgs.ST, 1800 LoopArgs.Chunk); 1801 1802 // for combined 'distribute' and 'for' the increment expression of distribute 1803 // is store in DistInc. For 'distribute' alone, it is in Inc. 1804 Expr *IncExpr; 1805 if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())) 1806 IncExpr = S.getDistInc(); 1807 else 1808 IncExpr = S.getInc(); 1809 1810 // this routine is shared by 'omp distribute parallel for' and 1811 // 'omp distribute': select the right EUB expression depending on the 1812 // directive 1813 OMPLoopArguments OuterLoopArgs; 1814 OuterLoopArgs.LB = LoopArgs.LB; 1815 OuterLoopArgs.UB = LoopArgs.UB; 1816 OuterLoopArgs.ST = LoopArgs.ST; 1817 OuterLoopArgs.IL = LoopArgs.IL; 1818 OuterLoopArgs.Chunk = LoopArgs.Chunk; 1819 OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 1820 ? S.getCombinedEnsureUpperBound() 1821 : S.getEnsureUpperBound(); 1822 OuterLoopArgs.IncExpr = IncExpr; 1823 OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 1824 ? S.getCombinedInit() 1825 : S.getInit(); 1826 OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 1827 ? S.getCombinedCond() 1828 : S.getCond(); 1829 OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 1830 ? S.getCombinedNextLowerBound() 1831 : S.getNextLowerBound(); 1832 OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 1833 ? S.getCombinedNextUpperBound() 1834 : S.getNextUpperBound(); 1835 1836 EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S, 1837 LoopScope, OuterLoopArgs, CodeGenLoopContent, 1838 emitEmptyOrdered); 1839 } 1840 1841 /// Emit a helper variable and return corresponding lvalue. 1842 static LValue EmitOMPHelperVar(CodeGenFunction &CGF, 1843 const DeclRefExpr *Helper) { 1844 auto VDecl = cast<VarDecl>(Helper->getDecl()); 1845 CGF.EmitVarDecl(*VDecl); 1846 return CGF.EmitLValue(Helper); 1847 } 1848 1849 static std::pair<LValue, LValue> 1850 emitDistributeParallelForInnerBounds(CodeGenFunction &CGF, 1851 const OMPExecutableDirective &S) { 1852 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 1853 LValue LB = 1854 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); 1855 LValue UB = 1856 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); 1857 1858 // When composing 'distribute' with 'for' (e.g. as in 'distribute 1859 // parallel for') we need to use the 'distribute' 1860 // chunk lower and upper bounds rather than the whole loop iteration 1861 // space. These are parameters to the outlined function for 'parallel' 1862 // and we copy the bounds of the previous schedule into the 1863 // the current ones. 1864 LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable()); 1865 LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable()); 1866 llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(PrevLB, SourceLocation()); 1867 PrevLBVal = CGF.EmitScalarConversion( 1868 PrevLBVal, LS.getPrevLowerBoundVariable()->getType(), 1869 LS.getIterationVariable()->getType(), SourceLocation()); 1870 llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(PrevUB, SourceLocation()); 1871 PrevUBVal = CGF.EmitScalarConversion( 1872 PrevUBVal, LS.getPrevUpperBoundVariable()->getType(), 1873 LS.getIterationVariable()->getType(), SourceLocation()); 1874 1875 CGF.EmitStoreOfScalar(PrevLBVal, LB); 1876 CGF.EmitStoreOfScalar(PrevUBVal, UB); 1877 1878 return {LB, UB}; 1879 } 1880 1881 /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then 1882 /// we need to use the LB and UB expressions generated by the worksharing 1883 /// code generation support, whereas in non combined situations we would 1884 /// just emit 0 and the LastIteration expression 1885 /// This function is necessary due to the difference of the LB and UB 1886 /// types for the RT emission routines for 'for_static_init' and 1887 /// 'for_dispatch_init' 1888 static std::pair<llvm::Value *, llvm::Value *> 1889 emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF, 1890 const OMPExecutableDirective &S, 1891 Address LB, Address UB) { 1892 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 1893 const Expr *IVExpr = LS.getIterationVariable(); 1894 // when implementing a dynamic schedule for a 'for' combined with a 1895 // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop 1896 // is not normalized as each team only executes its own assigned 1897 // distribute chunk 1898 QualType IteratorTy = IVExpr->getType(); 1899 llvm::Value *LBVal = CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, 1900 SourceLocation()); 1901 llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, 1902 SourceLocation()); 1903 return {LBVal, UBVal}; 1904 } 1905 1906 static void emitDistributeParallelForDistributeInnerBoundParams( 1907 CodeGenFunction &CGF, const OMPExecutableDirective &S, 1908 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) { 1909 const auto &Dir = cast<OMPLoopDirective>(S); 1910 LValue LB = 1911 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable())); 1912 auto LBCast = CGF.Builder.CreateIntCast( 1913 CGF.Builder.CreateLoad(LB.getAddress()), CGF.SizeTy, /*isSigned=*/false); 1914 CapturedVars.push_back(LBCast); 1915 LValue UB = 1916 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable())); 1917 1918 auto UBCast = CGF.Builder.CreateIntCast( 1919 CGF.Builder.CreateLoad(UB.getAddress()), CGF.SizeTy, /*isSigned=*/false); 1920 CapturedVars.push_back(UBCast); 1921 } 1922 1923 static void 1924 emitInnerParallelForWhenCombined(CodeGenFunction &CGF, 1925 const OMPLoopDirective &S, 1926 CodeGenFunction::JumpDest LoopExit) { 1927 auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF, 1928 PrePostActionTy &) { 1929 CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(), 1930 emitDistributeParallelForInnerBounds, 1931 emitDistributeParallelForDispatchBounds); 1932 }; 1933 1934 emitCommonOMPParallelDirective( 1935 CGF, S, OMPD_for, CGInlinedWorksharingLoop, 1936 emitDistributeParallelForDistributeInnerBoundParams); 1937 } 1938 1939 void CodeGenFunction::EmitOMPDistributeParallelForDirective( 1940 const OMPDistributeParallelForDirective &S) { 1941 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1942 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 1943 S.getDistInc()); 1944 }; 1945 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1946 OMPCancelStackRAII CancelRegion(*this, OMPD_distribute_parallel_for, 1947 /*HasCancel=*/false); 1948 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen, 1949 /*HasCancel=*/false); 1950 } 1951 1952 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective( 1953 const OMPDistributeParallelForSimdDirective &S) { 1954 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1955 CGM.getOpenMPRuntime().emitInlinedDirective( 1956 *this, OMPD_distribute_parallel_for_simd, 1957 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1958 OMPLoopScope PreInitScope(CGF, S); 1959 CGF.EmitStmt( 1960 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1961 }); 1962 } 1963 1964 void CodeGenFunction::EmitOMPDistributeSimdDirective( 1965 const OMPDistributeSimdDirective &S) { 1966 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1967 CGM.getOpenMPRuntime().emitInlinedDirective( 1968 *this, OMPD_distribute_simd, 1969 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1970 OMPLoopScope PreInitScope(CGF, S); 1971 CGF.EmitStmt( 1972 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1973 }); 1974 } 1975 1976 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective( 1977 const OMPTargetParallelForSimdDirective &S) { 1978 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1979 CGM.getOpenMPRuntime().emitInlinedDirective( 1980 *this, OMPD_target_parallel_for_simd, 1981 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1982 OMPLoopScope PreInitScope(CGF, S); 1983 CGF.EmitStmt( 1984 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1985 }); 1986 } 1987 1988 void CodeGenFunction::EmitOMPTargetSimdDirective( 1989 const OMPTargetSimdDirective &S) { 1990 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1991 CGM.getOpenMPRuntime().emitInlinedDirective( 1992 *this, OMPD_target_simd, [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1993 OMPLoopScope PreInitScope(CGF, S); 1994 CGF.EmitStmt( 1995 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1996 }); 1997 } 1998 1999 void CodeGenFunction::EmitOMPTeamsDistributeDirective( 2000 const OMPTeamsDistributeDirective &S) { 2001 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2002 CGM.getOpenMPRuntime().emitInlinedDirective( 2003 *this, OMPD_teams_distribute, 2004 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2005 OMPLoopScope PreInitScope(CGF, S); 2006 CGF.EmitStmt( 2007 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2008 }); 2009 } 2010 2011 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective( 2012 const OMPTeamsDistributeSimdDirective &S) { 2013 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2014 CGM.getOpenMPRuntime().emitInlinedDirective( 2015 *this, OMPD_teams_distribute_simd, 2016 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2017 OMPLoopScope PreInitScope(CGF, S); 2018 CGF.EmitStmt( 2019 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2020 }); 2021 } 2022 2023 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective( 2024 const OMPTeamsDistributeParallelForSimdDirective &S) { 2025 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2026 CGM.getOpenMPRuntime().emitInlinedDirective( 2027 *this, OMPD_teams_distribute_parallel_for_simd, 2028 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2029 OMPLoopScope PreInitScope(CGF, S); 2030 CGF.EmitStmt( 2031 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2032 }); 2033 } 2034 2035 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective( 2036 const OMPTeamsDistributeParallelForDirective &S) { 2037 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2038 CGM.getOpenMPRuntime().emitInlinedDirective( 2039 *this, OMPD_teams_distribute_parallel_for, 2040 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2041 OMPLoopScope PreInitScope(CGF, S); 2042 CGF.EmitStmt( 2043 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2044 }); 2045 } 2046 2047 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective( 2048 const OMPTargetTeamsDistributeDirective &S) { 2049 CGM.getOpenMPRuntime().emitInlinedDirective( 2050 *this, OMPD_target_teams_distribute, 2051 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2052 CGF.EmitStmt( 2053 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2054 }); 2055 } 2056 2057 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective( 2058 const OMPTargetTeamsDistributeParallelForDirective &S) { 2059 CGM.getOpenMPRuntime().emitInlinedDirective( 2060 *this, OMPD_target_teams_distribute_parallel_for, 2061 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2062 CGF.EmitStmt( 2063 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2064 }); 2065 } 2066 2067 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective( 2068 const OMPTargetTeamsDistributeParallelForSimdDirective &S) { 2069 CGM.getOpenMPRuntime().emitInlinedDirective( 2070 *this, OMPD_target_teams_distribute_parallel_for_simd, 2071 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2072 CGF.EmitStmt( 2073 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2074 }); 2075 } 2076 2077 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective( 2078 const OMPTargetTeamsDistributeSimdDirective &S) { 2079 CGM.getOpenMPRuntime().emitInlinedDirective( 2080 *this, OMPD_target_teams_distribute_simd, 2081 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2082 CGF.EmitStmt( 2083 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2084 }); 2085 } 2086 2087 namespace { 2088 struct ScheduleKindModifiersTy { 2089 OpenMPScheduleClauseKind Kind; 2090 OpenMPScheduleClauseModifier M1; 2091 OpenMPScheduleClauseModifier M2; 2092 ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind, 2093 OpenMPScheduleClauseModifier M1, 2094 OpenMPScheduleClauseModifier M2) 2095 : Kind(Kind), M1(M1), M2(M2) {} 2096 }; 2097 } // namespace 2098 2099 bool CodeGenFunction::EmitOMPWorksharingLoop( 2100 const OMPLoopDirective &S, Expr *EUB, 2101 const CodeGenLoopBoundsTy &CodeGenLoopBounds, 2102 const CodeGenDispatchBoundsTy &CGDispatchBounds) { 2103 // Emit the loop iteration variable. 2104 auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 2105 auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); 2106 EmitVarDecl(*IVDecl); 2107 2108 // Emit the iterations count variable. 2109 // If it is not a variable, Sema decided to calculate iterations count on each 2110 // iteration (e.g., it is foldable into a constant). 2111 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 2112 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 2113 // Emit calculation of the iterations count. 2114 EmitIgnoredExpr(S.getCalcLastIteration()); 2115 } 2116 2117 auto &RT = CGM.getOpenMPRuntime(); 2118 2119 bool HasLastprivateClause; 2120 // Check pre-condition. 2121 { 2122 OMPLoopScope PreInitScope(*this, S); 2123 // Skip the entire loop if we don't meet the precondition. 2124 // If the condition constant folds and can be elided, avoid emitting the 2125 // whole loop. 2126 bool CondConstant; 2127 llvm::BasicBlock *ContBlock = nullptr; 2128 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 2129 if (!CondConstant) 2130 return false; 2131 } else { 2132 auto *ThenBlock = createBasicBlock("omp.precond.then"); 2133 ContBlock = createBasicBlock("omp.precond.end"); 2134 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 2135 getProfileCount(&S)); 2136 EmitBlock(ThenBlock); 2137 incrementProfileCounter(&S); 2138 } 2139 2140 bool Ordered = false; 2141 if (auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) { 2142 if (OrderedClause->getNumForLoops()) 2143 RT.emitDoacrossInit(*this, S); 2144 else 2145 Ordered = true; 2146 } 2147 2148 llvm::DenseSet<const Expr *> EmittedFinals; 2149 emitAlignedClause(*this, S); 2150 EmitOMPLinearClauseInit(S); 2151 // Emit helper vars inits. 2152 2153 std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S); 2154 LValue LB = Bounds.first; 2155 LValue UB = Bounds.second; 2156 LValue ST = 2157 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 2158 LValue IL = 2159 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 2160 2161 // Emit 'then' code. 2162 { 2163 OMPPrivateScope LoopScope(*this); 2164 if (EmitOMPFirstprivateClause(S, LoopScope)) { 2165 // Emit implicit barrier to synchronize threads and avoid data races on 2166 // initialization of firstprivate variables and post-update of 2167 // lastprivate variables. 2168 CGM.getOpenMPRuntime().emitBarrierCall( 2169 *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 2170 /*ForceSimpleCall=*/true); 2171 } 2172 EmitOMPPrivateClause(S, LoopScope); 2173 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 2174 EmitOMPReductionClauseInit(S, LoopScope); 2175 EmitOMPPrivateLoopCounters(S, LoopScope); 2176 EmitOMPLinearClause(S, LoopScope); 2177 (void)LoopScope.Privatize(); 2178 2179 // Detect the loop schedule kind and chunk. 2180 llvm::Value *Chunk = nullptr; 2181 OpenMPScheduleTy ScheduleKind; 2182 if (auto *C = S.getSingleClause<OMPScheduleClause>()) { 2183 ScheduleKind.Schedule = C->getScheduleKind(); 2184 ScheduleKind.M1 = C->getFirstScheduleModifier(); 2185 ScheduleKind.M2 = C->getSecondScheduleModifier(); 2186 if (const auto *Ch = C->getChunkSize()) { 2187 Chunk = EmitScalarExpr(Ch); 2188 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 2189 S.getIterationVariable()->getType(), 2190 S.getLocStart()); 2191 } 2192 } 2193 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2194 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2195 // OpenMP 4.5, 2.7.1 Loop Construct, Description. 2196 // If the static schedule kind is specified or if the ordered clause is 2197 // specified, and if no monotonic modifier is specified, the effect will 2198 // be as if the monotonic modifier was specified. 2199 if (RT.isStaticNonchunked(ScheduleKind.Schedule, 2200 /* Chunked */ Chunk != nullptr) && 2201 !Ordered) { 2202 if (isOpenMPSimdDirective(S.getDirectiveKind())) 2203 EmitOMPSimdInit(S, /*IsMonotonic=*/true); 2204 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 2205 // When no chunk_size is specified, the iteration space is divided into 2206 // chunks that are approximately equal in size, and at most one chunk is 2207 // distributed to each thread. Note that the size of the chunks is 2208 // unspecified in this case. 2209 RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, 2210 IVSize, IVSigned, Ordered, 2211 IL.getAddress(), LB.getAddress(), 2212 UB.getAddress(), ST.getAddress()); 2213 auto LoopExit = 2214 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 2215 // UB = min(UB, GlobalUB); 2216 EmitIgnoredExpr(S.getEnsureUpperBound()); 2217 // IV = LB; 2218 EmitIgnoredExpr(S.getInit()); 2219 // while (idx <= UB) { BODY; ++idx; } 2220 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 2221 S.getInc(), 2222 [&S, LoopExit](CodeGenFunction &CGF) { 2223 CGF.EmitOMPLoopBody(S, LoopExit); 2224 CGF.EmitStopPoint(&S); 2225 }, 2226 [](CodeGenFunction &) {}); 2227 EmitBlock(LoopExit.getBlock()); 2228 // Tell the runtime we are done. 2229 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 2230 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd()); 2231 }; 2232 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); 2233 } else { 2234 const bool IsMonotonic = 2235 Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static || 2236 ScheduleKind.Schedule == OMPC_SCHEDULE_unknown || 2237 ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic || 2238 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic; 2239 // Emit the outer loop, which requests its work chunk [LB..UB] from 2240 // runtime and runs the inner loop to process it. 2241 const OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(), 2242 ST.getAddress(), IL.getAddress(), 2243 Chunk, EUB); 2244 EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered, 2245 LoopArguments, CGDispatchBounds); 2246 } 2247 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 2248 EmitOMPSimdFinal(S, 2249 [&](CodeGenFunction &CGF) -> llvm::Value * { 2250 return CGF.Builder.CreateIsNotNull( 2251 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2252 }); 2253 } 2254 EmitOMPReductionClauseFinal( 2255 S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind()) 2256 ? /*Parallel and Simd*/ OMPD_parallel_for_simd 2257 : /*Parallel only*/ OMPD_parallel); 2258 // Emit post-update of the reduction variables if IsLastIter != 0. 2259 emitPostUpdateForReductionClause( 2260 *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * { 2261 return CGF.Builder.CreateIsNotNull( 2262 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2263 }); 2264 // Emit final copy of the lastprivate variables if IsLastIter != 0. 2265 if (HasLastprivateClause) 2266 EmitOMPLastprivateClauseFinal( 2267 S, isOpenMPSimdDirective(S.getDirectiveKind()), 2268 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart()))); 2269 } 2270 EmitOMPLinearClauseFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * { 2271 return CGF.Builder.CreateIsNotNull( 2272 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2273 }); 2274 // We're now done with the loop, so jump to the continuation block. 2275 if (ContBlock) { 2276 EmitBranch(ContBlock); 2277 EmitBlock(ContBlock, true); 2278 } 2279 } 2280 return HasLastprivateClause; 2281 } 2282 2283 /// The following two functions generate expressions for the loop lower 2284 /// and upper bounds in case of static and dynamic (dispatch) schedule 2285 /// of the associated 'for' or 'distribute' loop. 2286 static std::pair<LValue, LValue> 2287 emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 2288 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 2289 LValue LB = 2290 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); 2291 LValue UB = 2292 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); 2293 return {LB, UB}; 2294 } 2295 2296 /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not 2297 /// consider the lower and upper bound expressions generated by the 2298 /// worksharing loop support, but we use 0 and the iteration space size as 2299 /// constants 2300 static std::pair<llvm::Value *, llvm::Value *> 2301 emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S, 2302 Address LB, Address UB) { 2303 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 2304 const Expr *IVExpr = LS.getIterationVariable(); 2305 const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType()); 2306 llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0); 2307 llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration()); 2308 return {LBVal, UBVal}; 2309 } 2310 2311 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { 2312 bool HasLastprivates = false; 2313 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, 2314 PrePostActionTy &) { 2315 OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel()); 2316 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 2317 emitForLoopBounds, 2318 emitDispatchForLoopBounds); 2319 }; 2320 { 2321 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2322 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen, 2323 S.hasCancel()); 2324 } 2325 2326 // Emit an implicit barrier at the end. 2327 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) { 2328 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); 2329 } 2330 } 2331 2332 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { 2333 bool HasLastprivates = false; 2334 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, 2335 PrePostActionTy &) { 2336 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 2337 emitForLoopBounds, 2338 emitDispatchForLoopBounds); 2339 }; 2340 { 2341 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2342 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 2343 } 2344 2345 // Emit an implicit barrier at the end. 2346 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) { 2347 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); 2348 } 2349 } 2350 2351 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, 2352 const Twine &Name, 2353 llvm::Value *Init = nullptr) { 2354 auto LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty); 2355 if (Init) 2356 CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true); 2357 return LVal; 2358 } 2359 2360 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { 2361 auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt(); 2362 auto *CS = dyn_cast<CompoundStmt>(Stmt); 2363 bool HasLastprivates = false; 2364 auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF, 2365 PrePostActionTy &) { 2366 auto &C = CGF.CGM.getContext(); 2367 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2368 // Emit helper vars inits. 2369 LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.", 2370 CGF.Builder.getInt32(0)); 2371 auto *GlobalUBVal = CS != nullptr ? CGF.Builder.getInt32(CS->size() - 1) 2372 : CGF.Builder.getInt32(0); 2373 LValue UB = 2374 createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal); 2375 LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.", 2376 CGF.Builder.getInt32(1)); 2377 LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.", 2378 CGF.Builder.getInt32(0)); 2379 // Loop counter. 2380 LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv."); 2381 OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 2382 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV); 2383 OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 2384 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); 2385 // Generate condition for loop. 2386 BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, 2387 OK_Ordinary, S.getLocStart(), FPOptions()); 2388 // Increment for loop counter. 2389 UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary, 2390 S.getLocStart()); 2391 auto BodyGen = [Stmt, CS, &S, &IV](CodeGenFunction &CGF) { 2392 // Iterate through all sections and emit a switch construct: 2393 // switch (IV) { 2394 // case 0: 2395 // <SectionStmt[0]>; 2396 // break; 2397 // ... 2398 // case <NumSection> - 1: 2399 // <SectionStmt[<NumSection> - 1]>; 2400 // break; 2401 // } 2402 // .omp.sections.exit: 2403 auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit"); 2404 auto *SwitchStmt = CGF.Builder.CreateSwitch( 2405 CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB, 2406 CS == nullptr ? 1 : CS->size()); 2407 if (CS) { 2408 unsigned CaseNumber = 0; 2409 for (auto *SubStmt : CS->children()) { 2410 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 2411 CGF.EmitBlock(CaseBB); 2412 SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB); 2413 CGF.EmitStmt(SubStmt); 2414 CGF.EmitBranch(ExitBB); 2415 ++CaseNumber; 2416 } 2417 } else { 2418 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 2419 CGF.EmitBlock(CaseBB); 2420 SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB); 2421 CGF.EmitStmt(Stmt); 2422 CGF.EmitBranch(ExitBB); 2423 } 2424 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 2425 }; 2426 2427 CodeGenFunction::OMPPrivateScope LoopScope(CGF); 2428 if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) { 2429 // Emit implicit barrier to synchronize threads and avoid data races on 2430 // initialization of firstprivate variables and post-update of lastprivate 2431 // variables. 2432 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 2433 CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 2434 /*ForceSimpleCall=*/true); 2435 } 2436 CGF.EmitOMPPrivateClause(S, LoopScope); 2437 HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 2438 CGF.EmitOMPReductionClauseInit(S, LoopScope); 2439 (void)LoopScope.Privatize(); 2440 2441 // Emit static non-chunked loop. 2442 OpenMPScheduleTy ScheduleKind; 2443 ScheduleKind.Schedule = OMPC_SCHEDULE_static; 2444 CGF.CGM.getOpenMPRuntime().emitForStaticInit( 2445 CGF, S.getLocStart(), ScheduleKind, /*IVSize=*/32, 2446 /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), LB.getAddress(), 2447 UB.getAddress(), ST.getAddress()); 2448 // UB = min(UB, GlobalUB); 2449 auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart()); 2450 auto *MinUBGlobalUB = CGF.Builder.CreateSelect( 2451 CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal); 2452 CGF.EmitStoreOfScalar(MinUBGlobalUB, UB); 2453 // IV = LB; 2454 CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV); 2455 // while (idx <= UB) { BODY; ++idx; } 2456 CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen, 2457 [](CodeGenFunction &) {}); 2458 // Tell the runtime we are done. 2459 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 2460 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd()); 2461 }; 2462 CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen); 2463 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 2464 // Emit post-update of the reduction variables if IsLastIter != 0. 2465 emitPostUpdateForReductionClause( 2466 CGF, S, [&](CodeGenFunction &CGF) -> llvm::Value * { 2467 return CGF.Builder.CreateIsNotNull( 2468 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2469 }); 2470 2471 // Emit final copy of the lastprivate variables if IsLastIter != 0. 2472 if (HasLastprivates) 2473 CGF.EmitOMPLastprivateClauseFinal( 2474 S, /*NoFinals=*/false, 2475 CGF.Builder.CreateIsNotNull( 2476 CGF.EmitLoadOfScalar(IL, S.getLocStart()))); 2477 }; 2478 2479 bool HasCancel = false; 2480 if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S)) 2481 HasCancel = OSD->hasCancel(); 2482 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S)) 2483 HasCancel = OPSD->hasCancel(); 2484 OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel); 2485 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen, 2486 HasCancel); 2487 // Emit barrier for lastprivates only if 'sections' directive has 'nowait' 2488 // clause. Otherwise the barrier will be generated by the codegen for the 2489 // directive. 2490 if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) { 2491 // Emit implicit barrier to synchronize threads and avoid data races on 2492 // initialization of firstprivate variables. 2493 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), 2494 OMPD_unknown); 2495 } 2496 } 2497 2498 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { 2499 { 2500 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2501 EmitSections(S); 2502 } 2503 // Emit an implicit barrier at the end. 2504 if (!S.getSingleClause<OMPNowaitClause>()) { 2505 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), 2506 OMPD_sections); 2507 } 2508 } 2509 2510 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { 2511 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2512 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2513 }; 2514 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2515 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen, 2516 S.hasCancel()); 2517 } 2518 2519 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { 2520 llvm::SmallVector<const Expr *, 8> CopyprivateVars; 2521 llvm::SmallVector<const Expr *, 8> DestExprs; 2522 llvm::SmallVector<const Expr *, 8> SrcExprs; 2523 llvm::SmallVector<const Expr *, 8> AssignmentOps; 2524 // Check if there are any 'copyprivate' clauses associated with this 2525 // 'single' construct. 2526 // Build a list of copyprivate variables along with helper expressions 2527 // (<source>, <destination>, <destination>=<source> expressions) 2528 for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) { 2529 CopyprivateVars.append(C->varlists().begin(), C->varlists().end()); 2530 DestExprs.append(C->destination_exprs().begin(), 2531 C->destination_exprs().end()); 2532 SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end()); 2533 AssignmentOps.append(C->assignment_ops().begin(), 2534 C->assignment_ops().end()); 2535 } 2536 // Emit code for 'single' region along with 'copyprivate' clauses 2537 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2538 Action.Enter(CGF); 2539 OMPPrivateScope SingleScope(CGF); 2540 (void)CGF.EmitOMPFirstprivateClause(S, SingleScope); 2541 CGF.EmitOMPPrivateClause(S, SingleScope); 2542 (void)SingleScope.Privatize(); 2543 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2544 }; 2545 { 2546 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2547 CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(), 2548 CopyprivateVars, DestExprs, 2549 SrcExprs, AssignmentOps); 2550 } 2551 // Emit an implicit barrier at the end (to avoid data race on firstprivate 2552 // init or if no 'nowait' clause was specified and no 'copyprivate' clause). 2553 if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) { 2554 CGM.getOpenMPRuntime().emitBarrierCall( 2555 *this, S.getLocStart(), 2556 S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single); 2557 } 2558 } 2559 2560 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { 2561 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2562 Action.Enter(CGF); 2563 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2564 }; 2565 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2566 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart()); 2567 } 2568 2569 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { 2570 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2571 Action.Enter(CGF); 2572 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2573 }; 2574 Expr *Hint = nullptr; 2575 if (auto *HintClause = S.getSingleClause<OMPHintClause>()) 2576 Hint = HintClause->getHint(); 2577 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2578 CGM.getOpenMPRuntime().emitCriticalRegion(*this, 2579 S.getDirectiveName().getAsString(), 2580 CodeGen, S.getLocStart(), Hint); 2581 } 2582 2583 void CodeGenFunction::EmitOMPParallelForDirective( 2584 const OMPParallelForDirective &S) { 2585 // Emit directive as a combined directive that consists of two implicit 2586 // directives: 'parallel' with 'for' directive. 2587 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2588 OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel()); 2589 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 2590 emitDispatchForLoopBounds); 2591 }; 2592 emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen, 2593 emitEmptyBoundParameters); 2594 } 2595 2596 void CodeGenFunction::EmitOMPParallelForSimdDirective( 2597 const OMPParallelForSimdDirective &S) { 2598 // Emit directive as a combined directive that consists of two implicit 2599 // directives: 'parallel' with 'for' directive. 2600 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2601 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 2602 emitDispatchForLoopBounds); 2603 }; 2604 emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen, 2605 emitEmptyBoundParameters); 2606 } 2607 2608 void CodeGenFunction::EmitOMPParallelSectionsDirective( 2609 const OMPParallelSectionsDirective &S) { 2610 // Emit directive as a combined directive that consists of two implicit 2611 // directives: 'parallel' with 'sections' directive. 2612 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2613 CGF.EmitSections(S); 2614 }; 2615 emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen, 2616 emitEmptyBoundParameters); 2617 } 2618 2619 void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, 2620 const RegionCodeGenTy &BodyGen, 2621 const TaskGenTy &TaskGen, 2622 OMPTaskDataTy &Data) { 2623 // Emit outlined function for task construct. 2624 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2625 auto *I = CS->getCapturedDecl()->param_begin(); 2626 auto *PartId = std::next(I); 2627 auto *TaskT = std::next(I, 4); 2628 // Check if the task is final 2629 if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) { 2630 // If the condition constant folds and can be elided, try to avoid emitting 2631 // the condition and the dead arm of the if/else. 2632 auto *Cond = Clause->getCondition(); 2633 bool CondConstant; 2634 if (ConstantFoldsToSimpleInteger(Cond, CondConstant)) 2635 Data.Final.setInt(CondConstant); 2636 else 2637 Data.Final.setPointer(EvaluateExprAsBool(Cond)); 2638 } else { 2639 // By default the task is not final. 2640 Data.Final.setInt(/*IntVal=*/false); 2641 } 2642 // Check if the task has 'priority' clause. 2643 if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) { 2644 auto *Prio = Clause->getPriority(); 2645 Data.Priority.setInt(/*IntVal=*/true); 2646 Data.Priority.setPointer(EmitScalarConversion( 2647 EmitScalarExpr(Prio), Prio->getType(), 2648 getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1), 2649 Prio->getExprLoc())); 2650 } 2651 // The first function argument for tasks is a thread id, the second one is a 2652 // part id (0 for tied tasks, >=0 for untied task). 2653 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 2654 // Get list of private variables. 2655 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 2656 auto IRef = C->varlist_begin(); 2657 for (auto *IInit : C->private_copies()) { 2658 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2659 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2660 Data.PrivateVars.push_back(*IRef); 2661 Data.PrivateCopies.push_back(IInit); 2662 } 2663 ++IRef; 2664 } 2665 } 2666 EmittedAsPrivate.clear(); 2667 // Get list of firstprivate variables. 2668 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 2669 auto IRef = C->varlist_begin(); 2670 auto IElemInitRef = C->inits().begin(); 2671 for (auto *IInit : C->private_copies()) { 2672 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2673 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2674 Data.FirstprivateVars.push_back(*IRef); 2675 Data.FirstprivateCopies.push_back(IInit); 2676 Data.FirstprivateInits.push_back(*IElemInitRef); 2677 } 2678 ++IRef; 2679 ++IElemInitRef; 2680 } 2681 } 2682 // Get list of lastprivate variables (for taskloops). 2683 llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs; 2684 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 2685 auto IRef = C->varlist_begin(); 2686 auto ID = C->destination_exprs().begin(); 2687 for (auto *IInit : C->private_copies()) { 2688 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2689 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2690 Data.LastprivateVars.push_back(*IRef); 2691 Data.LastprivateCopies.push_back(IInit); 2692 } 2693 LastprivateDstsOrigs.insert( 2694 {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()), 2695 cast<DeclRefExpr>(*IRef)}); 2696 ++IRef; 2697 ++ID; 2698 } 2699 } 2700 SmallVector<const Expr *, 4> LHSs; 2701 SmallVector<const Expr *, 4> RHSs; 2702 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 2703 auto IPriv = C->privates().begin(); 2704 auto IRed = C->reduction_ops().begin(); 2705 auto ILHS = C->lhs_exprs().begin(); 2706 auto IRHS = C->rhs_exprs().begin(); 2707 for (const auto *Ref : C->varlists()) { 2708 Data.ReductionVars.emplace_back(Ref); 2709 Data.ReductionCopies.emplace_back(*IPriv); 2710 Data.ReductionOps.emplace_back(*IRed); 2711 LHSs.emplace_back(*ILHS); 2712 RHSs.emplace_back(*IRHS); 2713 std::advance(IPriv, 1); 2714 std::advance(IRed, 1); 2715 std::advance(ILHS, 1); 2716 std::advance(IRHS, 1); 2717 } 2718 } 2719 Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit( 2720 *this, S.getLocStart(), LHSs, RHSs, Data); 2721 // Build list of dependences. 2722 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) 2723 for (auto *IRef : C->varlists()) 2724 Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef)); 2725 auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs]( 2726 CodeGenFunction &CGF, PrePostActionTy &Action) { 2727 // Set proper addresses for generated private copies. 2728 OMPPrivateScope Scope(CGF); 2729 if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() || 2730 !Data.LastprivateVars.empty()) { 2731 auto *CopyFn = CGF.Builder.CreateLoad( 2732 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3))); 2733 auto *PrivatesPtr = CGF.Builder.CreateLoad( 2734 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2))); 2735 // Map privates. 2736 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; 2737 llvm::SmallVector<llvm::Value *, 16> CallArgs; 2738 CallArgs.push_back(PrivatesPtr); 2739 for (auto *E : Data.PrivateVars) { 2740 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2741 Address PrivatePtr = CGF.CreateMemTemp( 2742 CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr"); 2743 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2744 CallArgs.push_back(PrivatePtr.getPointer()); 2745 } 2746 for (auto *E : Data.FirstprivateVars) { 2747 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2748 Address PrivatePtr = 2749 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 2750 ".firstpriv.ptr.addr"); 2751 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2752 CallArgs.push_back(PrivatePtr.getPointer()); 2753 } 2754 for (auto *E : Data.LastprivateVars) { 2755 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2756 Address PrivatePtr = 2757 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 2758 ".lastpriv.ptr.addr"); 2759 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2760 CallArgs.push_back(PrivatePtr.getPointer()); 2761 } 2762 CGF.EmitRuntimeCall(CopyFn, CallArgs); 2763 for (auto &&Pair : LastprivateDstsOrigs) { 2764 auto *OrigVD = cast<VarDecl>(Pair.second->getDecl()); 2765 DeclRefExpr DRE( 2766 const_cast<VarDecl *>(OrigVD), 2767 /*RefersToEnclosingVariableOrCapture=*/CGF.CapturedStmtInfo->lookup( 2768 OrigVD) != nullptr, 2769 Pair.second->getType(), VK_LValue, Pair.second->getExprLoc()); 2770 Scope.addPrivate(Pair.first, [&CGF, &DRE]() { 2771 return CGF.EmitLValue(&DRE).getAddress(); 2772 }); 2773 } 2774 for (auto &&Pair : PrivatePtrs) { 2775 Address Replacement(CGF.Builder.CreateLoad(Pair.second), 2776 CGF.getContext().getDeclAlign(Pair.first)); 2777 Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); 2778 } 2779 } 2780 if (Data.Reductions) { 2781 OMPLexicalScope LexScope(CGF, S, /*AsInlined=*/true); 2782 ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionCopies, 2783 Data.ReductionOps); 2784 llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad( 2785 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9))); 2786 for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) { 2787 RedCG.emitSharedLValue(CGF, Cnt); 2788 RedCG.emitAggregateType(CGF, Cnt); 2789 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( 2790 CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); 2791 Replacement = 2792 Address(CGF.EmitScalarConversion( 2793 Replacement.getPointer(), CGF.getContext().VoidPtrTy, 2794 CGF.getContext().getPointerType( 2795 Data.ReductionCopies[Cnt]->getType()), 2796 SourceLocation()), 2797 Replacement.getAlignment()); 2798 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); 2799 Scope.addPrivate(RedCG.getBaseDecl(Cnt), 2800 [Replacement]() { return Replacement; }); 2801 // FIXME: This must removed once the runtime library is fixed. 2802 // Emit required threadprivate variables for 2803 // initilizer/combiner/finalizer. 2804 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(), 2805 RedCG, Cnt); 2806 } 2807 } 2808 // Privatize all private variables except for in_reduction items. 2809 (void)Scope.Privatize(); 2810 SmallVector<const Expr *, 4> InRedVars; 2811 SmallVector<const Expr *, 4> InRedPrivs; 2812 SmallVector<const Expr *, 4> InRedOps; 2813 SmallVector<const Expr *, 4> TaskgroupDescriptors; 2814 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) { 2815 auto IPriv = C->privates().begin(); 2816 auto IRed = C->reduction_ops().begin(); 2817 auto ITD = C->taskgroup_descriptors().begin(); 2818 for (const auto *Ref : C->varlists()) { 2819 InRedVars.emplace_back(Ref); 2820 InRedPrivs.emplace_back(*IPriv); 2821 InRedOps.emplace_back(*IRed); 2822 TaskgroupDescriptors.emplace_back(*ITD); 2823 std::advance(IPriv, 1); 2824 std::advance(IRed, 1); 2825 std::advance(ITD, 1); 2826 } 2827 } 2828 // Privatize in_reduction items here, because taskgroup descriptors must be 2829 // privatized earlier. 2830 OMPPrivateScope InRedScope(CGF); 2831 if (!InRedVars.empty()) { 2832 ReductionCodeGen RedCG(InRedVars, InRedPrivs, InRedOps); 2833 for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) { 2834 RedCG.emitSharedLValue(CGF, Cnt); 2835 RedCG.emitAggregateType(CGF, Cnt); 2836 // The taskgroup descriptor variable is always implicit firstprivate and 2837 // privatized already during procoessing of the firstprivates. 2838 llvm::Value *ReductionsPtr = CGF.EmitLoadOfScalar( 2839 CGF.EmitLValue(TaskgroupDescriptors[Cnt]), SourceLocation()); 2840 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( 2841 CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); 2842 Replacement = Address( 2843 CGF.EmitScalarConversion( 2844 Replacement.getPointer(), CGF.getContext().VoidPtrTy, 2845 CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()), 2846 SourceLocation()), 2847 Replacement.getAlignment()); 2848 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); 2849 InRedScope.addPrivate(RedCG.getBaseDecl(Cnt), 2850 [Replacement]() { return Replacement; }); 2851 // FIXME: This must removed once the runtime library is fixed. 2852 // Emit required threadprivate variables for 2853 // initilizer/combiner/finalizer. 2854 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(), 2855 RedCG, Cnt); 2856 } 2857 } 2858 (void)InRedScope.Privatize(); 2859 2860 Action.Enter(CGF); 2861 BodyGen(CGF); 2862 }; 2863 auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( 2864 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied, 2865 Data.NumberOfParts); 2866 OMPLexicalScope Scope(*this, S); 2867 TaskGen(*this, OutlinedFn, Data); 2868 } 2869 2870 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { 2871 // Emit outlined function for task construct. 2872 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2873 auto CapturedStruct = GenerateCapturedStmtArgument(*CS); 2874 auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 2875 const Expr *IfCond = nullptr; 2876 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 2877 if (C->getNameModifier() == OMPD_unknown || 2878 C->getNameModifier() == OMPD_task) { 2879 IfCond = C->getCondition(); 2880 break; 2881 } 2882 } 2883 2884 OMPTaskDataTy Data; 2885 // Check if we should emit tied or untied task. 2886 Data.Tied = !S.getSingleClause<OMPUntiedClause>(); 2887 auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { 2888 CGF.EmitStmt(CS->getCapturedStmt()); 2889 }; 2890 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 2891 IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn, 2892 const OMPTaskDataTy &Data) { 2893 CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getLocStart(), S, OutlinedFn, 2894 SharedsTy, CapturedStruct, IfCond, 2895 Data); 2896 }; 2897 EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data); 2898 } 2899 2900 void CodeGenFunction::EmitOMPTaskyieldDirective( 2901 const OMPTaskyieldDirective &S) { 2902 CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart()); 2903 } 2904 2905 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { 2906 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier); 2907 } 2908 2909 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) { 2910 CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart()); 2911 } 2912 2913 void CodeGenFunction::EmitOMPTaskgroupDirective( 2914 const OMPTaskgroupDirective &S) { 2915 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2916 Action.Enter(CGF); 2917 if (const Expr *E = S.getReductionRef()) { 2918 SmallVector<const Expr *, 4> LHSs; 2919 SmallVector<const Expr *, 4> RHSs; 2920 OMPTaskDataTy Data; 2921 for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) { 2922 auto IPriv = C->privates().begin(); 2923 auto IRed = C->reduction_ops().begin(); 2924 auto ILHS = C->lhs_exprs().begin(); 2925 auto IRHS = C->rhs_exprs().begin(); 2926 for (const auto *Ref : C->varlists()) { 2927 Data.ReductionVars.emplace_back(Ref); 2928 Data.ReductionCopies.emplace_back(*IPriv); 2929 Data.ReductionOps.emplace_back(*IRed); 2930 LHSs.emplace_back(*ILHS); 2931 RHSs.emplace_back(*IRHS); 2932 std::advance(IPriv, 1); 2933 std::advance(IRed, 1); 2934 std::advance(ILHS, 1); 2935 std::advance(IRHS, 1); 2936 } 2937 } 2938 llvm::Value *ReductionDesc = 2939 CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getLocStart(), 2940 LHSs, RHSs, Data); 2941 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2942 CGF.EmitVarDecl(*VD); 2943 CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD), 2944 /*Volatile=*/false, E->getType()); 2945 } 2946 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2947 }; 2948 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2949 CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart()); 2950 } 2951 2952 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { 2953 CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> { 2954 if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) { 2955 return llvm::makeArrayRef(FlushClause->varlist_begin(), 2956 FlushClause->varlist_end()); 2957 } 2958 return llvm::None; 2959 }(), S.getLocStart()); 2960 } 2961 2962 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, 2963 const CodeGenLoopTy &CodeGenLoop, 2964 Expr *IncExpr) { 2965 // Emit the loop iteration variable. 2966 auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 2967 auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); 2968 EmitVarDecl(*IVDecl); 2969 2970 // Emit the iterations count variable. 2971 // If it is not a variable, Sema decided to calculate iterations count on each 2972 // iteration (e.g., it is foldable into a constant). 2973 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 2974 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 2975 // Emit calculation of the iterations count. 2976 EmitIgnoredExpr(S.getCalcLastIteration()); 2977 } 2978 2979 auto &RT = CGM.getOpenMPRuntime(); 2980 2981 bool HasLastprivateClause = false; 2982 // Check pre-condition. 2983 { 2984 OMPLoopScope PreInitScope(*this, S); 2985 // Skip the entire loop if we don't meet the precondition. 2986 // If the condition constant folds and can be elided, avoid emitting the 2987 // whole loop. 2988 bool CondConstant; 2989 llvm::BasicBlock *ContBlock = nullptr; 2990 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 2991 if (!CondConstant) 2992 return; 2993 } else { 2994 auto *ThenBlock = createBasicBlock("omp.precond.then"); 2995 ContBlock = createBasicBlock("omp.precond.end"); 2996 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 2997 getProfileCount(&S)); 2998 EmitBlock(ThenBlock); 2999 incrementProfileCounter(&S); 3000 } 3001 3002 // Emit 'then' code. 3003 { 3004 // Emit helper vars inits. 3005 3006 LValue LB = EmitOMPHelperVar( 3007 *this, cast<DeclRefExpr>( 3008 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3009 ? S.getCombinedLowerBoundVariable() 3010 : S.getLowerBoundVariable()))); 3011 LValue UB = EmitOMPHelperVar( 3012 *this, cast<DeclRefExpr>( 3013 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3014 ? S.getCombinedUpperBoundVariable() 3015 : S.getUpperBoundVariable()))); 3016 LValue ST = 3017 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 3018 LValue IL = 3019 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 3020 3021 OMPPrivateScope LoopScope(*this); 3022 if (EmitOMPFirstprivateClause(S, LoopScope)) { 3023 // Emit implicit barrier to synchronize threads and avoid data races on 3024 // initialization of firstprivate variables and post-update of 3025 // lastprivate variables. 3026 CGM.getOpenMPRuntime().emitBarrierCall( 3027 *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 3028 /*ForceSimpleCall=*/true); 3029 } 3030 EmitOMPPrivateClause(S, LoopScope); 3031 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 3032 EmitOMPPrivateLoopCounters(S, LoopScope); 3033 (void)LoopScope.Privatize(); 3034 3035 // Detect the distribute schedule kind and chunk. 3036 llvm::Value *Chunk = nullptr; 3037 OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown; 3038 if (auto *C = S.getSingleClause<OMPDistScheduleClause>()) { 3039 ScheduleKind = C->getDistScheduleKind(); 3040 if (const auto *Ch = C->getChunkSize()) { 3041 Chunk = EmitScalarExpr(Ch); 3042 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 3043 S.getIterationVariable()->getType(), 3044 S.getLocStart()); 3045 } 3046 } 3047 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 3048 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 3049 3050 // OpenMP [2.10.8, distribute Construct, Description] 3051 // If dist_schedule is specified, kind must be static. If specified, 3052 // iterations are divided into chunks of size chunk_size, chunks are 3053 // assigned to the teams of the league in a round-robin fashion in the 3054 // order of the team number. When no chunk_size is specified, the 3055 // iteration space is divided into chunks that are approximately equal 3056 // in size, and at most one chunk is distributed to each team of the 3057 // league. The size of the chunks is unspecified in this case. 3058 if (RT.isStaticNonchunked(ScheduleKind, 3059 /* Chunked */ Chunk != nullptr)) { 3060 RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, 3061 IVSize, IVSigned, /* Ordered = */ false, 3062 IL.getAddress(), LB.getAddress(), 3063 UB.getAddress(), ST.getAddress()); 3064 auto LoopExit = 3065 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 3066 // UB = min(UB, GlobalUB); 3067 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3068 ? S.getCombinedEnsureUpperBound() 3069 : S.getEnsureUpperBound()); 3070 // IV = LB; 3071 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3072 ? S.getCombinedInit() 3073 : S.getInit()); 3074 3075 Expr *Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3076 ? S.getCombinedCond() 3077 : S.getCond(); 3078 3079 // for distribute alone, codegen 3080 // while (idx <= UB) { BODY; ++idx; } 3081 // when combined with 'for' (e.g. as in 'distribute parallel for') 3082 // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; } 3083 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), Cond, IncExpr, 3084 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { 3085 CodeGenLoop(CGF, S, LoopExit); 3086 }, 3087 [](CodeGenFunction &) {}); 3088 EmitBlock(LoopExit.getBlock()); 3089 // Tell the runtime we are done. 3090 RT.emitForStaticFinish(*this, S.getLocStart()); 3091 } else { 3092 // Emit the outer loop, which requests its work chunk [LB..UB] from 3093 // runtime and runs the inner loop to process it. 3094 const OMPLoopArguments LoopArguments = { 3095 LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(), 3096 Chunk}; 3097 EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments, 3098 CodeGenLoop); 3099 } 3100 3101 // Emit final copy of the lastprivate variables if IsLastIter != 0. 3102 if (HasLastprivateClause) 3103 EmitOMPLastprivateClauseFinal( 3104 S, /*NoFinals=*/false, 3105 Builder.CreateIsNotNull( 3106 EmitLoadOfScalar(IL, S.getLocStart()))); 3107 } 3108 3109 // We're now done with the loop, so jump to the continuation block. 3110 if (ContBlock) { 3111 EmitBranch(ContBlock); 3112 EmitBlock(ContBlock, true); 3113 } 3114 } 3115 } 3116 3117 void CodeGenFunction::EmitOMPDistributeDirective( 3118 const OMPDistributeDirective &S) { 3119 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3120 3121 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 3122 }; 3123 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 3124 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen, 3125 false); 3126 } 3127 3128 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, 3129 const CapturedStmt *S) { 3130 CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); 3131 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; 3132 CGF.CapturedStmtInfo = &CapStmtInfo; 3133 auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S); 3134 Fn->addFnAttr(llvm::Attribute::NoInline); 3135 return Fn; 3136 } 3137 3138 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { 3139 if (!S.getAssociatedStmt()) { 3140 for (const auto *DC : S.getClausesOfKind<OMPDependClause>()) 3141 CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC); 3142 return; 3143 } 3144 auto *C = S.getSingleClause<OMPSIMDClause>(); 3145 auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF, 3146 PrePostActionTy &Action) { 3147 if (C) { 3148 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 3149 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 3150 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 3151 auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS); 3152 CGF.EmitNounwindRuntimeCall(OutlinedFn, CapturedVars); 3153 } else { 3154 Action.Enter(CGF); 3155 CGF.EmitStmt( 3156 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3157 } 3158 }; 3159 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 3160 CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C); 3161 } 3162 3163 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, 3164 QualType SrcType, QualType DestType, 3165 SourceLocation Loc) { 3166 assert(CGF.hasScalarEvaluationKind(DestType) && 3167 "DestType must have scalar evaluation kind."); 3168 assert(!Val.isAggregate() && "Must be a scalar or complex."); 3169 return Val.isScalar() 3170 ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType, 3171 Loc) 3172 : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType, 3173 DestType, Loc); 3174 } 3175 3176 static CodeGenFunction::ComplexPairTy 3177 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, 3178 QualType DestType, SourceLocation Loc) { 3179 assert(CGF.getEvaluationKind(DestType) == TEK_Complex && 3180 "DestType must have complex evaluation kind."); 3181 CodeGenFunction::ComplexPairTy ComplexVal; 3182 if (Val.isScalar()) { 3183 // Convert the input element to the element type of the complex. 3184 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 3185 auto ScalarVal = CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, 3186 DestElementType, Loc); 3187 ComplexVal = CodeGenFunction::ComplexPairTy( 3188 ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType())); 3189 } else { 3190 assert(Val.isComplex() && "Must be a scalar or complex."); 3191 auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType(); 3192 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 3193 ComplexVal.first = CGF.EmitScalarConversion( 3194 Val.getComplexVal().first, SrcElementType, DestElementType, Loc); 3195 ComplexVal.second = CGF.EmitScalarConversion( 3196 Val.getComplexVal().second, SrcElementType, DestElementType, Loc); 3197 } 3198 return ComplexVal; 3199 } 3200 3201 static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst, 3202 LValue LVal, RValue RVal) { 3203 if (LVal.isGlobalReg()) { 3204 CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal); 3205 } else { 3206 CGF.EmitAtomicStore(RVal, LVal, 3207 IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3208 : llvm::AtomicOrdering::Monotonic, 3209 LVal.isVolatile(), /*IsInit=*/false); 3210 } 3211 } 3212 3213 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal, 3214 QualType RValTy, SourceLocation Loc) { 3215 switch (getEvaluationKind(LVal.getType())) { 3216 case TEK_Scalar: 3217 EmitStoreThroughLValue(RValue::get(convertToScalarValue( 3218 *this, RVal, RValTy, LVal.getType(), Loc)), 3219 LVal); 3220 break; 3221 case TEK_Complex: 3222 EmitStoreOfComplex( 3223 convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal, 3224 /*isInit=*/false); 3225 break; 3226 case TEK_Aggregate: 3227 llvm_unreachable("Must be a scalar or complex."); 3228 } 3229 } 3230 3231 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst, 3232 const Expr *X, const Expr *V, 3233 SourceLocation Loc) { 3234 // v = x; 3235 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue"); 3236 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue"); 3237 LValue XLValue = CGF.EmitLValue(X); 3238 LValue VLValue = CGF.EmitLValue(V); 3239 RValue Res = XLValue.isGlobalReg() 3240 ? CGF.EmitLoadOfLValue(XLValue, Loc) 3241 : CGF.EmitAtomicLoad( 3242 XLValue, Loc, 3243 IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3244 : llvm::AtomicOrdering::Monotonic, 3245 XLValue.isVolatile()); 3246 // OpenMP, 2.12.6, atomic Construct 3247 // Any atomic construct with a seq_cst clause forces the atomically 3248 // performed operation to include an implicit flush operation without a 3249 // list. 3250 if (IsSeqCst) 3251 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3252 CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc); 3253 } 3254 3255 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst, 3256 const Expr *X, const Expr *E, 3257 SourceLocation Loc) { 3258 // x = expr; 3259 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue"); 3260 emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E)); 3261 // OpenMP, 2.12.6, atomic Construct 3262 // Any atomic construct with a seq_cst clause forces the atomically 3263 // performed operation to include an implicit flush operation without a 3264 // list. 3265 if (IsSeqCst) 3266 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3267 } 3268 3269 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, 3270 RValue Update, 3271 BinaryOperatorKind BO, 3272 llvm::AtomicOrdering AO, 3273 bool IsXLHSInRHSPart) { 3274 auto &Context = CGF.CGM.getContext(); 3275 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x' 3276 // expression is simple and atomic is allowed for the given type for the 3277 // target platform. 3278 if (BO == BO_Comma || !Update.isScalar() || 3279 !Update.getScalarVal()->getType()->isIntegerTy() || 3280 !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) && 3281 (Update.getScalarVal()->getType() != 3282 X.getAddress().getElementType())) || 3283 !X.getAddress().getElementType()->isIntegerTy() || 3284 !Context.getTargetInfo().hasBuiltinAtomic( 3285 Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment()))) 3286 return std::make_pair(false, RValue::get(nullptr)); 3287 3288 llvm::AtomicRMWInst::BinOp RMWOp; 3289 switch (BO) { 3290 case BO_Add: 3291 RMWOp = llvm::AtomicRMWInst::Add; 3292 break; 3293 case BO_Sub: 3294 if (!IsXLHSInRHSPart) 3295 return std::make_pair(false, RValue::get(nullptr)); 3296 RMWOp = llvm::AtomicRMWInst::Sub; 3297 break; 3298 case BO_And: 3299 RMWOp = llvm::AtomicRMWInst::And; 3300 break; 3301 case BO_Or: 3302 RMWOp = llvm::AtomicRMWInst::Or; 3303 break; 3304 case BO_Xor: 3305 RMWOp = llvm::AtomicRMWInst::Xor; 3306 break; 3307 case BO_LT: 3308 RMWOp = X.getType()->hasSignedIntegerRepresentation() 3309 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min 3310 : llvm::AtomicRMWInst::Max) 3311 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin 3312 : llvm::AtomicRMWInst::UMax); 3313 break; 3314 case BO_GT: 3315 RMWOp = X.getType()->hasSignedIntegerRepresentation() 3316 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max 3317 : llvm::AtomicRMWInst::Min) 3318 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax 3319 : llvm::AtomicRMWInst::UMin); 3320 break; 3321 case BO_Assign: 3322 RMWOp = llvm::AtomicRMWInst::Xchg; 3323 break; 3324 case BO_Mul: 3325 case BO_Div: 3326 case BO_Rem: 3327 case BO_Shl: 3328 case BO_Shr: 3329 case BO_LAnd: 3330 case BO_LOr: 3331 return std::make_pair(false, RValue::get(nullptr)); 3332 case BO_PtrMemD: 3333 case BO_PtrMemI: 3334 case BO_LE: 3335 case BO_GE: 3336 case BO_EQ: 3337 case BO_NE: 3338 case BO_AddAssign: 3339 case BO_SubAssign: 3340 case BO_AndAssign: 3341 case BO_OrAssign: 3342 case BO_XorAssign: 3343 case BO_MulAssign: 3344 case BO_DivAssign: 3345 case BO_RemAssign: 3346 case BO_ShlAssign: 3347 case BO_ShrAssign: 3348 case BO_Comma: 3349 llvm_unreachable("Unsupported atomic update operation"); 3350 } 3351 auto *UpdateVal = Update.getScalarVal(); 3352 if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) { 3353 UpdateVal = CGF.Builder.CreateIntCast( 3354 IC, X.getAddress().getElementType(), 3355 X.getType()->hasSignedIntegerRepresentation()); 3356 } 3357 auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO); 3358 return std::make_pair(true, RValue::get(Res)); 3359 } 3360 3361 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr( 3362 LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart, 3363 llvm::AtomicOrdering AO, SourceLocation Loc, 3364 const llvm::function_ref<RValue(RValue)> &CommonGen) { 3365 // Update expressions are allowed to have the following forms: 3366 // x binop= expr; -> xrval + expr; 3367 // x++, ++x -> xrval + 1; 3368 // x--, --x -> xrval - 1; 3369 // x = x binop expr; -> xrval binop expr 3370 // x = expr Op x; - > expr binop xrval; 3371 auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart); 3372 if (!Res.first) { 3373 if (X.isGlobalReg()) { 3374 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop 3375 // 'xrval'. 3376 EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X); 3377 } else { 3378 // Perform compare-and-swap procedure. 3379 EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified()); 3380 } 3381 } 3382 return Res; 3383 } 3384 3385 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst, 3386 const Expr *X, const Expr *E, 3387 const Expr *UE, bool IsXLHSInRHSPart, 3388 SourceLocation Loc) { 3389 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 3390 "Update expr in 'atomic update' must be a binary operator."); 3391 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 3392 // Update expressions are allowed to have the following forms: 3393 // x binop= expr; -> xrval + expr; 3394 // x++, ++x -> xrval + 1; 3395 // x--, --x -> xrval - 1; 3396 // x = x binop expr; -> xrval binop expr 3397 // x = expr Op x; - > expr binop xrval; 3398 assert(X->isLValue() && "X of 'omp atomic update' is not lvalue"); 3399 LValue XLValue = CGF.EmitLValue(X); 3400 RValue ExprRValue = CGF.EmitAnyExpr(E); 3401 auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3402 : llvm::AtomicOrdering::Monotonic; 3403 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 3404 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 3405 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 3406 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 3407 auto Gen = 3408 [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue { 3409 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3410 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 3411 return CGF.EmitAnyExpr(UE); 3412 }; 3413 (void)CGF.EmitOMPAtomicSimpleUpdateExpr( 3414 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 3415 // OpenMP, 2.12.6, atomic Construct 3416 // Any atomic construct with a seq_cst clause forces the atomically 3417 // performed operation to include an implicit flush operation without a 3418 // list. 3419 if (IsSeqCst) 3420 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3421 } 3422 3423 static RValue convertToType(CodeGenFunction &CGF, RValue Value, 3424 QualType SourceType, QualType ResType, 3425 SourceLocation Loc) { 3426 switch (CGF.getEvaluationKind(ResType)) { 3427 case TEK_Scalar: 3428 return RValue::get( 3429 convertToScalarValue(CGF, Value, SourceType, ResType, Loc)); 3430 case TEK_Complex: { 3431 auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc); 3432 return RValue::getComplex(Res.first, Res.second); 3433 } 3434 case TEK_Aggregate: 3435 break; 3436 } 3437 llvm_unreachable("Must be a scalar or complex."); 3438 } 3439 3440 static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst, 3441 bool IsPostfixUpdate, const Expr *V, 3442 const Expr *X, const Expr *E, 3443 const Expr *UE, bool IsXLHSInRHSPart, 3444 SourceLocation Loc) { 3445 assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue"); 3446 assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue"); 3447 RValue NewVVal; 3448 LValue VLValue = CGF.EmitLValue(V); 3449 LValue XLValue = CGF.EmitLValue(X); 3450 RValue ExprRValue = CGF.EmitAnyExpr(E); 3451 auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3452 : llvm::AtomicOrdering::Monotonic; 3453 QualType NewVValType; 3454 if (UE) { 3455 // 'x' is updated with some additional value. 3456 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 3457 "Update expr in 'atomic capture' must be a binary operator."); 3458 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 3459 // Update expressions are allowed to have the following forms: 3460 // x binop= expr; -> xrval + expr; 3461 // x++, ++x -> xrval + 1; 3462 // x--, --x -> xrval - 1; 3463 // x = x binop expr; -> xrval binop expr 3464 // x = expr Op x; - > expr binop xrval; 3465 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 3466 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 3467 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 3468 NewVValType = XRValExpr->getType(); 3469 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 3470 auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr, 3471 IsPostfixUpdate](RValue XRValue) -> RValue { 3472 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3473 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 3474 RValue Res = CGF.EmitAnyExpr(UE); 3475 NewVVal = IsPostfixUpdate ? XRValue : Res; 3476 return Res; 3477 }; 3478 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 3479 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 3480 if (Res.first) { 3481 // 'atomicrmw' instruction was generated. 3482 if (IsPostfixUpdate) { 3483 // Use old value from 'atomicrmw'. 3484 NewVVal = Res.second; 3485 } else { 3486 // 'atomicrmw' does not provide new value, so evaluate it using old 3487 // value of 'x'. 3488 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3489 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second); 3490 NewVVal = CGF.EmitAnyExpr(UE); 3491 } 3492 } 3493 } else { 3494 // 'x' is simply rewritten with some 'expr'. 3495 NewVValType = X->getType().getNonReferenceType(); 3496 ExprRValue = convertToType(CGF, ExprRValue, E->getType(), 3497 X->getType().getNonReferenceType(), Loc); 3498 auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) -> RValue { 3499 NewVVal = XRValue; 3500 return ExprRValue; 3501 }; 3502 // Try to perform atomicrmw xchg, otherwise simple exchange. 3503 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 3504 XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO, 3505 Loc, Gen); 3506 if (Res.first) { 3507 // 'atomicrmw' instruction was generated. 3508 NewVVal = IsPostfixUpdate ? Res.second : ExprRValue; 3509 } 3510 } 3511 // Emit post-update store to 'v' of old/new 'x' value. 3512 CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc); 3513 // OpenMP, 2.12.6, atomic Construct 3514 // Any atomic construct with a seq_cst clause forces the atomically 3515 // performed operation to include an implicit flush operation without a 3516 // list. 3517 if (IsSeqCst) 3518 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3519 } 3520 3521 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, 3522 bool IsSeqCst, bool IsPostfixUpdate, 3523 const Expr *X, const Expr *V, const Expr *E, 3524 const Expr *UE, bool IsXLHSInRHSPart, 3525 SourceLocation Loc) { 3526 switch (Kind) { 3527 case OMPC_read: 3528 EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc); 3529 break; 3530 case OMPC_write: 3531 EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc); 3532 break; 3533 case OMPC_unknown: 3534 case OMPC_update: 3535 EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc); 3536 break; 3537 case OMPC_capture: 3538 EmitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE, 3539 IsXLHSInRHSPart, Loc); 3540 break; 3541 case OMPC_if: 3542 case OMPC_final: 3543 case OMPC_num_threads: 3544 case OMPC_private: 3545 case OMPC_firstprivate: 3546 case OMPC_lastprivate: 3547 case OMPC_reduction: 3548 case OMPC_task_reduction: 3549 case OMPC_in_reduction: 3550 case OMPC_safelen: 3551 case OMPC_simdlen: 3552 case OMPC_collapse: 3553 case OMPC_default: 3554 case OMPC_seq_cst: 3555 case OMPC_shared: 3556 case OMPC_linear: 3557 case OMPC_aligned: 3558 case OMPC_copyin: 3559 case OMPC_copyprivate: 3560 case OMPC_flush: 3561 case OMPC_proc_bind: 3562 case OMPC_schedule: 3563 case OMPC_ordered: 3564 case OMPC_nowait: 3565 case OMPC_untied: 3566 case OMPC_threadprivate: 3567 case OMPC_depend: 3568 case OMPC_mergeable: 3569 case OMPC_device: 3570 case OMPC_threads: 3571 case OMPC_simd: 3572 case OMPC_map: 3573 case OMPC_num_teams: 3574 case OMPC_thread_limit: 3575 case OMPC_priority: 3576 case OMPC_grainsize: 3577 case OMPC_nogroup: 3578 case OMPC_num_tasks: 3579 case OMPC_hint: 3580 case OMPC_dist_schedule: 3581 case OMPC_defaultmap: 3582 case OMPC_uniform: 3583 case OMPC_to: 3584 case OMPC_from: 3585 case OMPC_use_device_ptr: 3586 case OMPC_is_device_ptr: 3587 llvm_unreachable("Clause is not allowed in 'omp atomic'."); 3588 } 3589 } 3590 3591 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { 3592 bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>(); 3593 OpenMPClauseKind Kind = OMPC_unknown; 3594 for (auto *C : S.clauses()) { 3595 // Find first clause (skip seq_cst clause, if it is first). 3596 if (C->getClauseKind() != OMPC_seq_cst) { 3597 Kind = C->getClauseKind(); 3598 break; 3599 } 3600 } 3601 3602 const auto *CS = 3603 S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 3604 if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) { 3605 enterFullExpression(EWC); 3606 } 3607 // Processing for statements under 'atomic capture'. 3608 if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) { 3609 for (const auto *C : Compound->body()) { 3610 if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) { 3611 enterFullExpression(EWC); 3612 } 3613 } 3614 } 3615 3616 auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF, 3617 PrePostActionTy &) { 3618 CGF.EmitStopPoint(CS); 3619 EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(), 3620 S.getV(), S.getExpr(), S.getUpdateExpr(), 3621 S.isXLHSInRHSPart(), S.getLocStart()); 3622 }; 3623 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 3624 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen); 3625 } 3626 3627 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, 3628 const OMPExecutableDirective &S, 3629 const RegionCodeGenTy &CodeGen) { 3630 assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind())); 3631 CodeGenModule &CGM = CGF.CGM; 3632 const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt()); 3633 3634 llvm::Function *Fn = nullptr; 3635 llvm::Constant *FnID = nullptr; 3636 3637 const Expr *IfCond = nullptr; 3638 // Check for the at most one if clause associated with the target region. 3639 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 3640 if (C->getNameModifier() == OMPD_unknown || 3641 C->getNameModifier() == OMPD_target) { 3642 IfCond = C->getCondition(); 3643 break; 3644 } 3645 } 3646 3647 // Check if we have any device clause associated with the directive. 3648 const Expr *Device = nullptr; 3649 if (auto *C = S.getSingleClause<OMPDeviceClause>()) { 3650 Device = C->getDevice(); 3651 } 3652 3653 // Check if we have an if clause whose conditional always evaluates to false 3654 // or if we do not have any targets specified. If so the target region is not 3655 // an offload entry point. 3656 bool IsOffloadEntry = true; 3657 if (IfCond) { 3658 bool Val; 3659 if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val) 3660 IsOffloadEntry = false; 3661 } 3662 if (CGM.getLangOpts().OMPTargetTriples.empty()) 3663 IsOffloadEntry = false; 3664 3665 assert(CGF.CurFuncDecl && "No parent declaration for target region!"); 3666 StringRef ParentName; 3667 // In case we have Ctors/Dtors we use the complete type variant to produce 3668 // the mangling of the device outlined kernel. 3669 if (auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl)) 3670 ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete)); 3671 else if (auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl)) 3672 ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete)); 3673 else 3674 ParentName = 3675 CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl))); 3676 3677 // Emit target region as a standalone region. 3678 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID, 3679 IsOffloadEntry, CodeGen); 3680 OMPLexicalScope Scope(CGF, S); 3681 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 3682 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 3683 CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device, 3684 CapturedVars); 3685 } 3686 3687 static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S, 3688 PrePostActionTy &Action) { 3689 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 3690 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 3691 CGF.EmitOMPPrivateClause(S, PrivateScope); 3692 (void)PrivateScope.Privatize(); 3693 3694 Action.Enter(CGF); 3695 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3696 } 3697 3698 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM, 3699 StringRef ParentName, 3700 const OMPTargetDirective &S) { 3701 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3702 emitTargetRegion(CGF, S, Action); 3703 }; 3704 llvm::Function *Fn; 3705 llvm::Constant *Addr; 3706 // Emit target region as a standalone region. 3707 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 3708 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 3709 assert(Fn && Addr && "Target device function emission failed."); 3710 } 3711 3712 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { 3713 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3714 emitTargetRegion(CGF, S, Action); 3715 }; 3716 emitCommonOMPTargetDirective(*this, S, CodeGen); 3717 } 3718 3719 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF, 3720 const OMPExecutableDirective &S, 3721 OpenMPDirectiveKind InnermostKind, 3722 const RegionCodeGenTy &CodeGen) { 3723 const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams); 3724 auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction( 3725 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 3726 3727 const OMPNumTeamsClause *NT = S.getSingleClause<OMPNumTeamsClause>(); 3728 const OMPThreadLimitClause *TL = S.getSingleClause<OMPThreadLimitClause>(); 3729 if (NT || TL) { 3730 Expr *NumTeams = (NT) ? NT->getNumTeams() : nullptr; 3731 Expr *ThreadLimit = (TL) ? TL->getThreadLimit() : nullptr; 3732 3733 CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit, 3734 S.getLocStart()); 3735 } 3736 3737 OMPTeamsScope Scope(CGF, S); 3738 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 3739 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 3740 CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn, 3741 CapturedVars); 3742 } 3743 3744 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) { 3745 // Emit teams region as a standalone region. 3746 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3747 OMPPrivateScope PrivateScope(CGF); 3748 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 3749 CGF.EmitOMPPrivateClause(S, PrivateScope); 3750 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 3751 (void)PrivateScope.Privatize(); 3752 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3753 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 3754 }; 3755 emitCommonOMPTeamsDirective(*this, S, OMPD_teams, CodeGen); 3756 emitPostUpdateForReductionClause( 3757 *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 3758 } 3759 3760 static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action, 3761 const OMPTargetTeamsDirective &S) { 3762 auto *CS = S.getCapturedStmt(OMPD_teams); 3763 Action.Enter(CGF); 3764 auto &&CodeGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { 3765 // TODO: Add support for clauses. 3766 CGF.EmitStmt(CS->getCapturedStmt()); 3767 }; 3768 emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen); 3769 } 3770 3771 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 3772 CodeGenModule &CGM, StringRef ParentName, 3773 const OMPTargetTeamsDirective &S) { 3774 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3775 emitTargetTeamsRegion(CGF, Action, S); 3776 }; 3777 llvm::Function *Fn; 3778 llvm::Constant *Addr; 3779 // Emit target region as a standalone region. 3780 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 3781 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 3782 assert(Fn && Addr && "Target device function emission failed."); 3783 } 3784 3785 void CodeGenFunction::EmitOMPTargetTeamsDirective( 3786 const OMPTargetTeamsDirective &S) { 3787 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3788 emitTargetTeamsRegion(CGF, Action, S); 3789 }; 3790 emitCommonOMPTargetDirective(*this, S, CodeGen); 3791 } 3792 3793 void CodeGenFunction::EmitOMPCancellationPointDirective( 3794 const OMPCancellationPointDirective &S) { 3795 CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(), 3796 S.getCancelRegion()); 3797 } 3798 3799 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { 3800 const Expr *IfCond = nullptr; 3801 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 3802 if (C->getNameModifier() == OMPD_unknown || 3803 C->getNameModifier() == OMPD_cancel) { 3804 IfCond = C->getCondition(); 3805 break; 3806 } 3807 } 3808 CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(), IfCond, 3809 S.getCancelRegion()); 3810 } 3811 3812 CodeGenFunction::JumpDest 3813 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) { 3814 if (Kind == OMPD_parallel || Kind == OMPD_task || 3815 Kind == OMPD_target_parallel) 3816 return ReturnBlock; 3817 assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections || 3818 Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for || 3819 Kind == OMPD_distribute_parallel_for || 3820 Kind == OMPD_target_parallel_for); 3821 return OMPCancelStack.getExitBlock(); 3822 } 3823 3824 void CodeGenFunction::EmitOMPUseDevicePtrClause( 3825 const OMPClause &NC, OMPPrivateScope &PrivateScope, 3826 const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) { 3827 const auto &C = cast<OMPUseDevicePtrClause>(NC); 3828 auto OrigVarIt = C.varlist_begin(); 3829 auto InitIt = C.inits().begin(); 3830 for (auto PvtVarIt : C.private_copies()) { 3831 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl()); 3832 auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl()); 3833 auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl()); 3834 3835 // In order to identify the right initializer we need to match the 3836 // declaration used by the mapping logic. In some cases we may get 3837 // OMPCapturedExprDecl that refers to the original declaration. 3838 const ValueDecl *MatchingVD = OrigVD; 3839 if (auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) { 3840 // OMPCapturedExprDecl are used to privative fields of the current 3841 // structure. 3842 auto *ME = cast<MemberExpr>(OED->getInit()); 3843 assert(isa<CXXThisExpr>(ME->getBase()) && 3844 "Base should be the current struct!"); 3845 MatchingVD = ME->getMemberDecl(); 3846 } 3847 3848 // If we don't have information about the current list item, move on to 3849 // the next one. 3850 auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD); 3851 if (InitAddrIt == CaptureDeviceAddrMap.end()) 3852 continue; 3853 3854 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 3855 // Initialize the temporary initialization variable with the address we 3856 // get from the runtime library. We have to cast the source address 3857 // because it is always a void *. References are materialized in the 3858 // privatization scope, so the initialization here disregards the fact 3859 // the original variable is a reference. 3860 QualType AddrQTy = 3861 getContext().getPointerType(OrigVD->getType().getNonReferenceType()); 3862 llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy); 3863 Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy); 3864 setAddrOfLocalVar(InitVD, InitAddr); 3865 3866 // Emit private declaration, it will be initialized by the value we 3867 // declaration we just added to the local declarations map. 3868 EmitDecl(*PvtVD); 3869 3870 // The initialization variables reached its purpose in the emission 3871 // ofthe previous declaration, so we don't need it anymore. 3872 LocalDeclMap.erase(InitVD); 3873 3874 // Return the address of the private variable. 3875 return GetAddrOfLocalVar(PvtVD); 3876 }); 3877 assert(IsRegistered && "firstprivate var already registered as private"); 3878 // Silence the warning about unused variable. 3879 (void)IsRegistered; 3880 3881 ++OrigVarIt; 3882 ++InitIt; 3883 } 3884 } 3885 3886 // Generate the instructions for '#pragma omp target data' directive. 3887 void CodeGenFunction::EmitOMPTargetDataDirective( 3888 const OMPTargetDataDirective &S) { 3889 CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true); 3890 3891 // Create a pre/post action to signal the privatization of the device pointer. 3892 // This action can be replaced by the OpenMP runtime code generation to 3893 // deactivate privatization. 3894 bool PrivatizeDevicePointers = false; 3895 class DevicePointerPrivActionTy : public PrePostActionTy { 3896 bool &PrivatizeDevicePointers; 3897 3898 public: 3899 explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers) 3900 : PrePostActionTy(), PrivatizeDevicePointers(PrivatizeDevicePointers) {} 3901 void Enter(CodeGenFunction &CGF) override { 3902 PrivatizeDevicePointers = true; 3903 } 3904 }; 3905 DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers); 3906 3907 auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers]( 3908 CodeGenFunction &CGF, PrePostActionTy &Action) { 3909 auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3910 CGF.EmitStmt( 3911 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3912 }; 3913 3914 // Codegen that selects wheather to generate the privatization code or not. 3915 auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers, 3916 &InnermostCodeGen](CodeGenFunction &CGF, 3917 PrePostActionTy &Action) { 3918 RegionCodeGenTy RCG(InnermostCodeGen); 3919 PrivatizeDevicePointers = false; 3920 3921 // Call the pre-action to change the status of PrivatizeDevicePointers if 3922 // needed. 3923 Action.Enter(CGF); 3924 3925 if (PrivatizeDevicePointers) { 3926 OMPPrivateScope PrivateScope(CGF); 3927 // Emit all instances of the use_device_ptr clause. 3928 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>()) 3929 CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope, 3930 Info.CaptureDeviceAddrMap); 3931 (void)PrivateScope.Privatize(); 3932 RCG(CGF); 3933 } else 3934 RCG(CGF); 3935 }; 3936 3937 // Forward the provided action to the privatization codegen. 3938 RegionCodeGenTy PrivRCG(PrivCodeGen); 3939 PrivRCG.setAction(Action); 3940 3941 // Notwithstanding the body of the region is emitted as inlined directive, 3942 // we don't use an inline scope as changes in the references inside the 3943 // region are expected to be visible outside, so we do not privative them. 3944 OMPLexicalScope Scope(CGF, S); 3945 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data, 3946 PrivRCG); 3947 }; 3948 3949 RegionCodeGenTy RCG(CodeGen); 3950 3951 // If we don't have target devices, don't bother emitting the data mapping 3952 // code. 3953 if (CGM.getLangOpts().OMPTargetTriples.empty()) { 3954 RCG(*this); 3955 return; 3956 } 3957 3958 // Check if we have any if clause associated with the directive. 3959 const Expr *IfCond = nullptr; 3960 if (auto *C = S.getSingleClause<OMPIfClause>()) 3961 IfCond = C->getCondition(); 3962 3963 // Check if we have any device clause associated with the directive. 3964 const Expr *Device = nullptr; 3965 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 3966 Device = C->getDevice(); 3967 3968 // Set the action to signal privatization of device pointers. 3969 RCG.setAction(PrivAction); 3970 3971 // Emit region code. 3972 CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG, 3973 Info); 3974 } 3975 3976 void CodeGenFunction::EmitOMPTargetEnterDataDirective( 3977 const OMPTargetEnterDataDirective &S) { 3978 // If we don't have target devices, don't bother emitting the data mapping 3979 // code. 3980 if (CGM.getLangOpts().OMPTargetTriples.empty()) 3981 return; 3982 3983 // Check if we have any if clause associated with the directive. 3984 const Expr *IfCond = nullptr; 3985 if (auto *C = S.getSingleClause<OMPIfClause>()) 3986 IfCond = C->getCondition(); 3987 3988 // Check if we have any device clause associated with the directive. 3989 const Expr *Device = nullptr; 3990 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 3991 Device = C->getDevice(); 3992 3993 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 3994 } 3995 3996 void CodeGenFunction::EmitOMPTargetExitDataDirective( 3997 const OMPTargetExitDataDirective &S) { 3998 // If we don't have target devices, don't bother emitting the data mapping 3999 // code. 4000 if (CGM.getLangOpts().OMPTargetTriples.empty()) 4001 return; 4002 4003 // Check if we have any if clause associated with the directive. 4004 const Expr *IfCond = nullptr; 4005 if (auto *C = S.getSingleClause<OMPIfClause>()) 4006 IfCond = C->getCondition(); 4007 4008 // Check if we have any device clause associated with the directive. 4009 const Expr *Device = nullptr; 4010 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 4011 Device = C->getDevice(); 4012 4013 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 4014 } 4015 4016 static void emitTargetParallelRegion(CodeGenFunction &CGF, 4017 const OMPTargetParallelDirective &S, 4018 PrePostActionTy &Action) { 4019 // Get the captured statement associated with the 'parallel' region. 4020 auto *CS = S.getCapturedStmt(OMPD_parallel); 4021 Action.Enter(CGF); 4022 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &) { 4023 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 4024 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 4025 CGF.EmitOMPPrivateClause(S, PrivateScope); 4026 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4027 (void)PrivateScope.Privatize(); 4028 // TODO: Add support for clauses. 4029 CGF.EmitStmt(CS->getCapturedStmt()); 4030 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 4031 }; 4032 emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen, 4033 emitEmptyBoundParameters); 4034 emitPostUpdateForReductionClause( 4035 CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 4036 } 4037 4038 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 4039 CodeGenModule &CGM, StringRef ParentName, 4040 const OMPTargetParallelDirective &S) { 4041 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4042 emitTargetParallelRegion(CGF, S, Action); 4043 }; 4044 llvm::Function *Fn; 4045 llvm::Constant *Addr; 4046 // Emit target region as a standalone region. 4047 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 4048 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 4049 assert(Fn && Addr && "Target device function emission failed."); 4050 } 4051 4052 void CodeGenFunction::EmitOMPTargetParallelDirective( 4053 const OMPTargetParallelDirective &S) { 4054 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4055 emitTargetParallelRegion(CGF, S, Action); 4056 }; 4057 emitCommonOMPTargetDirective(*this, S, CodeGen); 4058 } 4059 4060 void CodeGenFunction::EmitOMPTargetParallelForDirective( 4061 const OMPTargetParallelForDirective &S) { 4062 // TODO: codegen for target parallel for. 4063 } 4064 4065 /// Emit a helper variable and return corresponding lvalue. 4066 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper, 4067 const ImplicitParamDecl *PVD, 4068 CodeGenFunction::OMPPrivateScope &Privates) { 4069 auto *VDecl = cast<VarDecl>(Helper->getDecl()); 4070 Privates.addPrivate( 4071 VDecl, [&CGF, PVD]() -> Address { return CGF.GetAddrOfLocalVar(PVD); }); 4072 } 4073 4074 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { 4075 assert(isOpenMPTaskLoopDirective(S.getDirectiveKind())); 4076 // Emit outlined function for task construct. 4077 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 4078 auto CapturedStruct = GenerateCapturedStmtArgument(*CS); 4079 auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 4080 const Expr *IfCond = nullptr; 4081 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 4082 if (C->getNameModifier() == OMPD_unknown || 4083 C->getNameModifier() == OMPD_taskloop) { 4084 IfCond = C->getCondition(); 4085 break; 4086 } 4087 } 4088 4089 OMPTaskDataTy Data; 4090 // Check if taskloop must be emitted without taskgroup. 4091 Data.Nogroup = S.getSingleClause<OMPNogroupClause>(); 4092 // TODO: Check if we should emit tied or untied task. 4093 Data.Tied = true; 4094 // Set scheduling for taskloop 4095 if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) { 4096 // grainsize clause 4097 Data.Schedule.setInt(/*IntVal=*/false); 4098 Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize())); 4099 } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) { 4100 // num_tasks clause 4101 Data.Schedule.setInt(/*IntVal=*/true); 4102 Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks())); 4103 } 4104 4105 auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) { 4106 // if (PreCond) { 4107 // for (IV in 0..LastIteration) BODY; 4108 // <Final counter/linear vars updates>; 4109 // } 4110 // 4111 4112 // Emit: if (PreCond) - begin. 4113 // If the condition constant folds and can be elided, avoid emitting the 4114 // whole loop. 4115 bool CondConstant; 4116 llvm::BasicBlock *ContBlock = nullptr; 4117 OMPLoopScope PreInitScope(CGF, S); 4118 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 4119 if (!CondConstant) 4120 return; 4121 } else { 4122 auto *ThenBlock = CGF.createBasicBlock("taskloop.if.then"); 4123 ContBlock = CGF.createBasicBlock("taskloop.if.end"); 4124 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 4125 CGF.getProfileCount(&S)); 4126 CGF.EmitBlock(ThenBlock); 4127 CGF.incrementProfileCounter(&S); 4128 } 4129 4130 if (isOpenMPSimdDirective(S.getDirectiveKind())) 4131 CGF.EmitOMPSimdInit(S); 4132 4133 OMPPrivateScope LoopScope(CGF); 4134 // Emit helper vars inits. 4135 enum { LowerBound = 5, UpperBound, Stride, LastIter }; 4136 auto *I = CS->getCapturedDecl()->param_begin(); 4137 auto *LBP = std::next(I, LowerBound); 4138 auto *UBP = std::next(I, UpperBound); 4139 auto *STP = std::next(I, Stride); 4140 auto *LIP = std::next(I, LastIter); 4141 mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP, 4142 LoopScope); 4143 mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP, 4144 LoopScope); 4145 mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope); 4146 mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP, 4147 LoopScope); 4148 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 4149 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 4150 (void)LoopScope.Privatize(); 4151 // Emit the loop iteration variable. 4152 const Expr *IVExpr = S.getIterationVariable(); 4153 const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 4154 CGF.EmitVarDecl(*IVDecl); 4155 CGF.EmitIgnoredExpr(S.getInit()); 4156 4157 // Emit the iterations count variable. 4158 // If it is not a variable, Sema decided to calculate iterations count on 4159 // each iteration (e.g., it is foldable into a constant). 4160 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 4161 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 4162 // Emit calculation of the iterations count. 4163 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 4164 } 4165 4166 CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 4167 S.getInc(), 4168 [&S](CodeGenFunction &CGF) { 4169 CGF.EmitOMPLoopBody(S, JumpDest()); 4170 CGF.EmitStopPoint(&S); 4171 }, 4172 [](CodeGenFunction &) {}); 4173 // Emit: if (PreCond) - end. 4174 if (ContBlock) { 4175 CGF.EmitBranch(ContBlock); 4176 CGF.EmitBlock(ContBlock, true); 4177 } 4178 // Emit final copy of the lastprivate variables if IsLastIter != 0. 4179 if (HasLastprivateClause) { 4180 CGF.EmitOMPLastprivateClauseFinal( 4181 S, isOpenMPSimdDirective(S.getDirectiveKind()), 4182 CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar( 4183 CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false, 4184 (*LIP)->getType(), S.getLocStart()))); 4185 } 4186 }; 4187 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 4188 IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn, 4189 const OMPTaskDataTy &Data) { 4190 auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &) { 4191 OMPLoopScope PreInitScope(CGF, S); 4192 CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getLocStart(), S, 4193 OutlinedFn, SharedsTy, 4194 CapturedStruct, IfCond, Data); 4195 }; 4196 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop, 4197 CodeGen); 4198 }; 4199 if (Data.Nogroup) 4200 EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data); 4201 else { 4202 CGM.getOpenMPRuntime().emitTaskgroupRegion( 4203 *this, 4204 [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF, 4205 PrePostActionTy &Action) { 4206 Action.Enter(CGF); 4207 CGF.EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data); 4208 }, 4209 S.getLocStart()); 4210 } 4211 } 4212 4213 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) { 4214 EmitOMPTaskLoopBasedDirective(S); 4215 } 4216 4217 void CodeGenFunction::EmitOMPTaskLoopSimdDirective( 4218 const OMPTaskLoopSimdDirective &S) { 4219 EmitOMPTaskLoopBasedDirective(S); 4220 } 4221 4222 // Generate the instructions for '#pragma omp target update' directive. 4223 void CodeGenFunction::EmitOMPTargetUpdateDirective( 4224 const OMPTargetUpdateDirective &S) { 4225 // If we don't have target devices, don't bother emitting the data mapping 4226 // code. 4227 if (CGM.getLangOpts().OMPTargetTriples.empty()) 4228 return; 4229 4230 // Check if we have any if clause associated with the directive. 4231 const Expr *IfCond = nullptr; 4232 if (auto *C = S.getSingleClause<OMPIfClause>()) 4233 IfCond = C->getCondition(); 4234 4235 // Check if we have any device clause associated with the directive. 4236 const Expr *Device = nullptr; 4237 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 4238 Device = C->getDevice(); 4239 4240 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 4241 } 4242