1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This contains code to emit OpenMP nodes as LLVM code. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGCleanup.h" 15 #include "CGOpenMPRuntime.h" 16 #include "CodeGenFunction.h" 17 #include "CodeGenModule.h" 18 #include "TargetInfo.h" 19 #include "clang/AST/Stmt.h" 20 #include "clang/AST/StmtOpenMP.h" 21 #include "clang/AST/DeclOpenMP.h" 22 #include "llvm/IR/CallSite.h" 23 using namespace clang; 24 using namespace CodeGen; 25 26 namespace { 27 /// Lexical scope for OpenMP executable constructs, that handles correct codegen 28 /// for captured expressions. 29 class OMPLexicalScope : public CodeGenFunction::LexicalScope { 30 void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 31 for (const auto *C : S.clauses()) { 32 if (auto *CPI = OMPClauseWithPreInit::get(C)) { 33 if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { 34 for (const auto *I : PreInit->decls()) { 35 if (!I->hasAttr<OMPCaptureNoInitAttr>()) 36 CGF.EmitVarDecl(cast<VarDecl>(*I)); 37 else { 38 CodeGenFunction::AutoVarEmission Emission = 39 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 40 CGF.EmitAutoVarCleanups(Emission); 41 } 42 } 43 } 44 } 45 } 46 } 47 CodeGenFunction::OMPPrivateScope InlinedShareds; 48 49 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { 50 return CGF.LambdaCaptureFields.lookup(VD) || 51 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || 52 (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl)); 53 } 54 55 public: 56 OMPLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S, 57 bool AsInlined = false, bool EmitPreInitStmt = true) 58 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), 59 InlinedShareds(CGF) { 60 if (EmitPreInitStmt) 61 emitPreInitStmt(CGF, S); 62 if (AsInlined) { 63 if (S.hasAssociatedStmt()) { 64 auto *CS = cast<CapturedStmt>(S.getAssociatedStmt()); 65 for (auto &C : CS->captures()) { 66 if (C.capturesVariable() || C.capturesVariableByCopy()) { 67 auto *VD = C.getCapturedVar(); 68 DeclRefExpr DRE(const_cast<VarDecl *>(VD), 69 isCapturedVar(CGF, VD) || 70 (CGF.CapturedStmtInfo && 71 InlinedShareds.isGlobalVarCaptured(VD)), 72 VD->getType().getNonReferenceType(), VK_LValue, 73 SourceLocation()); 74 InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address { 75 return CGF.EmitLValue(&DRE).getAddress(); 76 }); 77 } 78 } 79 (void)InlinedShareds.Privatize(); 80 } 81 } 82 } 83 }; 84 85 /// Lexical scope for OpenMP parallel construct, that handles correct codegen 86 /// for captured expressions. 87 class OMPParallelScope final : public OMPLexicalScope { 88 bool EmitPreInitStmt(const OMPExecutableDirective &S) { 89 OpenMPDirectiveKind Kind = S.getDirectiveKind(); 90 return !(isOpenMPTargetExecutionDirective(Kind) || 91 isOpenMPLoopBoundSharingDirective(Kind)) && 92 isOpenMPParallelDirective(Kind); 93 } 94 95 public: 96 OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 97 : OMPLexicalScope(CGF, S, 98 /*AsInlined=*/false, 99 /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {} 100 }; 101 102 /// Lexical scope for OpenMP teams construct, that handles correct codegen 103 /// for captured expressions. 104 class OMPTeamsScope final : public OMPLexicalScope { 105 bool EmitPreInitStmt(const OMPExecutableDirective &S) { 106 OpenMPDirectiveKind Kind = S.getDirectiveKind(); 107 return !isOpenMPTargetExecutionDirective(Kind) && 108 isOpenMPTeamsDirective(Kind); 109 } 110 111 public: 112 OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 113 : OMPLexicalScope(CGF, S, 114 /*AsInlined=*/false, 115 /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {} 116 }; 117 118 /// Private scope for OpenMP loop-based directives, that supports capturing 119 /// of used expression from loop statement. 120 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { 121 void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) { 122 if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) { 123 if (auto *PreInits = cast_or_null<DeclStmt>(LD->getPreInits())) { 124 for (const auto *I : PreInits->decls()) 125 CGF.EmitVarDecl(cast<VarDecl>(*I)); 126 } 127 } 128 } 129 130 public: 131 OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S) 132 : CodeGenFunction::RunCleanupsScope(CGF) { 133 emitPreInitStmt(CGF, S); 134 } 135 }; 136 137 } // namespace 138 139 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) { 140 auto &C = getContext(); 141 llvm::Value *Size = nullptr; 142 auto SizeInChars = C.getTypeSizeInChars(Ty); 143 if (SizeInChars.isZero()) { 144 // getTypeSizeInChars() returns 0 for a VLA. 145 while (auto *VAT = C.getAsVariableArrayType(Ty)) { 146 llvm::Value *ArraySize; 147 std::tie(ArraySize, Ty) = getVLASize(VAT); 148 Size = Size ? Builder.CreateNUWMul(Size, ArraySize) : ArraySize; 149 } 150 SizeInChars = C.getTypeSizeInChars(Ty); 151 if (SizeInChars.isZero()) 152 return llvm::ConstantInt::get(SizeTy, /*V=*/0); 153 Size = Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars)); 154 } else 155 Size = CGM.getSize(SizeInChars); 156 return Size; 157 } 158 159 void CodeGenFunction::GenerateOpenMPCapturedVars( 160 const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) { 161 const RecordDecl *RD = S.getCapturedRecordDecl(); 162 auto CurField = RD->field_begin(); 163 auto CurCap = S.captures().begin(); 164 for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(), 165 E = S.capture_init_end(); 166 I != E; ++I, ++CurField, ++CurCap) { 167 if (CurField->hasCapturedVLAType()) { 168 auto VAT = CurField->getCapturedVLAType(); 169 auto *Val = VLASizeMap[VAT->getSizeExpr()]; 170 CapturedVars.push_back(Val); 171 } else if (CurCap->capturesThis()) 172 CapturedVars.push_back(CXXThisValue); 173 else if (CurCap->capturesVariableByCopy()) { 174 llvm::Value *CV = 175 EmitLoadOfLValue(EmitLValue(*I), SourceLocation()).getScalarVal(); 176 177 // If the field is not a pointer, we need to save the actual value 178 // and load it as a void pointer. 179 if (!CurField->getType()->isAnyPointerType()) { 180 auto &Ctx = getContext(); 181 auto DstAddr = CreateMemTemp( 182 Ctx.getUIntPtrType(), 183 Twine(CurCap->getCapturedVar()->getName()) + ".casted"); 184 LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType()); 185 186 auto *SrcAddrVal = EmitScalarConversion( 187 DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()), 188 Ctx.getPointerType(CurField->getType()), SourceLocation()); 189 LValue SrcLV = 190 MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType()); 191 192 // Store the value using the source type pointer. 193 EmitStoreThroughLValue(RValue::get(CV), SrcLV); 194 195 // Load the value using the destination type pointer. 196 CV = EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal(); 197 } 198 CapturedVars.push_back(CV); 199 } else { 200 assert(CurCap->capturesVariable() && "Expected capture by reference."); 201 CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer()); 202 } 203 } 204 } 205 206 static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType, 207 StringRef Name, LValue AddrLV, 208 bool isReferenceType = false) { 209 ASTContext &Ctx = CGF.getContext(); 210 211 auto *CastedPtr = CGF.EmitScalarConversion( 212 AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(), 213 Ctx.getPointerType(DstType), SourceLocation()); 214 auto TmpAddr = 215 CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType)) 216 .getAddress(); 217 218 // If we are dealing with references we need to return the address of the 219 // reference instead of the reference of the value. 220 if (isReferenceType) { 221 QualType RefType = Ctx.getLValueReferenceType(DstType); 222 auto *RefVal = TmpAddr.getPointer(); 223 TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name) + ".ref"); 224 auto TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType); 225 CGF.EmitStoreThroughLValue(RValue::get(RefVal), TmpLVal, /*isInit*/ true); 226 } 227 228 return TmpAddr; 229 } 230 231 static QualType getCanonicalParamType(ASTContext &C, QualType T) { 232 if (T->isLValueReferenceType()) { 233 return C.getLValueReferenceType( 234 getCanonicalParamType(C, T.getNonReferenceType()), 235 /*SpelledAsLValue=*/false); 236 } 237 if (T->isPointerType()) 238 return C.getPointerType(getCanonicalParamType(C, T->getPointeeType())); 239 return C.getCanonicalParamType(T); 240 } 241 242 namespace { 243 /// Contains required data for proper outlined function codegen. 244 struct FunctionOptions { 245 /// Captured statement for which the function is generated. 246 const CapturedStmt *S = nullptr; 247 /// true if cast to/from UIntPtr is required for variables captured by 248 /// value. 249 const bool UIntPtrCastRequired = true; 250 /// true if only casted arguments must be registered as local args or VLA 251 /// sizes. 252 const bool RegisterCastedArgsOnly = false; 253 /// Name of the generated function. 254 const StringRef FunctionName; 255 explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired, 256 bool RegisterCastedArgsOnly, 257 StringRef FunctionName) 258 : S(S), UIntPtrCastRequired(UIntPtrCastRequired), 259 RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly), 260 FunctionName(FunctionName) {} 261 }; 262 } 263 264 static llvm::Function *emitOutlinedFunctionPrologue( 265 CodeGenFunction &CGF, FunctionArgList &Args, 266 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> 267 &LocalAddrs, 268 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> 269 &VLASizes, 270 llvm::Value *&CXXThisValue, const FunctionOptions &FO) { 271 const CapturedDecl *CD = FO.S->getCapturedDecl(); 272 const RecordDecl *RD = FO.S->getCapturedRecordDecl(); 273 assert(CD->hasBody() && "missing CapturedDecl body"); 274 275 CXXThisValue = nullptr; 276 // Build the argument list. 277 CodeGenModule &CGM = CGF.CGM; 278 ASTContext &Ctx = CGM.getContext(); 279 FunctionArgList TargetArgs; 280 Args.append(CD->param_begin(), 281 std::next(CD->param_begin(), CD->getContextParamPosition())); 282 TargetArgs.append( 283 CD->param_begin(), 284 std::next(CD->param_begin(), CD->getContextParamPosition())); 285 auto I = FO.S->captures().begin(); 286 for (auto *FD : RD->fields()) { 287 QualType ArgType = FD->getType(); 288 IdentifierInfo *II = nullptr; 289 VarDecl *CapVar = nullptr; 290 291 // If this is a capture by copy and the type is not a pointer, the outlined 292 // function argument type should be uintptr and the value properly casted to 293 // uintptr. This is necessary given that the runtime library is only able to 294 // deal with pointers. We can pass in the same way the VLA type sizes to the 295 // outlined function. 296 if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) || 297 I->capturesVariableArrayType()) { 298 if (FO.UIntPtrCastRequired) 299 ArgType = Ctx.getUIntPtrType(); 300 } 301 302 if (I->capturesVariable() || I->capturesVariableByCopy()) { 303 CapVar = I->getCapturedVar(); 304 II = CapVar->getIdentifier(); 305 } else if (I->capturesThis()) 306 II = &Ctx.Idents.get("this"); 307 else { 308 assert(I->capturesVariableArrayType()); 309 II = &Ctx.Idents.get("vla"); 310 } 311 if (ArgType->isVariablyModifiedType()) 312 ArgType = getCanonicalParamType(Ctx, ArgType.getNonReferenceType()); 313 auto *Arg = 314 ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(), II, 315 ArgType, ImplicitParamDecl::Other); 316 Args.emplace_back(Arg); 317 // Do not cast arguments if we emit function with non-original types. 318 TargetArgs.emplace_back( 319 FO.UIntPtrCastRequired 320 ? Arg 321 : CGM.getOpenMPRuntime().translateParameter(FD, Arg)); 322 ++I; 323 } 324 Args.append( 325 std::next(CD->param_begin(), CD->getContextParamPosition() + 1), 326 CD->param_end()); 327 TargetArgs.append( 328 std::next(CD->param_begin(), CD->getContextParamPosition() + 1), 329 CD->param_end()); 330 331 // Create the function declaration. 332 FunctionType::ExtInfo ExtInfo; 333 const CGFunctionInfo &FuncInfo = 334 CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs); 335 llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); 336 337 llvm::Function *F = 338 llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage, 339 FO.FunctionName, &CGM.getModule()); 340 CGM.SetInternalFunctionAttributes(CD, F, FuncInfo); 341 if (CD->isNothrow()) 342 F->setDoesNotThrow(); 343 344 // Generate the function. 345 CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs, CD->getLocation(), 346 CD->getBody()->getLocStart()); 347 unsigned Cnt = CD->getContextParamPosition(); 348 I = FO.S->captures().begin(); 349 for (auto *FD : RD->fields()) { 350 // Do not map arguments if we emit function with non-original types. 351 Address LocalAddr(Address::invalid()); 352 if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) { 353 LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt], 354 TargetArgs[Cnt]); 355 } else { 356 LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]); 357 } 358 // If we are capturing a pointer by copy we don't need to do anything, just 359 // use the value that we get from the arguments. 360 if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) { 361 const VarDecl *CurVD = I->getCapturedVar(); 362 // If the variable is a reference we need to materialize it here. 363 if (CurVD->getType()->isReferenceType()) { 364 Address RefAddr = CGF.CreateMemTemp( 365 CurVD->getType(), CGM.getPointerAlign(), ".materialized_ref"); 366 CGF.EmitStoreOfScalar(LocalAddr.getPointer(), RefAddr, 367 /*Volatile=*/false, CurVD->getType()); 368 LocalAddr = RefAddr; 369 } 370 if (!FO.RegisterCastedArgsOnly) 371 LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}}); 372 ++Cnt; 373 ++I; 374 continue; 375 } 376 377 LValueBaseInfo BaseInfo(AlignmentSource::Decl, false); 378 LValue ArgLVal = 379 CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(), BaseInfo); 380 if (FD->hasCapturedVLAType()) { 381 if (FO.UIntPtrCastRequired) { 382 ArgLVal = CGF.MakeAddrLValue(castValueFromUintptr(CGF, FD->getType(), 383 Args[Cnt]->getName(), 384 ArgLVal), 385 FD->getType(), BaseInfo); 386 } 387 auto *ExprArg = 388 CGF.EmitLoadOfLValue(ArgLVal, SourceLocation()).getScalarVal(); 389 auto VAT = FD->getCapturedVLAType(); 390 VLASizes.insert({Args[Cnt], {VAT->getSizeExpr(), ExprArg}}); 391 } else if (I->capturesVariable()) { 392 auto *Var = I->getCapturedVar(); 393 QualType VarTy = Var->getType(); 394 Address ArgAddr = ArgLVal.getAddress(); 395 if (!VarTy->isReferenceType()) { 396 if (ArgLVal.getType()->isLValueReferenceType()) { 397 ArgAddr = CGF.EmitLoadOfReference( 398 ArgAddr, ArgLVal.getType()->castAs<ReferenceType>()); 399 } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) { 400 assert(ArgLVal.getType()->isPointerType()); 401 ArgAddr = CGF.EmitLoadOfPointer( 402 ArgAddr, ArgLVal.getType()->castAs<PointerType>()); 403 } 404 } 405 if (!FO.RegisterCastedArgsOnly) { 406 LocalAddrs.insert( 407 {Args[Cnt], 408 {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}}); 409 } 410 } else if (I->capturesVariableByCopy()) { 411 assert(!FD->getType()->isAnyPointerType() && 412 "Not expecting a captured pointer."); 413 auto *Var = I->getCapturedVar(); 414 QualType VarTy = Var->getType(); 415 LocalAddrs.insert( 416 {Args[Cnt], 417 {Var, 418 FO.UIntPtrCastRequired 419 ? castValueFromUintptr(CGF, FD->getType(), Args[Cnt]->getName(), 420 ArgLVal, VarTy->isReferenceType()) 421 : ArgLVal.getAddress()}}); 422 } else { 423 // If 'this' is captured, load it into CXXThisValue. 424 assert(I->capturesThis()); 425 CXXThisValue = CGF.EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation()) 426 .getScalarVal(); 427 LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress()}}); 428 } 429 ++Cnt; 430 ++I; 431 } 432 433 return F; 434 } 435 436 llvm::Function * 437 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { 438 assert( 439 CapturedStmtInfo && 440 "CapturedStmtInfo should be set when generating the captured function"); 441 const CapturedDecl *CD = S.getCapturedDecl(); 442 // Build the argument list. 443 bool NeedWrapperFunction = 444 getDebugInfo() && 445 CGM.getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo; 446 FunctionArgList Args; 447 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs; 448 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes; 449 SmallString<256> Buffer; 450 llvm::raw_svector_ostream Out(Buffer); 451 Out << CapturedStmtInfo->getHelperName(); 452 if (NeedWrapperFunction) 453 Out << "_debug__"; 454 FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false, 455 Out.str()); 456 llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs, 457 VLASizes, CXXThisValue, FO); 458 for (const auto &LocalAddrPair : LocalAddrs) { 459 if (LocalAddrPair.second.first) { 460 setAddrOfLocalVar(LocalAddrPair.second.first, 461 LocalAddrPair.second.second); 462 } 463 } 464 for (const auto &VLASizePair : VLASizes) 465 VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second; 466 PGO.assignRegionCounters(GlobalDecl(CD), F); 467 CapturedStmtInfo->EmitBody(*this, CD->getBody()); 468 FinishFunction(CD->getBodyRBrace()); 469 if (!NeedWrapperFunction) 470 return F; 471 472 FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true, 473 /*RegisterCastedArgsOnly=*/true, 474 CapturedStmtInfo->getHelperName()); 475 CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true); 476 WrapperCGF.disableDebugInfo(); 477 Args.clear(); 478 LocalAddrs.clear(); 479 VLASizes.clear(); 480 llvm::Function *WrapperF = 481 emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes, 482 WrapperCGF.CXXThisValue, WrapperFO); 483 LValueBaseInfo BaseInfo(AlignmentSource::Decl, false); 484 llvm::SmallVector<llvm::Value *, 4> CallArgs; 485 for (const auto *Arg : Args) { 486 llvm::Value *CallArg; 487 auto I = LocalAddrs.find(Arg); 488 if (I != LocalAddrs.end()) { 489 LValue LV = 490 WrapperCGF.MakeAddrLValue(I->second.second, Arg->getType(), BaseInfo); 491 CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation()); 492 } else { 493 auto EI = VLASizes.find(Arg); 494 if (EI != VLASizes.end()) 495 CallArg = EI->second.second; 496 else { 497 LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg), 498 Arg->getType(), BaseInfo); 499 CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation()); 500 } 501 } 502 CallArgs.emplace_back(CallArg); 503 } 504 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, F, CallArgs); 505 WrapperCGF.FinishFunction(); 506 return WrapperF; 507 } 508 509 //===----------------------------------------------------------------------===// 510 // OpenMP Directive Emission 511 //===----------------------------------------------------------------------===// 512 void CodeGenFunction::EmitOMPAggregateAssign( 513 Address DestAddr, Address SrcAddr, QualType OriginalType, 514 const llvm::function_ref<void(Address, Address)> &CopyGen) { 515 // Perform element-by-element initialization. 516 QualType ElementTy; 517 518 // Drill down to the base element type on both arrays. 519 auto ArrayTy = OriginalType->getAsArrayTypeUnsafe(); 520 auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr); 521 SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 522 523 auto SrcBegin = SrcAddr.getPointer(); 524 auto DestBegin = DestAddr.getPointer(); 525 // Cast from pointer to array type to pointer to single element. 526 auto DestEnd = Builder.CreateGEP(DestBegin, NumElements); 527 // The basic structure here is a while-do loop. 528 auto BodyBB = createBasicBlock("omp.arraycpy.body"); 529 auto DoneBB = createBasicBlock("omp.arraycpy.done"); 530 auto IsEmpty = 531 Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty"); 532 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 533 534 // Enter the loop body, making that address the current address. 535 auto EntryBB = Builder.GetInsertBlock(); 536 EmitBlock(BodyBB); 537 538 CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy); 539 540 llvm::PHINode *SrcElementPHI = 541 Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 542 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 543 Address SrcElementCurrent = 544 Address(SrcElementPHI, 545 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 546 547 llvm::PHINode *DestElementPHI = 548 Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 549 DestElementPHI->addIncoming(DestBegin, EntryBB); 550 Address DestElementCurrent = 551 Address(DestElementPHI, 552 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 553 554 // Emit copy. 555 CopyGen(DestElementCurrent, SrcElementCurrent); 556 557 // Shift the address forward by one element. 558 auto DestElementNext = Builder.CreateConstGEP1_32( 559 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 560 auto SrcElementNext = Builder.CreateConstGEP1_32( 561 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 562 // Check whether we've reached the end. 563 auto Done = 564 Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 565 Builder.CreateCondBr(Done, DoneBB, BodyBB); 566 DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock()); 567 SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock()); 568 569 // Done. 570 EmitBlock(DoneBB, /*IsFinished=*/true); 571 } 572 573 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr, 574 Address SrcAddr, const VarDecl *DestVD, 575 const VarDecl *SrcVD, const Expr *Copy) { 576 if (OriginalType->isArrayType()) { 577 auto *BO = dyn_cast<BinaryOperator>(Copy); 578 if (BO && BO->getOpcode() == BO_Assign) { 579 // Perform simple memcpy for simple copying. 580 EmitAggregateAssign(DestAddr, SrcAddr, OriginalType); 581 } else { 582 // For arrays with complex element types perform element by element 583 // copying. 584 EmitOMPAggregateAssign( 585 DestAddr, SrcAddr, OriginalType, 586 [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) { 587 // Working with the single array element, so have to remap 588 // destination and source variables to corresponding array 589 // elements. 590 CodeGenFunction::OMPPrivateScope Remap(*this); 591 Remap.addPrivate(DestVD, [DestElement]() -> Address { 592 return DestElement; 593 }); 594 Remap.addPrivate( 595 SrcVD, [SrcElement]() -> Address { return SrcElement; }); 596 (void)Remap.Privatize(); 597 EmitIgnoredExpr(Copy); 598 }); 599 } 600 } else { 601 // Remap pseudo source variable to private copy. 602 CodeGenFunction::OMPPrivateScope Remap(*this); 603 Remap.addPrivate(SrcVD, [SrcAddr]() -> Address { return SrcAddr; }); 604 Remap.addPrivate(DestVD, [DestAddr]() -> Address { return DestAddr; }); 605 (void)Remap.Privatize(); 606 // Emit copying of the whole variable. 607 EmitIgnoredExpr(Copy); 608 } 609 } 610 611 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, 612 OMPPrivateScope &PrivateScope) { 613 if (!HaveInsertPoint()) 614 return false; 615 bool FirstprivateIsLastprivate = false; 616 llvm::DenseSet<const VarDecl *> Lastprivates; 617 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 618 for (const auto *D : C->varlists()) 619 Lastprivates.insert( 620 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); 621 } 622 llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate; 623 CGCapturedStmtInfo CapturesInfo(cast<CapturedStmt>(*D.getAssociatedStmt())); 624 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { 625 auto IRef = C->varlist_begin(); 626 auto InitsRef = C->inits().begin(); 627 for (auto IInit : C->private_copies()) { 628 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 629 bool ThisFirstprivateIsLastprivate = 630 Lastprivates.count(OrigVD->getCanonicalDecl()) > 0; 631 auto *CapFD = CapturesInfo.lookup(OrigVD); 632 auto *FD = CapturedStmtInfo->lookup(OrigVD); 633 if (!ThisFirstprivateIsLastprivate && FD && (FD == CapFD) && 634 !FD->getType()->isReferenceType()) { 635 EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()); 636 ++IRef; 637 ++InitsRef; 638 continue; 639 } 640 FirstprivateIsLastprivate = 641 FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate; 642 if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) { 643 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 644 auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl()); 645 bool IsRegistered; 646 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 647 /*RefersToEnclosingVariableOrCapture=*/FD != nullptr, 648 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 649 Address OriginalAddr = EmitLValue(&DRE).getAddress(); 650 QualType Type = VD->getType(); 651 if (Type->isArrayType()) { 652 // Emit VarDecl with copy init for arrays. 653 // Get the address of the original variable captured in current 654 // captured region. 655 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 656 auto Emission = EmitAutoVarAlloca(*VD); 657 auto *Init = VD->getInit(); 658 if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) { 659 // Perform simple memcpy. 660 EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr, 661 Type); 662 } else { 663 EmitOMPAggregateAssign( 664 Emission.getAllocatedAddress(), OriginalAddr, Type, 665 [this, VDInit, Init](Address DestElement, 666 Address SrcElement) { 667 // Clean up any temporaries needed by the initialization. 668 RunCleanupsScope InitScope(*this); 669 // Emit initialization for single element. 670 setAddrOfLocalVar(VDInit, SrcElement); 671 EmitAnyExprToMem(Init, DestElement, 672 Init->getType().getQualifiers(), 673 /*IsInitializer*/ false); 674 LocalDeclMap.erase(VDInit); 675 }); 676 } 677 EmitAutoVarCleanups(Emission); 678 return Emission.getAllocatedAddress(); 679 }); 680 } else { 681 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 682 // Emit private VarDecl with copy init. 683 // Remap temp VDInit variable to the address of the original 684 // variable 685 // (for proper handling of captured global variables). 686 setAddrOfLocalVar(VDInit, OriginalAddr); 687 EmitDecl(*VD); 688 LocalDeclMap.erase(VDInit); 689 return GetAddrOfLocalVar(VD); 690 }); 691 } 692 assert(IsRegistered && 693 "firstprivate var already registered as private"); 694 // Silence the warning about unused variable. 695 (void)IsRegistered; 696 } 697 ++IRef; 698 ++InitsRef; 699 } 700 } 701 return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty(); 702 } 703 704 void CodeGenFunction::EmitOMPPrivateClause( 705 const OMPExecutableDirective &D, 706 CodeGenFunction::OMPPrivateScope &PrivateScope) { 707 if (!HaveInsertPoint()) 708 return; 709 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 710 for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) { 711 auto IRef = C->varlist_begin(); 712 for (auto IInit : C->private_copies()) { 713 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 714 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 715 auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 716 bool IsRegistered = 717 PrivateScope.addPrivate(OrigVD, [&]() -> Address { 718 // Emit private VarDecl with copy init. 719 EmitDecl(*VD); 720 return GetAddrOfLocalVar(VD); 721 }); 722 assert(IsRegistered && "private var already registered as private"); 723 // Silence the warning about unused variable. 724 (void)IsRegistered; 725 } 726 ++IRef; 727 } 728 } 729 } 730 731 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { 732 if (!HaveInsertPoint()) 733 return false; 734 // threadprivate_var1 = master_threadprivate_var1; 735 // operator=(threadprivate_var2, master_threadprivate_var2); 736 // ... 737 // __kmpc_barrier(&loc, global_tid); 738 llvm::DenseSet<const VarDecl *> CopiedVars; 739 llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr; 740 for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) { 741 auto IRef = C->varlist_begin(); 742 auto ISrcRef = C->source_exprs().begin(); 743 auto IDestRef = C->destination_exprs().begin(); 744 for (auto *AssignOp : C->assignment_ops()) { 745 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 746 QualType Type = VD->getType(); 747 if (CopiedVars.insert(VD->getCanonicalDecl()).second) { 748 // Get the address of the master variable. If we are emitting code with 749 // TLS support, the address is passed from the master as field in the 750 // captured declaration. 751 Address MasterAddr = Address::invalid(); 752 if (getLangOpts().OpenMPUseTLS && 753 getContext().getTargetInfo().isTLSSupported()) { 754 assert(CapturedStmtInfo->lookup(VD) && 755 "Copyin threadprivates should have been captured!"); 756 DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(), 757 VK_LValue, (*IRef)->getExprLoc()); 758 MasterAddr = EmitLValue(&DRE).getAddress(); 759 LocalDeclMap.erase(VD); 760 } else { 761 MasterAddr = 762 Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD) 763 : CGM.GetAddrOfGlobal(VD), 764 getContext().getDeclAlign(VD)); 765 } 766 // Get the address of the threadprivate variable. 767 Address PrivateAddr = EmitLValue(*IRef).getAddress(); 768 if (CopiedVars.size() == 1) { 769 // At first check if current thread is a master thread. If it is, no 770 // need to copy data. 771 CopyBegin = createBasicBlock("copyin.not.master"); 772 CopyEnd = createBasicBlock("copyin.not.master.end"); 773 Builder.CreateCondBr( 774 Builder.CreateICmpNE( 775 Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy), 776 Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy)), 777 CopyBegin, CopyEnd); 778 EmitBlock(CopyBegin); 779 } 780 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 781 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 782 EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp); 783 } 784 ++IRef; 785 ++ISrcRef; 786 ++IDestRef; 787 } 788 } 789 if (CopyEnd) { 790 // Exit out of copying procedure for non-master thread. 791 EmitBlock(CopyEnd, /*IsFinished=*/true); 792 return true; 793 } 794 return false; 795 } 796 797 bool CodeGenFunction::EmitOMPLastprivateClauseInit( 798 const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) { 799 if (!HaveInsertPoint()) 800 return false; 801 bool HasAtLeastOneLastprivate = false; 802 llvm::DenseSet<const VarDecl *> SIMDLCVs; 803 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 804 auto *LoopDirective = cast<OMPLoopDirective>(&D); 805 for (auto *C : LoopDirective->counters()) { 806 SIMDLCVs.insert( 807 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 808 } 809 } 810 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 811 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 812 HasAtLeastOneLastprivate = true; 813 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) 814 break; 815 auto IRef = C->varlist_begin(); 816 auto IDestRef = C->destination_exprs().begin(); 817 for (auto *IInit : C->private_copies()) { 818 // Keep the address of the original variable for future update at the end 819 // of the loop. 820 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 821 // Taskloops do not require additional initialization, it is done in 822 // runtime support library. 823 if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) { 824 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 825 PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address { 826 DeclRefExpr DRE( 827 const_cast<VarDecl *>(OrigVD), 828 /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup( 829 OrigVD) != nullptr, 830 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 831 return EmitLValue(&DRE).getAddress(); 832 }); 833 // Check if the variable is also a firstprivate: in this case IInit is 834 // not generated. Initialization of this variable will happen in codegen 835 // for 'firstprivate' clause. 836 if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) { 837 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 838 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 839 // Emit private VarDecl with copy init. 840 EmitDecl(*VD); 841 return GetAddrOfLocalVar(VD); 842 }); 843 assert(IsRegistered && 844 "lastprivate var already registered as private"); 845 (void)IsRegistered; 846 } 847 } 848 ++IRef; 849 ++IDestRef; 850 } 851 } 852 return HasAtLeastOneLastprivate; 853 } 854 855 void CodeGenFunction::EmitOMPLastprivateClauseFinal( 856 const OMPExecutableDirective &D, bool NoFinals, 857 llvm::Value *IsLastIterCond) { 858 if (!HaveInsertPoint()) 859 return; 860 // Emit following code: 861 // if (<IsLastIterCond>) { 862 // orig_var1 = private_orig_var1; 863 // ... 864 // orig_varn = private_orig_varn; 865 // } 866 llvm::BasicBlock *ThenBB = nullptr; 867 llvm::BasicBlock *DoneBB = nullptr; 868 if (IsLastIterCond) { 869 ThenBB = createBasicBlock(".omp.lastprivate.then"); 870 DoneBB = createBasicBlock(".omp.lastprivate.done"); 871 Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB); 872 EmitBlock(ThenBB); 873 } 874 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 875 llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates; 876 if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) { 877 auto IC = LoopDirective->counters().begin(); 878 for (auto F : LoopDirective->finals()) { 879 auto *D = 880 cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl(); 881 if (NoFinals) 882 AlreadyEmittedVars.insert(D); 883 else 884 LoopCountersAndUpdates[D] = F; 885 ++IC; 886 } 887 } 888 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 889 auto IRef = C->varlist_begin(); 890 auto ISrcRef = C->source_exprs().begin(); 891 auto IDestRef = C->destination_exprs().begin(); 892 for (auto *AssignOp : C->assignment_ops()) { 893 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 894 QualType Type = PrivateVD->getType(); 895 auto *CanonicalVD = PrivateVD->getCanonicalDecl(); 896 if (AlreadyEmittedVars.insert(CanonicalVD).second) { 897 // If lastprivate variable is a loop control variable for loop-based 898 // directive, update its value before copyin back to original 899 // variable. 900 if (auto *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD)) 901 EmitIgnoredExpr(FinalExpr); 902 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 903 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 904 // Get the address of the original variable. 905 Address OriginalAddr = GetAddrOfLocalVar(DestVD); 906 // Get the address of the private variable. 907 Address PrivateAddr = GetAddrOfLocalVar(PrivateVD); 908 if (auto RefTy = PrivateVD->getType()->getAs<ReferenceType>()) 909 PrivateAddr = 910 Address(Builder.CreateLoad(PrivateAddr), 911 getNaturalTypeAlignment(RefTy->getPointeeType())); 912 EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp); 913 } 914 ++IRef; 915 ++ISrcRef; 916 ++IDestRef; 917 } 918 if (auto *PostUpdate = C->getPostUpdateExpr()) 919 EmitIgnoredExpr(PostUpdate); 920 } 921 if (IsLastIterCond) 922 EmitBlock(DoneBB, /*IsFinished=*/true); 923 } 924 925 void CodeGenFunction::EmitOMPReductionClauseInit( 926 const OMPExecutableDirective &D, 927 CodeGenFunction::OMPPrivateScope &PrivateScope) { 928 if (!HaveInsertPoint()) 929 return; 930 SmallVector<const Expr *, 4> Shareds; 931 SmallVector<const Expr *, 4> Privates; 932 SmallVector<const Expr *, 4> ReductionOps; 933 SmallVector<const Expr *, 4> LHSs; 934 SmallVector<const Expr *, 4> RHSs; 935 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 936 auto IPriv = C->privates().begin(); 937 auto IRed = C->reduction_ops().begin(); 938 auto ILHS = C->lhs_exprs().begin(); 939 auto IRHS = C->rhs_exprs().begin(); 940 for (const auto *Ref : C->varlists()) { 941 Shareds.emplace_back(Ref); 942 Privates.emplace_back(*IPriv); 943 ReductionOps.emplace_back(*IRed); 944 LHSs.emplace_back(*ILHS); 945 RHSs.emplace_back(*IRHS); 946 std::advance(IPriv, 1); 947 std::advance(IRed, 1); 948 std::advance(ILHS, 1); 949 std::advance(IRHS, 1); 950 } 951 } 952 ReductionCodeGen RedCG(Shareds, Privates, ReductionOps); 953 unsigned Count = 0; 954 auto ILHS = LHSs.begin(); 955 auto IRHS = RHSs.begin(); 956 auto IPriv = Privates.begin(); 957 for (const auto *IRef : Shareds) { 958 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl()); 959 // Emit private VarDecl with reduction init. 960 RedCG.emitSharedLValue(*this, Count); 961 RedCG.emitAggregateType(*this, Count); 962 auto Emission = EmitAutoVarAlloca(*PrivateVD); 963 RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(), 964 RedCG.getSharedLValue(Count), 965 [&Emission](CodeGenFunction &CGF) { 966 CGF.EmitAutoVarInit(Emission); 967 return true; 968 }); 969 EmitAutoVarCleanups(Emission); 970 Address BaseAddr = RedCG.adjustPrivateAddress( 971 *this, Count, Emission.getAllocatedAddress()); 972 bool IsRegistered = PrivateScope.addPrivate( 973 RedCG.getBaseDecl(Count), [BaseAddr]() -> Address { return BaseAddr; }); 974 assert(IsRegistered && "private var already registered as private"); 975 // Silence the warning about unused variable. 976 (void)IsRegistered; 977 978 auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 979 auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 980 if (isa<OMPArraySectionExpr>(IRef)) { 981 // Store the address of the original variable associated with the LHS 982 // implicit variable. 983 PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address { 984 return RedCG.getSharedLValue(Count).getAddress(); 985 }); 986 PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { 987 return GetAddrOfLocalVar(PrivateVD); 988 }); 989 } else if (isa<ArraySubscriptExpr>(IRef)) { 990 // Store the address of the original variable associated with the LHS 991 // implicit variable. 992 PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address { 993 return RedCG.getSharedLValue(Count).getAddress(); 994 }); 995 PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address { 996 return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD), 997 ConvertTypeForMem(RHSVD->getType()), 998 "rhs.begin"); 999 }); 1000 } else { 1001 QualType Type = PrivateVD->getType(); 1002 bool IsArray = getContext().getAsArrayType(Type) != nullptr; 1003 Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress(); 1004 // Store the address of the original variable associated with the LHS 1005 // implicit variable. 1006 if (IsArray) { 1007 OriginalAddr = Builder.CreateElementBitCast( 1008 OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin"); 1009 } 1010 PrivateScope.addPrivate( 1011 LHSVD, [OriginalAddr]() -> Address { return OriginalAddr; }); 1012 PrivateScope.addPrivate( 1013 RHSVD, [this, PrivateVD, RHSVD, IsArray]() -> Address { 1014 return IsArray 1015 ? Builder.CreateElementBitCast( 1016 GetAddrOfLocalVar(PrivateVD), 1017 ConvertTypeForMem(RHSVD->getType()), "rhs.begin") 1018 : GetAddrOfLocalVar(PrivateVD); 1019 }); 1020 } 1021 ++ILHS; 1022 ++IRHS; 1023 ++IPriv; 1024 ++Count; 1025 } 1026 } 1027 1028 void CodeGenFunction::EmitOMPReductionClauseFinal( 1029 const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) { 1030 if (!HaveInsertPoint()) 1031 return; 1032 llvm::SmallVector<const Expr *, 8> Privates; 1033 llvm::SmallVector<const Expr *, 8> LHSExprs; 1034 llvm::SmallVector<const Expr *, 8> RHSExprs; 1035 llvm::SmallVector<const Expr *, 8> ReductionOps; 1036 bool HasAtLeastOneReduction = false; 1037 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1038 HasAtLeastOneReduction = true; 1039 Privates.append(C->privates().begin(), C->privates().end()); 1040 LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 1041 RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 1042 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 1043 } 1044 if (HasAtLeastOneReduction) { 1045 bool WithNowait = D.getSingleClause<OMPNowaitClause>() || 1046 isOpenMPParallelDirective(D.getDirectiveKind()) || 1047 D.getDirectiveKind() == OMPD_simd; 1048 bool SimpleReduction = D.getDirectiveKind() == OMPD_simd; 1049 // Emit nowait reduction if nowait clause is present or directive is a 1050 // parallel directive (it always has implicit barrier). 1051 CGM.getOpenMPRuntime().emitReduction( 1052 *this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps, 1053 {WithNowait, SimpleReduction, ReductionKind}); 1054 } 1055 } 1056 1057 static void emitPostUpdateForReductionClause( 1058 CodeGenFunction &CGF, const OMPExecutableDirective &D, 1059 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1060 if (!CGF.HaveInsertPoint()) 1061 return; 1062 llvm::BasicBlock *DoneBB = nullptr; 1063 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1064 if (auto *PostUpdate = C->getPostUpdateExpr()) { 1065 if (!DoneBB) { 1066 if (auto *Cond = CondGen(CGF)) { 1067 // If the first post-update expression is found, emit conditional 1068 // block if it was requested. 1069 auto *ThenBB = CGF.createBasicBlock(".omp.reduction.pu"); 1070 DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done"); 1071 CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1072 CGF.EmitBlock(ThenBB); 1073 } 1074 } 1075 CGF.EmitIgnoredExpr(PostUpdate); 1076 } 1077 } 1078 if (DoneBB) 1079 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 1080 } 1081 1082 namespace { 1083 /// Codegen lambda for appending distribute lower and upper bounds to outlined 1084 /// parallel function. This is necessary for combined constructs such as 1085 /// 'distribute parallel for' 1086 typedef llvm::function_ref<void(CodeGenFunction &, 1087 const OMPExecutableDirective &, 1088 llvm::SmallVectorImpl<llvm::Value *> &)> 1089 CodeGenBoundParametersTy; 1090 } // anonymous namespace 1091 1092 static void emitCommonOMPParallelDirective( 1093 CodeGenFunction &CGF, const OMPExecutableDirective &S, 1094 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1095 const CodeGenBoundParametersTy &CodeGenBoundParameters) { 1096 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); 1097 auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( 1098 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 1099 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) { 1100 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 1101 auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), 1102 /*IgnoreResultAssign*/ true); 1103 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause( 1104 CGF, NumThreads, NumThreadsClause->getLocStart()); 1105 } 1106 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) { 1107 CodeGenFunction::RunCleanupsScope ProcBindScope(CGF); 1108 CGF.CGM.getOpenMPRuntime().emitProcBindClause( 1109 CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart()); 1110 } 1111 const Expr *IfCond = nullptr; 1112 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 1113 if (C->getNameModifier() == OMPD_unknown || 1114 C->getNameModifier() == OMPD_parallel) { 1115 IfCond = C->getCondition(); 1116 break; 1117 } 1118 } 1119 1120 OMPParallelScope Scope(CGF, S); 1121 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 1122 // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk 1123 // lower and upper bounds with the pragma 'for' chunking mechanism. 1124 // The following lambda takes care of appending the lower and upper bound 1125 // parameters when necessary 1126 CodeGenBoundParameters(CGF, S, CapturedVars); 1127 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 1128 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn, 1129 CapturedVars, IfCond); 1130 } 1131 1132 static void emitEmptyBoundParameters(CodeGenFunction &, 1133 const OMPExecutableDirective &, 1134 llvm::SmallVectorImpl<llvm::Value *> &) {} 1135 1136 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { 1137 // Emit parallel region as a standalone region. 1138 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1139 OMPPrivateScope PrivateScope(CGF); 1140 bool Copyins = CGF.EmitOMPCopyinClause(S); 1141 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 1142 if (Copyins) { 1143 // Emit implicit barrier to synchronize threads and avoid data races on 1144 // propagation master's thread values of threadprivate variables to local 1145 // instances of that variables of all other implicit threads. 1146 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 1147 CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 1148 /*ForceSimpleCall=*/true); 1149 } 1150 CGF.EmitOMPPrivateClause(S, PrivateScope); 1151 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 1152 (void)PrivateScope.Privatize(); 1153 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1154 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 1155 }; 1156 emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen, 1157 emitEmptyBoundParameters); 1158 emitPostUpdateForReductionClause( 1159 *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1160 } 1161 1162 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, 1163 JumpDest LoopExit) { 1164 RunCleanupsScope BodyScope(*this); 1165 // Update counters values on current iteration. 1166 for (auto I : D.updates()) { 1167 EmitIgnoredExpr(I); 1168 } 1169 // Update the linear variables. 1170 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1171 for (auto *U : C->updates()) 1172 EmitIgnoredExpr(U); 1173 } 1174 1175 // On a continue in the body, jump to the end. 1176 auto Continue = getJumpDestInCurrentScope("omp.body.continue"); 1177 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1178 // Emit loop body. 1179 EmitStmt(D.getBody()); 1180 // The end (updates/cleanups). 1181 EmitBlock(Continue.getBlock()); 1182 BreakContinueStack.pop_back(); 1183 } 1184 1185 void CodeGenFunction::EmitOMPInnerLoop( 1186 const Stmt &S, bool RequiresCleanup, const Expr *LoopCond, 1187 const Expr *IncExpr, 1188 const llvm::function_ref<void(CodeGenFunction &)> &BodyGen, 1189 const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen) { 1190 auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end"); 1191 1192 // Start the loop with a block that tests the condition. 1193 auto CondBlock = createBasicBlock("omp.inner.for.cond"); 1194 EmitBlock(CondBlock); 1195 const SourceRange &R = S.getSourceRange(); 1196 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), 1197 SourceLocToDebugLoc(R.getEnd())); 1198 1199 // If there are any cleanups between here and the loop-exit scope, 1200 // create a block to stage a loop exit along. 1201 auto ExitBlock = LoopExit.getBlock(); 1202 if (RequiresCleanup) 1203 ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup"); 1204 1205 auto LoopBody = createBasicBlock("omp.inner.for.body"); 1206 1207 // Emit condition. 1208 EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S)); 1209 if (ExitBlock != LoopExit.getBlock()) { 1210 EmitBlock(ExitBlock); 1211 EmitBranchThroughCleanup(LoopExit); 1212 } 1213 1214 EmitBlock(LoopBody); 1215 incrementProfileCounter(&S); 1216 1217 // Create a block for the increment. 1218 auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc"); 1219 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1220 1221 BodyGen(*this); 1222 1223 // Emit "IV = IV + 1" and a back-edge to the condition block. 1224 EmitBlock(Continue.getBlock()); 1225 EmitIgnoredExpr(IncExpr); 1226 PostIncGen(*this); 1227 BreakContinueStack.pop_back(); 1228 EmitBranch(CondBlock); 1229 LoopStack.pop(); 1230 // Emit the fall-through block. 1231 EmitBlock(LoopExit.getBlock()); 1232 } 1233 1234 void CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { 1235 if (!HaveInsertPoint()) 1236 return; 1237 // Emit inits for the linear variables. 1238 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1239 for (auto *Init : C->inits()) { 1240 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl()); 1241 if (auto *Ref = dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) { 1242 AutoVarEmission Emission = EmitAutoVarAlloca(*VD); 1243 auto *OrigVD = cast<VarDecl>(Ref->getDecl()); 1244 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1245 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1246 VD->getInit()->getType(), VK_LValue, 1247 VD->getInit()->getExprLoc()); 1248 EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(), 1249 VD->getType()), 1250 /*capturedByInit=*/false); 1251 EmitAutoVarCleanups(Emission); 1252 } else 1253 EmitVarDecl(*VD); 1254 } 1255 // Emit the linear steps for the linear clauses. 1256 // If a step is not constant, it is pre-calculated before the loop. 1257 if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep())) 1258 if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) { 1259 EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl())); 1260 // Emit calculation of the linear step. 1261 EmitIgnoredExpr(CS); 1262 } 1263 } 1264 } 1265 1266 void CodeGenFunction::EmitOMPLinearClauseFinal( 1267 const OMPLoopDirective &D, 1268 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1269 if (!HaveInsertPoint()) 1270 return; 1271 llvm::BasicBlock *DoneBB = nullptr; 1272 // Emit the final values of the linear variables. 1273 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1274 auto IC = C->varlist_begin(); 1275 for (auto *F : C->finals()) { 1276 if (!DoneBB) { 1277 if (auto *Cond = CondGen(*this)) { 1278 // If the first post-update expression is found, emit conditional 1279 // block if it was requested. 1280 auto *ThenBB = createBasicBlock(".omp.linear.pu"); 1281 DoneBB = createBasicBlock(".omp.linear.pu.done"); 1282 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1283 EmitBlock(ThenBB); 1284 } 1285 } 1286 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl()); 1287 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1288 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1289 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); 1290 Address OrigAddr = EmitLValue(&DRE).getAddress(); 1291 CodeGenFunction::OMPPrivateScope VarScope(*this); 1292 VarScope.addPrivate(OrigVD, [OrigAddr]() -> Address { return OrigAddr; }); 1293 (void)VarScope.Privatize(); 1294 EmitIgnoredExpr(F); 1295 ++IC; 1296 } 1297 if (auto *PostUpdate = C->getPostUpdateExpr()) 1298 EmitIgnoredExpr(PostUpdate); 1299 } 1300 if (DoneBB) 1301 EmitBlock(DoneBB, /*IsFinished=*/true); 1302 } 1303 1304 static void emitAlignedClause(CodeGenFunction &CGF, 1305 const OMPExecutableDirective &D) { 1306 if (!CGF.HaveInsertPoint()) 1307 return; 1308 for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) { 1309 unsigned ClauseAlignment = 0; 1310 if (auto AlignmentExpr = Clause->getAlignment()) { 1311 auto AlignmentCI = 1312 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); 1313 ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue()); 1314 } 1315 for (auto E : Clause->varlists()) { 1316 unsigned Alignment = ClauseAlignment; 1317 if (Alignment == 0) { 1318 // OpenMP [2.8.1, Description] 1319 // If no optional parameter is specified, implementation-defined default 1320 // alignments for SIMD instructions on the target platforms are assumed. 1321 Alignment = 1322 CGF.getContext() 1323 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign( 1324 E->getType()->getPointeeType())) 1325 .getQuantity(); 1326 } 1327 assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) && 1328 "alignment is not power of 2"); 1329 if (Alignment != 0) { 1330 llvm::Value *PtrValue = CGF.EmitScalarExpr(E); 1331 CGF.EmitAlignmentAssumption(PtrValue, Alignment); 1332 } 1333 } 1334 } 1335 } 1336 1337 void CodeGenFunction::EmitOMPPrivateLoopCounters( 1338 const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) { 1339 if (!HaveInsertPoint()) 1340 return; 1341 auto I = S.private_counters().begin(); 1342 for (auto *E : S.counters()) { 1343 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1344 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()); 1345 (void)LoopScope.addPrivate(VD, [&]() -> Address { 1346 // Emit var without initialization. 1347 if (!LocalDeclMap.count(PrivateVD)) { 1348 auto VarEmission = EmitAutoVarAlloca(*PrivateVD); 1349 EmitAutoVarCleanups(VarEmission); 1350 } 1351 DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD), 1352 /*RefersToEnclosingVariableOrCapture=*/false, 1353 (*I)->getType(), VK_LValue, (*I)->getExprLoc()); 1354 return EmitLValue(&DRE).getAddress(); 1355 }); 1356 if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) || 1357 VD->hasGlobalStorage()) { 1358 (void)LoopScope.addPrivate(PrivateVD, [&]() -> Address { 1359 DeclRefExpr DRE(const_cast<VarDecl *>(VD), 1360 LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD), 1361 E->getType(), VK_LValue, E->getExprLoc()); 1362 return EmitLValue(&DRE).getAddress(); 1363 }); 1364 } 1365 ++I; 1366 } 1367 } 1368 1369 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S, 1370 const Expr *Cond, llvm::BasicBlock *TrueBlock, 1371 llvm::BasicBlock *FalseBlock, uint64_t TrueCount) { 1372 if (!CGF.HaveInsertPoint()) 1373 return; 1374 { 1375 CodeGenFunction::OMPPrivateScope PreCondScope(CGF); 1376 CGF.EmitOMPPrivateLoopCounters(S, PreCondScope); 1377 (void)PreCondScope.Privatize(); 1378 // Get initial values of real counters. 1379 for (auto I : S.inits()) { 1380 CGF.EmitIgnoredExpr(I); 1381 } 1382 } 1383 // Check that loop is executed at least one time. 1384 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount); 1385 } 1386 1387 void CodeGenFunction::EmitOMPLinearClause( 1388 const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) { 1389 if (!HaveInsertPoint()) 1390 return; 1391 llvm::DenseSet<const VarDecl *> SIMDLCVs; 1392 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 1393 auto *LoopDirective = cast<OMPLoopDirective>(&D); 1394 for (auto *C : LoopDirective->counters()) { 1395 SIMDLCVs.insert( 1396 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 1397 } 1398 } 1399 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1400 auto CurPrivate = C->privates().begin(); 1401 for (auto *E : C->varlists()) { 1402 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1403 auto *PrivateVD = 1404 cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl()); 1405 if (!SIMDLCVs.count(VD->getCanonicalDecl())) { 1406 bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address { 1407 // Emit private VarDecl with copy init. 1408 EmitVarDecl(*PrivateVD); 1409 return GetAddrOfLocalVar(PrivateVD); 1410 }); 1411 assert(IsRegistered && "linear var already registered as private"); 1412 // Silence the warning about unused variable. 1413 (void)IsRegistered; 1414 } else 1415 EmitVarDecl(*PrivateVD); 1416 ++CurPrivate; 1417 } 1418 } 1419 } 1420 1421 static void emitSimdlenSafelenClause(CodeGenFunction &CGF, 1422 const OMPExecutableDirective &D, 1423 bool IsMonotonic) { 1424 if (!CGF.HaveInsertPoint()) 1425 return; 1426 if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) { 1427 RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(), 1428 /*ignoreResult=*/true); 1429 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 1430 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 1431 // In presence of finite 'safelen', it may be unsafe to mark all 1432 // the memory instructions parallel, because loop-carried 1433 // dependences of 'safelen' iterations are possible. 1434 if (!IsMonotonic) 1435 CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>()); 1436 } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) { 1437 RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(), 1438 /*ignoreResult=*/true); 1439 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 1440 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 1441 // In presence of finite 'safelen', it may be unsafe to mark all 1442 // the memory instructions parallel, because loop-carried 1443 // dependences of 'safelen' iterations are possible. 1444 CGF.LoopStack.setParallel(false); 1445 } 1446 } 1447 1448 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D, 1449 bool IsMonotonic) { 1450 // Walk clauses and process safelen/lastprivate. 1451 LoopStack.setParallel(!IsMonotonic); 1452 LoopStack.setVectorizeEnable(true); 1453 emitSimdlenSafelenClause(*this, D, IsMonotonic); 1454 } 1455 1456 void CodeGenFunction::EmitOMPSimdFinal( 1457 const OMPLoopDirective &D, 1458 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1459 if (!HaveInsertPoint()) 1460 return; 1461 llvm::BasicBlock *DoneBB = nullptr; 1462 auto IC = D.counters().begin(); 1463 auto IPC = D.private_counters().begin(); 1464 for (auto F : D.finals()) { 1465 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl()); 1466 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl()); 1467 auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD); 1468 if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) || 1469 OrigVD->hasGlobalStorage() || CED) { 1470 if (!DoneBB) { 1471 if (auto *Cond = CondGen(*this)) { 1472 // If the first post-update expression is found, emit conditional 1473 // block if it was requested. 1474 auto *ThenBB = createBasicBlock(".omp.final.then"); 1475 DoneBB = createBasicBlock(".omp.final.done"); 1476 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1477 EmitBlock(ThenBB); 1478 } 1479 } 1480 Address OrigAddr = Address::invalid(); 1481 if (CED) 1482 OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress(); 1483 else { 1484 DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD), 1485 /*RefersToEnclosingVariableOrCapture=*/false, 1486 (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc()); 1487 OrigAddr = EmitLValue(&DRE).getAddress(); 1488 } 1489 OMPPrivateScope VarScope(*this); 1490 VarScope.addPrivate(OrigVD, 1491 [OrigAddr]() -> Address { return OrigAddr; }); 1492 (void)VarScope.Privatize(); 1493 EmitIgnoredExpr(F); 1494 } 1495 ++IC; 1496 ++IPC; 1497 } 1498 if (DoneBB) 1499 EmitBlock(DoneBB, /*IsFinished=*/true); 1500 } 1501 1502 static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF, 1503 const OMPLoopDirective &S, 1504 CodeGenFunction::JumpDest LoopExit) { 1505 CGF.EmitOMPLoopBody(S, LoopExit); 1506 CGF.EmitStopPoint(&S); 1507 } 1508 1509 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { 1510 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1511 OMPLoopScope PreInitScope(CGF, S); 1512 // if (PreCond) { 1513 // for (IV in 0..LastIteration) BODY; 1514 // <Final counter/linear vars updates>; 1515 // } 1516 // 1517 1518 // Emit: if (PreCond) - begin. 1519 // If the condition constant folds and can be elided, avoid emitting the 1520 // whole loop. 1521 bool CondConstant; 1522 llvm::BasicBlock *ContBlock = nullptr; 1523 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 1524 if (!CondConstant) 1525 return; 1526 } else { 1527 auto *ThenBlock = CGF.createBasicBlock("simd.if.then"); 1528 ContBlock = CGF.createBasicBlock("simd.if.end"); 1529 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 1530 CGF.getProfileCount(&S)); 1531 CGF.EmitBlock(ThenBlock); 1532 CGF.incrementProfileCounter(&S); 1533 } 1534 1535 // Emit the loop iteration variable. 1536 const Expr *IVExpr = S.getIterationVariable(); 1537 const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 1538 CGF.EmitVarDecl(*IVDecl); 1539 CGF.EmitIgnoredExpr(S.getInit()); 1540 1541 // Emit the iterations count variable. 1542 // If it is not a variable, Sema decided to calculate iterations count on 1543 // each iteration (e.g., it is foldable into a constant). 1544 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 1545 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 1546 // Emit calculation of the iterations count. 1547 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 1548 } 1549 1550 CGF.EmitOMPSimdInit(S); 1551 1552 emitAlignedClause(CGF, S); 1553 CGF.EmitOMPLinearClauseInit(S); 1554 { 1555 OMPPrivateScope LoopScope(CGF); 1556 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 1557 CGF.EmitOMPLinearClause(S, LoopScope); 1558 CGF.EmitOMPPrivateClause(S, LoopScope); 1559 CGF.EmitOMPReductionClauseInit(S, LoopScope); 1560 bool HasLastprivateClause = 1561 CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 1562 (void)LoopScope.Privatize(); 1563 CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 1564 S.getInc(), 1565 [&S](CodeGenFunction &CGF) { 1566 CGF.EmitOMPLoopBody(S, JumpDest()); 1567 CGF.EmitStopPoint(&S); 1568 }, 1569 [](CodeGenFunction &) {}); 1570 CGF.EmitOMPSimdFinal( 1571 S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1572 // Emit final copy of the lastprivate variables at the end of loops. 1573 if (HasLastprivateClause) 1574 CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true); 1575 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd); 1576 emitPostUpdateForReductionClause( 1577 CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1578 } 1579 CGF.EmitOMPLinearClauseFinal( 1580 S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1581 // Emit: if (PreCond) - end. 1582 if (ContBlock) { 1583 CGF.EmitBranch(ContBlock); 1584 CGF.EmitBlock(ContBlock, true); 1585 } 1586 }; 1587 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1588 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 1589 } 1590 1591 void CodeGenFunction::EmitOMPOuterLoop( 1592 bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S, 1593 CodeGenFunction::OMPPrivateScope &LoopScope, 1594 const CodeGenFunction::OMPLoopArguments &LoopArgs, 1595 const CodeGenFunction::CodeGenLoopTy &CodeGenLoop, 1596 const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) { 1597 auto &RT = CGM.getOpenMPRuntime(); 1598 1599 const Expr *IVExpr = S.getIterationVariable(); 1600 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1601 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1602 1603 auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end"); 1604 1605 // Start the loop with a block that tests the condition. 1606 auto CondBlock = createBasicBlock("omp.dispatch.cond"); 1607 EmitBlock(CondBlock); 1608 const SourceRange &R = S.getSourceRange(); 1609 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), 1610 SourceLocToDebugLoc(R.getEnd())); 1611 1612 llvm::Value *BoolCondVal = nullptr; 1613 if (!DynamicOrOrdered) { 1614 // UB = min(UB, GlobalUB) or 1615 // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g. 1616 // 'distribute parallel for') 1617 EmitIgnoredExpr(LoopArgs.EUB); 1618 // IV = LB 1619 EmitIgnoredExpr(LoopArgs.Init); 1620 // IV < UB 1621 BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond); 1622 } else { 1623 BoolCondVal = 1624 RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, LoopArgs.IL, 1625 LoopArgs.LB, LoopArgs.UB, LoopArgs.ST); 1626 } 1627 1628 // If there are any cleanups between here and the loop-exit scope, 1629 // create a block to stage a loop exit along. 1630 auto ExitBlock = LoopExit.getBlock(); 1631 if (LoopScope.requiresCleanups()) 1632 ExitBlock = createBasicBlock("omp.dispatch.cleanup"); 1633 1634 auto LoopBody = createBasicBlock("omp.dispatch.body"); 1635 Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock); 1636 if (ExitBlock != LoopExit.getBlock()) { 1637 EmitBlock(ExitBlock); 1638 EmitBranchThroughCleanup(LoopExit); 1639 } 1640 EmitBlock(LoopBody); 1641 1642 // Emit "IV = LB" (in case of static schedule, we have already calculated new 1643 // LB for loop condition and emitted it above). 1644 if (DynamicOrOrdered) 1645 EmitIgnoredExpr(LoopArgs.Init); 1646 1647 // Create a block for the increment. 1648 auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); 1649 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1650 1651 // Generate !llvm.loop.parallel metadata for loads and stores for loops 1652 // with dynamic/guided scheduling and without ordered clause. 1653 if (!isOpenMPSimdDirective(S.getDirectiveKind())) 1654 LoopStack.setParallel(!IsMonotonic); 1655 else 1656 EmitOMPSimdInit(S, IsMonotonic); 1657 1658 SourceLocation Loc = S.getLocStart(); 1659 1660 // when 'distribute' is not combined with a 'for': 1661 // while (idx <= UB) { BODY; ++idx; } 1662 // when 'distribute' is combined with a 'for' 1663 // (e.g. 'distribute parallel for') 1664 // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; } 1665 EmitOMPInnerLoop( 1666 S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr, 1667 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { 1668 CodeGenLoop(CGF, S, LoopExit); 1669 }, 1670 [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) { 1671 CodeGenOrdered(CGF, Loc, IVSize, IVSigned); 1672 }); 1673 1674 EmitBlock(Continue.getBlock()); 1675 BreakContinueStack.pop_back(); 1676 if (!DynamicOrOrdered) { 1677 // Emit "LB = LB + Stride", "UB = UB + Stride". 1678 EmitIgnoredExpr(LoopArgs.NextLB); 1679 EmitIgnoredExpr(LoopArgs.NextUB); 1680 } 1681 1682 EmitBranch(CondBlock); 1683 LoopStack.pop(); 1684 // Emit the fall-through block. 1685 EmitBlock(LoopExit.getBlock()); 1686 1687 // Tell the runtime we are done. 1688 auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) { 1689 if (!DynamicOrOrdered) 1690 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd()); 1691 }; 1692 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); 1693 } 1694 1695 void CodeGenFunction::EmitOMPForOuterLoop( 1696 const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic, 1697 const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, 1698 const OMPLoopArguments &LoopArgs, 1699 const CodeGenDispatchBoundsTy &CGDispatchBounds) { 1700 auto &RT = CGM.getOpenMPRuntime(); 1701 1702 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). 1703 const bool DynamicOrOrdered = 1704 Ordered || RT.isDynamic(ScheduleKind.Schedule); 1705 1706 assert((Ordered || 1707 !RT.isStaticNonchunked(ScheduleKind.Schedule, 1708 LoopArgs.Chunk != nullptr)) && 1709 "static non-chunked schedule does not need outer loop"); 1710 1711 // Emit outer loop. 1712 // 1713 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1714 // When schedule(dynamic,chunk_size) is specified, the iterations are 1715 // distributed to threads in the team in chunks as the threads request them. 1716 // Each thread executes a chunk of iterations, then requests another chunk, 1717 // until no chunks remain to be distributed. Each chunk contains chunk_size 1718 // iterations, except for the last chunk to be distributed, which may have 1719 // fewer iterations. When no chunk_size is specified, it defaults to 1. 1720 // 1721 // When schedule(guided,chunk_size) is specified, the iterations are assigned 1722 // to threads in the team in chunks as the executing threads request them. 1723 // Each thread executes a chunk of iterations, then requests another chunk, 1724 // until no chunks remain to be assigned. For a chunk_size of 1, the size of 1725 // each chunk is proportional to the number of unassigned iterations divided 1726 // by the number of threads in the team, decreasing to 1. For a chunk_size 1727 // with value k (greater than 1), the size of each chunk is determined in the 1728 // same way, with the restriction that the chunks do not contain fewer than k 1729 // iterations (except for the last chunk to be assigned, which may have fewer 1730 // than k iterations). 1731 // 1732 // When schedule(auto) is specified, the decision regarding scheduling is 1733 // delegated to the compiler and/or runtime system. The programmer gives the 1734 // implementation the freedom to choose any possible mapping of iterations to 1735 // threads in the team. 1736 // 1737 // When schedule(runtime) is specified, the decision regarding scheduling is 1738 // deferred until run time, and the schedule and chunk size are taken from the 1739 // run-sched-var ICV. If the ICV is set to auto, the schedule is 1740 // implementation defined 1741 // 1742 // while(__kmpc_dispatch_next(&LB, &UB)) { 1743 // idx = LB; 1744 // while (idx <= UB) { BODY; ++idx; 1745 // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only. 1746 // } // inner loop 1747 // } 1748 // 1749 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1750 // When schedule(static, chunk_size) is specified, iterations are divided into 1751 // chunks of size chunk_size, and the chunks are assigned to the threads in 1752 // the team in a round-robin fashion in the order of the thread number. 1753 // 1754 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { 1755 // while (idx <= UB) { BODY; ++idx; } // inner loop 1756 // LB = LB + ST; 1757 // UB = UB + ST; 1758 // } 1759 // 1760 1761 const Expr *IVExpr = S.getIterationVariable(); 1762 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1763 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1764 1765 if (DynamicOrOrdered) { 1766 auto DispatchBounds = CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB); 1767 llvm::Value *LBVal = DispatchBounds.first; 1768 llvm::Value *UBVal = DispatchBounds.second; 1769 CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal, 1770 LoopArgs.Chunk}; 1771 RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, IVSize, 1772 IVSigned, Ordered, DipatchRTInputValues); 1773 } else { 1774 RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, 1775 Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB, 1776 LoopArgs.ST, LoopArgs.Chunk); 1777 } 1778 1779 auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc, 1780 const unsigned IVSize, 1781 const bool IVSigned) { 1782 if (Ordered) { 1783 CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize, 1784 IVSigned); 1785 } 1786 }; 1787 1788 OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST, 1789 LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB); 1790 OuterLoopArgs.IncExpr = S.getInc(); 1791 OuterLoopArgs.Init = S.getInit(); 1792 OuterLoopArgs.Cond = S.getCond(); 1793 OuterLoopArgs.NextLB = S.getNextLowerBound(); 1794 OuterLoopArgs.NextUB = S.getNextUpperBound(); 1795 EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs, 1796 emitOMPLoopBodyWithStopPoint, CodeGenOrdered); 1797 } 1798 1799 static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc, 1800 const unsigned IVSize, const bool IVSigned) {} 1801 1802 void CodeGenFunction::EmitOMPDistributeOuterLoop( 1803 OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S, 1804 OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs, 1805 const CodeGenLoopTy &CodeGenLoopContent) { 1806 1807 auto &RT = CGM.getOpenMPRuntime(); 1808 1809 // Emit outer loop. 1810 // Same behavior as a OMPForOuterLoop, except that schedule cannot be 1811 // dynamic 1812 // 1813 1814 const Expr *IVExpr = S.getIterationVariable(); 1815 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1816 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1817 1818 RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize, 1819 IVSigned, /* Ordered = */ false, LoopArgs.IL, 1820 LoopArgs.LB, LoopArgs.UB, LoopArgs.ST, 1821 LoopArgs.Chunk); 1822 1823 // for combined 'distribute' and 'for' the increment expression of distribute 1824 // is store in DistInc. For 'distribute' alone, it is in Inc. 1825 Expr *IncExpr; 1826 if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())) 1827 IncExpr = S.getDistInc(); 1828 else 1829 IncExpr = S.getInc(); 1830 1831 // this routine is shared by 'omp distribute parallel for' and 1832 // 'omp distribute': select the right EUB expression depending on the 1833 // directive 1834 OMPLoopArguments OuterLoopArgs; 1835 OuterLoopArgs.LB = LoopArgs.LB; 1836 OuterLoopArgs.UB = LoopArgs.UB; 1837 OuterLoopArgs.ST = LoopArgs.ST; 1838 OuterLoopArgs.IL = LoopArgs.IL; 1839 OuterLoopArgs.Chunk = LoopArgs.Chunk; 1840 OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 1841 ? S.getCombinedEnsureUpperBound() 1842 : S.getEnsureUpperBound(); 1843 OuterLoopArgs.IncExpr = IncExpr; 1844 OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 1845 ? S.getCombinedInit() 1846 : S.getInit(); 1847 OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 1848 ? S.getCombinedCond() 1849 : S.getCond(); 1850 OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 1851 ? S.getCombinedNextLowerBound() 1852 : S.getNextLowerBound(); 1853 OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 1854 ? S.getCombinedNextUpperBound() 1855 : S.getNextUpperBound(); 1856 1857 EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S, 1858 LoopScope, OuterLoopArgs, CodeGenLoopContent, 1859 emitEmptyOrdered); 1860 } 1861 1862 /// Emit a helper variable and return corresponding lvalue. 1863 static LValue EmitOMPHelperVar(CodeGenFunction &CGF, 1864 const DeclRefExpr *Helper) { 1865 auto VDecl = cast<VarDecl>(Helper->getDecl()); 1866 CGF.EmitVarDecl(*VDecl); 1867 return CGF.EmitLValue(Helper); 1868 } 1869 1870 static std::pair<LValue, LValue> 1871 emitDistributeParallelForInnerBounds(CodeGenFunction &CGF, 1872 const OMPExecutableDirective &S) { 1873 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 1874 LValue LB = 1875 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); 1876 LValue UB = 1877 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); 1878 1879 // When composing 'distribute' with 'for' (e.g. as in 'distribute 1880 // parallel for') we need to use the 'distribute' 1881 // chunk lower and upper bounds rather than the whole loop iteration 1882 // space. These are parameters to the outlined function for 'parallel' 1883 // and we copy the bounds of the previous schedule into the 1884 // the current ones. 1885 LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable()); 1886 LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable()); 1887 llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(PrevLB, SourceLocation()); 1888 PrevLBVal = CGF.EmitScalarConversion( 1889 PrevLBVal, LS.getPrevLowerBoundVariable()->getType(), 1890 LS.getIterationVariable()->getType(), SourceLocation()); 1891 llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(PrevUB, SourceLocation()); 1892 PrevUBVal = CGF.EmitScalarConversion( 1893 PrevUBVal, LS.getPrevUpperBoundVariable()->getType(), 1894 LS.getIterationVariable()->getType(), SourceLocation()); 1895 1896 CGF.EmitStoreOfScalar(PrevLBVal, LB); 1897 CGF.EmitStoreOfScalar(PrevUBVal, UB); 1898 1899 return {LB, UB}; 1900 } 1901 1902 /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then 1903 /// we need to use the LB and UB expressions generated by the worksharing 1904 /// code generation support, whereas in non combined situations we would 1905 /// just emit 0 and the LastIteration expression 1906 /// This function is necessary due to the difference of the LB and UB 1907 /// types for the RT emission routines for 'for_static_init' and 1908 /// 'for_dispatch_init' 1909 static std::pair<llvm::Value *, llvm::Value *> 1910 emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF, 1911 const OMPExecutableDirective &S, 1912 Address LB, Address UB) { 1913 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 1914 const Expr *IVExpr = LS.getIterationVariable(); 1915 // when implementing a dynamic schedule for a 'for' combined with a 1916 // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop 1917 // is not normalized as each team only executes its own assigned 1918 // distribute chunk 1919 QualType IteratorTy = IVExpr->getType(); 1920 llvm::Value *LBVal = CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, 1921 SourceLocation()); 1922 llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, 1923 SourceLocation()); 1924 return {LBVal, UBVal}; 1925 } 1926 1927 static void emitDistributeParallelForDistributeInnerBoundParams( 1928 CodeGenFunction &CGF, const OMPExecutableDirective &S, 1929 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) { 1930 const auto &Dir = cast<OMPLoopDirective>(S); 1931 LValue LB = 1932 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable())); 1933 auto LBCast = CGF.Builder.CreateIntCast( 1934 CGF.Builder.CreateLoad(LB.getAddress()), CGF.SizeTy, /*isSigned=*/false); 1935 CapturedVars.push_back(LBCast); 1936 LValue UB = 1937 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable())); 1938 1939 auto UBCast = CGF.Builder.CreateIntCast( 1940 CGF.Builder.CreateLoad(UB.getAddress()), CGF.SizeTy, /*isSigned=*/false); 1941 CapturedVars.push_back(UBCast); 1942 } 1943 1944 static void 1945 emitInnerParallelForWhenCombined(CodeGenFunction &CGF, 1946 const OMPLoopDirective &S, 1947 CodeGenFunction::JumpDest LoopExit) { 1948 auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF, 1949 PrePostActionTy &) { 1950 CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(), 1951 emitDistributeParallelForInnerBounds, 1952 emitDistributeParallelForDispatchBounds); 1953 }; 1954 1955 emitCommonOMPParallelDirective( 1956 CGF, S, OMPD_for, CGInlinedWorksharingLoop, 1957 emitDistributeParallelForDistributeInnerBoundParams); 1958 } 1959 1960 void CodeGenFunction::EmitOMPDistributeParallelForDirective( 1961 const OMPDistributeParallelForDirective &S) { 1962 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1963 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 1964 S.getDistInc()); 1965 }; 1966 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1967 OMPCancelStackRAII CancelRegion(*this, OMPD_distribute_parallel_for, 1968 /*HasCancel=*/false); 1969 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen, 1970 /*HasCancel=*/false); 1971 } 1972 1973 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective( 1974 const OMPDistributeParallelForSimdDirective &S) { 1975 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1976 CGM.getOpenMPRuntime().emitInlinedDirective( 1977 *this, OMPD_distribute_parallel_for_simd, 1978 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1979 OMPLoopScope PreInitScope(CGF, S); 1980 CGF.EmitStmt( 1981 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1982 }); 1983 } 1984 1985 void CodeGenFunction::EmitOMPDistributeSimdDirective( 1986 const OMPDistributeSimdDirective &S) { 1987 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1988 CGM.getOpenMPRuntime().emitInlinedDirective( 1989 *this, OMPD_distribute_simd, 1990 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1991 OMPLoopScope PreInitScope(CGF, S); 1992 CGF.EmitStmt( 1993 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1994 }); 1995 } 1996 1997 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective( 1998 const OMPTargetParallelForSimdDirective &S) { 1999 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2000 CGM.getOpenMPRuntime().emitInlinedDirective( 2001 *this, OMPD_target_parallel_for_simd, 2002 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2003 OMPLoopScope PreInitScope(CGF, S); 2004 CGF.EmitStmt( 2005 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2006 }); 2007 } 2008 2009 void CodeGenFunction::EmitOMPTargetSimdDirective( 2010 const OMPTargetSimdDirective &S) { 2011 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2012 CGM.getOpenMPRuntime().emitInlinedDirective( 2013 *this, OMPD_target_simd, [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2014 OMPLoopScope PreInitScope(CGF, S); 2015 CGF.EmitStmt( 2016 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2017 }); 2018 } 2019 2020 void CodeGenFunction::EmitOMPTeamsDistributeDirective( 2021 const OMPTeamsDistributeDirective &S) { 2022 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2023 CGM.getOpenMPRuntime().emitInlinedDirective( 2024 *this, OMPD_teams_distribute, 2025 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2026 OMPLoopScope PreInitScope(CGF, S); 2027 CGF.EmitStmt( 2028 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2029 }); 2030 } 2031 2032 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective( 2033 const OMPTeamsDistributeSimdDirective &S) { 2034 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2035 CGM.getOpenMPRuntime().emitInlinedDirective( 2036 *this, OMPD_teams_distribute_simd, 2037 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2038 OMPLoopScope PreInitScope(CGF, S); 2039 CGF.EmitStmt( 2040 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2041 }); 2042 } 2043 2044 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective( 2045 const OMPTeamsDistributeParallelForSimdDirective &S) { 2046 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2047 CGM.getOpenMPRuntime().emitInlinedDirective( 2048 *this, OMPD_teams_distribute_parallel_for_simd, 2049 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2050 OMPLoopScope PreInitScope(CGF, S); 2051 CGF.EmitStmt( 2052 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2053 }); 2054 } 2055 2056 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective( 2057 const OMPTeamsDistributeParallelForDirective &S) { 2058 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2059 CGM.getOpenMPRuntime().emitInlinedDirective( 2060 *this, OMPD_teams_distribute_parallel_for, 2061 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2062 OMPLoopScope PreInitScope(CGF, S); 2063 CGF.EmitStmt( 2064 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2065 }); 2066 } 2067 2068 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective( 2069 const OMPTargetTeamsDistributeDirective &S) { 2070 CGM.getOpenMPRuntime().emitInlinedDirective( 2071 *this, OMPD_target_teams_distribute, 2072 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2073 CGF.EmitStmt( 2074 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2075 }); 2076 } 2077 2078 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective( 2079 const OMPTargetTeamsDistributeParallelForDirective &S) { 2080 CGM.getOpenMPRuntime().emitInlinedDirective( 2081 *this, OMPD_target_teams_distribute_parallel_for, 2082 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2083 CGF.EmitStmt( 2084 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2085 }); 2086 } 2087 2088 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective( 2089 const OMPTargetTeamsDistributeParallelForSimdDirective &S) { 2090 CGM.getOpenMPRuntime().emitInlinedDirective( 2091 *this, OMPD_target_teams_distribute_parallel_for_simd, 2092 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2093 CGF.EmitStmt( 2094 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2095 }); 2096 } 2097 2098 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective( 2099 const OMPTargetTeamsDistributeSimdDirective &S) { 2100 CGM.getOpenMPRuntime().emitInlinedDirective( 2101 *this, OMPD_target_teams_distribute_simd, 2102 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2103 CGF.EmitStmt( 2104 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2105 }); 2106 } 2107 2108 namespace { 2109 struct ScheduleKindModifiersTy { 2110 OpenMPScheduleClauseKind Kind; 2111 OpenMPScheduleClauseModifier M1; 2112 OpenMPScheduleClauseModifier M2; 2113 ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind, 2114 OpenMPScheduleClauseModifier M1, 2115 OpenMPScheduleClauseModifier M2) 2116 : Kind(Kind), M1(M1), M2(M2) {} 2117 }; 2118 } // namespace 2119 2120 bool CodeGenFunction::EmitOMPWorksharingLoop( 2121 const OMPLoopDirective &S, Expr *EUB, 2122 const CodeGenLoopBoundsTy &CodeGenLoopBounds, 2123 const CodeGenDispatchBoundsTy &CGDispatchBounds) { 2124 // Emit the loop iteration variable. 2125 auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 2126 auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); 2127 EmitVarDecl(*IVDecl); 2128 2129 // Emit the iterations count variable. 2130 // If it is not a variable, Sema decided to calculate iterations count on each 2131 // iteration (e.g., it is foldable into a constant). 2132 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 2133 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 2134 // Emit calculation of the iterations count. 2135 EmitIgnoredExpr(S.getCalcLastIteration()); 2136 } 2137 2138 auto &RT = CGM.getOpenMPRuntime(); 2139 2140 bool HasLastprivateClause; 2141 // Check pre-condition. 2142 { 2143 OMPLoopScope PreInitScope(*this, S); 2144 // Skip the entire loop if we don't meet the precondition. 2145 // If the condition constant folds and can be elided, avoid emitting the 2146 // whole loop. 2147 bool CondConstant; 2148 llvm::BasicBlock *ContBlock = nullptr; 2149 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 2150 if (!CondConstant) 2151 return false; 2152 } else { 2153 auto *ThenBlock = createBasicBlock("omp.precond.then"); 2154 ContBlock = createBasicBlock("omp.precond.end"); 2155 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 2156 getProfileCount(&S)); 2157 EmitBlock(ThenBlock); 2158 incrementProfileCounter(&S); 2159 } 2160 2161 bool Ordered = false; 2162 if (auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) { 2163 if (OrderedClause->getNumForLoops()) 2164 RT.emitDoacrossInit(*this, S); 2165 else 2166 Ordered = true; 2167 } 2168 2169 llvm::DenseSet<const Expr *> EmittedFinals; 2170 emitAlignedClause(*this, S); 2171 EmitOMPLinearClauseInit(S); 2172 // Emit helper vars inits. 2173 2174 std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S); 2175 LValue LB = Bounds.first; 2176 LValue UB = Bounds.second; 2177 LValue ST = 2178 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 2179 LValue IL = 2180 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 2181 2182 // Emit 'then' code. 2183 { 2184 OMPPrivateScope LoopScope(*this); 2185 if (EmitOMPFirstprivateClause(S, LoopScope)) { 2186 // Emit implicit barrier to synchronize threads and avoid data races on 2187 // initialization of firstprivate variables and post-update of 2188 // lastprivate variables. 2189 CGM.getOpenMPRuntime().emitBarrierCall( 2190 *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 2191 /*ForceSimpleCall=*/true); 2192 } 2193 EmitOMPPrivateClause(S, LoopScope); 2194 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 2195 EmitOMPReductionClauseInit(S, LoopScope); 2196 EmitOMPPrivateLoopCounters(S, LoopScope); 2197 EmitOMPLinearClause(S, LoopScope); 2198 (void)LoopScope.Privatize(); 2199 2200 // Detect the loop schedule kind and chunk. 2201 llvm::Value *Chunk = nullptr; 2202 OpenMPScheduleTy ScheduleKind; 2203 if (auto *C = S.getSingleClause<OMPScheduleClause>()) { 2204 ScheduleKind.Schedule = C->getScheduleKind(); 2205 ScheduleKind.M1 = C->getFirstScheduleModifier(); 2206 ScheduleKind.M2 = C->getSecondScheduleModifier(); 2207 if (const auto *Ch = C->getChunkSize()) { 2208 Chunk = EmitScalarExpr(Ch); 2209 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 2210 S.getIterationVariable()->getType(), 2211 S.getLocStart()); 2212 } 2213 } 2214 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2215 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2216 // OpenMP 4.5, 2.7.1 Loop Construct, Description. 2217 // If the static schedule kind is specified or if the ordered clause is 2218 // specified, and if no monotonic modifier is specified, the effect will 2219 // be as if the monotonic modifier was specified. 2220 if (RT.isStaticNonchunked(ScheduleKind.Schedule, 2221 /* Chunked */ Chunk != nullptr) && 2222 !Ordered) { 2223 if (isOpenMPSimdDirective(S.getDirectiveKind())) 2224 EmitOMPSimdInit(S, /*IsMonotonic=*/true); 2225 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 2226 // When no chunk_size is specified, the iteration space is divided into 2227 // chunks that are approximately equal in size, and at most one chunk is 2228 // distributed to each thread. Note that the size of the chunks is 2229 // unspecified in this case. 2230 RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, 2231 IVSize, IVSigned, Ordered, 2232 IL.getAddress(), LB.getAddress(), 2233 UB.getAddress(), ST.getAddress()); 2234 auto LoopExit = 2235 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 2236 // UB = min(UB, GlobalUB); 2237 EmitIgnoredExpr(S.getEnsureUpperBound()); 2238 // IV = LB; 2239 EmitIgnoredExpr(S.getInit()); 2240 // while (idx <= UB) { BODY; ++idx; } 2241 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 2242 S.getInc(), 2243 [&S, LoopExit](CodeGenFunction &CGF) { 2244 CGF.EmitOMPLoopBody(S, LoopExit); 2245 CGF.EmitStopPoint(&S); 2246 }, 2247 [](CodeGenFunction &) {}); 2248 EmitBlock(LoopExit.getBlock()); 2249 // Tell the runtime we are done. 2250 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 2251 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd()); 2252 }; 2253 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); 2254 } else { 2255 const bool IsMonotonic = 2256 Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static || 2257 ScheduleKind.Schedule == OMPC_SCHEDULE_unknown || 2258 ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic || 2259 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic; 2260 // Emit the outer loop, which requests its work chunk [LB..UB] from 2261 // runtime and runs the inner loop to process it. 2262 const OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(), 2263 ST.getAddress(), IL.getAddress(), 2264 Chunk, EUB); 2265 EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered, 2266 LoopArguments, CGDispatchBounds); 2267 } 2268 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 2269 EmitOMPSimdFinal(S, 2270 [&](CodeGenFunction &CGF) -> llvm::Value * { 2271 return CGF.Builder.CreateIsNotNull( 2272 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2273 }); 2274 } 2275 EmitOMPReductionClauseFinal( 2276 S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind()) 2277 ? /*Parallel and Simd*/ OMPD_parallel_for_simd 2278 : /*Parallel only*/ OMPD_parallel); 2279 // Emit post-update of the reduction variables if IsLastIter != 0. 2280 emitPostUpdateForReductionClause( 2281 *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * { 2282 return CGF.Builder.CreateIsNotNull( 2283 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2284 }); 2285 // Emit final copy of the lastprivate variables if IsLastIter != 0. 2286 if (HasLastprivateClause) 2287 EmitOMPLastprivateClauseFinal( 2288 S, isOpenMPSimdDirective(S.getDirectiveKind()), 2289 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart()))); 2290 } 2291 EmitOMPLinearClauseFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * { 2292 return CGF.Builder.CreateIsNotNull( 2293 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2294 }); 2295 // We're now done with the loop, so jump to the continuation block. 2296 if (ContBlock) { 2297 EmitBranch(ContBlock); 2298 EmitBlock(ContBlock, true); 2299 } 2300 } 2301 return HasLastprivateClause; 2302 } 2303 2304 /// The following two functions generate expressions for the loop lower 2305 /// and upper bounds in case of static and dynamic (dispatch) schedule 2306 /// of the associated 'for' or 'distribute' loop. 2307 static std::pair<LValue, LValue> 2308 emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 2309 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 2310 LValue LB = 2311 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); 2312 LValue UB = 2313 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); 2314 return {LB, UB}; 2315 } 2316 2317 /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not 2318 /// consider the lower and upper bound expressions generated by the 2319 /// worksharing loop support, but we use 0 and the iteration space size as 2320 /// constants 2321 static std::pair<llvm::Value *, llvm::Value *> 2322 emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S, 2323 Address LB, Address UB) { 2324 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 2325 const Expr *IVExpr = LS.getIterationVariable(); 2326 const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType()); 2327 llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0); 2328 llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration()); 2329 return {LBVal, UBVal}; 2330 } 2331 2332 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { 2333 bool HasLastprivates = false; 2334 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, 2335 PrePostActionTy &) { 2336 OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel()); 2337 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 2338 emitForLoopBounds, 2339 emitDispatchForLoopBounds); 2340 }; 2341 { 2342 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2343 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen, 2344 S.hasCancel()); 2345 } 2346 2347 // Emit an implicit barrier at the end. 2348 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) { 2349 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); 2350 } 2351 } 2352 2353 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { 2354 bool HasLastprivates = false; 2355 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, 2356 PrePostActionTy &) { 2357 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 2358 emitForLoopBounds, 2359 emitDispatchForLoopBounds); 2360 }; 2361 { 2362 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2363 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 2364 } 2365 2366 // Emit an implicit barrier at the end. 2367 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) { 2368 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); 2369 } 2370 } 2371 2372 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, 2373 const Twine &Name, 2374 llvm::Value *Init = nullptr) { 2375 auto LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty); 2376 if (Init) 2377 CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true); 2378 return LVal; 2379 } 2380 2381 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { 2382 auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt(); 2383 auto *CS = dyn_cast<CompoundStmt>(Stmt); 2384 bool HasLastprivates = false; 2385 auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF, 2386 PrePostActionTy &) { 2387 auto &C = CGF.CGM.getContext(); 2388 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2389 // Emit helper vars inits. 2390 LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.", 2391 CGF.Builder.getInt32(0)); 2392 auto *GlobalUBVal = CS != nullptr ? CGF.Builder.getInt32(CS->size() - 1) 2393 : CGF.Builder.getInt32(0); 2394 LValue UB = 2395 createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal); 2396 LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.", 2397 CGF.Builder.getInt32(1)); 2398 LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.", 2399 CGF.Builder.getInt32(0)); 2400 // Loop counter. 2401 LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv."); 2402 OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 2403 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV); 2404 OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 2405 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); 2406 // Generate condition for loop. 2407 BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, 2408 OK_Ordinary, S.getLocStart(), FPOptions()); 2409 // Increment for loop counter. 2410 UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary, 2411 S.getLocStart()); 2412 auto BodyGen = [Stmt, CS, &S, &IV](CodeGenFunction &CGF) { 2413 // Iterate through all sections and emit a switch construct: 2414 // switch (IV) { 2415 // case 0: 2416 // <SectionStmt[0]>; 2417 // break; 2418 // ... 2419 // case <NumSection> - 1: 2420 // <SectionStmt[<NumSection> - 1]>; 2421 // break; 2422 // } 2423 // .omp.sections.exit: 2424 auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit"); 2425 auto *SwitchStmt = CGF.Builder.CreateSwitch( 2426 CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB, 2427 CS == nullptr ? 1 : CS->size()); 2428 if (CS) { 2429 unsigned CaseNumber = 0; 2430 for (auto *SubStmt : CS->children()) { 2431 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 2432 CGF.EmitBlock(CaseBB); 2433 SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB); 2434 CGF.EmitStmt(SubStmt); 2435 CGF.EmitBranch(ExitBB); 2436 ++CaseNumber; 2437 } 2438 } else { 2439 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 2440 CGF.EmitBlock(CaseBB); 2441 SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB); 2442 CGF.EmitStmt(Stmt); 2443 CGF.EmitBranch(ExitBB); 2444 } 2445 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 2446 }; 2447 2448 CodeGenFunction::OMPPrivateScope LoopScope(CGF); 2449 if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) { 2450 // Emit implicit barrier to synchronize threads and avoid data races on 2451 // initialization of firstprivate variables and post-update of lastprivate 2452 // variables. 2453 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 2454 CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 2455 /*ForceSimpleCall=*/true); 2456 } 2457 CGF.EmitOMPPrivateClause(S, LoopScope); 2458 HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 2459 CGF.EmitOMPReductionClauseInit(S, LoopScope); 2460 (void)LoopScope.Privatize(); 2461 2462 // Emit static non-chunked loop. 2463 OpenMPScheduleTy ScheduleKind; 2464 ScheduleKind.Schedule = OMPC_SCHEDULE_static; 2465 CGF.CGM.getOpenMPRuntime().emitForStaticInit( 2466 CGF, S.getLocStart(), ScheduleKind, /*IVSize=*/32, 2467 /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), LB.getAddress(), 2468 UB.getAddress(), ST.getAddress()); 2469 // UB = min(UB, GlobalUB); 2470 auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart()); 2471 auto *MinUBGlobalUB = CGF.Builder.CreateSelect( 2472 CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal); 2473 CGF.EmitStoreOfScalar(MinUBGlobalUB, UB); 2474 // IV = LB; 2475 CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV); 2476 // while (idx <= UB) { BODY; ++idx; } 2477 CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen, 2478 [](CodeGenFunction &) {}); 2479 // Tell the runtime we are done. 2480 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 2481 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd()); 2482 }; 2483 CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen); 2484 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 2485 // Emit post-update of the reduction variables if IsLastIter != 0. 2486 emitPostUpdateForReductionClause( 2487 CGF, S, [&](CodeGenFunction &CGF) -> llvm::Value * { 2488 return CGF.Builder.CreateIsNotNull( 2489 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2490 }); 2491 2492 // Emit final copy of the lastprivate variables if IsLastIter != 0. 2493 if (HasLastprivates) 2494 CGF.EmitOMPLastprivateClauseFinal( 2495 S, /*NoFinals=*/false, 2496 CGF.Builder.CreateIsNotNull( 2497 CGF.EmitLoadOfScalar(IL, S.getLocStart()))); 2498 }; 2499 2500 bool HasCancel = false; 2501 if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S)) 2502 HasCancel = OSD->hasCancel(); 2503 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S)) 2504 HasCancel = OPSD->hasCancel(); 2505 OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel); 2506 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen, 2507 HasCancel); 2508 // Emit barrier for lastprivates only if 'sections' directive has 'nowait' 2509 // clause. Otherwise the barrier will be generated by the codegen for the 2510 // directive. 2511 if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) { 2512 // Emit implicit barrier to synchronize threads and avoid data races on 2513 // initialization of firstprivate variables. 2514 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), 2515 OMPD_unknown); 2516 } 2517 } 2518 2519 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { 2520 { 2521 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2522 EmitSections(S); 2523 } 2524 // Emit an implicit barrier at the end. 2525 if (!S.getSingleClause<OMPNowaitClause>()) { 2526 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), 2527 OMPD_sections); 2528 } 2529 } 2530 2531 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { 2532 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2533 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2534 }; 2535 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2536 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen, 2537 S.hasCancel()); 2538 } 2539 2540 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { 2541 llvm::SmallVector<const Expr *, 8> CopyprivateVars; 2542 llvm::SmallVector<const Expr *, 8> DestExprs; 2543 llvm::SmallVector<const Expr *, 8> SrcExprs; 2544 llvm::SmallVector<const Expr *, 8> AssignmentOps; 2545 // Check if there are any 'copyprivate' clauses associated with this 2546 // 'single' construct. 2547 // Build a list of copyprivate variables along with helper expressions 2548 // (<source>, <destination>, <destination>=<source> expressions) 2549 for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) { 2550 CopyprivateVars.append(C->varlists().begin(), C->varlists().end()); 2551 DestExprs.append(C->destination_exprs().begin(), 2552 C->destination_exprs().end()); 2553 SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end()); 2554 AssignmentOps.append(C->assignment_ops().begin(), 2555 C->assignment_ops().end()); 2556 } 2557 // Emit code for 'single' region along with 'copyprivate' clauses 2558 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2559 Action.Enter(CGF); 2560 OMPPrivateScope SingleScope(CGF); 2561 (void)CGF.EmitOMPFirstprivateClause(S, SingleScope); 2562 CGF.EmitOMPPrivateClause(S, SingleScope); 2563 (void)SingleScope.Privatize(); 2564 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2565 }; 2566 { 2567 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2568 CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(), 2569 CopyprivateVars, DestExprs, 2570 SrcExprs, AssignmentOps); 2571 } 2572 // Emit an implicit barrier at the end (to avoid data race on firstprivate 2573 // init or if no 'nowait' clause was specified and no 'copyprivate' clause). 2574 if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) { 2575 CGM.getOpenMPRuntime().emitBarrierCall( 2576 *this, S.getLocStart(), 2577 S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single); 2578 } 2579 } 2580 2581 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { 2582 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2583 Action.Enter(CGF); 2584 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2585 }; 2586 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2587 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart()); 2588 } 2589 2590 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { 2591 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2592 Action.Enter(CGF); 2593 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2594 }; 2595 Expr *Hint = nullptr; 2596 if (auto *HintClause = S.getSingleClause<OMPHintClause>()) 2597 Hint = HintClause->getHint(); 2598 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2599 CGM.getOpenMPRuntime().emitCriticalRegion(*this, 2600 S.getDirectiveName().getAsString(), 2601 CodeGen, S.getLocStart(), Hint); 2602 } 2603 2604 void CodeGenFunction::EmitOMPParallelForDirective( 2605 const OMPParallelForDirective &S) { 2606 // Emit directive as a combined directive that consists of two implicit 2607 // directives: 'parallel' with 'for' directive. 2608 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2609 OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel()); 2610 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 2611 emitDispatchForLoopBounds); 2612 }; 2613 emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen, 2614 emitEmptyBoundParameters); 2615 } 2616 2617 void CodeGenFunction::EmitOMPParallelForSimdDirective( 2618 const OMPParallelForSimdDirective &S) { 2619 // Emit directive as a combined directive that consists of two implicit 2620 // directives: 'parallel' with 'for' directive. 2621 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2622 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 2623 emitDispatchForLoopBounds); 2624 }; 2625 emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen, 2626 emitEmptyBoundParameters); 2627 } 2628 2629 void CodeGenFunction::EmitOMPParallelSectionsDirective( 2630 const OMPParallelSectionsDirective &S) { 2631 // Emit directive as a combined directive that consists of two implicit 2632 // directives: 'parallel' with 'sections' directive. 2633 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2634 CGF.EmitSections(S); 2635 }; 2636 emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen, 2637 emitEmptyBoundParameters); 2638 } 2639 2640 void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, 2641 const RegionCodeGenTy &BodyGen, 2642 const TaskGenTy &TaskGen, 2643 OMPTaskDataTy &Data) { 2644 // Emit outlined function for task construct. 2645 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2646 auto *I = CS->getCapturedDecl()->param_begin(); 2647 auto *PartId = std::next(I); 2648 auto *TaskT = std::next(I, 4); 2649 // Check if the task is final 2650 if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) { 2651 // If the condition constant folds and can be elided, try to avoid emitting 2652 // the condition and the dead arm of the if/else. 2653 auto *Cond = Clause->getCondition(); 2654 bool CondConstant; 2655 if (ConstantFoldsToSimpleInteger(Cond, CondConstant)) 2656 Data.Final.setInt(CondConstant); 2657 else 2658 Data.Final.setPointer(EvaluateExprAsBool(Cond)); 2659 } else { 2660 // By default the task is not final. 2661 Data.Final.setInt(/*IntVal=*/false); 2662 } 2663 // Check if the task has 'priority' clause. 2664 if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) { 2665 auto *Prio = Clause->getPriority(); 2666 Data.Priority.setInt(/*IntVal=*/true); 2667 Data.Priority.setPointer(EmitScalarConversion( 2668 EmitScalarExpr(Prio), Prio->getType(), 2669 getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1), 2670 Prio->getExprLoc())); 2671 } 2672 // The first function argument for tasks is a thread id, the second one is a 2673 // part id (0 for tied tasks, >=0 for untied task). 2674 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 2675 // Get list of private variables. 2676 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 2677 auto IRef = C->varlist_begin(); 2678 for (auto *IInit : C->private_copies()) { 2679 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2680 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2681 Data.PrivateVars.push_back(*IRef); 2682 Data.PrivateCopies.push_back(IInit); 2683 } 2684 ++IRef; 2685 } 2686 } 2687 EmittedAsPrivate.clear(); 2688 // Get list of firstprivate variables. 2689 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 2690 auto IRef = C->varlist_begin(); 2691 auto IElemInitRef = C->inits().begin(); 2692 for (auto *IInit : C->private_copies()) { 2693 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2694 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2695 Data.FirstprivateVars.push_back(*IRef); 2696 Data.FirstprivateCopies.push_back(IInit); 2697 Data.FirstprivateInits.push_back(*IElemInitRef); 2698 } 2699 ++IRef; 2700 ++IElemInitRef; 2701 } 2702 } 2703 // Get list of lastprivate variables (for taskloops). 2704 llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs; 2705 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 2706 auto IRef = C->varlist_begin(); 2707 auto ID = C->destination_exprs().begin(); 2708 for (auto *IInit : C->private_copies()) { 2709 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2710 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2711 Data.LastprivateVars.push_back(*IRef); 2712 Data.LastprivateCopies.push_back(IInit); 2713 } 2714 LastprivateDstsOrigs.insert( 2715 {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()), 2716 cast<DeclRefExpr>(*IRef)}); 2717 ++IRef; 2718 ++ID; 2719 } 2720 } 2721 SmallVector<const Expr *, 4> LHSs; 2722 SmallVector<const Expr *, 4> RHSs; 2723 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 2724 auto IPriv = C->privates().begin(); 2725 auto IRed = C->reduction_ops().begin(); 2726 auto ILHS = C->lhs_exprs().begin(); 2727 auto IRHS = C->rhs_exprs().begin(); 2728 for (const auto *Ref : C->varlists()) { 2729 Data.ReductionVars.emplace_back(Ref); 2730 Data.ReductionCopies.emplace_back(*IPriv); 2731 Data.ReductionOps.emplace_back(*IRed); 2732 LHSs.emplace_back(*ILHS); 2733 RHSs.emplace_back(*IRHS); 2734 std::advance(IPriv, 1); 2735 std::advance(IRed, 1); 2736 std::advance(ILHS, 1); 2737 std::advance(IRHS, 1); 2738 } 2739 } 2740 Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit( 2741 *this, S.getLocStart(), LHSs, RHSs, Data); 2742 // Build list of dependences. 2743 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) 2744 for (auto *IRef : C->varlists()) 2745 Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef)); 2746 auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs]( 2747 CodeGenFunction &CGF, PrePostActionTy &Action) { 2748 // Set proper addresses for generated private copies. 2749 OMPPrivateScope Scope(CGF); 2750 if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() || 2751 !Data.LastprivateVars.empty()) { 2752 auto *CopyFn = CGF.Builder.CreateLoad( 2753 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3))); 2754 auto *PrivatesPtr = CGF.Builder.CreateLoad( 2755 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2))); 2756 // Map privates. 2757 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; 2758 llvm::SmallVector<llvm::Value *, 16> CallArgs; 2759 CallArgs.push_back(PrivatesPtr); 2760 for (auto *E : Data.PrivateVars) { 2761 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2762 Address PrivatePtr = CGF.CreateMemTemp( 2763 CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr"); 2764 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2765 CallArgs.push_back(PrivatePtr.getPointer()); 2766 } 2767 for (auto *E : Data.FirstprivateVars) { 2768 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2769 Address PrivatePtr = 2770 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 2771 ".firstpriv.ptr.addr"); 2772 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2773 CallArgs.push_back(PrivatePtr.getPointer()); 2774 } 2775 for (auto *E : Data.LastprivateVars) { 2776 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2777 Address PrivatePtr = 2778 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 2779 ".lastpriv.ptr.addr"); 2780 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2781 CallArgs.push_back(PrivatePtr.getPointer()); 2782 } 2783 CGF.EmitRuntimeCall(CopyFn, CallArgs); 2784 for (auto &&Pair : LastprivateDstsOrigs) { 2785 auto *OrigVD = cast<VarDecl>(Pair.second->getDecl()); 2786 DeclRefExpr DRE( 2787 const_cast<VarDecl *>(OrigVD), 2788 /*RefersToEnclosingVariableOrCapture=*/CGF.CapturedStmtInfo->lookup( 2789 OrigVD) != nullptr, 2790 Pair.second->getType(), VK_LValue, Pair.second->getExprLoc()); 2791 Scope.addPrivate(Pair.first, [&CGF, &DRE]() { 2792 return CGF.EmitLValue(&DRE).getAddress(); 2793 }); 2794 } 2795 for (auto &&Pair : PrivatePtrs) { 2796 Address Replacement(CGF.Builder.CreateLoad(Pair.second), 2797 CGF.getContext().getDeclAlign(Pair.first)); 2798 Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); 2799 } 2800 } 2801 if (Data.Reductions) { 2802 OMPLexicalScope LexScope(CGF, S, /*AsInlined=*/true); 2803 ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionCopies, 2804 Data.ReductionOps); 2805 llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad( 2806 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9))); 2807 for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) { 2808 RedCG.emitSharedLValue(CGF, Cnt); 2809 RedCG.emitAggregateType(CGF, Cnt); 2810 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( 2811 CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); 2812 Replacement = 2813 Address(CGF.EmitScalarConversion( 2814 Replacement.getPointer(), CGF.getContext().VoidPtrTy, 2815 CGF.getContext().getPointerType( 2816 Data.ReductionCopies[Cnt]->getType()), 2817 SourceLocation()), 2818 Replacement.getAlignment()); 2819 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); 2820 Scope.addPrivate(RedCG.getBaseDecl(Cnt), 2821 [Replacement]() { return Replacement; }); 2822 // FIXME: This must removed once the runtime library is fixed. 2823 // Emit required threadprivate variables for 2824 // initilizer/combiner/finalizer. 2825 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(), 2826 RedCG, Cnt); 2827 } 2828 } 2829 // Privatize all private variables except for in_reduction items. 2830 (void)Scope.Privatize(); 2831 SmallVector<const Expr *, 4> InRedVars; 2832 SmallVector<const Expr *, 4> InRedPrivs; 2833 SmallVector<const Expr *, 4> InRedOps; 2834 SmallVector<const Expr *, 4> TaskgroupDescriptors; 2835 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) { 2836 auto IPriv = C->privates().begin(); 2837 auto IRed = C->reduction_ops().begin(); 2838 auto ITD = C->taskgroup_descriptors().begin(); 2839 for (const auto *Ref : C->varlists()) { 2840 InRedVars.emplace_back(Ref); 2841 InRedPrivs.emplace_back(*IPriv); 2842 InRedOps.emplace_back(*IRed); 2843 TaskgroupDescriptors.emplace_back(*ITD); 2844 std::advance(IPriv, 1); 2845 std::advance(IRed, 1); 2846 std::advance(ITD, 1); 2847 } 2848 } 2849 // Privatize in_reduction items here, because taskgroup descriptors must be 2850 // privatized earlier. 2851 OMPPrivateScope InRedScope(CGF); 2852 if (!InRedVars.empty()) { 2853 ReductionCodeGen RedCG(InRedVars, InRedPrivs, InRedOps); 2854 for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) { 2855 RedCG.emitSharedLValue(CGF, Cnt); 2856 RedCG.emitAggregateType(CGF, Cnt); 2857 // The taskgroup descriptor variable is always implicit firstprivate and 2858 // privatized already during procoessing of the firstprivates. 2859 llvm::Value *ReductionsPtr = CGF.EmitLoadOfScalar( 2860 CGF.EmitLValue(TaskgroupDescriptors[Cnt]), SourceLocation()); 2861 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( 2862 CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); 2863 Replacement = Address( 2864 CGF.EmitScalarConversion( 2865 Replacement.getPointer(), CGF.getContext().VoidPtrTy, 2866 CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()), 2867 SourceLocation()), 2868 Replacement.getAlignment()); 2869 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); 2870 InRedScope.addPrivate(RedCG.getBaseDecl(Cnt), 2871 [Replacement]() { return Replacement; }); 2872 // FIXME: This must removed once the runtime library is fixed. 2873 // Emit required threadprivate variables for 2874 // initilizer/combiner/finalizer. 2875 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(), 2876 RedCG, Cnt); 2877 } 2878 } 2879 (void)InRedScope.Privatize(); 2880 2881 Action.Enter(CGF); 2882 BodyGen(CGF); 2883 }; 2884 auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( 2885 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied, 2886 Data.NumberOfParts); 2887 OMPLexicalScope Scope(*this, S); 2888 TaskGen(*this, OutlinedFn, Data); 2889 } 2890 2891 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { 2892 // Emit outlined function for task construct. 2893 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2894 auto CapturedStruct = GenerateCapturedStmtArgument(*CS); 2895 auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 2896 const Expr *IfCond = nullptr; 2897 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 2898 if (C->getNameModifier() == OMPD_unknown || 2899 C->getNameModifier() == OMPD_task) { 2900 IfCond = C->getCondition(); 2901 break; 2902 } 2903 } 2904 2905 OMPTaskDataTy Data; 2906 // Check if we should emit tied or untied task. 2907 Data.Tied = !S.getSingleClause<OMPUntiedClause>(); 2908 auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { 2909 CGF.EmitStmt(CS->getCapturedStmt()); 2910 }; 2911 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 2912 IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn, 2913 const OMPTaskDataTy &Data) { 2914 CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getLocStart(), S, OutlinedFn, 2915 SharedsTy, CapturedStruct, IfCond, 2916 Data); 2917 }; 2918 EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data); 2919 } 2920 2921 void CodeGenFunction::EmitOMPTaskyieldDirective( 2922 const OMPTaskyieldDirective &S) { 2923 CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart()); 2924 } 2925 2926 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { 2927 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier); 2928 } 2929 2930 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) { 2931 CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart()); 2932 } 2933 2934 void CodeGenFunction::EmitOMPTaskgroupDirective( 2935 const OMPTaskgroupDirective &S) { 2936 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2937 Action.Enter(CGF); 2938 if (const Expr *E = S.getReductionRef()) { 2939 SmallVector<const Expr *, 4> LHSs; 2940 SmallVector<const Expr *, 4> RHSs; 2941 OMPTaskDataTy Data; 2942 for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) { 2943 auto IPriv = C->privates().begin(); 2944 auto IRed = C->reduction_ops().begin(); 2945 auto ILHS = C->lhs_exprs().begin(); 2946 auto IRHS = C->rhs_exprs().begin(); 2947 for (const auto *Ref : C->varlists()) { 2948 Data.ReductionVars.emplace_back(Ref); 2949 Data.ReductionCopies.emplace_back(*IPriv); 2950 Data.ReductionOps.emplace_back(*IRed); 2951 LHSs.emplace_back(*ILHS); 2952 RHSs.emplace_back(*IRHS); 2953 std::advance(IPriv, 1); 2954 std::advance(IRed, 1); 2955 std::advance(ILHS, 1); 2956 std::advance(IRHS, 1); 2957 } 2958 } 2959 llvm::Value *ReductionDesc = 2960 CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getLocStart(), 2961 LHSs, RHSs, Data); 2962 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2963 CGF.EmitVarDecl(*VD); 2964 CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD), 2965 /*Volatile=*/false, E->getType()); 2966 } 2967 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2968 }; 2969 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2970 CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart()); 2971 } 2972 2973 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { 2974 CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> { 2975 if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) { 2976 return llvm::makeArrayRef(FlushClause->varlist_begin(), 2977 FlushClause->varlist_end()); 2978 } 2979 return llvm::None; 2980 }(), S.getLocStart()); 2981 } 2982 2983 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, 2984 const CodeGenLoopTy &CodeGenLoop, 2985 Expr *IncExpr) { 2986 // Emit the loop iteration variable. 2987 auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 2988 auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); 2989 EmitVarDecl(*IVDecl); 2990 2991 // Emit the iterations count variable. 2992 // If it is not a variable, Sema decided to calculate iterations count on each 2993 // iteration (e.g., it is foldable into a constant). 2994 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 2995 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 2996 // Emit calculation of the iterations count. 2997 EmitIgnoredExpr(S.getCalcLastIteration()); 2998 } 2999 3000 auto &RT = CGM.getOpenMPRuntime(); 3001 3002 bool HasLastprivateClause = false; 3003 // Check pre-condition. 3004 { 3005 OMPLoopScope PreInitScope(*this, S); 3006 // Skip the entire loop if we don't meet the precondition. 3007 // If the condition constant folds and can be elided, avoid emitting the 3008 // whole loop. 3009 bool CondConstant; 3010 llvm::BasicBlock *ContBlock = nullptr; 3011 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 3012 if (!CondConstant) 3013 return; 3014 } else { 3015 auto *ThenBlock = createBasicBlock("omp.precond.then"); 3016 ContBlock = createBasicBlock("omp.precond.end"); 3017 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 3018 getProfileCount(&S)); 3019 EmitBlock(ThenBlock); 3020 incrementProfileCounter(&S); 3021 } 3022 3023 // Emit 'then' code. 3024 { 3025 // Emit helper vars inits. 3026 3027 LValue LB = EmitOMPHelperVar( 3028 *this, cast<DeclRefExpr>( 3029 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3030 ? S.getCombinedLowerBoundVariable() 3031 : S.getLowerBoundVariable()))); 3032 LValue UB = EmitOMPHelperVar( 3033 *this, cast<DeclRefExpr>( 3034 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3035 ? S.getCombinedUpperBoundVariable() 3036 : S.getUpperBoundVariable()))); 3037 LValue ST = 3038 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 3039 LValue IL = 3040 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 3041 3042 OMPPrivateScope LoopScope(*this); 3043 if (EmitOMPFirstprivateClause(S, LoopScope)) { 3044 // Emit implicit barrier to synchronize threads and avoid data races on 3045 // initialization of firstprivate variables and post-update of 3046 // lastprivate variables. 3047 CGM.getOpenMPRuntime().emitBarrierCall( 3048 *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 3049 /*ForceSimpleCall=*/true); 3050 } 3051 EmitOMPPrivateClause(S, LoopScope); 3052 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 3053 EmitOMPPrivateLoopCounters(S, LoopScope); 3054 (void)LoopScope.Privatize(); 3055 3056 // Detect the distribute schedule kind and chunk. 3057 llvm::Value *Chunk = nullptr; 3058 OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown; 3059 if (auto *C = S.getSingleClause<OMPDistScheduleClause>()) { 3060 ScheduleKind = C->getDistScheduleKind(); 3061 if (const auto *Ch = C->getChunkSize()) { 3062 Chunk = EmitScalarExpr(Ch); 3063 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 3064 S.getIterationVariable()->getType(), 3065 S.getLocStart()); 3066 } 3067 } 3068 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 3069 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 3070 3071 // OpenMP [2.10.8, distribute Construct, Description] 3072 // If dist_schedule is specified, kind must be static. If specified, 3073 // iterations are divided into chunks of size chunk_size, chunks are 3074 // assigned to the teams of the league in a round-robin fashion in the 3075 // order of the team number. When no chunk_size is specified, the 3076 // iteration space is divided into chunks that are approximately equal 3077 // in size, and at most one chunk is distributed to each team of the 3078 // league. The size of the chunks is unspecified in this case. 3079 if (RT.isStaticNonchunked(ScheduleKind, 3080 /* Chunked */ Chunk != nullptr)) { 3081 RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, 3082 IVSize, IVSigned, /* Ordered = */ false, 3083 IL.getAddress(), LB.getAddress(), 3084 UB.getAddress(), ST.getAddress()); 3085 auto LoopExit = 3086 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 3087 // UB = min(UB, GlobalUB); 3088 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3089 ? S.getCombinedEnsureUpperBound() 3090 : S.getEnsureUpperBound()); 3091 // IV = LB; 3092 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3093 ? S.getCombinedInit() 3094 : S.getInit()); 3095 3096 Expr *Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3097 ? S.getCombinedCond() 3098 : S.getCond(); 3099 3100 // for distribute alone, codegen 3101 // while (idx <= UB) { BODY; ++idx; } 3102 // when combined with 'for' (e.g. as in 'distribute parallel for') 3103 // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; } 3104 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), Cond, IncExpr, 3105 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { 3106 CodeGenLoop(CGF, S, LoopExit); 3107 }, 3108 [](CodeGenFunction &) {}); 3109 EmitBlock(LoopExit.getBlock()); 3110 // Tell the runtime we are done. 3111 RT.emitForStaticFinish(*this, S.getLocStart()); 3112 } else { 3113 // Emit the outer loop, which requests its work chunk [LB..UB] from 3114 // runtime and runs the inner loop to process it. 3115 const OMPLoopArguments LoopArguments = { 3116 LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(), 3117 Chunk}; 3118 EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments, 3119 CodeGenLoop); 3120 } 3121 3122 // Emit final copy of the lastprivate variables if IsLastIter != 0. 3123 if (HasLastprivateClause) 3124 EmitOMPLastprivateClauseFinal( 3125 S, /*NoFinals=*/false, 3126 Builder.CreateIsNotNull( 3127 EmitLoadOfScalar(IL, S.getLocStart()))); 3128 } 3129 3130 // We're now done with the loop, so jump to the continuation block. 3131 if (ContBlock) { 3132 EmitBranch(ContBlock); 3133 EmitBlock(ContBlock, true); 3134 } 3135 } 3136 } 3137 3138 void CodeGenFunction::EmitOMPDistributeDirective( 3139 const OMPDistributeDirective &S) { 3140 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3141 3142 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 3143 }; 3144 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 3145 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen, 3146 false); 3147 } 3148 3149 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, 3150 const CapturedStmt *S) { 3151 CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); 3152 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; 3153 CGF.CapturedStmtInfo = &CapStmtInfo; 3154 auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S); 3155 Fn->addFnAttr(llvm::Attribute::NoInline); 3156 return Fn; 3157 } 3158 3159 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { 3160 if (!S.getAssociatedStmt()) { 3161 for (const auto *DC : S.getClausesOfKind<OMPDependClause>()) 3162 CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC); 3163 return; 3164 } 3165 auto *C = S.getSingleClause<OMPSIMDClause>(); 3166 auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF, 3167 PrePostActionTy &Action) { 3168 if (C) { 3169 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 3170 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 3171 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 3172 auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS); 3173 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, OutlinedFn, 3174 CapturedVars); 3175 } else { 3176 Action.Enter(CGF); 3177 CGF.EmitStmt( 3178 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3179 } 3180 }; 3181 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 3182 CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C); 3183 } 3184 3185 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, 3186 QualType SrcType, QualType DestType, 3187 SourceLocation Loc) { 3188 assert(CGF.hasScalarEvaluationKind(DestType) && 3189 "DestType must have scalar evaluation kind."); 3190 assert(!Val.isAggregate() && "Must be a scalar or complex."); 3191 return Val.isScalar() 3192 ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType, 3193 Loc) 3194 : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType, 3195 DestType, Loc); 3196 } 3197 3198 static CodeGenFunction::ComplexPairTy 3199 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, 3200 QualType DestType, SourceLocation Loc) { 3201 assert(CGF.getEvaluationKind(DestType) == TEK_Complex && 3202 "DestType must have complex evaluation kind."); 3203 CodeGenFunction::ComplexPairTy ComplexVal; 3204 if (Val.isScalar()) { 3205 // Convert the input element to the element type of the complex. 3206 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 3207 auto ScalarVal = CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, 3208 DestElementType, Loc); 3209 ComplexVal = CodeGenFunction::ComplexPairTy( 3210 ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType())); 3211 } else { 3212 assert(Val.isComplex() && "Must be a scalar or complex."); 3213 auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType(); 3214 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 3215 ComplexVal.first = CGF.EmitScalarConversion( 3216 Val.getComplexVal().first, SrcElementType, DestElementType, Loc); 3217 ComplexVal.second = CGF.EmitScalarConversion( 3218 Val.getComplexVal().second, SrcElementType, DestElementType, Loc); 3219 } 3220 return ComplexVal; 3221 } 3222 3223 static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst, 3224 LValue LVal, RValue RVal) { 3225 if (LVal.isGlobalReg()) { 3226 CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal); 3227 } else { 3228 CGF.EmitAtomicStore(RVal, LVal, 3229 IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3230 : llvm::AtomicOrdering::Monotonic, 3231 LVal.isVolatile(), /*IsInit=*/false); 3232 } 3233 } 3234 3235 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal, 3236 QualType RValTy, SourceLocation Loc) { 3237 switch (getEvaluationKind(LVal.getType())) { 3238 case TEK_Scalar: 3239 EmitStoreThroughLValue(RValue::get(convertToScalarValue( 3240 *this, RVal, RValTy, LVal.getType(), Loc)), 3241 LVal); 3242 break; 3243 case TEK_Complex: 3244 EmitStoreOfComplex( 3245 convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal, 3246 /*isInit=*/false); 3247 break; 3248 case TEK_Aggregate: 3249 llvm_unreachable("Must be a scalar or complex."); 3250 } 3251 } 3252 3253 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst, 3254 const Expr *X, const Expr *V, 3255 SourceLocation Loc) { 3256 // v = x; 3257 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue"); 3258 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue"); 3259 LValue XLValue = CGF.EmitLValue(X); 3260 LValue VLValue = CGF.EmitLValue(V); 3261 RValue Res = XLValue.isGlobalReg() 3262 ? CGF.EmitLoadOfLValue(XLValue, Loc) 3263 : CGF.EmitAtomicLoad( 3264 XLValue, Loc, 3265 IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3266 : llvm::AtomicOrdering::Monotonic, 3267 XLValue.isVolatile()); 3268 // OpenMP, 2.12.6, atomic Construct 3269 // Any atomic construct with a seq_cst clause forces the atomically 3270 // performed operation to include an implicit flush operation without a 3271 // list. 3272 if (IsSeqCst) 3273 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3274 CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc); 3275 } 3276 3277 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst, 3278 const Expr *X, const Expr *E, 3279 SourceLocation Loc) { 3280 // x = expr; 3281 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue"); 3282 emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E)); 3283 // OpenMP, 2.12.6, atomic Construct 3284 // Any atomic construct with a seq_cst clause forces the atomically 3285 // performed operation to include an implicit flush operation without a 3286 // list. 3287 if (IsSeqCst) 3288 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3289 } 3290 3291 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, 3292 RValue Update, 3293 BinaryOperatorKind BO, 3294 llvm::AtomicOrdering AO, 3295 bool IsXLHSInRHSPart) { 3296 auto &Context = CGF.CGM.getContext(); 3297 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x' 3298 // expression is simple and atomic is allowed for the given type for the 3299 // target platform. 3300 if (BO == BO_Comma || !Update.isScalar() || 3301 !Update.getScalarVal()->getType()->isIntegerTy() || 3302 !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) && 3303 (Update.getScalarVal()->getType() != 3304 X.getAddress().getElementType())) || 3305 !X.getAddress().getElementType()->isIntegerTy() || 3306 !Context.getTargetInfo().hasBuiltinAtomic( 3307 Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment()))) 3308 return std::make_pair(false, RValue::get(nullptr)); 3309 3310 llvm::AtomicRMWInst::BinOp RMWOp; 3311 switch (BO) { 3312 case BO_Add: 3313 RMWOp = llvm::AtomicRMWInst::Add; 3314 break; 3315 case BO_Sub: 3316 if (!IsXLHSInRHSPart) 3317 return std::make_pair(false, RValue::get(nullptr)); 3318 RMWOp = llvm::AtomicRMWInst::Sub; 3319 break; 3320 case BO_And: 3321 RMWOp = llvm::AtomicRMWInst::And; 3322 break; 3323 case BO_Or: 3324 RMWOp = llvm::AtomicRMWInst::Or; 3325 break; 3326 case BO_Xor: 3327 RMWOp = llvm::AtomicRMWInst::Xor; 3328 break; 3329 case BO_LT: 3330 RMWOp = X.getType()->hasSignedIntegerRepresentation() 3331 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min 3332 : llvm::AtomicRMWInst::Max) 3333 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin 3334 : llvm::AtomicRMWInst::UMax); 3335 break; 3336 case BO_GT: 3337 RMWOp = X.getType()->hasSignedIntegerRepresentation() 3338 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max 3339 : llvm::AtomicRMWInst::Min) 3340 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax 3341 : llvm::AtomicRMWInst::UMin); 3342 break; 3343 case BO_Assign: 3344 RMWOp = llvm::AtomicRMWInst::Xchg; 3345 break; 3346 case BO_Mul: 3347 case BO_Div: 3348 case BO_Rem: 3349 case BO_Shl: 3350 case BO_Shr: 3351 case BO_LAnd: 3352 case BO_LOr: 3353 return std::make_pair(false, RValue::get(nullptr)); 3354 case BO_PtrMemD: 3355 case BO_PtrMemI: 3356 case BO_LE: 3357 case BO_GE: 3358 case BO_EQ: 3359 case BO_NE: 3360 case BO_AddAssign: 3361 case BO_SubAssign: 3362 case BO_AndAssign: 3363 case BO_OrAssign: 3364 case BO_XorAssign: 3365 case BO_MulAssign: 3366 case BO_DivAssign: 3367 case BO_RemAssign: 3368 case BO_ShlAssign: 3369 case BO_ShrAssign: 3370 case BO_Comma: 3371 llvm_unreachable("Unsupported atomic update operation"); 3372 } 3373 auto *UpdateVal = Update.getScalarVal(); 3374 if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) { 3375 UpdateVal = CGF.Builder.CreateIntCast( 3376 IC, X.getAddress().getElementType(), 3377 X.getType()->hasSignedIntegerRepresentation()); 3378 } 3379 auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO); 3380 return std::make_pair(true, RValue::get(Res)); 3381 } 3382 3383 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr( 3384 LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart, 3385 llvm::AtomicOrdering AO, SourceLocation Loc, 3386 const llvm::function_ref<RValue(RValue)> &CommonGen) { 3387 // Update expressions are allowed to have the following forms: 3388 // x binop= expr; -> xrval + expr; 3389 // x++, ++x -> xrval + 1; 3390 // x--, --x -> xrval - 1; 3391 // x = x binop expr; -> xrval binop expr 3392 // x = expr Op x; - > expr binop xrval; 3393 auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart); 3394 if (!Res.first) { 3395 if (X.isGlobalReg()) { 3396 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop 3397 // 'xrval'. 3398 EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X); 3399 } else { 3400 // Perform compare-and-swap procedure. 3401 EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified()); 3402 } 3403 } 3404 return Res; 3405 } 3406 3407 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst, 3408 const Expr *X, const Expr *E, 3409 const Expr *UE, bool IsXLHSInRHSPart, 3410 SourceLocation Loc) { 3411 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 3412 "Update expr in 'atomic update' must be a binary operator."); 3413 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 3414 // Update expressions are allowed to have the following forms: 3415 // x binop= expr; -> xrval + expr; 3416 // x++, ++x -> xrval + 1; 3417 // x--, --x -> xrval - 1; 3418 // x = x binop expr; -> xrval binop expr 3419 // x = expr Op x; - > expr binop xrval; 3420 assert(X->isLValue() && "X of 'omp atomic update' is not lvalue"); 3421 LValue XLValue = CGF.EmitLValue(X); 3422 RValue ExprRValue = CGF.EmitAnyExpr(E); 3423 auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3424 : llvm::AtomicOrdering::Monotonic; 3425 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 3426 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 3427 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 3428 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 3429 auto Gen = 3430 [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue { 3431 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3432 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 3433 return CGF.EmitAnyExpr(UE); 3434 }; 3435 (void)CGF.EmitOMPAtomicSimpleUpdateExpr( 3436 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 3437 // OpenMP, 2.12.6, atomic Construct 3438 // Any atomic construct with a seq_cst clause forces the atomically 3439 // performed operation to include an implicit flush operation without a 3440 // list. 3441 if (IsSeqCst) 3442 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3443 } 3444 3445 static RValue convertToType(CodeGenFunction &CGF, RValue Value, 3446 QualType SourceType, QualType ResType, 3447 SourceLocation Loc) { 3448 switch (CGF.getEvaluationKind(ResType)) { 3449 case TEK_Scalar: 3450 return RValue::get( 3451 convertToScalarValue(CGF, Value, SourceType, ResType, Loc)); 3452 case TEK_Complex: { 3453 auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc); 3454 return RValue::getComplex(Res.first, Res.second); 3455 } 3456 case TEK_Aggregate: 3457 break; 3458 } 3459 llvm_unreachable("Must be a scalar or complex."); 3460 } 3461 3462 static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst, 3463 bool IsPostfixUpdate, const Expr *V, 3464 const Expr *X, const Expr *E, 3465 const Expr *UE, bool IsXLHSInRHSPart, 3466 SourceLocation Loc) { 3467 assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue"); 3468 assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue"); 3469 RValue NewVVal; 3470 LValue VLValue = CGF.EmitLValue(V); 3471 LValue XLValue = CGF.EmitLValue(X); 3472 RValue ExprRValue = CGF.EmitAnyExpr(E); 3473 auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3474 : llvm::AtomicOrdering::Monotonic; 3475 QualType NewVValType; 3476 if (UE) { 3477 // 'x' is updated with some additional value. 3478 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 3479 "Update expr in 'atomic capture' must be a binary operator."); 3480 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 3481 // Update expressions are allowed to have the following forms: 3482 // x binop= expr; -> xrval + expr; 3483 // x++, ++x -> xrval + 1; 3484 // x--, --x -> xrval - 1; 3485 // x = x binop expr; -> xrval binop expr 3486 // x = expr Op x; - > expr binop xrval; 3487 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 3488 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 3489 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 3490 NewVValType = XRValExpr->getType(); 3491 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 3492 auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr, 3493 IsPostfixUpdate](RValue XRValue) -> RValue { 3494 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3495 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 3496 RValue Res = CGF.EmitAnyExpr(UE); 3497 NewVVal = IsPostfixUpdate ? XRValue : Res; 3498 return Res; 3499 }; 3500 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 3501 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 3502 if (Res.first) { 3503 // 'atomicrmw' instruction was generated. 3504 if (IsPostfixUpdate) { 3505 // Use old value from 'atomicrmw'. 3506 NewVVal = Res.second; 3507 } else { 3508 // 'atomicrmw' does not provide new value, so evaluate it using old 3509 // value of 'x'. 3510 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3511 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second); 3512 NewVVal = CGF.EmitAnyExpr(UE); 3513 } 3514 } 3515 } else { 3516 // 'x' is simply rewritten with some 'expr'. 3517 NewVValType = X->getType().getNonReferenceType(); 3518 ExprRValue = convertToType(CGF, ExprRValue, E->getType(), 3519 X->getType().getNonReferenceType(), Loc); 3520 auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) -> RValue { 3521 NewVVal = XRValue; 3522 return ExprRValue; 3523 }; 3524 // Try to perform atomicrmw xchg, otherwise simple exchange. 3525 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 3526 XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO, 3527 Loc, Gen); 3528 if (Res.first) { 3529 // 'atomicrmw' instruction was generated. 3530 NewVVal = IsPostfixUpdate ? Res.second : ExprRValue; 3531 } 3532 } 3533 // Emit post-update store to 'v' of old/new 'x' value. 3534 CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc); 3535 // OpenMP, 2.12.6, atomic Construct 3536 // Any atomic construct with a seq_cst clause forces the atomically 3537 // performed operation to include an implicit flush operation without a 3538 // list. 3539 if (IsSeqCst) 3540 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3541 } 3542 3543 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, 3544 bool IsSeqCst, bool IsPostfixUpdate, 3545 const Expr *X, const Expr *V, const Expr *E, 3546 const Expr *UE, bool IsXLHSInRHSPart, 3547 SourceLocation Loc) { 3548 switch (Kind) { 3549 case OMPC_read: 3550 EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc); 3551 break; 3552 case OMPC_write: 3553 EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc); 3554 break; 3555 case OMPC_unknown: 3556 case OMPC_update: 3557 EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc); 3558 break; 3559 case OMPC_capture: 3560 EmitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE, 3561 IsXLHSInRHSPart, Loc); 3562 break; 3563 case OMPC_if: 3564 case OMPC_final: 3565 case OMPC_num_threads: 3566 case OMPC_private: 3567 case OMPC_firstprivate: 3568 case OMPC_lastprivate: 3569 case OMPC_reduction: 3570 case OMPC_task_reduction: 3571 case OMPC_in_reduction: 3572 case OMPC_safelen: 3573 case OMPC_simdlen: 3574 case OMPC_collapse: 3575 case OMPC_default: 3576 case OMPC_seq_cst: 3577 case OMPC_shared: 3578 case OMPC_linear: 3579 case OMPC_aligned: 3580 case OMPC_copyin: 3581 case OMPC_copyprivate: 3582 case OMPC_flush: 3583 case OMPC_proc_bind: 3584 case OMPC_schedule: 3585 case OMPC_ordered: 3586 case OMPC_nowait: 3587 case OMPC_untied: 3588 case OMPC_threadprivate: 3589 case OMPC_depend: 3590 case OMPC_mergeable: 3591 case OMPC_device: 3592 case OMPC_threads: 3593 case OMPC_simd: 3594 case OMPC_map: 3595 case OMPC_num_teams: 3596 case OMPC_thread_limit: 3597 case OMPC_priority: 3598 case OMPC_grainsize: 3599 case OMPC_nogroup: 3600 case OMPC_num_tasks: 3601 case OMPC_hint: 3602 case OMPC_dist_schedule: 3603 case OMPC_defaultmap: 3604 case OMPC_uniform: 3605 case OMPC_to: 3606 case OMPC_from: 3607 case OMPC_use_device_ptr: 3608 case OMPC_is_device_ptr: 3609 llvm_unreachable("Clause is not allowed in 'omp atomic'."); 3610 } 3611 } 3612 3613 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { 3614 bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>(); 3615 OpenMPClauseKind Kind = OMPC_unknown; 3616 for (auto *C : S.clauses()) { 3617 // Find first clause (skip seq_cst clause, if it is first). 3618 if (C->getClauseKind() != OMPC_seq_cst) { 3619 Kind = C->getClauseKind(); 3620 break; 3621 } 3622 } 3623 3624 const auto *CS = 3625 S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 3626 if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) { 3627 enterFullExpression(EWC); 3628 } 3629 // Processing for statements under 'atomic capture'. 3630 if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) { 3631 for (const auto *C : Compound->body()) { 3632 if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) { 3633 enterFullExpression(EWC); 3634 } 3635 } 3636 } 3637 3638 auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF, 3639 PrePostActionTy &) { 3640 CGF.EmitStopPoint(CS); 3641 EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(), 3642 S.getV(), S.getExpr(), S.getUpdateExpr(), 3643 S.isXLHSInRHSPart(), S.getLocStart()); 3644 }; 3645 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 3646 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen); 3647 } 3648 3649 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, 3650 const OMPExecutableDirective &S, 3651 const RegionCodeGenTy &CodeGen) { 3652 assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind())); 3653 CodeGenModule &CGM = CGF.CGM; 3654 const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt()); 3655 3656 llvm::Function *Fn = nullptr; 3657 llvm::Constant *FnID = nullptr; 3658 3659 const Expr *IfCond = nullptr; 3660 // Check for the at most one if clause associated with the target region. 3661 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 3662 if (C->getNameModifier() == OMPD_unknown || 3663 C->getNameModifier() == OMPD_target) { 3664 IfCond = C->getCondition(); 3665 break; 3666 } 3667 } 3668 3669 // Check if we have any device clause associated with the directive. 3670 const Expr *Device = nullptr; 3671 if (auto *C = S.getSingleClause<OMPDeviceClause>()) { 3672 Device = C->getDevice(); 3673 } 3674 3675 // Check if we have an if clause whose conditional always evaluates to false 3676 // or if we do not have any targets specified. If so the target region is not 3677 // an offload entry point. 3678 bool IsOffloadEntry = true; 3679 if (IfCond) { 3680 bool Val; 3681 if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val) 3682 IsOffloadEntry = false; 3683 } 3684 if (CGM.getLangOpts().OMPTargetTriples.empty()) 3685 IsOffloadEntry = false; 3686 3687 assert(CGF.CurFuncDecl && "No parent declaration for target region!"); 3688 StringRef ParentName; 3689 // In case we have Ctors/Dtors we use the complete type variant to produce 3690 // the mangling of the device outlined kernel. 3691 if (auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl)) 3692 ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete)); 3693 else if (auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl)) 3694 ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete)); 3695 else 3696 ParentName = 3697 CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl))); 3698 3699 // Emit target region as a standalone region. 3700 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID, 3701 IsOffloadEntry, CodeGen); 3702 OMPLexicalScope Scope(CGF, S); 3703 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 3704 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 3705 CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device, 3706 CapturedVars); 3707 } 3708 3709 static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S, 3710 PrePostActionTy &Action) { 3711 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 3712 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 3713 CGF.EmitOMPPrivateClause(S, PrivateScope); 3714 (void)PrivateScope.Privatize(); 3715 3716 Action.Enter(CGF); 3717 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3718 } 3719 3720 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM, 3721 StringRef ParentName, 3722 const OMPTargetDirective &S) { 3723 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3724 emitTargetRegion(CGF, S, Action); 3725 }; 3726 llvm::Function *Fn; 3727 llvm::Constant *Addr; 3728 // Emit target region as a standalone region. 3729 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 3730 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 3731 assert(Fn && Addr && "Target device function emission failed."); 3732 } 3733 3734 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { 3735 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3736 emitTargetRegion(CGF, S, Action); 3737 }; 3738 emitCommonOMPTargetDirective(*this, S, CodeGen); 3739 } 3740 3741 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF, 3742 const OMPExecutableDirective &S, 3743 OpenMPDirectiveKind InnermostKind, 3744 const RegionCodeGenTy &CodeGen) { 3745 const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams); 3746 auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction( 3747 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 3748 3749 const OMPNumTeamsClause *NT = S.getSingleClause<OMPNumTeamsClause>(); 3750 const OMPThreadLimitClause *TL = S.getSingleClause<OMPThreadLimitClause>(); 3751 if (NT || TL) { 3752 Expr *NumTeams = (NT) ? NT->getNumTeams() : nullptr; 3753 Expr *ThreadLimit = (TL) ? TL->getThreadLimit() : nullptr; 3754 3755 CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit, 3756 S.getLocStart()); 3757 } 3758 3759 OMPTeamsScope Scope(CGF, S); 3760 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 3761 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 3762 CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn, 3763 CapturedVars); 3764 } 3765 3766 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) { 3767 // Emit teams region as a standalone region. 3768 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3769 OMPPrivateScope PrivateScope(CGF); 3770 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 3771 CGF.EmitOMPPrivateClause(S, PrivateScope); 3772 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 3773 (void)PrivateScope.Privatize(); 3774 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3775 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 3776 }; 3777 emitCommonOMPTeamsDirective(*this, S, OMPD_teams, CodeGen); 3778 emitPostUpdateForReductionClause( 3779 *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 3780 } 3781 3782 static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action, 3783 const OMPTargetTeamsDirective &S) { 3784 auto *CS = S.getCapturedStmt(OMPD_teams); 3785 Action.Enter(CGF); 3786 auto &&CodeGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { 3787 // TODO: Add support for clauses. 3788 CGF.EmitStmt(CS->getCapturedStmt()); 3789 }; 3790 emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen); 3791 } 3792 3793 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 3794 CodeGenModule &CGM, StringRef ParentName, 3795 const OMPTargetTeamsDirective &S) { 3796 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3797 emitTargetTeamsRegion(CGF, Action, S); 3798 }; 3799 llvm::Function *Fn; 3800 llvm::Constant *Addr; 3801 // Emit target region as a standalone region. 3802 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 3803 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 3804 assert(Fn && Addr && "Target device function emission failed."); 3805 } 3806 3807 void CodeGenFunction::EmitOMPTargetTeamsDirective( 3808 const OMPTargetTeamsDirective &S) { 3809 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3810 emitTargetTeamsRegion(CGF, Action, S); 3811 }; 3812 emitCommonOMPTargetDirective(*this, S, CodeGen); 3813 } 3814 3815 void CodeGenFunction::EmitOMPCancellationPointDirective( 3816 const OMPCancellationPointDirective &S) { 3817 CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(), 3818 S.getCancelRegion()); 3819 } 3820 3821 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { 3822 const Expr *IfCond = nullptr; 3823 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 3824 if (C->getNameModifier() == OMPD_unknown || 3825 C->getNameModifier() == OMPD_cancel) { 3826 IfCond = C->getCondition(); 3827 break; 3828 } 3829 } 3830 CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(), IfCond, 3831 S.getCancelRegion()); 3832 } 3833 3834 CodeGenFunction::JumpDest 3835 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) { 3836 if (Kind == OMPD_parallel || Kind == OMPD_task || 3837 Kind == OMPD_target_parallel) 3838 return ReturnBlock; 3839 assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections || 3840 Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for || 3841 Kind == OMPD_distribute_parallel_for || 3842 Kind == OMPD_target_parallel_for); 3843 return OMPCancelStack.getExitBlock(); 3844 } 3845 3846 void CodeGenFunction::EmitOMPUseDevicePtrClause( 3847 const OMPClause &NC, OMPPrivateScope &PrivateScope, 3848 const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) { 3849 const auto &C = cast<OMPUseDevicePtrClause>(NC); 3850 auto OrigVarIt = C.varlist_begin(); 3851 auto InitIt = C.inits().begin(); 3852 for (auto PvtVarIt : C.private_copies()) { 3853 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl()); 3854 auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl()); 3855 auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl()); 3856 3857 // In order to identify the right initializer we need to match the 3858 // declaration used by the mapping logic. In some cases we may get 3859 // OMPCapturedExprDecl that refers to the original declaration. 3860 const ValueDecl *MatchingVD = OrigVD; 3861 if (auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) { 3862 // OMPCapturedExprDecl are used to privative fields of the current 3863 // structure. 3864 auto *ME = cast<MemberExpr>(OED->getInit()); 3865 assert(isa<CXXThisExpr>(ME->getBase()) && 3866 "Base should be the current struct!"); 3867 MatchingVD = ME->getMemberDecl(); 3868 } 3869 3870 // If we don't have information about the current list item, move on to 3871 // the next one. 3872 auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD); 3873 if (InitAddrIt == CaptureDeviceAddrMap.end()) 3874 continue; 3875 3876 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 3877 // Initialize the temporary initialization variable with the address we 3878 // get from the runtime library. We have to cast the source address 3879 // because it is always a void *. References are materialized in the 3880 // privatization scope, so the initialization here disregards the fact 3881 // the original variable is a reference. 3882 QualType AddrQTy = 3883 getContext().getPointerType(OrigVD->getType().getNonReferenceType()); 3884 llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy); 3885 Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy); 3886 setAddrOfLocalVar(InitVD, InitAddr); 3887 3888 // Emit private declaration, it will be initialized by the value we 3889 // declaration we just added to the local declarations map. 3890 EmitDecl(*PvtVD); 3891 3892 // The initialization variables reached its purpose in the emission 3893 // ofthe previous declaration, so we don't need it anymore. 3894 LocalDeclMap.erase(InitVD); 3895 3896 // Return the address of the private variable. 3897 return GetAddrOfLocalVar(PvtVD); 3898 }); 3899 assert(IsRegistered && "firstprivate var already registered as private"); 3900 // Silence the warning about unused variable. 3901 (void)IsRegistered; 3902 3903 ++OrigVarIt; 3904 ++InitIt; 3905 } 3906 } 3907 3908 // Generate the instructions for '#pragma omp target data' directive. 3909 void CodeGenFunction::EmitOMPTargetDataDirective( 3910 const OMPTargetDataDirective &S) { 3911 CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true); 3912 3913 // Create a pre/post action to signal the privatization of the device pointer. 3914 // This action can be replaced by the OpenMP runtime code generation to 3915 // deactivate privatization. 3916 bool PrivatizeDevicePointers = false; 3917 class DevicePointerPrivActionTy : public PrePostActionTy { 3918 bool &PrivatizeDevicePointers; 3919 3920 public: 3921 explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers) 3922 : PrePostActionTy(), PrivatizeDevicePointers(PrivatizeDevicePointers) {} 3923 void Enter(CodeGenFunction &CGF) override { 3924 PrivatizeDevicePointers = true; 3925 } 3926 }; 3927 DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers); 3928 3929 auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers]( 3930 CodeGenFunction &CGF, PrePostActionTy &Action) { 3931 auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3932 CGF.EmitStmt( 3933 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3934 }; 3935 3936 // Codegen that selects wheather to generate the privatization code or not. 3937 auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers, 3938 &InnermostCodeGen](CodeGenFunction &CGF, 3939 PrePostActionTy &Action) { 3940 RegionCodeGenTy RCG(InnermostCodeGen); 3941 PrivatizeDevicePointers = false; 3942 3943 // Call the pre-action to change the status of PrivatizeDevicePointers if 3944 // needed. 3945 Action.Enter(CGF); 3946 3947 if (PrivatizeDevicePointers) { 3948 OMPPrivateScope PrivateScope(CGF); 3949 // Emit all instances of the use_device_ptr clause. 3950 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>()) 3951 CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope, 3952 Info.CaptureDeviceAddrMap); 3953 (void)PrivateScope.Privatize(); 3954 RCG(CGF); 3955 } else 3956 RCG(CGF); 3957 }; 3958 3959 // Forward the provided action to the privatization codegen. 3960 RegionCodeGenTy PrivRCG(PrivCodeGen); 3961 PrivRCG.setAction(Action); 3962 3963 // Notwithstanding the body of the region is emitted as inlined directive, 3964 // we don't use an inline scope as changes in the references inside the 3965 // region are expected to be visible outside, so we do not privative them. 3966 OMPLexicalScope Scope(CGF, S); 3967 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data, 3968 PrivRCG); 3969 }; 3970 3971 RegionCodeGenTy RCG(CodeGen); 3972 3973 // If we don't have target devices, don't bother emitting the data mapping 3974 // code. 3975 if (CGM.getLangOpts().OMPTargetTriples.empty()) { 3976 RCG(*this); 3977 return; 3978 } 3979 3980 // Check if we have any if clause associated with the directive. 3981 const Expr *IfCond = nullptr; 3982 if (auto *C = S.getSingleClause<OMPIfClause>()) 3983 IfCond = C->getCondition(); 3984 3985 // Check if we have any device clause associated with the directive. 3986 const Expr *Device = nullptr; 3987 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 3988 Device = C->getDevice(); 3989 3990 // Set the action to signal privatization of device pointers. 3991 RCG.setAction(PrivAction); 3992 3993 // Emit region code. 3994 CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG, 3995 Info); 3996 } 3997 3998 void CodeGenFunction::EmitOMPTargetEnterDataDirective( 3999 const OMPTargetEnterDataDirective &S) { 4000 // If we don't have target devices, don't bother emitting the data mapping 4001 // code. 4002 if (CGM.getLangOpts().OMPTargetTriples.empty()) 4003 return; 4004 4005 // Check if we have any if clause associated with the directive. 4006 const Expr *IfCond = nullptr; 4007 if (auto *C = S.getSingleClause<OMPIfClause>()) 4008 IfCond = C->getCondition(); 4009 4010 // Check if we have any device clause associated with the directive. 4011 const Expr *Device = nullptr; 4012 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 4013 Device = C->getDevice(); 4014 4015 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 4016 } 4017 4018 void CodeGenFunction::EmitOMPTargetExitDataDirective( 4019 const OMPTargetExitDataDirective &S) { 4020 // If we don't have target devices, don't bother emitting the data mapping 4021 // code. 4022 if (CGM.getLangOpts().OMPTargetTriples.empty()) 4023 return; 4024 4025 // Check if we have any if clause associated with the directive. 4026 const Expr *IfCond = nullptr; 4027 if (auto *C = S.getSingleClause<OMPIfClause>()) 4028 IfCond = C->getCondition(); 4029 4030 // Check if we have any device clause associated with the directive. 4031 const Expr *Device = nullptr; 4032 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 4033 Device = C->getDevice(); 4034 4035 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 4036 } 4037 4038 static void emitTargetParallelRegion(CodeGenFunction &CGF, 4039 const OMPTargetParallelDirective &S, 4040 PrePostActionTy &Action) { 4041 // Get the captured statement associated with the 'parallel' region. 4042 auto *CS = S.getCapturedStmt(OMPD_parallel); 4043 Action.Enter(CGF); 4044 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &) { 4045 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 4046 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 4047 CGF.EmitOMPPrivateClause(S, PrivateScope); 4048 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4049 (void)PrivateScope.Privatize(); 4050 // TODO: Add support for clauses. 4051 CGF.EmitStmt(CS->getCapturedStmt()); 4052 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 4053 }; 4054 emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen, 4055 emitEmptyBoundParameters); 4056 emitPostUpdateForReductionClause( 4057 CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 4058 } 4059 4060 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 4061 CodeGenModule &CGM, StringRef ParentName, 4062 const OMPTargetParallelDirective &S) { 4063 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4064 emitTargetParallelRegion(CGF, S, Action); 4065 }; 4066 llvm::Function *Fn; 4067 llvm::Constant *Addr; 4068 // Emit target region as a standalone region. 4069 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 4070 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 4071 assert(Fn && Addr && "Target device function emission failed."); 4072 } 4073 4074 void CodeGenFunction::EmitOMPTargetParallelDirective( 4075 const OMPTargetParallelDirective &S) { 4076 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4077 emitTargetParallelRegion(CGF, S, Action); 4078 }; 4079 emitCommonOMPTargetDirective(*this, S, CodeGen); 4080 } 4081 4082 void CodeGenFunction::EmitOMPTargetParallelForDirective( 4083 const OMPTargetParallelForDirective &S) { 4084 // TODO: codegen for target parallel for. 4085 } 4086 4087 /// Emit a helper variable and return corresponding lvalue. 4088 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper, 4089 const ImplicitParamDecl *PVD, 4090 CodeGenFunction::OMPPrivateScope &Privates) { 4091 auto *VDecl = cast<VarDecl>(Helper->getDecl()); 4092 Privates.addPrivate( 4093 VDecl, [&CGF, PVD]() -> Address { return CGF.GetAddrOfLocalVar(PVD); }); 4094 } 4095 4096 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { 4097 assert(isOpenMPTaskLoopDirective(S.getDirectiveKind())); 4098 // Emit outlined function for task construct. 4099 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 4100 auto CapturedStruct = GenerateCapturedStmtArgument(*CS); 4101 auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 4102 const Expr *IfCond = nullptr; 4103 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 4104 if (C->getNameModifier() == OMPD_unknown || 4105 C->getNameModifier() == OMPD_taskloop) { 4106 IfCond = C->getCondition(); 4107 break; 4108 } 4109 } 4110 4111 OMPTaskDataTy Data; 4112 // Check if taskloop must be emitted without taskgroup. 4113 Data.Nogroup = S.getSingleClause<OMPNogroupClause>(); 4114 // TODO: Check if we should emit tied or untied task. 4115 Data.Tied = true; 4116 // Set scheduling for taskloop 4117 if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) { 4118 // grainsize clause 4119 Data.Schedule.setInt(/*IntVal=*/false); 4120 Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize())); 4121 } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) { 4122 // num_tasks clause 4123 Data.Schedule.setInt(/*IntVal=*/true); 4124 Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks())); 4125 } 4126 4127 auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) { 4128 // if (PreCond) { 4129 // for (IV in 0..LastIteration) BODY; 4130 // <Final counter/linear vars updates>; 4131 // } 4132 // 4133 4134 // Emit: if (PreCond) - begin. 4135 // If the condition constant folds and can be elided, avoid emitting the 4136 // whole loop. 4137 bool CondConstant; 4138 llvm::BasicBlock *ContBlock = nullptr; 4139 OMPLoopScope PreInitScope(CGF, S); 4140 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 4141 if (!CondConstant) 4142 return; 4143 } else { 4144 auto *ThenBlock = CGF.createBasicBlock("taskloop.if.then"); 4145 ContBlock = CGF.createBasicBlock("taskloop.if.end"); 4146 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 4147 CGF.getProfileCount(&S)); 4148 CGF.EmitBlock(ThenBlock); 4149 CGF.incrementProfileCounter(&S); 4150 } 4151 4152 if (isOpenMPSimdDirective(S.getDirectiveKind())) 4153 CGF.EmitOMPSimdInit(S); 4154 4155 OMPPrivateScope LoopScope(CGF); 4156 // Emit helper vars inits. 4157 enum { LowerBound = 5, UpperBound, Stride, LastIter }; 4158 auto *I = CS->getCapturedDecl()->param_begin(); 4159 auto *LBP = std::next(I, LowerBound); 4160 auto *UBP = std::next(I, UpperBound); 4161 auto *STP = std::next(I, Stride); 4162 auto *LIP = std::next(I, LastIter); 4163 mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP, 4164 LoopScope); 4165 mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP, 4166 LoopScope); 4167 mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope); 4168 mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP, 4169 LoopScope); 4170 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 4171 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 4172 (void)LoopScope.Privatize(); 4173 // Emit the loop iteration variable. 4174 const Expr *IVExpr = S.getIterationVariable(); 4175 const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 4176 CGF.EmitVarDecl(*IVDecl); 4177 CGF.EmitIgnoredExpr(S.getInit()); 4178 4179 // Emit the iterations count variable. 4180 // If it is not a variable, Sema decided to calculate iterations count on 4181 // each iteration (e.g., it is foldable into a constant). 4182 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 4183 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 4184 // Emit calculation of the iterations count. 4185 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 4186 } 4187 4188 CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 4189 S.getInc(), 4190 [&S](CodeGenFunction &CGF) { 4191 CGF.EmitOMPLoopBody(S, JumpDest()); 4192 CGF.EmitStopPoint(&S); 4193 }, 4194 [](CodeGenFunction &) {}); 4195 // Emit: if (PreCond) - end. 4196 if (ContBlock) { 4197 CGF.EmitBranch(ContBlock); 4198 CGF.EmitBlock(ContBlock, true); 4199 } 4200 // Emit final copy of the lastprivate variables if IsLastIter != 0. 4201 if (HasLastprivateClause) { 4202 CGF.EmitOMPLastprivateClauseFinal( 4203 S, isOpenMPSimdDirective(S.getDirectiveKind()), 4204 CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar( 4205 CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false, 4206 (*LIP)->getType(), S.getLocStart()))); 4207 } 4208 }; 4209 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 4210 IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn, 4211 const OMPTaskDataTy &Data) { 4212 auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &) { 4213 OMPLoopScope PreInitScope(CGF, S); 4214 CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getLocStart(), S, 4215 OutlinedFn, SharedsTy, 4216 CapturedStruct, IfCond, Data); 4217 }; 4218 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop, 4219 CodeGen); 4220 }; 4221 if (Data.Nogroup) 4222 EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data); 4223 else { 4224 CGM.getOpenMPRuntime().emitTaskgroupRegion( 4225 *this, 4226 [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF, 4227 PrePostActionTy &Action) { 4228 Action.Enter(CGF); 4229 CGF.EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data); 4230 }, 4231 S.getLocStart()); 4232 } 4233 } 4234 4235 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) { 4236 EmitOMPTaskLoopBasedDirective(S); 4237 } 4238 4239 void CodeGenFunction::EmitOMPTaskLoopSimdDirective( 4240 const OMPTaskLoopSimdDirective &S) { 4241 EmitOMPTaskLoopBasedDirective(S); 4242 } 4243 4244 // Generate the instructions for '#pragma omp target update' directive. 4245 void CodeGenFunction::EmitOMPTargetUpdateDirective( 4246 const OMPTargetUpdateDirective &S) { 4247 // If we don't have target devices, don't bother emitting the data mapping 4248 // code. 4249 if (CGM.getLangOpts().OMPTargetTriples.empty()) 4250 return; 4251 4252 // Check if we have any if clause associated with the directive. 4253 const Expr *IfCond = nullptr; 4254 if (auto *C = S.getSingleClause<OMPIfClause>()) 4255 IfCond = C->getCondition(); 4256 4257 // Check if we have any device clause associated with the directive. 4258 const Expr *Device = nullptr; 4259 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 4260 Device = C->getDevice(); 4261 4262 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 4263 } 4264