1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This contains code to emit OpenMP nodes as LLVM code. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGCleanup.h" 15 #include "CGOpenMPRuntime.h" 16 #include "CodeGenFunction.h" 17 #include "CodeGenModule.h" 18 #include "TargetInfo.h" 19 #include "clang/AST/Stmt.h" 20 #include "clang/AST/StmtOpenMP.h" 21 #include "clang/AST/DeclOpenMP.h" 22 #include "llvm/IR/CallSite.h" 23 using namespace clang; 24 using namespace CodeGen; 25 26 namespace { 27 /// Lexical scope for OpenMP executable constructs, that handles correct codegen 28 /// for captured expressions. 29 class OMPLexicalScope : public CodeGenFunction::LexicalScope { 30 void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 31 for (const auto *C : S.clauses()) { 32 if (auto *CPI = OMPClauseWithPreInit::get(C)) { 33 if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { 34 for (const auto *I : PreInit->decls()) { 35 if (!I->hasAttr<OMPCaptureNoInitAttr>()) 36 CGF.EmitVarDecl(cast<VarDecl>(*I)); 37 else { 38 CodeGenFunction::AutoVarEmission Emission = 39 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 40 CGF.EmitAutoVarCleanups(Emission); 41 } 42 } 43 } 44 } 45 } 46 } 47 CodeGenFunction::OMPPrivateScope InlinedShareds; 48 49 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { 50 return CGF.LambdaCaptureFields.lookup(VD) || 51 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || 52 (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl)); 53 } 54 55 public: 56 OMPLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S, 57 bool AsInlined = false, bool EmitPreInitStmt = true) 58 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), 59 InlinedShareds(CGF) { 60 if (EmitPreInitStmt) 61 emitPreInitStmt(CGF, S); 62 if (AsInlined) { 63 if (S.hasAssociatedStmt()) { 64 auto *CS = cast<CapturedStmt>(S.getAssociatedStmt()); 65 for (auto &C : CS->captures()) { 66 if (C.capturesVariable() || C.capturesVariableByCopy()) { 67 auto *VD = C.getCapturedVar(); 68 DeclRefExpr DRE(const_cast<VarDecl *>(VD), 69 isCapturedVar(CGF, VD) || 70 (CGF.CapturedStmtInfo && 71 InlinedShareds.isGlobalVarCaptured(VD)), 72 VD->getType().getNonReferenceType(), VK_LValue, 73 SourceLocation()); 74 InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address { 75 return CGF.EmitLValue(&DRE).getAddress(); 76 }); 77 } 78 } 79 (void)InlinedShareds.Privatize(); 80 } 81 } 82 } 83 }; 84 85 /// Lexical scope for OpenMP parallel construct, that handles correct codegen 86 /// for captured expressions. 87 class OMPParallelScope final : public OMPLexicalScope { 88 bool EmitPreInitStmt(const OMPExecutableDirective &S) { 89 OpenMPDirectiveKind Kind = S.getDirectiveKind(); 90 return !(isOpenMPTargetExecutionDirective(Kind) || 91 isOpenMPLoopBoundSharingDirective(Kind)) && 92 isOpenMPParallelDirective(Kind); 93 } 94 95 public: 96 OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 97 : OMPLexicalScope(CGF, S, 98 /*AsInlined=*/false, 99 /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {} 100 }; 101 102 /// Lexical scope for OpenMP teams construct, that handles correct codegen 103 /// for captured expressions. 104 class OMPTeamsScope final : public OMPLexicalScope { 105 bool EmitPreInitStmt(const OMPExecutableDirective &S) { 106 OpenMPDirectiveKind Kind = S.getDirectiveKind(); 107 return !isOpenMPTargetExecutionDirective(Kind) && 108 isOpenMPTeamsDirective(Kind); 109 } 110 111 public: 112 OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 113 : OMPLexicalScope(CGF, S, 114 /*AsInlined=*/false, 115 /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {} 116 }; 117 118 /// Private scope for OpenMP loop-based directives, that supports capturing 119 /// of used expression from loop statement. 120 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { 121 void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) { 122 if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) { 123 if (auto *PreInits = cast_or_null<DeclStmt>(LD->getPreInits())) { 124 for (const auto *I : PreInits->decls()) 125 CGF.EmitVarDecl(cast<VarDecl>(*I)); 126 } 127 } 128 } 129 130 public: 131 OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S) 132 : CodeGenFunction::RunCleanupsScope(CGF) { 133 emitPreInitStmt(CGF, S); 134 } 135 }; 136 137 } // namespace 138 139 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) { 140 auto &C = getContext(); 141 llvm::Value *Size = nullptr; 142 auto SizeInChars = C.getTypeSizeInChars(Ty); 143 if (SizeInChars.isZero()) { 144 // getTypeSizeInChars() returns 0 for a VLA. 145 while (auto *VAT = C.getAsVariableArrayType(Ty)) { 146 llvm::Value *ArraySize; 147 std::tie(ArraySize, Ty) = getVLASize(VAT); 148 Size = Size ? Builder.CreateNUWMul(Size, ArraySize) : ArraySize; 149 } 150 SizeInChars = C.getTypeSizeInChars(Ty); 151 if (SizeInChars.isZero()) 152 return llvm::ConstantInt::get(SizeTy, /*V=*/0); 153 Size = Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars)); 154 } else 155 Size = CGM.getSize(SizeInChars); 156 return Size; 157 } 158 159 void CodeGenFunction::GenerateOpenMPCapturedVars( 160 const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) { 161 const RecordDecl *RD = S.getCapturedRecordDecl(); 162 auto CurField = RD->field_begin(); 163 auto CurCap = S.captures().begin(); 164 for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(), 165 E = S.capture_init_end(); 166 I != E; ++I, ++CurField, ++CurCap) { 167 if (CurField->hasCapturedVLAType()) { 168 auto VAT = CurField->getCapturedVLAType(); 169 auto *Val = VLASizeMap[VAT->getSizeExpr()]; 170 CapturedVars.push_back(Val); 171 } else if (CurCap->capturesThis()) 172 CapturedVars.push_back(CXXThisValue); 173 else if (CurCap->capturesVariableByCopy()) { 174 llvm::Value *CV = 175 EmitLoadOfLValue(EmitLValue(*I), SourceLocation()).getScalarVal(); 176 177 // If the field is not a pointer, we need to save the actual value 178 // and load it as a void pointer. 179 if (!CurField->getType()->isAnyPointerType()) { 180 auto &Ctx = getContext(); 181 auto DstAddr = CreateMemTemp( 182 Ctx.getUIntPtrType(), 183 Twine(CurCap->getCapturedVar()->getName()) + ".casted"); 184 LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType()); 185 186 auto *SrcAddrVal = EmitScalarConversion( 187 DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()), 188 Ctx.getPointerType(CurField->getType()), SourceLocation()); 189 LValue SrcLV = 190 MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType()); 191 192 // Store the value using the source type pointer. 193 EmitStoreThroughLValue(RValue::get(CV), SrcLV); 194 195 // Load the value using the destination type pointer. 196 CV = EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal(); 197 } 198 CapturedVars.push_back(CV); 199 } else { 200 assert(CurCap->capturesVariable() && "Expected capture by reference."); 201 CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer()); 202 } 203 } 204 } 205 206 static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType, 207 StringRef Name, LValue AddrLV, 208 bool isReferenceType = false) { 209 ASTContext &Ctx = CGF.getContext(); 210 211 auto *CastedPtr = CGF.EmitScalarConversion( 212 AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(), 213 Ctx.getPointerType(DstType), SourceLocation()); 214 auto TmpAddr = 215 CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType)) 216 .getAddress(); 217 218 // If we are dealing with references we need to return the address of the 219 // reference instead of the reference of the value. 220 if (isReferenceType) { 221 QualType RefType = Ctx.getLValueReferenceType(DstType); 222 auto *RefVal = TmpAddr.getPointer(); 223 TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name) + ".ref"); 224 auto TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType); 225 CGF.EmitStoreThroughLValue(RValue::get(RefVal), TmpLVal, /*isInit*/ true); 226 } 227 228 return TmpAddr; 229 } 230 231 static QualType getCanonicalParamType(ASTContext &C, QualType T) { 232 if (T->isLValueReferenceType()) { 233 return C.getLValueReferenceType( 234 getCanonicalParamType(C, T.getNonReferenceType()), 235 /*SpelledAsLValue=*/false); 236 } 237 if (T->isPointerType()) 238 return C.getPointerType(getCanonicalParamType(C, T->getPointeeType())); 239 return C.getCanonicalParamType(T); 240 } 241 242 namespace { 243 /// Contains required data for proper outlined function codegen. 244 struct FunctionOptions { 245 /// Captured statement for which the function is generated. 246 const CapturedStmt *S = nullptr; 247 /// true if cast to/from UIntPtr is required for variables captured by 248 /// value. 249 const bool UIntPtrCastRequired = true; 250 /// true if only casted arguments must be registered as local args or VLA 251 /// sizes. 252 const bool RegisterCastedArgsOnly = false; 253 /// Name of the generated function. 254 const StringRef FunctionName; 255 explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired, 256 bool RegisterCastedArgsOnly, 257 StringRef FunctionName) 258 : S(S), UIntPtrCastRequired(UIntPtrCastRequired), 259 RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly), 260 FunctionName(FunctionName) {} 261 }; 262 } 263 264 static llvm::Function *emitOutlinedFunctionPrologue( 265 CodeGenFunction &CGF, FunctionArgList &Args, 266 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> 267 &LocalAddrs, 268 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> 269 &VLASizes, 270 llvm::Value *&CXXThisValue, const FunctionOptions &FO) { 271 const CapturedDecl *CD = FO.S->getCapturedDecl(); 272 const RecordDecl *RD = FO.S->getCapturedRecordDecl(); 273 assert(CD->hasBody() && "missing CapturedDecl body"); 274 275 CXXThisValue = nullptr; 276 // Build the argument list. 277 CodeGenModule &CGM = CGF.CGM; 278 ASTContext &Ctx = CGM.getContext(); 279 FunctionArgList TargetArgs; 280 Args.append(CD->param_begin(), 281 std::next(CD->param_begin(), CD->getContextParamPosition())); 282 TargetArgs.append( 283 CD->param_begin(), 284 std::next(CD->param_begin(), CD->getContextParamPosition())); 285 auto I = FO.S->captures().begin(); 286 for (auto *FD : RD->fields()) { 287 QualType ArgType = FD->getType(); 288 IdentifierInfo *II = nullptr; 289 VarDecl *CapVar = nullptr; 290 291 // If this is a capture by copy and the type is not a pointer, the outlined 292 // function argument type should be uintptr and the value properly casted to 293 // uintptr. This is necessary given that the runtime library is only able to 294 // deal with pointers. We can pass in the same way the VLA type sizes to the 295 // outlined function. 296 if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) || 297 I->capturesVariableArrayType()) { 298 if (FO.UIntPtrCastRequired) 299 ArgType = Ctx.getUIntPtrType(); 300 } 301 302 if (I->capturesVariable() || I->capturesVariableByCopy()) { 303 CapVar = I->getCapturedVar(); 304 II = CapVar->getIdentifier(); 305 } else if (I->capturesThis()) 306 II = &Ctx.Idents.get("this"); 307 else { 308 assert(I->capturesVariableArrayType()); 309 II = &Ctx.Idents.get("vla"); 310 } 311 if (ArgType->isVariablyModifiedType()) 312 ArgType = getCanonicalParamType(Ctx, ArgType.getNonReferenceType()); 313 auto *Arg = 314 ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(), II, 315 ArgType, ImplicitParamDecl::Other); 316 Args.emplace_back(Arg); 317 // Do not cast arguments if we emit function with non-original types. 318 TargetArgs.emplace_back( 319 FO.UIntPtrCastRequired 320 ? Arg 321 : CGM.getOpenMPRuntime().translateParameter(FD, Arg)); 322 ++I; 323 } 324 Args.append( 325 std::next(CD->param_begin(), CD->getContextParamPosition() + 1), 326 CD->param_end()); 327 TargetArgs.append( 328 std::next(CD->param_begin(), CD->getContextParamPosition() + 1), 329 CD->param_end()); 330 331 // Create the function declaration. 332 FunctionType::ExtInfo ExtInfo; 333 const CGFunctionInfo &FuncInfo = 334 CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs); 335 llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); 336 337 llvm::Function *F = 338 llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage, 339 FO.FunctionName, &CGM.getModule()); 340 CGM.SetInternalFunctionAttributes(CD, F, FuncInfo); 341 if (CD->isNothrow()) 342 F->setDoesNotThrow(); 343 344 // Generate the function. 345 CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs, 346 FO.S->getLocStart(), CD->getBody()->getLocStart()); 347 unsigned Cnt = CD->getContextParamPosition(); 348 I = FO.S->captures().begin(); 349 for (auto *FD : RD->fields()) { 350 // Do not map arguments if we emit function with non-original types. 351 Address LocalAddr(Address::invalid()); 352 if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) { 353 LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt], 354 TargetArgs[Cnt]); 355 } else { 356 LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]); 357 } 358 // If we are capturing a pointer by copy we don't need to do anything, just 359 // use the value that we get from the arguments. 360 if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) { 361 const VarDecl *CurVD = I->getCapturedVar(); 362 // If the variable is a reference we need to materialize it here. 363 if (CurVD->getType()->isReferenceType()) { 364 Address RefAddr = CGF.CreateMemTemp( 365 CurVD->getType(), CGM.getPointerAlign(), ".materialized_ref"); 366 CGF.EmitStoreOfScalar(LocalAddr.getPointer(), RefAddr, 367 /*Volatile=*/false, CurVD->getType()); 368 LocalAddr = RefAddr; 369 } 370 if (!FO.RegisterCastedArgsOnly) 371 LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}}); 372 ++Cnt; 373 ++I; 374 continue; 375 } 376 377 LValueBaseInfo BaseInfo(AlignmentSource::Decl, false); 378 LValue ArgLVal = 379 CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(), BaseInfo); 380 if (FD->hasCapturedVLAType()) { 381 if (FO.UIntPtrCastRequired) { 382 ArgLVal = CGF.MakeAddrLValue(castValueFromUintptr(CGF, FD->getType(), 383 Args[Cnt]->getName(), 384 ArgLVal), 385 FD->getType(), BaseInfo); 386 } 387 auto *ExprArg = 388 CGF.EmitLoadOfLValue(ArgLVal, SourceLocation()).getScalarVal(); 389 auto VAT = FD->getCapturedVLAType(); 390 VLASizes.insert({Args[Cnt], {VAT->getSizeExpr(), ExprArg}}); 391 } else if (I->capturesVariable()) { 392 auto *Var = I->getCapturedVar(); 393 QualType VarTy = Var->getType(); 394 Address ArgAddr = ArgLVal.getAddress(); 395 if (!VarTy->isReferenceType()) { 396 if (ArgLVal.getType()->isLValueReferenceType()) { 397 ArgAddr = CGF.EmitLoadOfReference( 398 ArgAddr, ArgLVal.getType()->castAs<ReferenceType>()); 399 } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) { 400 assert(ArgLVal.getType()->isPointerType()); 401 ArgAddr = CGF.EmitLoadOfPointer( 402 ArgAddr, ArgLVal.getType()->castAs<PointerType>()); 403 } 404 } 405 if (!FO.RegisterCastedArgsOnly) { 406 LocalAddrs.insert( 407 {Args[Cnt], 408 {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}}); 409 } 410 } else if (I->capturesVariableByCopy()) { 411 assert(!FD->getType()->isAnyPointerType() && 412 "Not expecting a captured pointer."); 413 auto *Var = I->getCapturedVar(); 414 QualType VarTy = Var->getType(); 415 LocalAddrs.insert( 416 {Args[Cnt], 417 {Var, 418 FO.UIntPtrCastRequired 419 ? castValueFromUintptr(CGF, FD->getType(), Args[Cnt]->getName(), 420 ArgLVal, VarTy->isReferenceType()) 421 : ArgLVal.getAddress()}}); 422 } else { 423 // If 'this' is captured, load it into CXXThisValue. 424 assert(I->capturesThis()); 425 CXXThisValue = CGF.EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation()) 426 .getScalarVal(); 427 LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress()}}); 428 } 429 ++Cnt; 430 ++I; 431 } 432 433 return F; 434 } 435 436 llvm::Function * 437 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { 438 assert( 439 CapturedStmtInfo && 440 "CapturedStmtInfo should be set when generating the captured function"); 441 const CapturedDecl *CD = S.getCapturedDecl(); 442 // Build the argument list. 443 bool NeedWrapperFunction = 444 getDebugInfo() && 445 CGM.getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo; 446 FunctionArgList Args; 447 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs; 448 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes; 449 SmallString<256> Buffer; 450 llvm::raw_svector_ostream Out(Buffer); 451 Out << CapturedStmtInfo->getHelperName(); 452 if (NeedWrapperFunction) 453 Out << "_debug__"; 454 FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false, 455 Out.str()); 456 llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs, 457 VLASizes, CXXThisValue, FO); 458 for (const auto &LocalAddrPair : LocalAddrs) { 459 if (LocalAddrPair.second.first) { 460 setAddrOfLocalVar(LocalAddrPair.second.first, 461 LocalAddrPair.second.second); 462 } 463 } 464 for (const auto &VLASizePair : VLASizes) 465 VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second; 466 PGO.assignRegionCounters(GlobalDecl(CD), F); 467 CapturedStmtInfo->EmitBody(*this, CD->getBody()); 468 FinishFunction(CD->getBodyRBrace()); 469 if (!NeedWrapperFunction) 470 return F; 471 472 FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true, 473 /*RegisterCastedArgsOnly=*/true, 474 CapturedStmtInfo->getHelperName()); 475 CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true); 476 Args.clear(); 477 LocalAddrs.clear(); 478 VLASizes.clear(); 479 llvm::Function *WrapperF = 480 emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes, 481 WrapperCGF.CXXThisValue, WrapperFO); 482 LValueBaseInfo BaseInfo(AlignmentSource::Decl, false); 483 llvm::SmallVector<llvm::Value *, 4> CallArgs; 484 for (const auto *Arg : Args) { 485 llvm::Value *CallArg; 486 auto I = LocalAddrs.find(Arg); 487 if (I != LocalAddrs.end()) { 488 LValue LV = 489 WrapperCGF.MakeAddrLValue(I->second.second, Arg->getType(), BaseInfo); 490 CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation()); 491 } else { 492 auto EI = VLASizes.find(Arg); 493 if (EI != VLASizes.end()) 494 CallArg = EI->second.second; 495 else { 496 LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg), 497 Arg->getType(), BaseInfo); 498 CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation()); 499 } 500 } 501 CallArgs.emplace_back(CallArg); 502 } 503 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, S.getLocStart(), 504 F, CallArgs); 505 WrapperCGF.FinishFunction(); 506 return WrapperF; 507 } 508 509 //===----------------------------------------------------------------------===// 510 // OpenMP Directive Emission 511 //===----------------------------------------------------------------------===// 512 void CodeGenFunction::EmitOMPAggregateAssign( 513 Address DestAddr, Address SrcAddr, QualType OriginalType, 514 const llvm::function_ref<void(Address, Address)> &CopyGen) { 515 // Perform element-by-element initialization. 516 QualType ElementTy; 517 518 // Drill down to the base element type on both arrays. 519 auto ArrayTy = OriginalType->getAsArrayTypeUnsafe(); 520 auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr); 521 SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 522 523 auto SrcBegin = SrcAddr.getPointer(); 524 auto DestBegin = DestAddr.getPointer(); 525 // Cast from pointer to array type to pointer to single element. 526 auto DestEnd = Builder.CreateGEP(DestBegin, NumElements); 527 // The basic structure here is a while-do loop. 528 auto BodyBB = createBasicBlock("omp.arraycpy.body"); 529 auto DoneBB = createBasicBlock("omp.arraycpy.done"); 530 auto IsEmpty = 531 Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty"); 532 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 533 534 // Enter the loop body, making that address the current address. 535 auto EntryBB = Builder.GetInsertBlock(); 536 EmitBlock(BodyBB); 537 538 CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy); 539 540 llvm::PHINode *SrcElementPHI = 541 Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 542 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 543 Address SrcElementCurrent = 544 Address(SrcElementPHI, 545 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 546 547 llvm::PHINode *DestElementPHI = 548 Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 549 DestElementPHI->addIncoming(DestBegin, EntryBB); 550 Address DestElementCurrent = 551 Address(DestElementPHI, 552 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 553 554 // Emit copy. 555 CopyGen(DestElementCurrent, SrcElementCurrent); 556 557 // Shift the address forward by one element. 558 auto DestElementNext = Builder.CreateConstGEP1_32( 559 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 560 auto SrcElementNext = Builder.CreateConstGEP1_32( 561 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 562 // Check whether we've reached the end. 563 auto Done = 564 Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 565 Builder.CreateCondBr(Done, DoneBB, BodyBB); 566 DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock()); 567 SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock()); 568 569 // Done. 570 EmitBlock(DoneBB, /*IsFinished=*/true); 571 } 572 573 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr, 574 Address SrcAddr, const VarDecl *DestVD, 575 const VarDecl *SrcVD, const Expr *Copy) { 576 if (OriginalType->isArrayType()) { 577 auto *BO = dyn_cast<BinaryOperator>(Copy); 578 if (BO && BO->getOpcode() == BO_Assign) { 579 // Perform simple memcpy for simple copying. 580 EmitAggregateAssign(DestAddr, SrcAddr, OriginalType); 581 } else { 582 // For arrays with complex element types perform element by element 583 // copying. 584 EmitOMPAggregateAssign( 585 DestAddr, SrcAddr, OriginalType, 586 [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) { 587 // Working with the single array element, so have to remap 588 // destination and source variables to corresponding array 589 // elements. 590 CodeGenFunction::OMPPrivateScope Remap(*this); 591 Remap.addPrivate(DestVD, [DestElement]() -> Address { 592 return DestElement; 593 }); 594 Remap.addPrivate( 595 SrcVD, [SrcElement]() -> Address { return SrcElement; }); 596 (void)Remap.Privatize(); 597 EmitIgnoredExpr(Copy); 598 }); 599 } 600 } else { 601 // Remap pseudo source variable to private copy. 602 CodeGenFunction::OMPPrivateScope Remap(*this); 603 Remap.addPrivate(SrcVD, [SrcAddr]() -> Address { return SrcAddr; }); 604 Remap.addPrivate(DestVD, [DestAddr]() -> Address { return DestAddr; }); 605 (void)Remap.Privatize(); 606 // Emit copying of the whole variable. 607 EmitIgnoredExpr(Copy); 608 } 609 } 610 611 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, 612 OMPPrivateScope &PrivateScope) { 613 if (!HaveInsertPoint()) 614 return false; 615 bool FirstprivateIsLastprivate = false; 616 llvm::DenseSet<const VarDecl *> Lastprivates; 617 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 618 for (const auto *D : C->varlists()) 619 Lastprivates.insert( 620 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); 621 } 622 llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate; 623 CGCapturedStmtInfo CapturesInfo(cast<CapturedStmt>(*D.getAssociatedStmt())); 624 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { 625 auto IRef = C->varlist_begin(); 626 auto InitsRef = C->inits().begin(); 627 for (auto IInit : C->private_copies()) { 628 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 629 bool ThisFirstprivateIsLastprivate = 630 Lastprivates.count(OrigVD->getCanonicalDecl()) > 0; 631 auto *CapFD = CapturesInfo.lookup(OrigVD); 632 auto *FD = CapturedStmtInfo->lookup(OrigVD); 633 if (!ThisFirstprivateIsLastprivate && FD && (FD == CapFD) && 634 !FD->getType()->isReferenceType()) { 635 EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()); 636 ++IRef; 637 ++InitsRef; 638 continue; 639 } 640 FirstprivateIsLastprivate = 641 FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate; 642 if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) { 643 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 644 auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl()); 645 bool IsRegistered; 646 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 647 /*RefersToEnclosingVariableOrCapture=*/FD != nullptr, 648 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 649 Address OriginalAddr = EmitLValue(&DRE).getAddress(); 650 QualType Type = VD->getType(); 651 if (Type->isArrayType()) { 652 // Emit VarDecl with copy init for arrays. 653 // Get the address of the original variable captured in current 654 // captured region. 655 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 656 auto Emission = EmitAutoVarAlloca(*VD); 657 auto *Init = VD->getInit(); 658 if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) { 659 // Perform simple memcpy. 660 EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr, 661 Type); 662 } else { 663 EmitOMPAggregateAssign( 664 Emission.getAllocatedAddress(), OriginalAddr, Type, 665 [this, VDInit, Init](Address DestElement, 666 Address SrcElement) { 667 // Clean up any temporaries needed by the initialization. 668 RunCleanupsScope InitScope(*this); 669 // Emit initialization for single element. 670 setAddrOfLocalVar(VDInit, SrcElement); 671 EmitAnyExprToMem(Init, DestElement, 672 Init->getType().getQualifiers(), 673 /*IsInitializer*/ false); 674 LocalDeclMap.erase(VDInit); 675 }); 676 } 677 EmitAutoVarCleanups(Emission); 678 return Emission.getAllocatedAddress(); 679 }); 680 } else { 681 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 682 // Emit private VarDecl with copy init. 683 // Remap temp VDInit variable to the address of the original 684 // variable 685 // (for proper handling of captured global variables). 686 setAddrOfLocalVar(VDInit, OriginalAddr); 687 EmitDecl(*VD); 688 LocalDeclMap.erase(VDInit); 689 return GetAddrOfLocalVar(VD); 690 }); 691 } 692 assert(IsRegistered && 693 "firstprivate var already registered as private"); 694 // Silence the warning about unused variable. 695 (void)IsRegistered; 696 } 697 ++IRef; 698 ++InitsRef; 699 } 700 } 701 return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty(); 702 } 703 704 void CodeGenFunction::EmitOMPPrivateClause( 705 const OMPExecutableDirective &D, 706 CodeGenFunction::OMPPrivateScope &PrivateScope) { 707 if (!HaveInsertPoint()) 708 return; 709 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 710 for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) { 711 auto IRef = C->varlist_begin(); 712 for (auto IInit : C->private_copies()) { 713 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 714 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 715 auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 716 bool IsRegistered = 717 PrivateScope.addPrivate(OrigVD, [&]() -> Address { 718 // Emit private VarDecl with copy init. 719 EmitDecl(*VD); 720 return GetAddrOfLocalVar(VD); 721 }); 722 assert(IsRegistered && "private var already registered as private"); 723 // Silence the warning about unused variable. 724 (void)IsRegistered; 725 } 726 ++IRef; 727 } 728 } 729 } 730 731 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { 732 if (!HaveInsertPoint()) 733 return false; 734 // threadprivate_var1 = master_threadprivate_var1; 735 // operator=(threadprivate_var2, master_threadprivate_var2); 736 // ... 737 // __kmpc_barrier(&loc, global_tid); 738 llvm::DenseSet<const VarDecl *> CopiedVars; 739 llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr; 740 for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) { 741 auto IRef = C->varlist_begin(); 742 auto ISrcRef = C->source_exprs().begin(); 743 auto IDestRef = C->destination_exprs().begin(); 744 for (auto *AssignOp : C->assignment_ops()) { 745 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 746 QualType Type = VD->getType(); 747 if (CopiedVars.insert(VD->getCanonicalDecl()).second) { 748 // Get the address of the master variable. If we are emitting code with 749 // TLS support, the address is passed from the master as field in the 750 // captured declaration. 751 Address MasterAddr = Address::invalid(); 752 if (getLangOpts().OpenMPUseTLS && 753 getContext().getTargetInfo().isTLSSupported()) { 754 assert(CapturedStmtInfo->lookup(VD) && 755 "Copyin threadprivates should have been captured!"); 756 DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(), 757 VK_LValue, (*IRef)->getExprLoc()); 758 MasterAddr = EmitLValue(&DRE).getAddress(); 759 LocalDeclMap.erase(VD); 760 } else { 761 MasterAddr = 762 Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD) 763 : CGM.GetAddrOfGlobal(VD), 764 getContext().getDeclAlign(VD)); 765 } 766 // Get the address of the threadprivate variable. 767 Address PrivateAddr = EmitLValue(*IRef).getAddress(); 768 if (CopiedVars.size() == 1) { 769 // At first check if current thread is a master thread. If it is, no 770 // need to copy data. 771 CopyBegin = createBasicBlock("copyin.not.master"); 772 CopyEnd = createBasicBlock("copyin.not.master.end"); 773 Builder.CreateCondBr( 774 Builder.CreateICmpNE( 775 Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy), 776 Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy)), 777 CopyBegin, CopyEnd); 778 EmitBlock(CopyBegin); 779 } 780 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 781 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 782 EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp); 783 } 784 ++IRef; 785 ++ISrcRef; 786 ++IDestRef; 787 } 788 } 789 if (CopyEnd) { 790 // Exit out of copying procedure for non-master thread. 791 EmitBlock(CopyEnd, /*IsFinished=*/true); 792 return true; 793 } 794 return false; 795 } 796 797 bool CodeGenFunction::EmitOMPLastprivateClauseInit( 798 const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) { 799 if (!HaveInsertPoint()) 800 return false; 801 bool HasAtLeastOneLastprivate = false; 802 llvm::DenseSet<const VarDecl *> SIMDLCVs; 803 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 804 auto *LoopDirective = cast<OMPLoopDirective>(&D); 805 for (auto *C : LoopDirective->counters()) { 806 SIMDLCVs.insert( 807 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 808 } 809 } 810 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 811 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 812 HasAtLeastOneLastprivate = true; 813 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) 814 break; 815 auto IRef = C->varlist_begin(); 816 auto IDestRef = C->destination_exprs().begin(); 817 for (auto *IInit : C->private_copies()) { 818 // Keep the address of the original variable for future update at the end 819 // of the loop. 820 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 821 // Taskloops do not require additional initialization, it is done in 822 // runtime support library. 823 if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) { 824 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 825 PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address { 826 DeclRefExpr DRE( 827 const_cast<VarDecl *>(OrigVD), 828 /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup( 829 OrigVD) != nullptr, 830 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 831 return EmitLValue(&DRE).getAddress(); 832 }); 833 // Check if the variable is also a firstprivate: in this case IInit is 834 // not generated. Initialization of this variable will happen in codegen 835 // for 'firstprivate' clause. 836 if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) { 837 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 838 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 839 // Emit private VarDecl with copy init. 840 EmitDecl(*VD); 841 return GetAddrOfLocalVar(VD); 842 }); 843 assert(IsRegistered && 844 "lastprivate var already registered as private"); 845 (void)IsRegistered; 846 } 847 } 848 ++IRef; 849 ++IDestRef; 850 } 851 } 852 return HasAtLeastOneLastprivate; 853 } 854 855 void CodeGenFunction::EmitOMPLastprivateClauseFinal( 856 const OMPExecutableDirective &D, bool NoFinals, 857 llvm::Value *IsLastIterCond) { 858 if (!HaveInsertPoint()) 859 return; 860 // Emit following code: 861 // if (<IsLastIterCond>) { 862 // orig_var1 = private_orig_var1; 863 // ... 864 // orig_varn = private_orig_varn; 865 // } 866 llvm::BasicBlock *ThenBB = nullptr; 867 llvm::BasicBlock *DoneBB = nullptr; 868 if (IsLastIterCond) { 869 ThenBB = createBasicBlock(".omp.lastprivate.then"); 870 DoneBB = createBasicBlock(".omp.lastprivate.done"); 871 Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB); 872 EmitBlock(ThenBB); 873 } 874 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 875 llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates; 876 if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) { 877 auto IC = LoopDirective->counters().begin(); 878 for (auto F : LoopDirective->finals()) { 879 auto *D = 880 cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl(); 881 if (NoFinals) 882 AlreadyEmittedVars.insert(D); 883 else 884 LoopCountersAndUpdates[D] = F; 885 ++IC; 886 } 887 } 888 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 889 auto IRef = C->varlist_begin(); 890 auto ISrcRef = C->source_exprs().begin(); 891 auto IDestRef = C->destination_exprs().begin(); 892 for (auto *AssignOp : C->assignment_ops()) { 893 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 894 QualType Type = PrivateVD->getType(); 895 auto *CanonicalVD = PrivateVD->getCanonicalDecl(); 896 if (AlreadyEmittedVars.insert(CanonicalVD).second) { 897 // If lastprivate variable is a loop control variable for loop-based 898 // directive, update its value before copyin back to original 899 // variable. 900 if (auto *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD)) 901 EmitIgnoredExpr(FinalExpr); 902 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 903 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 904 // Get the address of the original variable. 905 Address OriginalAddr = GetAddrOfLocalVar(DestVD); 906 // Get the address of the private variable. 907 Address PrivateAddr = GetAddrOfLocalVar(PrivateVD); 908 if (auto RefTy = PrivateVD->getType()->getAs<ReferenceType>()) 909 PrivateAddr = 910 Address(Builder.CreateLoad(PrivateAddr), 911 getNaturalTypeAlignment(RefTy->getPointeeType())); 912 EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp); 913 } 914 ++IRef; 915 ++ISrcRef; 916 ++IDestRef; 917 } 918 if (auto *PostUpdate = C->getPostUpdateExpr()) 919 EmitIgnoredExpr(PostUpdate); 920 } 921 if (IsLastIterCond) 922 EmitBlock(DoneBB, /*IsFinished=*/true); 923 } 924 925 void CodeGenFunction::EmitOMPReductionClauseInit( 926 const OMPExecutableDirective &D, 927 CodeGenFunction::OMPPrivateScope &PrivateScope) { 928 if (!HaveInsertPoint()) 929 return; 930 SmallVector<const Expr *, 4> Shareds; 931 SmallVector<const Expr *, 4> Privates; 932 SmallVector<const Expr *, 4> ReductionOps; 933 SmallVector<const Expr *, 4> LHSs; 934 SmallVector<const Expr *, 4> RHSs; 935 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 936 auto IPriv = C->privates().begin(); 937 auto IRed = C->reduction_ops().begin(); 938 auto ILHS = C->lhs_exprs().begin(); 939 auto IRHS = C->rhs_exprs().begin(); 940 for (const auto *Ref : C->varlists()) { 941 Shareds.emplace_back(Ref); 942 Privates.emplace_back(*IPriv); 943 ReductionOps.emplace_back(*IRed); 944 LHSs.emplace_back(*ILHS); 945 RHSs.emplace_back(*IRHS); 946 std::advance(IPriv, 1); 947 std::advance(IRed, 1); 948 std::advance(ILHS, 1); 949 std::advance(IRHS, 1); 950 } 951 } 952 ReductionCodeGen RedCG(Shareds, Privates, ReductionOps); 953 unsigned Count = 0; 954 auto ILHS = LHSs.begin(); 955 auto IRHS = RHSs.begin(); 956 auto IPriv = Privates.begin(); 957 for (const auto *IRef : Shareds) { 958 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl()); 959 // Emit private VarDecl with reduction init. 960 RedCG.emitSharedLValue(*this, Count); 961 RedCG.emitAggregateType(*this, Count); 962 auto Emission = EmitAutoVarAlloca(*PrivateVD); 963 RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(), 964 RedCG.getSharedLValue(Count), 965 [&Emission](CodeGenFunction &CGF) { 966 CGF.EmitAutoVarInit(Emission); 967 return true; 968 }); 969 EmitAutoVarCleanups(Emission); 970 Address BaseAddr = RedCG.adjustPrivateAddress( 971 *this, Count, Emission.getAllocatedAddress()); 972 bool IsRegistered = PrivateScope.addPrivate( 973 RedCG.getBaseDecl(Count), [BaseAddr]() -> Address { return BaseAddr; }); 974 assert(IsRegistered && "private var already registered as private"); 975 // Silence the warning about unused variable. 976 (void)IsRegistered; 977 978 auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 979 auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 980 if (isa<OMPArraySectionExpr>(IRef)) { 981 // Store the address of the original variable associated with the LHS 982 // implicit variable. 983 PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address { 984 return RedCG.getSharedLValue(Count).getAddress(); 985 }); 986 PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { 987 return GetAddrOfLocalVar(PrivateVD); 988 }); 989 } else if (isa<ArraySubscriptExpr>(IRef)) { 990 // Store the address of the original variable associated with the LHS 991 // implicit variable. 992 PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address { 993 return RedCG.getSharedLValue(Count).getAddress(); 994 }); 995 PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address { 996 return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD), 997 ConvertTypeForMem(RHSVD->getType()), 998 "rhs.begin"); 999 }); 1000 } else { 1001 QualType Type = PrivateVD->getType(); 1002 bool IsArray = getContext().getAsArrayType(Type) != nullptr; 1003 Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress(); 1004 // Store the address of the original variable associated with the LHS 1005 // implicit variable. 1006 if (IsArray) { 1007 OriginalAddr = Builder.CreateElementBitCast( 1008 OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin"); 1009 } 1010 PrivateScope.addPrivate( 1011 LHSVD, [OriginalAddr]() -> Address { return OriginalAddr; }); 1012 PrivateScope.addPrivate( 1013 RHSVD, [this, PrivateVD, RHSVD, IsArray]() -> Address { 1014 return IsArray 1015 ? Builder.CreateElementBitCast( 1016 GetAddrOfLocalVar(PrivateVD), 1017 ConvertTypeForMem(RHSVD->getType()), "rhs.begin") 1018 : GetAddrOfLocalVar(PrivateVD); 1019 }); 1020 } 1021 ++ILHS; 1022 ++IRHS; 1023 ++IPriv; 1024 ++Count; 1025 } 1026 } 1027 1028 void CodeGenFunction::EmitOMPReductionClauseFinal( 1029 const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) { 1030 if (!HaveInsertPoint()) 1031 return; 1032 llvm::SmallVector<const Expr *, 8> Privates; 1033 llvm::SmallVector<const Expr *, 8> LHSExprs; 1034 llvm::SmallVector<const Expr *, 8> RHSExprs; 1035 llvm::SmallVector<const Expr *, 8> ReductionOps; 1036 bool HasAtLeastOneReduction = false; 1037 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1038 HasAtLeastOneReduction = true; 1039 Privates.append(C->privates().begin(), C->privates().end()); 1040 LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 1041 RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 1042 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 1043 } 1044 if (HasAtLeastOneReduction) { 1045 bool WithNowait = D.getSingleClause<OMPNowaitClause>() || 1046 isOpenMPParallelDirective(D.getDirectiveKind()) || 1047 D.getDirectiveKind() == OMPD_simd; 1048 bool SimpleReduction = D.getDirectiveKind() == OMPD_simd; 1049 // Emit nowait reduction if nowait clause is present or directive is a 1050 // parallel directive (it always has implicit barrier). 1051 CGM.getOpenMPRuntime().emitReduction( 1052 *this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps, 1053 {WithNowait, SimpleReduction, ReductionKind}); 1054 } 1055 } 1056 1057 static void emitPostUpdateForReductionClause( 1058 CodeGenFunction &CGF, const OMPExecutableDirective &D, 1059 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1060 if (!CGF.HaveInsertPoint()) 1061 return; 1062 llvm::BasicBlock *DoneBB = nullptr; 1063 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1064 if (auto *PostUpdate = C->getPostUpdateExpr()) { 1065 if (!DoneBB) { 1066 if (auto *Cond = CondGen(CGF)) { 1067 // If the first post-update expression is found, emit conditional 1068 // block if it was requested. 1069 auto *ThenBB = CGF.createBasicBlock(".omp.reduction.pu"); 1070 DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done"); 1071 CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1072 CGF.EmitBlock(ThenBB); 1073 } 1074 } 1075 CGF.EmitIgnoredExpr(PostUpdate); 1076 } 1077 } 1078 if (DoneBB) 1079 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 1080 } 1081 1082 namespace { 1083 /// Codegen lambda for appending distribute lower and upper bounds to outlined 1084 /// parallel function. This is necessary for combined constructs such as 1085 /// 'distribute parallel for' 1086 typedef llvm::function_ref<void(CodeGenFunction &, 1087 const OMPExecutableDirective &, 1088 llvm::SmallVectorImpl<llvm::Value *> &)> 1089 CodeGenBoundParametersTy; 1090 } // anonymous namespace 1091 1092 static void emitCommonOMPParallelDirective( 1093 CodeGenFunction &CGF, const OMPExecutableDirective &S, 1094 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1095 const CodeGenBoundParametersTy &CodeGenBoundParameters) { 1096 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); 1097 auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( 1098 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 1099 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) { 1100 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 1101 auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), 1102 /*IgnoreResultAssign*/ true); 1103 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause( 1104 CGF, NumThreads, NumThreadsClause->getLocStart()); 1105 } 1106 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) { 1107 CodeGenFunction::RunCleanupsScope ProcBindScope(CGF); 1108 CGF.CGM.getOpenMPRuntime().emitProcBindClause( 1109 CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart()); 1110 } 1111 const Expr *IfCond = nullptr; 1112 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 1113 if (C->getNameModifier() == OMPD_unknown || 1114 C->getNameModifier() == OMPD_parallel) { 1115 IfCond = C->getCondition(); 1116 break; 1117 } 1118 } 1119 1120 OMPParallelScope Scope(CGF, S); 1121 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 1122 // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk 1123 // lower and upper bounds with the pragma 'for' chunking mechanism. 1124 // The following lambda takes care of appending the lower and upper bound 1125 // parameters when necessary 1126 CodeGenBoundParameters(CGF, S, CapturedVars); 1127 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 1128 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn, 1129 CapturedVars, IfCond); 1130 } 1131 1132 static void emitEmptyBoundParameters(CodeGenFunction &, 1133 const OMPExecutableDirective &, 1134 llvm::SmallVectorImpl<llvm::Value *> &) {} 1135 1136 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { 1137 // Emit parallel region as a standalone region. 1138 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1139 OMPPrivateScope PrivateScope(CGF); 1140 bool Copyins = CGF.EmitOMPCopyinClause(S); 1141 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 1142 if (Copyins) { 1143 // Emit implicit barrier to synchronize threads and avoid data races on 1144 // propagation master's thread values of threadprivate variables to local 1145 // instances of that variables of all other implicit threads. 1146 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 1147 CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 1148 /*ForceSimpleCall=*/true); 1149 } 1150 CGF.EmitOMPPrivateClause(S, PrivateScope); 1151 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 1152 (void)PrivateScope.Privatize(); 1153 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1154 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 1155 }; 1156 emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen, 1157 emitEmptyBoundParameters); 1158 emitPostUpdateForReductionClause( 1159 *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1160 } 1161 1162 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, 1163 JumpDest LoopExit) { 1164 RunCleanupsScope BodyScope(*this); 1165 // Update counters values on current iteration. 1166 for (auto I : D.updates()) { 1167 EmitIgnoredExpr(I); 1168 } 1169 // Update the linear variables. 1170 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1171 for (auto *U : C->updates()) 1172 EmitIgnoredExpr(U); 1173 } 1174 1175 // On a continue in the body, jump to the end. 1176 auto Continue = getJumpDestInCurrentScope("omp.body.continue"); 1177 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1178 // Emit loop body. 1179 EmitStmt(D.getBody()); 1180 // The end (updates/cleanups). 1181 EmitBlock(Continue.getBlock()); 1182 BreakContinueStack.pop_back(); 1183 } 1184 1185 void CodeGenFunction::EmitOMPInnerLoop( 1186 const Stmt &S, bool RequiresCleanup, const Expr *LoopCond, 1187 const Expr *IncExpr, 1188 const llvm::function_ref<void(CodeGenFunction &)> &BodyGen, 1189 const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen) { 1190 auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end"); 1191 1192 // Start the loop with a block that tests the condition. 1193 auto CondBlock = createBasicBlock("omp.inner.for.cond"); 1194 EmitBlock(CondBlock); 1195 const SourceRange &R = S.getSourceRange(); 1196 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), 1197 SourceLocToDebugLoc(R.getEnd())); 1198 1199 // If there are any cleanups between here and the loop-exit scope, 1200 // create a block to stage a loop exit along. 1201 auto ExitBlock = LoopExit.getBlock(); 1202 if (RequiresCleanup) 1203 ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup"); 1204 1205 auto LoopBody = createBasicBlock("omp.inner.for.body"); 1206 1207 // Emit condition. 1208 EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S)); 1209 if (ExitBlock != LoopExit.getBlock()) { 1210 EmitBlock(ExitBlock); 1211 EmitBranchThroughCleanup(LoopExit); 1212 } 1213 1214 EmitBlock(LoopBody); 1215 incrementProfileCounter(&S); 1216 1217 // Create a block for the increment. 1218 auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc"); 1219 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1220 1221 BodyGen(*this); 1222 1223 // Emit "IV = IV + 1" and a back-edge to the condition block. 1224 EmitBlock(Continue.getBlock()); 1225 EmitIgnoredExpr(IncExpr); 1226 PostIncGen(*this); 1227 BreakContinueStack.pop_back(); 1228 EmitBranch(CondBlock); 1229 LoopStack.pop(); 1230 // Emit the fall-through block. 1231 EmitBlock(LoopExit.getBlock()); 1232 } 1233 1234 bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { 1235 if (!HaveInsertPoint()) 1236 return false; 1237 // Emit inits for the linear variables. 1238 bool HasLinears = false; 1239 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1240 for (auto *Init : C->inits()) { 1241 HasLinears = true; 1242 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl()); 1243 if (auto *Ref = dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) { 1244 AutoVarEmission Emission = EmitAutoVarAlloca(*VD); 1245 auto *OrigVD = cast<VarDecl>(Ref->getDecl()); 1246 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1247 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1248 VD->getInit()->getType(), VK_LValue, 1249 VD->getInit()->getExprLoc()); 1250 EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(), 1251 VD->getType()), 1252 /*capturedByInit=*/false); 1253 EmitAutoVarCleanups(Emission); 1254 } else 1255 EmitVarDecl(*VD); 1256 } 1257 // Emit the linear steps for the linear clauses. 1258 // If a step is not constant, it is pre-calculated before the loop. 1259 if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep())) 1260 if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) { 1261 EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl())); 1262 // Emit calculation of the linear step. 1263 EmitIgnoredExpr(CS); 1264 } 1265 } 1266 return HasLinears; 1267 } 1268 1269 void CodeGenFunction::EmitOMPLinearClauseFinal( 1270 const OMPLoopDirective &D, 1271 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1272 if (!HaveInsertPoint()) 1273 return; 1274 llvm::BasicBlock *DoneBB = nullptr; 1275 // Emit the final values of the linear variables. 1276 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1277 auto IC = C->varlist_begin(); 1278 for (auto *F : C->finals()) { 1279 if (!DoneBB) { 1280 if (auto *Cond = CondGen(*this)) { 1281 // If the first post-update expression is found, emit conditional 1282 // block if it was requested. 1283 auto *ThenBB = createBasicBlock(".omp.linear.pu"); 1284 DoneBB = createBasicBlock(".omp.linear.pu.done"); 1285 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1286 EmitBlock(ThenBB); 1287 } 1288 } 1289 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl()); 1290 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1291 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1292 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); 1293 Address OrigAddr = EmitLValue(&DRE).getAddress(); 1294 CodeGenFunction::OMPPrivateScope VarScope(*this); 1295 VarScope.addPrivate(OrigVD, [OrigAddr]() -> Address { return OrigAddr; }); 1296 (void)VarScope.Privatize(); 1297 EmitIgnoredExpr(F); 1298 ++IC; 1299 } 1300 if (auto *PostUpdate = C->getPostUpdateExpr()) 1301 EmitIgnoredExpr(PostUpdate); 1302 } 1303 if (DoneBB) 1304 EmitBlock(DoneBB, /*IsFinished=*/true); 1305 } 1306 1307 static void emitAlignedClause(CodeGenFunction &CGF, 1308 const OMPExecutableDirective &D) { 1309 if (!CGF.HaveInsertPoint()) 1310 return; 1311 for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) { 1312 unsigned ClauseAlignment = 0; 1313 if (auto AlignmentExpr = Clause->getAlignment()) { 1314 auto AlignmentCI = 1315 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); 1316 ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue()); 1317 } 1318 for (auto E : Clause->varlists()) { 1319 unsigned Alignment = ClauseAlignment; 1320 if (Alignment == 0) { 1321 // OpenMP [2.8.1, Description] 1322 // If no optional parameter is specified, implementation-defined default 1323 // alignments for SIMD instructions on the target platforms are assumed. 1324 Alignment = 1325 CGF.getContext() 1326 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign( 1327 E->getType()->getPointeeType())) 1328 .getQuantity(); 1329 } 1330 assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) && 1331 "alignment is not power of 2"); 1332 if (Alignment != 0) { 1333 llvm::Value *PtrValue = CGF.EmitScalarExpr(E); 1334 CGF.EmitAlignmentAssumption(PtrValue, Alignment); 1335 } 1336 } 1337 } 1338 } 1339 1340 void CodeGenFunction::EmitOMPPrivateLoopCounters( 1341 const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) { 1342 if (!HaveInsertPoint()) 1343 return; 1344 auto I = S.private_counters().begin(); 1345 for (auto *E : S.counters()) { 1346 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1347 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()); 1348 (void)LoopScope.addPrivate(VD, [&]() -> Address { 1349 // Emit var without initialization. 1350 if (!LocalDeclMap.count(PrivateVD)) { 1351 auto VarEmission = EmitAutoVarAlloca(*PrivateVD); 1352 EmitAutoVarCleanups(VarEmission); 1353 } 1354 DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD), 1355 /*RefersToEnclosingVariableOrCapture=*/false, 1356 (*I)->getType(), VK_LValue, (*I)->getExprLoc()); 1357 return EmitLValue(&DRE).getAddress(); 1358 }); 1359 if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) || 1360 VD->hasGlobalStorage()) { 1361 (void)LoopScope.addPrivate(PrivateVD, [&]() -> Address { 1362 DeclRefExpr DRE(const_cast<VarDecl *>(VD), 1363 LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD), 1364 E->getType(), VK_LValue, E->getExprLoc()); 1365 return EmitLValue(&DRE).getAddress(); 1366 }); 1367 } 1368 ++I; 1369 } 1370 } 1371 1372 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S, 1373 const Expr *Cond, llvm::BasicBlock *TrueBlock, 1374 llvm::BasicBlock *FalseBlock, uint64_t TrueCount) { 1375 if (!CGF.HaveInsertPoint()) 1376 return; 1377 { 1378 CodeGenFunction::OMPPrivateScope PreCondScope(CGF); 1379 CGF.EmitOMPPrivateLoopCounters(S, PreCondScope); 1380 (void)PreCondScope.Privatize(); 1381 // Get initial values of real counters. 1382 for (auto I : S.inits()) { 1383 CGF.EmitIgnoredExpr(I); 1384 } 1385 } 1386 // Check that loop is executed at least one time. 1387 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount); 1388 } 1389 1390 void CodeGenFunction::EmitOMPLinearClause( 1391 const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) { 1392 if (!HaveInsertPoint()) 1393 return; 1394 llvm::DenseSet<const VarDecl *> SIMDLCVs; 1395 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 1396 auto *LoopDirective = cast<OMPLoopDirective>(&D); 1397 for (auto *C : LoopDirective->counters()) { 1398 SIMDLCVs.insert( 1399 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 1400 } 1401 } 1402 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1403 auto CurPrivate = C->privates().begin(); 1404 for (auto *E : C->varlists()) { 1405 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1406 auto *PrivateVD = 1407 cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl()); 1408 if (!SIMDLCVs.count(VD->getCanonicalDecl())) { 1409 bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address { 1410 // Emit private VarDecl with copy init. 1411 EmitVarDecl(*PrivateVD); 1412 return GetAddrOfLocalVar(PrivateVD); 1413 }); 1414 assert(IsRegistered && "linear var already registered as private"); 1415 // Silence the warning about unused variable. 1416 (void)IsRegistered; 1417 } else 1418 EmitVarDecl(*PrivateVD); 1419 ++CurPrivate; 1420 } 1421 } 1422 } 1423 1424 static void emitSimdlenSafelenClause(CodeGenFunction &CGF, 1425 const OMPExecutableDirective &D, 1426 bool IsMonotonic) { 1427 if (!CGF.HaveInsertPoint()) 1428 return; 1429 if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) { 1430 RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(), 1431 /*ignoreResult=*/true); 1432 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 1433 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 1434 // In presence of finite 'safelen', it may be unsafe to mark all 1435 // the memory instructions parallel, because loop-carried 1436 // dependences of 'safelen' iterations are possible. 1437 if (!IsMonotonic) 1438 CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>()); 1439 } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) { 1440 RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(), 1441 /*ignoreResult=*/true); 1442 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 1443 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 1444 // In presence of finite 'safelen', it may be unsafe to mark all 1445 // the memory instructions parallel, because loop-carried 1446 // dependences of 'safelen' iterations are possible. 1447 CGF.LoopStack.setParallel(false); 1448 } 1449 } 1450 1451 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D, 1452 bool IsMonotonic) { 1453 // Walk clauses and process safelen/lastprivate. 1454 LoopStack.setParallel(!IsMonotonic); 1455 LoopStack.setVectorizeEnable(true); 1456 emitSimdlenSafelenClause(*this, D, IsMonotonic); 1457 } 1458 1459 void CodeGenFunction::EmitOMPSimdFinal( 1460 const OMPLoopDirective &D, 1461 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1462 if (!HaveInsertPoint()) 1463 return; 1464 llvm::BasicBlock *DoneBB = nullptr; 1465 auto IC = D.counters().begin(); 1466 auto IPC = D.private_counters().begin(); 1467 for (auto F : D.finals()) { 1468 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl()); 1469 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl()); 1470 auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD); 1471 if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) || 1472 OrigVD->hasGlobalStorage() || CED) { 1473 if (!DoneBB) { 1474 if (auto *Cond = CondGen(*this)) { 1475 // If the first post-update expression is found, emit conditional 1476 // block if it was requested. 1477 auto *ThenBB = createBasicBlock(".omp.final.then"); 1478 DoneBB = createBasicBlock(".omp.final.done"); 1479 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1480 EmitBlock(ThenBB); 1481 } 1482 } 1483 Address OrigAddr = Address::invalid(); 1484 if (CED) 1485 OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress(); 1486 else { 1487 DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD), 1488 /*RefersToEnclosingVariableOrCapture=*/false, 1489 (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc()); 1490 OrigAddr = EmitLValue(&DRE).getAddress(); 1491 } 1492 OMPPrivateScope VarScope(*this); 1493 VarScope.addPrivate(OrigVD, 1494 [OrigAddr]() -> Address { return OrigAddr; }); 1495 (void)VarScope.Privatize(); 1496 EmitIgnoredExpr(F); 1497 } 1498 ++IC; 1499 ++IPC; 1500 } 1501 if (DoneBB) 1502 EmitBlock(DoneBB, /*IsFinished=*/true); 1503 } 1504 1505 static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF, 1506 const OMPLoopDirective &S, 1507 CodeGenFunction::JumpDest LoopExit) { 1508 CGF.EmitOMPLoopBody(S, LoopExit); 1509 CGF.EmitStopPoint(&S); 1510 } 1511 1512 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { 1513 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1514 OMPLoopScope PreInitScope(CGF, S); 1515 // if (PreCond) { 1516 // for (IV in 0..LastIteration) BODY; 1517 // <Final counter/linear vars updates>; 1518 // } 1519 // 1520 1521 // Emit: if (PreCond) - begin. 1522 // If the condition constant folds and can be elided, avoid emitting the 1523 // whole loop. 1524 bool CondConstant; 1525 llvm::BasicBlock *ContBlock = nullptr; 1526 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 1527 if (!CondConstant) 1528 return; 1529 } else { 1530 auto *ThenBlock = CGF.createBasicBlock("simd.if.then"); 1531 ContBlock = CGF.createBasicBlock("simd.if.end"); 1532 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 1533 CGF.getProfileCount(&S)); 1534 CGF.EmitBlock(ThenBlock); 1535 CGF.incrementProfileCounter(&S); 1536 } 1537 1538 // Emit the loop iteration variable. 1539 const Expr *IVExpr = S.getIterationVariable(); 1540 const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 1541 CGF.EmitVarDecl(*IVDecl); 1542 CGF.EmitIgnoredExpr(S.getInit()); 1543 1544 // Emit the iterations count variable. 1545 // If it is not a variable, Sema decided to calculate iterations count on 1546 // each iteration (e.g., it is foldable into a constant). 1547 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 1548 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 1549 // Emit calculation of the iterations count. 1550 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 1551 } 1552 1553 CGF.EmitOMPSimdInit(S); 1554 1555 emitAlignedClause(CGF, S); 1556 (void)CGF.EmitOMPLinearClauseInit(S); 1557 { 1558 OMPPrivateScope LoopScope(CGF); 1559 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 1560 CGF.EmitOMPLinearClause(S, LoopScope); 1561 CGF.EmitOMPPrivateClause(S, LoopScope); 1562 CGF.EmitOMPReductionClauseInit(S, LoopScope); 1563 bool HasLastprivateClause = 1564 CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 1565 (void)LoopScope.Privatize(); 1566 CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 1567 S.getInc(), 1568 [&S](CodeGenFunction &CGF) { 1569 CGF.EmitOMPLoopBody(S, JumpDest()); 1570 CGF.EmitStopPoint(&S); 1571 }, 1572 [](CodeGenFunction &) {}); 1573 CGF.EmitOMPSimdFinal( 1574 S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1575 // Emit final copy of the lastprivate variables at the end of loops. 1576 if (HasLastprivateClause) 1577 CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true); 1578 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd); 1579 emitPostUpdateForReductionClause( 1580 CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1581 } 1582 CGF.EmitOMPLinearClauseFinal( 1583 S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1584 // Emit: if (PreCond) - end. 1585 if (ContBlock) { 1586 CGF.EmitBranch(ContBlock); 1587 CGF.EmitBlock(ContBlock, true); 1588 } 1589 }; 1590 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1591 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 1592 } 1593 1594 void CodeGenFunction::EmitOMPOuterLoop( 1595 bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S, 1596 CodeGenFunction::OMPPrivateScope &LoopScope, 1597 const CodeGenFunction::OMPLoopArguments &LoopArgs, 1598 const CodeGenFunction::CodeGenLoopTy &CodeGenLoop, 1599 const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) { 1600 auto &RT = CGM.getOpenMPRuntime(); 1601 1602 const Expr *IVExpr = S.getIterationVariable(); 1603 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1604 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1605 1606 auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end"); 1607 1608 // Start the loop with a block that tests the condition. 1609 auto CondBlock = createBasicBlock("omp.dispatch.cond"); 1610 EmitBlock(CondBlock); 1611 const SourceRange &R = S.getSourceRange(); 1612 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), 1613 SourceLocToDebugLoc(R.getEnd())); 1614 1615 llvm::Value *BoolCondVal = nullptr; 1616 if (!DynamicOrOrdered) { 1617 // UB = min(UB, GlobalUB) or 1618 // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g. 1619 // 'distribute parallel for') 1620 EmitIgnoredExpr(LoopArgs.EUB); 1621 // IV = LB 1622 EmitIgnoredExpr(LoopArgs.Init); 1623 // IV < UB 1624 BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond); 1625 } else { 1626 BoolCondVal = 1627 RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, LoopArgs.IL, 1628 LoopArgs.LB, LoopArgs.UB, LoopArgs.ST); 1629 } 1630 1631 // If there are any cleanups between here and the loop-exit scope, 1632 // create a block to stage a loop exit along. 1633 auto ExitBlock = LoopExit.getBlock(); 1634 if (LoopScope.requiresCleanups()) 1635 ExitBlock = createBasicBlock("omp.dispatch.cleanup"); 1636 1637 auto LoopBody = createBasicBlock("omp.dispatch.body"); 1638 Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock); 1639 if (ExitBlock != LoopExit.getBlock()) { 1640 EmitBlock(ExitBlock); 1641 EmitBranchThroughCleanup(LoopExit); 1642 } 1643 EmitBlock(LoopBody); 1644 1645 // Emit "IV = LB" (in case of static schedule, we have already calculated new 1646 // LB for loop condition and emitted it above). 1647 if (DynamicOrOrdered) 1648 EmitIgnoredExpr(LoopArgs.Init); 1649 1650 // Create a block for the increment. 1651 auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); 1652 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1653 1654 // Generate !llvm.loop.parallel metadata for loads and stores for loops 1655 // with dynamic/guided scheduling and without ordered clause. 1656 if (!isOpenMPSimdDirective(S.getDirectiveKind())) 1657 LoopStack.setParallel(!IsMonotonic); 1658 else 1659 EmitOMPSimdInit(S, IsMonotonic); 1660 1661 SourceLocation Loc = S.getLocStart(); 1662 1663 // when 'distribute' is not combined with a 'for': 1664 // while (idx <= UB) { BODY; ++idx; } 1665 // when 'distribute' is combined with a 'for' 1666 // (e.g. 'distribute parallel for') 1667 // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; } 1668 EmitOMPInnerLoop( 1669 S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr, 1670 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { 1671 CodeGenLoop(CGF, S, LoopExit); 1672 }, 1673 [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) { 1674 CodeGenOrdered(CGF, Loc, IVSize, IVSigned); 1675 }); 1676 1677 EmitBlock(Continue.getBlock()); 1678 BreakContinueStack.pop_back(); 1679 if (!DynamicOrOrdered) { 1680 // Emit "LB = LB + Stride", "UB = UB + Stride". 1681 EmitIgnoredExpr(LoopArgs.NextLB); 1682 EmitIgnoredExpr(LoopArgs.NextUB); 1683 } 1684 1685 EmitBranch(CondBlock); 1686 LoopStack.pop(); 1687 // Emit the fall-through block. 1688 EmitBlock(LoopExit.getBlock()); 1689 1690 // Tell the runtime we are done. 1691 auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) { 1692 if (!DynamicOrOrdered) 1693 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd()); 1694 }; 1695 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); 1696 } 1697 1698 void CodeGenFunction::EmitOMPForOuterLoop( 1699 const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic, 1700 const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, 1701 const OMPLoopArguments &LoopArgs, 1702 const CodeGenDispatchBoundsTy &CGDispatchBounds) { 1703 auto &RT = CGM.getOpenMPRuntime(); 1704 1705 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). 1706 const bool DynamicOrOrdered = 1707 Ordered || RT.isDynamic(ScheduleKind.Schedule); 1708 1709 assert((Ordered || 1710 !RT.isStaticNonchunked(ScheduleKind.Schedule, 1711 LoopArgs.Chunk != nullptr)) && 1712 "static non-chunked schedule does not need outer loop"); 1713 1714 // Emit outer loop. 1715 // 1716 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1717 // When schedule(dynamic,chunk_size) is specified, the iterations are 1718 // distributed to threads in the team in chunks as the threads request them. 1719 // Each thread executes a chunk of iterations, then requests another chunk, 1720 // until no chunks remain to be distributed. Each chunk contains chunk_size 1721 // iterations, except for the last chunk to be distributed, which may have 1722 // fewer iterations. When no chunk_size is specified, it defaults to 1. 1723 // 1724 // When schedule(guided,chunk_size) is specified, the iterations are assigned 1725 // to threads in the team in chunks as the executing threads request them. 1726 // Each thread executes a chunk of iterations, then requests another chunk, 1727 // until no chunks remain to be assigned. For a chunk_size of 1, the size of 1728 // each chunk is proportional to the number of unassigned iterations divided 1729 // by the number of threads in the team, decreasing to 1. For a chunk_size 1730 // with value k (greater than 1), the size of each chunk is determined in the 1731 // same way, with the restriction that the chunks do not contain fewer than k 1732 // iterations (except for the last chunk to be assigned, which may have fewer 1733 // than k iterations). 1734 // 1735 // When schedule(auto) is specified, the decision regarding scheduling is 1736 // delegated to the compiler and/or runtime system. The programmer gives the 1737 // implementation the freedom to choose any possible mapping of iterations to 1738 // threads in the team. 1739 // 1740 // When schedule(runtime) is specified, the decision regarding scheduling is 1741 // deferred until run time, and the schedule and chunk size are taken from the 1742 // run-sched-var ICV. If the ICV is set to auto, the schedule is 1743 // implementation defined 1744 // 1745 // while(__kmpc_dispatch_next(&LB, &UB)) { 1746 // idx = LB; 1747 // while (idx <= UB) { BODY; ++idx; 1748 // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only. 1749 // } // inner loop 1750 // } 1751 // 1752 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1753 // When schedule(static, chunk_size) is specified, iterations are divided into 1754 // chunks of size chunk_size, and the chunks are assigned to the threads in 1755 // the team in a round-robin fashion in the order of the thread number. 1756 // 1757 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { 1758 // while (idx <= UB) { BODY; ++idx; } // inner loop 1759 // LB = LB + ST; 1760 // UB = UB + ST; 1761 // } 1762 // 1763 1764 const Expr *IVExpr = S.getIterationVariable(); 1765 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1766 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1767 1768 if (DynamicOrOrdered) { 1769 auto DispatchBounds = CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB); 1770 llvm::Value *LBVal = DispatchBounds.first; 1771 llvm::Value *UBVal = DispatchBounds.second; 1772 CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal, 1773 LoopArgs.Chunk}; 1774 RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, IVSize, 1775 IVSigned, Ordered, DipatchRTInputValues); 1776 } else { 1777 CGOpenMPRuntime::StaticRTInput StaticInit( 1778 IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB, 1779 LoopArgs.ST, LoopArgs.Chunk); 1780 RT.emitForStaticInit(*this, S.getLocStart(), S.getDirectiveKind(), 1781 ScheduleKind, StaticInit); 1782 } 1783 1784 auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc, 1785 const unsigned IVSize, 1786 const bool IVSigned) { 1787 if (Ordered) { 1788 CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize, 1789 IVSigned); 1790 } 1791 }; 1792 1793 OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST, 1794 LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB); 1795 OuterLoopArgs.IncExpr = S.getInc(); 1796 OuterLoopArgs.Init = S.getInit(); 1797 OuterLoopArgs.Cond = S.getCond(); 1798 OuterLoopArgs.NextLB = S.getNextLowerBound(); 1799 OuterLoopArgs.NextUB = S.getNextUpperBound(); 1800 EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs, 1801 emitOMPLoopBodyWithStopPoint, CodeGenOrdered); 1802 } 1803 1804 static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc, 1805 const unsigned IVSize, const bool IVSigned) {} 1806 1807 void CodeGenFunction::EmitOMPDistributeOuterLoop( 1808 OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S, 1809 OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs, 1810 const CodeGenLoopTy &CodeGenLoopContent) { 1811 1812 auto &RT = CGM.getOpenMPRuntime(); 1813 1814 // Emit outer loop. 1815 // Same behavior as a OMPForOuterLoop, except that schedule cannot be 1816 // dynamic 1817 // 1818 1819 const Expr *IVExpr = S.getIterationVariable(); 1820 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1821 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1822 1823 CGOpenMPRuntime::StaticRTInput StaticInit( 1824 IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB, 1825 LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk); 1826 RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, StaticInit); 1827 1828 // for combined 'distribute' and 'for' the increment expression of distribute 1829 // is store in DistInc. For 'distribute' alone, it is in Inc. 1830 Expr *IncExpr; 1831 if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())) 1832 IncExpr = S.getDistInc(); 1833 else 1834 IncExpr = S.getInc(); 1835 1836 // this routine is shared by 'omp distribute parallel for' and 1837 // 'omp distribute': select the right EUB expression depending on the 1838 // directive 1839 OMPLoopArguments OuterLoopArgs; 1840 OuterLoopArgs.LB = LoopArgs.LB; 1841 OuterLoopArgs.UB = LoopArgs.UB; 1842 OuterLoopArgs.ST = LoopArgs.ST; 1843 OuterLoopArgs.IL = LoopArgs.IL; 1844 OuterLoopArgs.Chunk = LoopArgs.Chunk; 1845 OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 1846 ? S.getCombinedEnsureUpperBound() 1847 : S.getEnsureUpperBound(); 1848 OuterLoopArgs.IncExpr = IncExpr; 1849 OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 1850 ? S.getCombinedInit() 1851 : S.getInit(); 1852 OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 1853 ? S.getCombinedCond() 1854 : S.getCond(); 1855 OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 1856 ? S.getCombinedNextLowerBound() 1857 : S.getNextLowerBound(); 1858 OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 1859 ? S.getCombinedNextUpperBound() 1860 : S.getNextUpperBound(); 1861 1862 EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S, 1863 LoopScope, OuterLoopArgs, CodeGenLoopContent, 1864 emitEmptyOrdered); 1865 } 1866 1867 /// Emit a helper variable and return corresponding lvalue. 1868 static LValue EmitOMPHelperVar(CodeGenFunction &CGF, 1869 const DeclRefExpr *Helper) { 1870 auto VDecl = cast<VarDecl>(Helper->getDecl()); 1871 CGF.EmitVarDecl(*VDecl); 1872 return CGF.EmitLValue(Helper); 1873 } 1874 1875 static std::pair<LValue, LValue> 1876 emitDistributeParallelForInnerBounds(CodeGenFunction &CGF, 1877 const OMPExecutableDirective &S) { 1878 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 1879 LValue LB = 1880 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); 1881 LValue UB = 1882 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); 1883 1884 // When composing 'distribute' with 'for' (e.g. as in 'distribute 1885 // parallel for') we need to use the 'distribute' 1886 // chunk lower and upper bounds rather than the whole loop iteration 1887 // space. These are parameters to the outlined function for 'parallel' 1888 // and we copy the bounds of the previous schedule into the 1889 // the current ones. 1890 LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable()); 1891 LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable()); 1892 llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(PrevLB, SourceLocation()); 1893 PrevLBVal = CGF.EmitScalarConversion( 1894 PrevLBVal, LS.getPrevLowerBoundVariable()->getType(), 1895 LS.getIterationVariable()->getType(), SourceLocation()); 1896 llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(PrevUB, SourceLocation()); 1897 PrevUBVal = CGF.EmitScalarConversion( 1898 PrevUBVal, LS.getPrevUpperBoundVariable()->getType(), 1899 LS.getIterationVariable()->getType(), SourceLocation()); 1900 1901 CGF.EmitStoreOfScalar(PrevLBVal, LB); 1902 CGF.EmitStoreOfScalar(PrevUBVal, UB); 1903 1904 return {LB, UB}; 1905 } 1906 1907 /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then 1908 /// we need to use the LB and UB expressions generated by the worksharing 1909 /// code generation support, whereas in non combined situations we would 1910 /// just emit 0 and the LastIteration expression 1911 /// This function is necessary due to the difference of the LB and UB 1912 /// types for the RT emission routines for 'for_static_init' and 1913 /// 'for_dispatch_init' 1914 static std::pair<llvm::Value *, llvm::Value *> 1915 emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF, 1916 const OMPExecutableDirective &S, 1917 Address LB, Address UB) { 1918 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 1919 const Expr *IVExpr = LS.getIterationVariable(); 1920 // when implementing a dynamic schedule for a 'for' combined with a 1921 // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop 1922 // is not normalized as each team only executes its own assigned 1923 // distribute chunk 1924 QualType IteratorTy = IVExpr->getType(); 1925 llvm::Value *LBVal = CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, 1926 SourceLocation()); 1927 llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, 1928 SourceLocation()); 1929 return {LBVal, UBVal}; 1930 } 1931 1932 static void emitDistributeParallelForDistributeInnerBoundParams( 1933 CodeGenFunction &CGF, const OMPExecutableDirective &S, 1934 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) { 1935 const auto &Dir = cast<OMPLoopDirective>(S); 1936 LValue LB = 1937 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable())); 1938 auto LBCast = CGF.Builder.CreateIntCast( 1939 CGF.Builder.CreateLoad(LB.getAddress()), CGF.SizeTy, /*isSigned=*/false); 1940 CapturedVars.push_back(LBCast); 1941 LValue UB = 1942 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable())); 1943 1944 auto UBCast = CGF.Builder.CreateIntCast( 1945 CGF.Builder.CreateLoad(UB.getAddress()), CGF.SizeTy, /*isSigned=*/false); 1946 CapturedVars.push_back(UBCast); 1947 } 1948 1949 static void 1950 emitInnerParallelForWhenCombined(CodeGenFunction &CGF, 1951 const OMPLoopDirective &S, 1952 CodeGenFunction::JumpDest LoopExit) { 1953 auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF, 1954 PrePostActionTy &) { 1955 CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(), 1956 emitDistributeParallelForInnerBounds, 1957 emitDistributeParallelForDispatchBounds); 1958 }; 1959 1960 emitCommonOMPParallelDirective( 1961 CGF, S, OMPD_for, CGInlinedWorksharingLoop, 1962 emitDistributeParallelForDistributeInnerBoundParams); 1963 } 1964 1965 void CodeGenFunction::EmitOMPDistributeParallelForDirective( 1966 const OMPDistributeParallelForDirective &S) { 1967 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1968 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 1969 S.getDistInc()); 1970 }; 1971 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1972 OMPCancelStackRAII CancelRegion(*this, OMPD_distribute_parallel_for, 1973 /*HasCancel=*/false); 1974 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen, 1975 /*HasCancel=*/false); 1976 } 1977 1978 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective( 1979 const OMPDistributeParallelForSimdDirective &S) { 1980 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1981 CGM.getOpenMPRuntime().emitInlinedDirective( 1982 *this, OMPD_distribute_parallel_for_simd, 1983 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1984 OMPLoopScope PreInitScope(CGF, S); 1985 CGF.EmitStmt( 1986 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1987 }); 1988 } 1989 1990 void CodeGenFunction::EmitOMPDistributeSimdDirective( 1991 const OMPDistributeSimdDirective &S) { 1992 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1993 CGM.getOpenMPRuntime().emitInlinedDirective( 1994 *this, OMPD_distribute_simd, 1995 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1996 OMPLoopScope PreInitScope(CGF, S); 1997 CGF.EmitStmt( 1998 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1999 }); 2000 } 2001 2002 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective( 2003 const OMPTargetParallelForSimdDirective &S) { 2004 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2005 CGM.getOpenMPRuntime().emitInlinedDirective( 2006 *this, OMPD_target_parallel_for_simd, 2007 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2008 OMPLoopScope PreInitScope(CGF, S); 2009 CGF.EmitStmt( 2010 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2011 }); 2012 } 2013 2014 void CodeGenFunction::EmitOMPTargetSimdDirective( 2015 const OMPTargetSimdDirective &S) { 2016 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2017 CGM.getOpenMPRuntime().emitInlinedDirective( 2018 *this, OMPD_target_simd, [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2019 OMPLoopScope PreInitScope(CGF, S); 2020 CGF.EmitStmt( 2021 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2022 }); 2023 } 2024 2025 void CodeGenFunction::EmitOMPTeamsDistributeDirective( 2026 const OMPTeamsDistributeDirective &S) { 2027 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2028 CGM.getOpenMPRuntime().emitInlinedDirective( 2029 *this, OMPD_teams_distribute, 2030 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2031 OMPLoopScope PreInitScope(CGF, S); 2032 CGF.EmitStmt( 2033 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2034 }); 2035 } 2036 2037 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective( 2038 const OMPTeamsDistributeSimdDirective &S) { 2039 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2040 CGM.getOpenMPRuntime().emitInlinedDirective( 2041 *this, OMPD_teams_distribute_simd, 2042 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2043 OMPLoopScope PreInitScope(CGF, S); 2044 CGF.EmitStmt( 2045 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2046 }); 2047 } 2048 2049 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective( 2050 const OMPTeamsDistributeParallelForSimdDirective &S) { 2051 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2052 CGM.getOpenMPRuntime().emitInlinedDirective( 2053 *this, OMPD_teams_distribute_parallel_for_simd, 2054 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2055 OMPLoopScope PreInitScope(CGF, S); 2056 CGF.EmitStmt( 2057 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2058 }); 2059 } 2060 2061 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective( 2062 const OMPTeamsDistributeParallelForDirective &S) { 2063 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2064 CGM.getOpenMPRuntime().emitInlinedDirective( 2065 *this, OMPD_teams_distribute_parallel_for, 2066 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2067 OMPLoopScope PreInitScope(CGF, S); 2068 CGF.EmitStmt( 2069 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2070 }); 2071 } 2072 2073 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective( 2074 const OMPTargetTeamsDistributeDirective &S) { 2075 CGM.getOpenMPRuntime().emitInlinedDirective( 2076 *this, OMPD_target_teams_distribute, 2077 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2078 CGF.EmitStmt( 2079 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2080 }); 2081 } 2082 2083 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective( 2084 const OMPTargetTeamsDistributeParallelForDirective &S) { 2085 CGM.getOpenMPRuntime().emitInlinedDirective( 2086 *this, OMPD_target_teams_distribute_parallel_for, 2087 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2088 CGF.EmitStmt( 2089 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2090 }); 2091 } 2092 2093 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective( 2094 const OMPTargetTeamsDistributeParallelForSimdDirective &S) { 2095 CGM.getOpenMPRuntime().emitInlinedDirective( 2096 *this, OMPD_target_teams_distribute_parallel_for_simd, 2097 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2098 CGF.EmitStmt( 2099 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2100 }); 2101 } 2102 2103 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective( 2104 const OMPTargetTeamsDistributeSimdDirective &S) { 2105 CGM.getOpenMPRuntime().emitInlinedDirective( 2106 *this, OMPD_target_teams_distribute_simd, 2107 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2108 CGF.EmitStmt( 2109 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2110 }); 2111 } 2112 2113 namespace { 2114 struct ScheduleKindModifiersTy { 2115 OpenMPScheduleClauseKind Kind; 2116 OpenMPScheduleClauseModifier M1; 2117 OpenMPScheduleClauseModifier M2; 2118 ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind, 2119 OpenMPScheduleClauseModifier M1, 2120 OpenMPScheduleClauseModifier M2) 2121 : Kind(Kind), M1(M1), M2(M2) {} 2122 }; 2123 } // namespace 2124 2125 bool CodeGenFunction::EmitOMPWorksharingLoop( 2126 const OMPLoopDirective &S, Expr *EUB, 2127 const CodeGenLoopBoundsTy &CodeGenLoopBounds, 2128 const CodeGenDispatchBoundsTy &CGDispatchBounds) { 2129 // Emit the loop iteration variable. 2130 auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 2131 auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); 2132 EmitVarDecl(*IVDecl); 2133 2134 // Emit the iterations count variable. 2135 // If it is not a variable, Sema decided to calculate iterations count on each 2136 // iteration (e.g., it is foldable into a constant). 2137 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 2138 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 2139 // Emit calculation of the iterations count. 2140 EmitIgnoredExpr(S.getCalcLastIteration()); 2141 } 2142 2143 auto &RT = CGM.getOpenMPRuntime(); 2144 2145 bool HasLastprivateClause; 2146 // Check pre-condition. 2147 { 2148 OMPLoopScope PreInitScope(*this, S); 2149 // Skip the entire loop if we don't meet the precondition. 2150 // If the condition constant folds and can be elided, avoid emitting the 2151 // whole loop. 2152 bool CondConstant; 2153 llvm::BasicBlock *ContBlock = nullptr; 2154 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 2155 if (!CondConstant) 2156 return false; 2157 } else { 2158 auto *ThenBlock = createBasicBlock("omp.precond.then"); 2159 ContBlock = createBasicBlock("omp.precond.end"); 2160 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 2161 getProfileCount(&S)); 2162 EmitBlock(ThenBlock); 2163 incrementProfileCounter(&S); 2164 } 2165 2166 bool Ordered = false; 2167 if (auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) { 2168 if (OrderedClause->getNumForLoops()) 2169 RT.emitDoacrossInit(*this, S); 2170 else 2171 Ordered = true; 2172 } 2173 2174 llvm::DenseSet<const Expr *> EmittedFinals; 2175 emitAlignedClause(*this, S); 2176 bool HasLinears = EmitOMPLinearClauseInit(S); 2177 // Emit helper vars inits. 2178 2179 std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S); 2180 LValue LB = Bounds.first; 2181 LValue UB = Bounds.second; 2182 LValue ST = 2183 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 2184 LValue IL = 2185 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 2186 2187 // Emit 'then' code. 2188 { 2189 OMPPrivateScope LoopScope(*this); 2190 if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) { 2191 // Emit implicit barrier to synchronize threads and avoid data races on 2192 // initialization of firstprivate variables and post-update of 2193 // lastprivate variables. 2194 CGM.getOpenMPRuntime().emitBarrierCall( 2195 *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 2196 /*ForceSimpleCall=*/true); 2197 } 2198 EmitOMPPrivateClause(S, LoopScope); 2199 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 2200 EmitOMPReductionClauseInit(S, LoopScope); 2201 EmitOMPPrivateLoopCounters(S, LoopScope); 2202 EmitOMPLinearClause(S, LoopScope); 2203 (void)LoopScope.Privatize(); 2204 2205 // Detect the loop schedule kind and chunk. 2206 llvm::Value *Chunk = nullptr; 2207 OpenMPScheduleTy ScheduleKind; 2208 if (auto *C = S.getSingleClause<OMPScheduleClause>()) { 2209 ScheduleKind.Schedule = C->getScheduleKind(); 2210 ScheduleKind.M1 = C->getFirstScheduleModifier(); 2211 ScheduleKind.M2 = C->getSecondScheduleModifier(); 2212 if (const auto *Ch = C->getChunkSize()) { 2213 Chunk = EmitScalarExpr(Ch); 2214 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 2215 S.getIterationVariable()->getType(), 2216 S.getLocStart()); 2217 } 2218 } 2219 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2220 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2221 // OpenMP 4.5, 2.7.1 Loop Construct, Description. 2222 // If the static schedule kind is specified or if the ordered clause is 2223 // specified, and if no monotonic modifier is specified, the effect will 2224 // be as if the monotonic modifier was specified. 2225 if (RT.isStaticNonchunked(ScheduleKind.Schedule, 2226 /* Chunked */ Chunk != nullptr) && 2227 !Ordered) { 2228 if (isOpenMPSimdDirective(S.getDirectiveKind())) 2229 EmitOMPSimdInit(S, /*IsMonotonic=*/true); 2230 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 2231 // When no chunk_size is specified, the iteration space is divided into 2232 // chunks that are approximately equal in size, and at most one chunk is 2233 // distributed to each thread. Note that the size of the chunks is 2234 // unspecified in this case. 2235 CGOpenMPRuntime::StaticRTInput StaticInit( 2236 IVSize, IVSigned, Ordered, IL.getAddress(), LB.getAddress(), 2237 UB.getAddress(), ST.getAddress()); 2238 RT.emitForStaticInit(*this, S.getLocStart(), S.getDirectiveKind(), 2239 ScheduleKind, StaticInit); 2240 auto LoopExit = 2241 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 2242 // UB = min(UB, GlobalUB); 2243 EmitIgnoredExpr(S.getEnsureUpperBound()); 2244 // IV = LB; 2245 EmitIgnoredExpr(S.getInit()); 2246 // while (idx <= UB) { BODY; ++idx; } 2247 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 2248 S.getInc(), 2249 [&S, LoopExit](CodeGenFunction &CGF) { 2250 CGF.EmitOMPLoopBody(S, LoopExit); 2251 CGF.EmitStopPoint(&S); 2252 }, 2253 [](CodeGenFunction &) {}); 2254 EmitBlock(LoopExit.getBlock()); 2255 // Tell the runtime we are done. 2256 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 2257 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd()); 2258 }; 2259 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); 2260 } else { 2261 const bool IsMonotonic = 2262 Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static || 2263 ScheduleKind.Schedule == OMPC_SCHEDULE_unknown || 2264 ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic || 2265 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic; 2266 // Emit the outer loop, which requests its work chunk [LB..UB] from 2267 // runtime and runs the inner loop to process it. 2268 const OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(), 2269 ST.getAddress(), IL.getAddress(), 2270 Chunk, EUB); 2271 EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered, 2272 LoopArguments, CGDispatchBounds); 2273 } 2274 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 2275 EmitOMPSimdFinal(S, 2276 [&](CodeGenFunction &CGF) -> llvm::Value * { 2277 return CGF.Builder.CreateIsNotNull( 2278 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2279 }); 2280 } 2281 EmitOMPReductionClauseFinal( 2282 S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind()) 2283 ? /*Parallel and Simd*/ OMPD_parallel_for_simd 2284 : /*Parallel only*/ OMPD_parallel); 2285 // Emit post-update of the reduction variables if IsLastIter != 0. 2286 emitPostUpdateForReductionClause( 2287 *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * { 2288 return CGF.Builder.CreateIsNotNull( 2289 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2290 }); 2291 // Emit final copy of the lastprivate variables if IsLastIter != 0. 2292 if (HasLastprivateClause) 2293 EmitOMPLastprivateClauseFinal( 2294 S, isOpenMPSimdDirective(S.getDirectiveKind()), 2295 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart()))); 2296 } 2297 EmitOMPLinearClauseFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * { 2298 return CGF.Builder.CreateIsNotNull( 2299 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2300 }); 2301 // We're now done with the loop, so jump to the continuation block. 2302 if (ContBlock) { 2303 EmitBranch(ContBlock); 2304 EmitBlock(ContBlock, true); 2305 } 2306 } 2307 return HasLastprivateClause; 2308 } 2309 2310 /// The following two functions generate expressions for the loop lower 2311 /// and upper bounds in case of static and dynamic (dispatch) schedule 2312 /// of the associated 'for' or 'distribute' loop. 2313 static std::pair<LValue, LValue> 2314 emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 2315 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 2316 LValue LB = 2317 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); 2318 LValue UB = 2319 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); 2320 return {LB, UB}; 2321 } 2322 2323 /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not 2324 /// consider the lower and upper bound expressions generated by the 2325 /// worksharing loop support, but we use 0 and the iteration space size as 2326 /// constants 2327 static std::pair<llvm::Value *, llvm::Value *> 2328 emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S, 2329 Address LB, Address UB) { 2330 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 2331 const Expr *IVExpr = LS.getIterationVariable(); 2332 const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType()); 2333 llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0); 2334 llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration()); 2335 return {LBVal, UBVal}; 2336 } 2337 2338 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { 2339 bool HasLastprivates = false; 2340 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, 2341 PrePostActionTy &) { 2342 OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel()); 2343 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 2344 emitForLoopBounds, 2345 emitDispatchForLoopBounds); 2346 }; 2347 { 2348 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2349 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen, 2350 S.hasCancel()); 2351 } 2352 2353 // Emit an implicit barrier at the end. 2354 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) { 2355 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); 2356 } 2357 } 2358 2359 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { 2360 bool HasLastprivates = false; 2361 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, 2362 PrePostActionTy &) { 2363 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 2364 emitForLoopBounds, 2365 emitDispatchForLoopBounds); 2366 }; 2367 { 2368 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2369 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 2370 } 2371 2372 // Emit an implicit barrier at the end. 2373 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) { 2374 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); 2375 } 2376 } 2377 2378 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, 2379 const Twine &Name, 2380 llvm::Value *Init = nullptr) { 2381 auto LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty); 2382 if (Init) 2383 CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true); 2384 return LVal; 2385 } 2386 2387 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { 2388 auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt(); 2389 auto *CS = dyn_cast<CompoundStmt>(Stmt); 2390 bool HasLastprivates = false; 2391 auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF, 2392 PrePostActionTy &) { 2393 auto &C = CGF.CGM.getContext(); 2394 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2395 // Emit helper vars inits. 2396 LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.", 2397 CGF.Builder.getInt32(0)); 2398 auto *GlobalUBVal = CS != nullptr ? CGF.Builder.getInt32(CS->size() - 1) 2399 : CGF.Builder.getInt32(0); 2400 LValue UB = 2401 createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal); 2402 LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.", 2403 CGF.Builder.getInt32(1)); 2404 LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.", 2405 CGF.Builder.getInt32(0)); 2406 // Loop counter. 2407 LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv."); 2408 OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 2409 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV); 2410 OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 2411 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); 2412 // Generate condition for loop. 2413 BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, 2414 OK_Ordinary, S.getLocStart(), FPOptions()); 2415 // Increment for loop counter. 2416 UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary, 2417 S.getLocStart()); 2418 auto BodyGen = [Stmt, CS, &S, &IV](CodeGenFunction &CGF) { 2419 // Iterate through all sections and emit a switch construct: 2420 // switch (IV) { 2421 // case 0: 2422 // <SectionStmt[0]>; 2423 // break; 2424 // ... 2425 // case <NumSection> - 1: 2426 // <SectionStmt[<NumSection> - 1]>; 2427 // break; 2428 // } 2429 // .omp.sections.exit: 2430 auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit"); 2431 auto *SwitchStmt = CGF.Builder.CreateSwitch( 2432 CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB, 2433 CS == nullptr ? 1 : CS->size()); 2434 if (CS) { 2435 unsigned CaseNumber = 0; 2436 for (auto *SubStmt : CS->children()) { 2437 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 2438 CGF.EmitBlock(CaseBB); 2439 SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB); 2440 CGF.EmitStmt(SubStmt); 2441 CGF.EmitBranch(ExitBB); 2442 ++CaseNumber; 2443 } 2444 } else { 2445 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 2446 CGF.EmitBlock(CaseBB); 2447 SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB); 2448 CGF.EmitStmt(Stmt); 2449 CGF.EmitBranch(ExitBB); 2450 } 2451 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 2452 }; 2453 2454 CodeGenFunction::OMPPrivateScope LoopScope(CGF); 2455 if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) { 2456 // Emit implicit barrier to synchronize threads and avoid data races on 2457 // initialization of firstprivate variables and post-update of lastprivate 2458 // variables. 2459 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 2460 CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 2461 /*ForceSimpleCall=*/true); 2462 } 2463 CGF.EmitOMPPrivateClause(S, LoopScope); 2464 HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 2465 CGF.EmitOMPReductionClauseInit(S, LoopScope); 2466 (void)LoopScope.Privatize(); 2467 2468 // Emit static non-chunked loop. 2469 OpenMPScheduleTy ScheduleKind; 2470 ScheduleKind.Schedule = OMPC_SCHEDULE_static; 2471 CGOpenMPRuntime::StaticRTInput StaticInit( 2472 /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), 2473 LB.getAddress(), UB.getAddress(), ST.getAddress()); 2474 CGF.CGM.getOpenMPRuntime().emitForStaticInit( 2475 CGF, S.getLocStart(), S.getDirectiveKind(), ScheduleKind, StaticInit); 2476 // UB = min(UB, GlobalUB); 2477 auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart()); 2478 auto *MinUBGlobalUB = CGF.Builder.CreateSelect( 2479 CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal); 2480 CGF.EmitStoreOfScalar(MinUBGlobalUB, UB); 2481 // IV = LB; 2482 CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV); 2483 // while (idx <= UB) { BODY; ++idx; } 2484 CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen, 2485 [](CodeGenFunction &) {}); 2486 // Tell the runtime we are done. 2487 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 2488 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd()); 2489 }; 2490 CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen); 2491 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 2492 // Emit post-update of the reduction variables if IsLastIter != 0. 2493 emitPostUpdateForReductionClause( 2494 CGF, S, [&](CodeGenFunction &CGF) -> llvm::Value * { 2495 return CGF.Builder.CreateIsNotNull( 2496 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2497 }); 2498 2499 // Emit final copy of the lastprivate variables if IsLastIter != 0. 2500 if (HasLastprivates) 2501 CGF.EmitOMPLastprivateClauseFinal( 2502 S, /*NoFinals=*/false, 2503 CGF.Builder.CreateIsNotNull( 2504 CGF.EmitLoadOfScalar(IL, S.getLocStart()))); 2505 }; 2506 2507 bool HasCancel = false; 2508 if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S)) 2509 HasCancel = OSD->hasCancel(); 2510 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S)) 2511 HasCancel = OPSD->hasCancel(); 2512 OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel); 2513 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen, 2514 HasCancel); 2515 // Emit barrier for lastprivates only if 'sections' directive has 'nowait' 2516 // clause. Otherwise the barrier will be generated by the codegen for the 2517 // directive. 2518 if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) { 2519 // Emit implicit barrier to synchronize threads and avoid data races on 2520 // initialization of firstprivate variables. 2521 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), 2522 OMPD_unknown); 2523 } 2524 } 2525 2526 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { 2527 { 2528 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2529 EmitSections(S); 2530 } 2531 // Emit an implicit barrier at the end. 2532 if (!S.getSingleClause<OMPNowaitClause>()) { 2533 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), 2534 OMPD_sections); 2535 } 2536 } 2537 2538 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { 2539 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2540 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2541 }; 2542 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2543 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen, 2544 S.hasCancel()); 2545 } 2546 2547 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { 2548 llvm::SmallVector<const Expr *, 8> CopyprivateVars; 2549 llvm::SmallVector<const Expr *, 8> DestExprs; 2550 llvm::SmallVector<const Expr *, 8> SrcExprs; 2551 llvm::SmallVector<const Expr *, 8> AssignmentOps; 2552 // Check if there are any 'copyprivate' clauses associated with this 2553 // 'single' construct. 2554 // Build a list of copyprivate variables along with helper expressions 2555 // (<source>, <destination>, <destination>=<source> expressions) 2556 for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) { 2557 CopyprivateVars.append(C->varlists().begin(), C->varlists().end()); 2558 DestExprs.append(C->destination_exprs().begin(), 2559 C->destination_exprs().end()); 2560 SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end()); 2561 AssignmentOps.append(C->assignment_ops().begin(), 2562 C->assignment_ops().end()); 2563 } 2564 // Emit code for 'single' region along with 'copyprivate' clauses 2565 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2566 Action.Enter(CGF); 2567 OMPPrivateScope SingleScope(CGF); 2568 (void)CGF.EmitOMPFirstprivateClause(S, SingleScope); 2569 CGF.EmitOMPPrivateClause(S, SingleScope); 2570 (void)SingleScope.Privatize(); 2571 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2572 }; 2573 { 2574 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2575 CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(), 2576 CopyprivateVars, DestExprs, 2577 SrcExprs, AssignmentOps); 2578 } 2579 // Emit an implicit barrier at the end (to avoid data race on firstprivate 2580 // init or if no 'nowait' clause was specified and no 'copyprivate' clause). 2581 if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) { 2582 CGM.getOpenMPRuntime().emitBarrierCall( 2583 *this, S.getLocStart(), 2584 S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single); 2585 } 2586 } 2587 2588 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { 2589 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2590 Action.Enter(CGF); 2591 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2592 }; 2593 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2594 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart()); 2595 } 2596 2597 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { 2598 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2599 Action.Enter(CGF); 2600 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2601 }; 2602 Expr *Hint = nullptr; 2603 if (auto *HintClause = S.getSingleClause<OMPHintClause>()) 2604 Hint = HintClause->getHint(); 2605 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2606 CGM.getOpenMPRuntime().emitCriticalRegion(*this, 2607 S.getDirectiveName().getAsString(), 2608 CodeGen, S.getLocStart(), Hint); 2609 } 2610 2611 void CodeGenFunction::EmitOMPParallelForDirective( 2612 const OMPParallelForDirective &S) { 2613 // Emit directive as a combined directive that consists of two implicit 2614 // directives: 'parallel' with 'for' directive. 2615 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2616 OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel()); 2617 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 2618 emitDispatchForLoopBounds); 2619 }; 2620 emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen, 2621 emitEmptyBoundParameters); 2622 } 2623 2624 void CodeGenFunction::EmitOMPParallelForSimdDirective( 2625 const OMPParallelForSimdDirective &S) { 2626 // Emit directive as a combined directive that consists of two implicit 2627 // directives: 'parallel' with 'for' directive. 2628 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2629 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 2630 emitDispatchForLoopBounds); 2631 }; 2632 emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen, 2633 emitEmptyBoundParameters); 2634 } 2635 2636 void CodeGenFunction::EmitOMPParallelSectionsDirective( 2637 const OMPParallelSectionsDirective &S) { 2638 // Emit directive as a combined directive that consists of two implicit 2639 // directives: 'parallel' with 'sections' directive. 2640 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2641 CGF.EmitSections(S); 2642 }; 2643 emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen, 2644 emitEmptyBoundParameters); 2645 } 2646 2647 void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, 2648 const RegionCodeGenTy &BodyGen, 2649 const TaskGenTy &TaskGen, 2650 OMPTaskDataTy &Data) { 2651 // Emit outlined function for task construct. 2652 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2653 auto *I = CS->getCapturedDecl()->param_begin(); 2654 auto *PartId = std::next(I); 2655 auto *TaskT = std::next(I, 4); 2656 // Check if the task is final 2657 if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) { 2658 // If the condition constant folds and can be elided, try to avoid emitting 2659 // the condition and the dead arm of the if/else. 2660 auto *Cond = Clause->getCondition(); 2661 bool CondConstant; 2662 if (ConstantFoldsToSimpleInteger(Cond, CondConstant)) 2663 Data.Final.setInt(CondConstant); 2664 else 2665 Data.Final.setPointer(EvaluateExprAsBool(Cond)); 2666 } else { 2667 // By default the task is not final. 2668 Data.Final.setInt(/*IntVal=*/false); 2669 } 2670 // Check if the task has 'priority' clause. 2671 if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) { 2672 auto *Prio = Clause->getPriority(); 2673 Data.Priority.setInt(/*IntVal=*/true); 2674 Data.Priority.setPointer(EmitScalarConversion( 2675 EmitScalarExpr(Prio), Prio->getType(), 2676 getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1), 2677 Prio->getExprLoc())); 2678 } 2679 // The first function argument for tasks is a thread id, the second one is a 2680 // part id (0 for tied tasks, >=0 for untied task). 2681 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 2682 // Get list of private variables. 2683 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 2684 auto IRef = C->varlist_begin(); 2685 for (auto *IInit : C->private_copies()) { 2686 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2687 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2688 Data.PrivateVars.push_back(*IRef); 2689 Data.PrivateCopies.push_back(IInit); 2690 } 2691 ++IRef; 2692 } 2693 } 2694 EmittedAsPrivate.clear(); 2695 // Get list of firstprivate variables. 2696 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 2697 auto IRef = C->varlist_begin(); 2698 auto IElemInitRef = C->inits().begin(); 2699 for (auto *IInit : C->private_copies()) { 2700 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2701 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2702 Data.FirstprivateVars.push_back(*IRef); 2703 Data.FirstprivateCopies.push_back(IInit); 2704 Data.FirstprivateInits.push_back(*IElemInitRef); 2705 } 2706 ++IRef; 2707 ++IElemInitRef; 2708 } 2709 } 2710 // Get list of lastprivate variables (for taskloops). 2711 llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs; 2712 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 2713 auto IRef = C->varlist_begin(); 2714 auto ID = C->destination_exprs().begin(); 2715 for (auto *IInit : C->private_copies()) { 2716 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2717 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2718 Data.LastprivateVars.push_back(*IRef); 2719 Data.LastprivateCopies.push_back(IInit); 2720 } 2721 LastprivateDstsOrigs.insert( 2722 {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()), 2723 cast<DeclRefExpr>(*IRef)}); 2724 ++IRef; 2725 ++ID; 2726 } 2727 } 2728 SmallVector<const Expr *, 4> LHSs; 2729 SmallVector<const Expr *, 4> RHSs; 2730 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 2731 auto IPriv = C->privates().begin(); 2732 auto IRed = C->reduction_ops().begin(); 2733 auto ILHS = C->lhs_exprs().begin(); 2734 auto IRHS = C->rhs_exprs().begin(); 2735 for (const auto *Ref : C->varlists()) { 2736 Data.ReductionVars.emplace_back(Ref); 2737 Data.ReductionCopies.emplace_back(*IPriv); 2738 Data.ReductionOps.emplace_back(*IRed); 2739 LHSs.emplace_back(*ILHS); 2740 RHSs.emplace_back(*IRHS); 2741 std::advance(IPriv, 1); 2742 std::advance(IRed, 1); 2743 std::advance(ILHS, 1); 2744 std::advance(IRHS, 1); 2745 } 2746 } 2747 Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit( 2748 *this, S.getLocStart(), LHSs, RHSs, Data); 2749 // Build list of dependences. 2750 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) 2751 for (auto *IRef : C->varlists()) 2752 Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef)); 2753 auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs]( 2754 CodeGenFunction &CGF, PrePostActionTy &Action) { 2755 // Set proper addresses for generated private copies. 2756 OMPPrivateScope Scope(CGF); 2757 if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() || 2758 !Data.LastprivateVars.empty()) { 2759 enum { PrivatesParam = 2, CopyFnParam = 3 }; 2760 auto *CopyFn = CGF.Builder.CreateLoad( 2761 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3))); 2762 auto *PrivatesPtr = CGF.Builder.CreateLoad( 2763 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2))); 2764 // Map privates. 2765 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; 2766 llvm::SmallVector<llvm::Value *, 16> CallArgs; 2767 CallArgs.push_back(PrivatesPtr); 2768 for (auto *E : Data.PrivateVars) { 2769 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2770 Address PrivatePtr = CGF.CreateMemTemp( 2771 CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr"); 2772 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2773 CallArgs.push_back(PrivatePtr.getPointer()); 2774 } 2775 for (auto *E : Data.FirstprivateVars) { 2776 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2777 Address PrivatePtr = 2778 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 2779 ".firstpriv.ptr.addr"); 2780 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2781 CallArgs.push_back(PrivatePtr.getPointer()); 2782 } 2783 for (auto *E : Data.LastprivateVars) { 2784 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2785 Address PrivatePtr = 2786 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 2787 ".lastpriv.ptr.addr"); 2788 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2789 CallArgs.push_back(PrivatePtr.getPointer()); 2790 } 2791 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(), 2792 CopyFn, CallArgs); 2793 for (auto &&Pair : LastprivateDstsOrigs) { 2794 auto *OrigVD = cast<VarDecl>(Pair.second->getDecl()); 2795 DeclRefExpr DRE( 2796 const_cast<VarDecl *>(OrigVD), 2797 /*RefersToEnclosingVariableOrCapture=*/CGF.CapturedStmtInfo->lookup( 2798 OrigVD) != nullptr, 2799 Pair.second->getType(), VK_LValue, Pair.second->getExprLoc()); 2800 Scope.addPrivate(Pair.first, [&CGF, &DRE]() { 2801 return CGF.EmitLValue(&DRE).getAddress(); 2802 }); 2803 } 2804 for (auto &&Pair : PrivatePtrs) { 2805 Address Replacement(CGF.Builder.CreateLoad(Pair.second), 2806 CGF.getContext().getDeclAlign(Pair.first)); 2807 Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); 2808 } 2809 } 2810 if (Data.Reductions) { 2811 OMPLexicalScope LexScope(CGF, S, /*AsInlined=*/true); 2812 ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionCopies, 2813 Data.ReductionOps); 2814 llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad( 2815 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9))); 2816 for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) { 2817 RedCG.emitSharedLValue(CGF, Cnt); 2818 RedCG.emitAggregateType(CGF, Cnt); 2819 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( 2820 CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); 2821 Replacement = 2822 Address(CGF.EmitScalarConversion( 2823 Replacement.getPointer(), CGF.getContext().VoidPtrTy, 2824 CGF.getContext().getPointerType( 2825 Data.ReductionCopies[Cnt]->getType()), 2826 SourceLocation()), 2827 Replacement.getAlignment()); 2828 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); 2829 Scope.addPrivate(RedCG.getBaseDecl(Cnt), 2830 [Replacement]() { return Replacement; }); 2831 // FIXME: This must removed once the runtime library is fixed. 2832 // Emit required threadprivate variables for 2833 // initilizer/combiner/finalizer. 2834 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(), 2835 RedCG, Cnt); 2836 } 2837 } 2838 // Privatize all private variables except for in_reduction items. 2839 (void)Scope.Privatize(); 2840 SmallVector<const Expr *, 4> InRedVars; 2841 SmallVector<const Expr *, 4> InRedPrivs; 2842 SmallVector<const Expr *, 4> InRedOps; 2843 SmallVector<const Expr *, 4> TaskgroupDescriptors; 2844 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) { 2845 auto IPriv = C->privates().begin(); 2846 auto IRed = C->reduction_ops().begin(); 2847 auto ITD = C->taskgroup_descriptors().begin(); 2848 for (const auto *Ref : C->varlists()) { 2849 InRedVars.emplace_back(Ref); 2850 InRedPrivs.emplace_back(*IPriv); 2851 InRedOps.emplace_back(*IRed); 2852 TaskgroupDescriptors.emplace_back(*ITD); 2853 std::advance(IPriv, 1); 2854 std::advance(IRed, 1); 2855 std::advance(ITD, 1); 2856 } 2857 } 2858 // Privatize in_reduction items here, because taskgroup descriptors must be 2859 // privatized earlier. 2860 OMPPrivateScope InRedScope(CGF); 2861 if (!InRedVars.empty()) { 2862 ReductionCodeGen RedCG(InRedVars, InRedPrivs, InRedOps); 2863 for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) { 2864 RedCG.emitSharedLValue(CGF, Cnt); 2865 RedCG.emitAggregateType(CGF, Cnt); 2866 // The taskgroup descriptor variable is always implicit firstprivate and 2867 // privatized already during procoessing of the firstprivates. 2868 llvm::Value *ReductionsPtr = CGF.EmitLoadOfScalar( 2869 CGF.EmitLValue(TaskgroupDescriptors[Cnt]), SourceLocation()); 2870 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( 2871 CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); 2872 Replacement = Address( 2873 CGF.EmitScalarConversion( 2874 Replacement.getPointer(), CGF.getContext().VoidPtrTy, 2875 CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()), 2876 SourceLocation()), 2877 Replacement.getAlignment()); 2878 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); 2879 InRedScope.addPrivate(RedCG.getBaseDecl(Cnt), 2880 [Replacement]() { return Replacement; }); 2881 // FIXME: This must removed once the runtime library is fixed. 2882 // Emit required threadprivate variables for 2883 // initilizer/combiner/finalizer. 2884 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(), 2885 RedCG, Cnt); 2886 } 2887 } 2888 (void)InRedScope.Privatize(); 2889 2890 Action.Enter(CGF); 2891 BodyGen(CGF); 2892 }; 2893 auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( 2894 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied, 2895 Data.NumberOfParts); 2896 OMPLexicalScope Scope(*this, S); 2897 TaskGen(*this, OutlinedFn, Data); 2898 } 2899 2900 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { 2901 // Emit outlined function for task construct. 2902 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2903 auto CapturedStruct = GenerateCapturedStmtArgument(*CS); 2904 auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 2905 const Expr *IfCond = nullptr; 2906 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 2907 if (C->getNameModifier() == OMPD_unknown || 2908 C->getNameModifier() == OMPD_task) { 2909 IfCond = C->getCondition(); 2910 break; 2911 } 2912 } 2913 2914 OMPTaskDataTy Data; 2915 // Check if we should emit tied or untied task. 2916 Data.Tied = !S.getSingleClause<OMPUntiedClause>(); 2917 auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { 2918 CGF.EmitStmt(CS->getCapturedStmt()); 2919 }; 2920 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 2921 IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn, 2922 const OMPTaskDataTy &Data) { 2923 CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getLocStart(), S, OutlinedFn, 2924 SharedsTy, CapturedStruct, IfCond, 2925 Data); 2926 }; 2927 EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data); 2928 } 2929 2930 void CodeGenFunction::EmitOMPTaskyieldDirective( 2931 const OMPTaskyieldDirective &S) { 2932 CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart()); 2933 } 2934 2935 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { 2936 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier); 2937 } 2938 2939 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) { 2940 CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart()); 2941 } 2942 2943 void CodeGenFunction::EmitOMPTaskgroupDirective( 2944 const OMPTaskgroupDirective &S) { 2945 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2946 Action.Enter(CGF); 2947 if (const Expr *E = S.getReductionRef()) { 2948 SmallVector<const Expr *, 4> LHSs; 2949 SmallVector<const Expr *, 4> RHSs; 2950 OMPTaskDataTy Data; 2951 for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) { 2952 auto IPriv = C->privates().begin(); 2953 auto IRed = C->reduction_ops().begin(); 2954 auto ILHS = C->lhs_exprs().begin(); 2955 auto IRHS = C->rhs_exprs().begin(); 2956 for (const auto *Ref : C->varlists()) { 2957 Data.ReductionVars.emplace_back(Ref); 2958 Data.ReductionCopies.emplace_back(*IPriv); 2959 Data.ReductionOps.emplace_back(*IRed); 2960 LHSs.emplace_back(*ILHS); 2961 RHSs.emplace_back(*IRHS); 2962 std::advance(IPriv, 1); 2963 std::advance(IRed, 1); 2964 std::advance(ILHS, 1); 2965 std::advance(IRHS, 1); 2966 } 2967 } 2968 llvm::Value *ReductionDesc = 2969 CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getLocStart(), 2970 LHSs, RHSs, Data); 2971 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2972 CGF.EmitVarDecl(*VD); 2973 CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD), 2974 /*Volatile=*/false, E->getType()); 2975 } 2976 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2977 }; 2978 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2979 CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart()); 2980 } 2981 2982 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { 2983 CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> { 2984 if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) { 2985 return llvm::makeArrayRef(FlushClause->varlist_begin(), 2986 FlushClause->varlist_end()); 2987 } 2988 return llvm::None; 2989 }(), S.getLocStart()); 2990 } 2991 2992 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, 2993 const CodeGenLoopTy &CodeGenLoop, 2994 Expr *IncExpr) { 2995 // Emit the loop iteration variable. 2996 auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 2997 auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); 2998 EmitVarDecl(*IVDecl); 2999 3000 // Emit the iterations count variable. 3001 // If it is not a variable, Sema decided to calculate iterations count on each 3002 // iteration (e.g., it is foldable into a constant). 3003 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 3004 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 3005 // Emit calculation of the iterations count. 3006 EmitIgnoredExpr(S.getCalcLastIteration()); 3007 } 3008 3009 auto &RT = CGM.getOpenMPRuntime(); 3010 3011 bool HasLastprivateClause = false; 3012 // Check pre-condition. 3013 { 3014 OMPLoopScope PreInitScope(*this, S); 3015 // Skip the entire loop if we don't meet the precondition. 3016 // If the condition constant folds and can be elided, avoid emitting the 3017 // whole loop. 3018 bool CondConstant; 3019 llvm::BasicBlock *ContBlock = nullptr; 3020 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 3021 if (!CondConstant) 3022 return; 3023 } else { 3024 auto *ThenBlock = createBasicBlock("omp.precond.then"); 3025 ContBlock = createBasicBlock("omp.precond.end"); 3026 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 3027 getProfileCount(&S)); 3028 EmitBlock(ThenBlock); 3029 incrementProfileCounter(&S); 3030 } 3031 3032 // Emit 'then' code. 3033 { 3034 // Emit helper vars inits. 3035 3036 LValue LB = EmitOMPHelperVar( 3037 *this, cast<DeclRefExpr>( 3038 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3039 ? S.getCombinedLowerBoundVariable() 3040 : S.getLowerBoundVariable()))); 3041 LValue UB = EmitOMPHelperVar( 3042 *this, cast<DeclRefExpr>( 3043 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3044 ? S.getCombinedUpperBoundVariable() 3045 : S.getUpperBoundVariable()))); 3046 LValue ST = 3047 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 3048 LValue IL = 3049 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 3050 3051 OMPPrivateScope LoopScope(*this); 3052 if (EmitOMPFirstprivateClause(S, LoopScope)) { 3053 // Emit implicit barrier to synchronize threads and avoid data races on 3054 // initialization of firstprivate variables and post-update of 3055 // lastprivate variables. 3056 CGM.getOpenMPRuntime().emitBarrierCall( 3057 *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 3058 /*ForceSimpleCall=*/true); 3059 } 3060 EmitOMPPrivateClause(S, LoopScope); 3061 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 3062 EmitOMPPrivateLoopCounters(S, LoopScope); 3063 (void)LoopScope.Privatize(); 3064 3065 // Detect the distribute schedule kind and chunk. 3066 llvm::Value *Chunk = nullptr; 3067 OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown; 3068 if (auto *C = S.getSingleClause<OMPDistScheduleClause>()) { 3069 ScheduleKind = C->getDistScheduleKind(); 3070 if (const auto *Ch = C->getChunkSize()) { 3071 Chunk = EmitScalarExpr(Ch); 3072 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 3073 S.getIterationVariable()->getType(), 3074 S.getLocStart()); 3075 } 3076 } 3077 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 3078 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 3079 3080 // OpenMP [2.10.8, distribute Construct, Description] 3081 // If dist_schedule is specified, kind must be static. If specified, 3082 // iterations are divided into chunks of size chunk_size, chunks are 3083 // assigned to the teams of the league in a round-robin fashion in the 3084 // order of the team number. When no chunk_size is specified, the 3085 // iteration space is divided into chunks that are approximately equal 3086 // in size, and at most one chunk is distributed to each team of the 3087 // league. The size of the chunks is unspecified in this case. 3088 if (RT.isStaticNonchunked(ScheduleKind, 3089 /* Chunked */ Chunk != nullptr)) { 3090 CGOpenMPRuntime::StaticRTInput StaticInit( 3091 IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(), 3092 LB.getAddress(), UB.getAddress(), ST.getAddress()); 3093 RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, 3094 StaticInit); 3095 auto LoopExit = 3096 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 3097 // UB = min(UB, GlobalUB); 3098 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3099 ? S.getCombinedEnsureUpperBound() 3100 : S.getEnsureUpperBound()); 3101 // IV = LB; 3102 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3103 ? S.getCombinedInit() 3104 : S.getInit()); 3105 3106 Expr *Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3107 ? S.getCombinedCond() 3108 : S.getCond(); 3109 3110 // for distribute alone, codegen 3111 // while (idx <= UB) { BODY; ++idx; } 3112 // when combined with 'for' (e.g. as in 'distribute parallel for') 3113 // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; } 3114 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), Cond, IncExpr, 3115 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { 3116 CodeGenLoop(CGF, S, LoopExit); 3117 }, 3118 [](CodeGenFunction &) {}); 3119 EmitBlock(LoopExit.getBlock()); 3120 // Tell the runtime we are done. 3121 RT.emitForStaticFinish(*this, S.getLocStart()); 3122 } else { 3123 // Emit the outer loop, which requests its work chunk [LB..UB] from 3124 // runtime and runs the inner loop to process it. 3125 const OMPLoopArguments LoopArguments = { 3126 LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(), 3127 Chunk}; 3128 EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments, 3129 CodeGenLoop); 3130 } 3131 3132 // Emit final copy of the lastprivate variables if IsLastIter != 0. 3133 if (HasLastprivateClause) 3134 EmitOMPLastprivateClauseFinal( 3135 S, /*NoFinals=*/false, 3136 Builder.CreateIsNotNull( 3137 EmitLoadOfScalar(IL, S.getLocStart()))); 3138 } 3139 3140 // We're now done with the loop, so jump to the continuation block. 3141 if (ContBlock) { 3142 EmitBranch(ContBlock); 3143 EmitBlock(ContBlock, true); 3144 } 3145 } 3146 } 3147 3148 void CodeGenFunction::EmitOMPDistributeDirective( 3149 const OMPDistributeDirective &S) { 3150 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3151 3152 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 3153 }; 3154 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 3155 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen, 3156 false); 3157 } 3158 3159 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, 3160 const CapturedStmt *S) { 3161 CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); 3162 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; 3163 CGF.CapturedStmtInfo = &CapStmtInfo; 3164 auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S); 3165 Fn->addFnAttr(llvm::Attribute::NoInline); 3166 return Fn; 3167 } 3168 3169 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { 3170 if (!S.getAssociatedStmt()) { 3171 for (const auto *DC : S.getClausesOfKind<OMPDependClause>()) 3172 CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC); 3173 return; 3174 } 3175 auto *C = S.getSingleClause<OMPSIMDClause>(); 3176 auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF, 3177 PrePostActionTy &Action) { 3178 if (C) { 3179 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 3180 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 3181 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 3182 auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS); 3183 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(), 3184 OutlinedFn, CapturedVars); 3185 } else { 3186 Action.Enter(CGF); 3187 CGF.EmitStmt( 3188 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3189 } 3190 }; 3191 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 3192 CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C); 3193 } 3194 3195 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, 3196 QualType SrcType, QualType DestType, 3197 SourceLocation Loc) { 3198 assert(CGF.hasScalarEvaluationKind(DestType) && 3199 "DestType must have scalar evaluation kind."); 3200 assert(!Val.isAggregate() && "Must be a scalar or complex."); 3201 return Val.isScalar() 3202 ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType, 3203 Loc) 3204 : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType, 3205 DestType, Loc); 3206 } 3207 3208 static CodeGenFunction::ComplexPairTy 3209 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, 3210 QualType DestType, SourceLocation Loc) { 3211 assert(CGF.getEvaluationKind(DestType) == TEK_Complex && 3212 "DestType must have complex evaluation kind."); 3213 CodeGenFunction::ComplexPairTy ComplexVal; 3214 if (Val.isScalar()) { 3215 // Convert the input element to the element type of the complex. 3216 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 3217 auto ScalarVal = CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, 3218 DestElementType, Loc); 3219 ComplexVal = CodeGenFunction::ComplexPairTy( 3220 ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType())); 3221 } else { 3222 assert(Val.isComplex() && "Must be a scalar or complex."); 3223 auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType(); 3224 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 3225 ComplexVal.first = CGF.EmitScalarConversion( 3226 Val.getComplexVal().first, SrcElementType, DestElementType, Loc); 3227 ComplexVal.second = CGF.EmitScalarConversion( 3228 Val.getComplexVal().second, SrcElementType, DestElementType, Loc); 3229 } 3230 return ComplexVal; 3231 } 3232 3233 static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst, 3234 LValue LVal, RValue RVal) { 3235 if (LVal.isGlobalReg()) { 3236 CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal); 3237 } else { 3238 CGF.EmitAtomicStore(RVal, LVal, 3239 IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3240 : llvm::AtomicOrdering::Monotonic, 3241 LVal.isVolatile(), /*IsInit=*/false); 3242 } 3243 } 3244 3245 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal, 3246 QualType RValTy, SourceLocation Loc) { 3247 switch (getEvaluationKind(LVal.getType())) { 3248 case TEK_Scalar: 3249 EmitStoreThroughLValue(RValue::get(convertToScalarValue( 3250 *this, RVal, RValTy, LVal.getType(), Loc)), 3251 LVal); 3252 break; 3253 case TEK_Complex: 3254 EmitStoreOfComplex( 3255 convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal, 3256 /*isInit=*/false); 3257 break; 3258 case TEK_Aggregate: 3259 llvm_unreachable("Must be a scalar or complex."); 3260 } 3261 } 3262 3263 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst, 3264 const Expr *X, const Expr *V, 3265 SourceLocation Loc) { 3266 // v = x; 3267 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue"); 3268 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue"); 3269 LValue XLValue = CGF.EmitLValue(X); 3270 LValue VLValue = CGF.EmitLValue(V); 3271 RValue Res = XLValue.isGlobalReg() 3272 ? CGF.EmitLoadOfLValue(XLValue, Loc) 3273 : CGF.EmitAtomicLoad( 3274 XLValue, Loc, 3275 IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3276 : llvm::AtomicOrdering::Monotonic, 3277 XLValue.isVolatile()); 3278 // OpenMP, 2.12.6, atomic Construct 3279 // Any atomic construct with a seq_cst clause forces the atomically 3280 // performed operation to include an implicit flush operation without a 3281 // list. 3282 if (IsSeqCst) 3283 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3284 CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc); 3285 } 3286 3287 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst, 3288 const Expr *X, const Expr *E, 3289 SourceLocation Loc) { 3290 // x = expr; 3291 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue"); 3292 emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E)); 3293 // OpenMP, 2.12.6, atomic Construct 3294 // Any atomic construct with a seq_cst clause forces the atomically 3295 // performed operation to include an implicit flush operation without a 3296 // list. 3297 if (IsSeqCst) 3298 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3299 } 3300 3301 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, 3302 RValue Update, 3303 BinaryOperatorKind BO, 3304 llvm::AtomicOrdering AO, 3305 bool IsXLHSInRHSPart) { 3306 auto &Context = CGF.CGM.getContext(); 3307 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x' 3308 // expression is simple and atomic is allowed for the given type for the 3309 // target platform. 3310 if (BO == BO_Comma || !Update.isScalar() || 3311 !Update.getScalarVal()->getType()->isIntegerTy() || 3312 !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) && 3313 (Update.getScalarVal()->getType() != 3314 X.getAddress().getElementType())) || 3315 !X.getAddress().getElementType()->isIntegerTy() || 3316 !Context.getTargetInfo().hasBuiltinAtomic( 3317 Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment()))) 3318 return std::make_pair(false, RValue::get(nullptr)); 3319 3320 llvm::AtomicRMWInst::BinOp RMWOp; 3321 switch (BO) { 3322 case BO_Add: 3323 RMWOp = llvm::AtomicRMWInst::Add; 3324 break; 3325 case BO_Sub: 3326 if (!IsXLHSInRHSPart) 3327 return std::make_pair(false, RValue::get(nullptr)); 3328 RMWOp = llvm::AtomicRMWInst::Sub; 3329 break; 3330 case BO_And: 3331 RMWOp = llvm::AtomicRMWInst::And; 3332 break; 3333 case BO_Or: 3334 RMWOp = llvm::AtomicRMWInst::Or; 3335 break; 3336 case BO_Xor: 3337 RMWOp = llvm::AtomicRMWInst::Xor; 3338 break; 3339 case BO_LT: 3340 RMWOp = X.getType()->hasSignedIntegerRepresentation() 3341 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min 3342 : llvm::AtomicRMWInst::Max) 3343 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin 3344 : llvm::AtomicRMWInst::UMax); 3345 break; 3346 case BO_GT: 3347 RMWOp = X.getType()->hasSignedIntegerRepresentation() 3348 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max 3349 : llvm::AtomicRMWInst::Min) 3350 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax 3351 : llvm::AtomicRMWInst::UMin); 3352 break; 3353 case BO_Assign: 3354 RMWOp = llvm::AtomicRMWInst::Xchg; 3355 break; 3356 case BO_Mul: 3357 case BO_Div: 3358 case BO_Rem: 3359 case BO_Shl: 3360 case BO_Shr: 3361 case BO_LAnd: 3362 case BO_LOr: 3363 return std::make_pair(false, RValue::get(nullptr)); 3364 case BO_PtrMemD: 3365 case BO_PtrMemI: 3366 case BO_LE: 3367 case BO_GE: 3368 case BO_EQ: 3369 case BO_NE: 3370 case BO_AddAssign: 3371 case BO_SubAssign: 3372 case BO_AndAssign: 3373 case BO_OrAssign: 3374 case BO_XorAssign: 3375 case BO_MulAssign: 3376 case BO_DivAssign: 3377 case BO_RemAssign: 3378 case BO_ShlAssign: 3379 case BO_ShrAssign: 3380 case BO_Comma: 3381 llvm_unreachable("Unsupported atomic update operation"); 3382 } 3383 auto *UpdateVal = Update.getScalarVal(); 3384 if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) { 3385 UpdateVal = CGF.Builder.CreateIntCast( 3386 IC, X.getAddress().getElementType(), 3387 X.getType()->hasSignedIntegerRepresentation()); 3388 } 3389 auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO); 3390 return std::make_pair(true, RValue::get(Res)); 3391 } 3392 3393 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr( 3394 LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart, 3395 llvm::AtomicOrdering AO, SourceLocation Loc, 3396 const llvm::function_ref<RValue(RValue)> &CommonGen) { 3397 // Update expressions are allowed to have the following forms: 3398 // x binop= expr; -> xrval + expr; 3399 // x++, ++x -> xrval + 1; 3400 // x--, --x -> xrval - 1; 3401 // x = x binop expr; -> xrval binop expr 3402 // x = expr Op x; - > expr binop xrval; 3403 auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart); 3404 if (!Res.first) { 3405 if (X.isGlobalReg()) { 3406 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop 3407 // 'xrval'. 3408 EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X); 3409 } else { 3410 // Perform compare-and-swap procedure. 3411 EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified()); 3412 } 3413 } 3414 return Res; 3415 } 3416 3417 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst, 3418 const Expr *X, const Expr *E, 3419 const Expr *UE, bool IsXLHSInRHSPart, 3420 SourceLocation Loc) { 3421 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 3422 "Update expr in 'atomic update' must be a binary operator."); 3423 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 3424 // Update expressions are allowed to have the following forms: 3425 // x binop= expr; -> xrval + expr; 3426 // x++, ++x -> xrval + 1; 3427 // x--, --x -> xrval - 1; 3428 // x = x binop expr; -> xrval binop expr 3429 // x = expr Op x; - > expr binop xrval; 3430 assert(X->isLValue() && "X of 'omp atomic update' is not lvalue"); 3431 LValue XLValue = CGF.EmitLValue(X); 3432 RValue ExprRValue = CGF.EmitAnyExpr(E); 3433 auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3434 : llvm::AtomicOrdering::Monotonic; 3435 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 3436 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 3437 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 3438 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 3439 auto Gen = 3440 [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue { 3441 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3442 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 3443 return CGF.EmitAnyExpr(UE); 3444 }; 3445 (void)CGF.EmitOMPAtomicSimpleUpdateExpr( 3446 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 3447 // OpenMP, 2.12.6, atomic Construct 3448 // Any atomic construct with a seq_cst clause forces the atomically 3449 // performed operation to include an implicit flush operation without a 3450 // list. 3451 if (IsSeqCst) 3452 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3453 } 3454 3455 static RValue convertToType(CodeGenFunction &CGF, RValue Value, 3456 QualType SourceType, QualType ResType, 3457 SourceLocation Loc) { 3458 switch (CGF.getEvaluationKind(ResType)) { 3459 case TEK_Scalar: 3460 return RValue::get( 3461 convertToScalarValue(CGF, Value, SourceType, ResType, Loc)); 3462 case TEK_Complex: { 3463 auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc); 3464 return RValue::getComplex(Res.first, Res.second); 3465 } 3466 case TEK_Aggregate: 3467 break; 3468 } 3469 llvm_unreachable("Must be a scalar or complex."); 3470 } 3471 3472 static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst, 3473 bool IsPostfixUpdate, const Expr *V, 3474 const Expr *X, const Expr *E, 3475 const Expr *UE, bool IsXLHSInRHSPart, 3476 SourceLocation Loc) { 3477 assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue"); 3478 assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue"); 3479 RValue NewVVal; 3480 LValue VLValue = CGF.EmitLValue(V); 3481 LValue XLValue = CGF.EmitLValue(X); 3482 RValue ExprRValue = CGF.EmitAnyExpr(E); 3483 auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3484 : llvm::AtomicOrdering::Monotonic; 3485 QualType NewVValType; 3486 if (UE) { 3487 // 'x' is updated with some additional value. 3488 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 3489 "Update expr in 'atomic capture' must be a binary operator."); 3490 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 3491 // Update expressions are allowed to have the following forms: 3492 // x binop= expr; -> xrval + expr; 3493 // x++, ++x -> xrval + 1; 3494 // x--, --x -> xrval - 1; 3495 // x = x binop expr; -> xrval binop expr 3496 // x = expr Op x; - > expr binop xrval; 3497 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 3498 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 3499 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 3500 NewVValType = XRValExpr->getType(); 3501 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 3502 auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr, 3503 IsPostfixUpdate](RValue XRValue) -> RValue { 3504 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3505 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 3506 RValue Res = CGF.EmitAnyExpr(UE); 3507 NewVVal = IsPostfixUpdate ? XRValue : Res; 3508 return Res; 3509 }; 3510 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 3511 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 3512 if (Res.first) { 3513 // 'atomicrmw' instruction was generated. 3514 if (IsPostfixUpdate) { 3515 // Use old value from 'atomicrmw'. 3516 NewVVal = Res.second; 3517 } else { 3518 // 'atomicrmw' does not provide new value, so evaluate it using old 3519 // value of 'x'. 3520 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3521 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second); 3522 NewVVal = CGF.EmitAnyExpr(UE); 3523 } 3524 } 3525 } else { 3526 // 'x' is simply rewritten with some 'expr'. 3527 NewVValType = X->getType().getNonReferenceType(); 3528 ExprRValue = convertToType(CGF, ExprRValue, E->getType(), 3529 X->getType().getNonReferenceType(), Loc); 3530 auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) -> RValue { 3531 NewVVal = XRValue; 3532 return ExprRValue; 3533 }; 3534 // Try to perform atomicrmw xchg, otherwise simple exchange. 3535 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 3536 XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO, 3537 Loc, Gen); 3538 if (Res.first) { 3539 // 'atomicrmw' instruction was generated. 3540 NewVVal = IsPostfixUpdate ? Res.second : ExprRValue; 3541 } 3542 } 3543 // Emit post-update store to 'v' of old/new 'x' value. 3544 CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc); 3545 // OpenMP, 2.12.6, atomic Construct 3546 // Any atomic construct with a seq_cst clause forces the atomically 3547 // performed operation to include an implicit flush operation without a 3548 // list. 3549 if (IsSeqCst) 3550 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3551 } 3552 3553 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, 3554 bool IsSeqCst, bool IsPostfixUpdate, 3555 const Expr *X, const Expr *V, const Expr *E, 3556 const Expr *UE, bool IsXLHSInRHSPart, 3557 SourceLocation Loc) { 3558 switch (Kind) { 3559 case OMPC_read: 3560 EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc); 3561 break; 3562 case OMPC_write: 3563 EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc); 3564 break; 3565 case OMPC_unknown: 3566 case OMPC_update: 3567 EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc); 3568 break; 3569 case OMPC_capture: 3570 EmitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE, 3571 IsXLHSInRHSPart, Loc); 3572 break; 3573 case OMPC_if: 3574 case OMPC_final: 3575 case OMPC_num_threads: 3576 case OMPC_private: 3577 case OMPC_firstprivate: 3578 case OMPC_lastprivate: 3579 case OMPC_reduction: 3580 case OMPC_task_reduction: 3581 case OMPC_in_reduction: 3582 case OMPC_safelen: 3583 case OMPC_simdlen: 3584 case OMPC_collapse: 3585 case OMPC_default: 3586 case OMPC_seq_cst: 3587 case OMPC_shared: 3588 case OMPC_linear: 3589 case OMPC_aligned: 3590 case OMPC_copyin: 3591 case OMPC_copyprivate: 3592 case OMPC_flush: 3593 case OMPC_proc_bind: 3594 case OMPC_schedule: 3595 case OMPC_ordered: 3596 case OMPC_nowait: 3597 case OMPC_untied: 3598 case OMPC_threadprivate: 3599 case OMPC_depend: 3600 case OMPC_mergeable: 3601 case OMPC_device: 3602 case OMPC_threads: 3603 case OMPC_simd: 3604 case OMPC_map: 3605 case OMPC_num_teams: 3606 case OMPC_thread_limit: 3607 case OMPC_priority: 3608 case OMPC_grainsize: 3609 case OMPC_nogroup: 3610 case OMPC_num_tasks: 3611 case OMPC_hint: 3612 case OMPC_dist_schedule: 3613 case OMPC_defaultmap: 3614 case OMPC_uniform: 3615 case OMPC_to: 3616 case OMPC_from: 3617 case OMPC_use_device_ptr: 3618 case OMPC_is_device_ptr: 3619 llvm_unreachable("Clause is not allowed in 'omp atomic'."); 3620 } 3621 } 3622 3623 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { 3624 bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>(); 3625 OpenMPClauseKind Kind = OMPC_unknown; 3626 for (auto *C : S.clauses()) { 3627 // Find first clause (skip seq_cst clause, if it is first). 3628 if (C->getClauseKind() != OMPC_seq_cst) { 3629 Kind = C->getClauseKind(); 3630 break; 3631 } 3632 } 3633 3634 const auto *CS = 3635 S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 3636 if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) { 3637 enterFullExpression(EWC); 3638 } 3639 // Processing for statements under 'atomic capture'. 3640 if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) { 3641 for (const auto *C : Compound->body()) { 3642 if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) { 3643 enterFullExpression(EWC); 3644 } 3645 } 3646 } 3647 3648 auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF, 3649 PrePostActionTy &) { 3650 CGF.EmitStopPoint(CS); 3651 EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(), 3652 S.getV(), S.getExpr(), S.getUpdateExpr(), 3653 S.isXLHSInRHSPart(), S.getLocStart()); 3654 }; 3655 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 3656 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen); 3657 } 3658 3659 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, 3660 const OMPExecutableDirective &S, 3661 const RegionCodeGenTy &CodeGen) { 3662 assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind())); 3663 CodeGenModule &CGM = CGF.CGM; 3664 const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt()); 3665 3666 llvm::Function *Fn = nullptr; 3667 llvm::Constant *FnID = nullptr; 3668 3669 const Expr *IfCond = nullptr; 3670 // Check for the at most one if clause associated with the target region. 3671 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 3672 if (C->getNameModifier() == OMPD_unknown || 3673 C->getNameModifier() == OMPD_target) { 3674 IfCond = C->getCondition(); 3675 break; 3676 } 3677 } 3678 3679 // Check if we have any device clause associated with the directive. 3680 const Expr *Device = nullptr; 3681 if (auto *C = S.getSingleClause<OMPDeviceClause>()) { 3682 Device = C->getDevice(); 3683 } 3684 3685 // Check if we have an if clause whose conditional always evaluates to false 3686 // or if we do not have any targets specified. If so the target region is not 3687 // an offload entry point. 3688 bool IsOffloadEntry = true; 3689 if (IfCond) { 3690 bool Val; 3691 if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val) 3692 IsOffloadEntry = false; 3693 } 3694 if (CGM.getLangOpts().OMPTargetTriples.empty()) 3695 IsOffloadEntry = false; 3696 3697 assert(CGF.CurFuncDecl && "No parent declaration for target region!"); 3698 StringRef ParentName; 3699 // In case we have Ctors/Dtors we use the complete type variant to produce 3700 // the mangling of the device outlined kernel. 3701 if (auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl)) 3702 ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete)); 3703 else if (auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl)) 3704 ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete)); 3705 else 3706 ParentName = 3707 CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl))); 3708 3709 // Emit target region as a standalone region. 3710 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID, 3711 IsOffloadEntry, CodeGen); 3712 OMPLexicalScope Scope(CGF, S); 3713 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 3714 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 3715 CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device, 3716 CapturedVars); 3717 } 3718 3719 static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S, 3720 PrePostActionTy &Action) { 3721 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 3722 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 3723 CGF.EmitOMPPrivateClause(S, PrivateScope); 3724 (void)PrivateScope.Privatize(); 3725 3726 Action.Enter(CGF); 3727 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3728 } 3729 3730 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM, 3731 StringRef ParentName, 3732 const OMPTargetDirective &S) { 3733 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3734 emitTargetRegion(CGF, S, Action); 3735 }; 3736 llvm::Function *Fn; 3737 llvm::Constant *Addr; 3738 // Emit target region as a standalone region. 3739 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 3740 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 3741 assert(Fn && Addr && "Target device function emission failed."); 3742 } 3743 3744 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { 3745 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3746 emitTargetRegion(CGF, S, Action); 3747 }; 3748 emitCommonOMPTargetDirective(*this, S, CodeGen); 3749 } 3750 3751 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF, 3752 const OMPExecutableDirective &S, 3753 OpenMPDirectiveKind InnermostKind, 3754 const RegionCodeGenTy &CodeGen) { 3755 const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams); 3756 auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction( 3757 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 3758 3759 const OMPNumTeamsClause *NT = S.getSingleClause<OMPNumTeamsClause>(); 3760 const OMPThreadLimitClause *TL = S.getSingleClause<OMPThreadLimitClause>(); 3761 if (NT || TL) { 3762 Expr *NumTeams = (NT) ? NT->getNumTeams() : nullptr; 3763 Expr *ThreadLimit = (TL) ? TL->getThreadLimit() : nullptr; 3764 3765 CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit, 3766 S.getLocStart()); 3767 } 3768 3769 OMPTeamsScope Scope(CGF, S); 3770 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 3771 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 3772 CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn, 3773 CapturedVars); 3774 } 3775 3776 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) { 3777 // Emit teams region as a standalone region. 3778 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3779 OMPPrivateScope PrivateScope(CGF); 3780 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 3781 CGF.EmitOMPPrivateClause(S, PrivateScope); 3782 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 3783 (void)PrivateScope.Privatize(); 3784 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3785 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 3786 }; 3787 emitCommonOMPTeamsDirective(*this, S, OMPD_teams, CodeGen); 3788 emitPostUpdateForReductionClause( 3789 *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 3790 } 3791 3792 static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action, 3793 const OMPTargetTeamsDirective &S) { 3794 auto *CS = S.getCapturedStmt(OMPD_teams); 3795 Action.Enter(CGF); 3796 auto &&CodeGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { 3797 // TODO: Add support for clauses. 3798 CGF.EmitStmt(CS->getCapturedStmt()); 3799 }; 3800 emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen); 3801 } 3802 3803 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 3804 CodeGenModule &CGM, StringRef ParentName, 3805 const OMPTargetTeamsDirective &S) { 3806 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3807 emitTargetTeamsRegion(CGF, Action, S); 3808 }; 3809 llvm::Function *Fn; 3810 llvm::Constant *Addr; 3811 // Emit target region as a standalone region. 3812 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 3813 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 3814 assert(Fn && Addr && "Target device function emission failed."); 3815 } 3816 3817 void CodeGenFunction::EmitOMPTargetTeamsDirective( 3818 const OMPTargetTeamsDirective &S) { 3819 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3820 emitTargetTeamsRegion(CGF, Action, S); 3821 }; 3822 emitCommonOMPTargetDirective(*this, S, CodeGen); 3823 } 3824 3825 void CodeGenFunction::EmitOMPCancellationPointDirective( 3826 const OMPCancellationPointDirective &S) { 3827 CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(), 3828 S.getCancelRegion()); 3829 } 3830 3831 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { 3832 const Expr *IfCond = nullptr; 3833 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 3834 if (C->getNameModifier() == OMPD_unknown || 3835 C->getNameModifier() == OMPD_cancel) { 3836 IfCond = C->getCondition(); 3837 break; 3838 } 3839 } 3840 CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(), IfCond, 3841 S.getCancelRegion()); 3842 } 3843 3844 CodeGenFunction::JumpDest 3845 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) { 3846 if (Kind == OMPD_parallel || Kind == OMPD_task || 3847 Kind == OMPD_target_parallel) 3848 return ReturnBlock; 3849 assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections || 3850 Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for || 3851 Kind == OMPD_distribute_parallel_for || 3852 Kind == OMPD_target_parallel_for); 3853 return OMPCancelStack.getExitBlock(); 3854 } 3855 3856 void CodeGenFunction::EmitOMPUseDevicePtrClause( 3857 const OMPClause &NC, OMPPrivateScope &PrivateScope, 3858 const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) { 3859 const auto &C = cast<OMPUseDevicePtrClause>(NC); 3860 auto OrigVarIt = C.varlist_begin(); 3861 auto InitIt = C.inits().begin(); 3862 for (auto PvtVarIt : C.private_copies()) { 3863 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl()); 3864 auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl()); 3865 auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl()); 3866 3867 // In order to identify the right initializer we need to match the 3868 // declaration used by the mapping logic. In some cases we may get 3869 // OMPCapturedExprDecl that refers to the original declaration. 3870 const ValueDecl *MatchingVD = OrigVD; 3871 if (auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) { 3872 // OMPCapturedExprDecl are used to privative fields of the current 3873 // structure. 3874 auto *ME = cast<MemberExpr>(OED->getInit()); 3875 assert(isa<CXXThisExpr>(ME->getBase()) && 3876 "Base should be the current struct!"); 3877 MatchingVD = ME->getMemberDecl(); 3878 } 3879 3880 // If we don't have information about the current list item, move on to 3881 // the next one. 3882 auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD); 3883 if (InitAddrIt == CaptureDeviceAddrMap.end()) 3884 continue; 3885 3886 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 3887 // Initialize the temporary initialization variable with the address we 3888 // get from the runtime library. We have to cast the source address 3889 // because it is always a void *. References are materialized in the 3890 // privatization scope, so the initialization here disregards the fact 3891 // the original variable is a reference. 3892 QualType AddrQTy = 3893 getContext().getPointerType(OrigVD->getType().getNonReferenceType()); 3894 llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy); 3895 Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy); 3896 setAddrOfLocalVar(InitVD, InitAddr); 3897 3898 // Emit private declaration, it will be initialized by the value we 3899 // declaration we just added to the local declarations map. 3900 EmitDecl(*PvtVD); 3901 3902 // The initialization variables reached its purpose in the emission 3903 // ofthe previous declaration, so we don't need it anymore. 3904 LocalDeclMap.erase(InitVD); 3905 3906 // Return the address of the private variable. 3907 return GetAddrOfLocalVar(PvtVD); 3908 }); 3909 assert(IsRegistered && "firstprivate var already registered as private"); 3910 // Silence the warning about unused variable. 3911 (void)IsRegistered; 3912 3913 ++OrigVarIt; 3914 ++InitIt; 3915 } 3916 } 3917 3918 // Generate the instructions for '#pragma omp target data' directive. 3919 void CodeGenFunction::EmitOMPTargetDataDirective( 3920 const OMPTargetDataDirective &S) { 3921 CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true); 3922 3923 // Create a pre/post action to signal the privatization of the device pointer. 3924 // This action can be replaced by the OpenMP runtime code generation to 3925 // deactivate privatization. 3926 bool PrivatizeDevicePointers = false; 3927 class DevicePointerPrivActionTy : public PrePostActionTy { 3928 bool &PrivatizeDevicePointers; 3929 3930 public: 3931 explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers) 3932 : PrePostActionTy(), PrivatizeDevicePointers(PrivatizeDevicePointers) {} 3933 void Enter(CodeGenFunction &CGF) override { 3934 PrivatizeDevicePointers = true; 3935 } 3936 }; 3937 DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers); 3938 3939 auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers]( 3940 CodeGenFunction &CGF, PrePostActionTy &Action) { 3941 auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3942 CGF.EmitStmt( 3943 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3944 }; 3945 3946 // Codegen that selects wheather to generate the privatization code or not. 3947 auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers, 3948 &InnermostCodeGen](CodeGenFunction &CGF, 3949 PrePostActionTy &Action) { 3950 RegionCodeGenTy RCG(InnermostCodeGen); 3951 PrivatizeDevicePointers = false; 3952 3953 // Call the pre-action to change the status of PrivatizeDevicePointers if 3954 // needed. 3955 Action.Enter(CGF); 3956 3957 if (PrivatizeDevicePointers) { 3958 OMPPrivateScope PrivateScope(CGF); 3959 // Emit all instances of the use_device_ptr clause. 3960 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>()) 3961 CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope, 3962 Info.CaptureDeviceAddrMap); 3963 (void)PrivateScope.Privatize(); 3964 RCG(CGF); 3965 } else 3966 RCG(CGF); 3967 }; 3968 3969 // Forward the provided action to the privatization codegen. 3970 RegionCodeGenTy PrivRCG(PrivCodeGen); 3971 PrivRCG.setAction(Action); 3972 3973 // Notwithstanding the body of the region is emitted as inlined directive, 3974 // we don't use an inline scope as changes in the references inside the 3975 // region are expected to be visible outside, so we do not privative them. 3976 OMPLexicalScope Scope(CGF, S); 3977 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data, 3978 PrivRCG); 3979 }; 3980 3981 RegionCodeGenTy RCG(CodeGen); 3982 3983 // If we don't have target devices, don't bother emitting the data mapping 3984 // code. 3985 if (CGM.getLangOpts().OMPTargetTriples.empty()) { 3986 RCG(*this); 3987 return; 3988 } 3989 3990 // Check if we have any if clause associated with the directive. 3991 const Expr *IfCond = nullptr; 3992 if (auto *C = S.getSingleClause<OMPIfClause>()) 3993 IfCond = C->getCondition(); 3994 3995 // Check if we have any device clause associated with the directive. 3996 const Expr *Device = nullptr; 3997 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 3998 Device = C->getDevice(); 3999 4000 // Set the action to signal privatization of device pointers. 4001 RCG.setAction(PrivAction); 4002 4003 // Emit region code. 4004 CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG, 4005 Info); 4006 } 4007 4008 void CodeGenFunction::EmitOMPTargetEnterDataDirective( 4009 const OMPTargetEnterDataDirective &S) { 4010 // If we don't have target devices, don't bother emitting the data mapping 4011 // code. 4012 if (CGM.getLangOpts().OMPTargetTriples.empty()) 4013 return; 4014 4015 // Check if we have any if clause associated with the directive. 4016 const Expr *IfCond = nullptr; 4017 if (auto *C = S.getSingleClause<OMPIfClause>()) 4018 IfCond = C->getCondition(); 4019 4020 // Check if we have any device clause associated with the directive. 4021 const Expr *Device = nullptr; 4022 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 4023 Device = C->getDevice(); 4024 4025 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 4026 } 4027 4028 void CodeGenFunction::EmitOMPTargetExitDataDirective( 4029 const OMPTargetExitDataDirective &S) { 4030 // If we don't have target devices, don't bother emitting the data mapping 4031 // code. 4032 if (CGM.getLangOpts().OMPTargetTriples.empty()) 4033 return; 4034 4035 // Check if we have any if clause associated with the directive. 4036 const Expr *IfCond = nullptr; 4037 if (auto *C = S.getSingleClause<OMPIfClause>()) 4038 IfCond = C->getCondition(); 4039 4040 // Check if we have any device clause associated with the directive. 4041 const Expr *Device = nullptr; 4042 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 4043 Device = C->getDevice(); 4044 4045 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 4046 } 4047 4048 static void emitTargetParallelRegion(CodeGenFunction &CGF, 4049 const OMPTargetParallelDirective &S, 4050 PrePostActionTy &Action) { 4051 // Get the captured statement associated with the 'parallel' region. 4052 auto *CS = S.getCapturedStmt(OMPD_parallel); 4053 Action.Enter(CGF); 4054 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &) { 4055 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 4056 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 4057 CGF.EmitOMPPrivateClause(S, PrivateScope); 4058 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4059 (void)PrivateScope.Privatize(); 4060 // TODO: Add support for clauses. 4061 CGF.EmitStmt(CS->getCapturedStmt()); 4062 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 4063 }; 4064 emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen, 4065 emitEmptyBoundParameters); 4066 emitPostUpdateForReductionClause( 4067 CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 4068 } 4069 4070 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 4071 CodeGenModule &CGM, StringRef ParentName, 4072 const OMPTargetParallelDirective &S) { 4073 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4074 emitTargetParallelRegion(CGF, S, Action); 4075 }; 4076 llvm::Function *Fn; 4077 llvm::Constant *Addr; 4078 // Emit target region as a standalone region. 4079 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 4080 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 4081 assert(Fn && Addr && "Target device function emission failed."); 4082 } 4083 4084 void CodeGenFunction::EmitOMPTargetParallelDirective( 4085 const OMPTargetParallelDirective &S) { 4086 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4087 emitTargetParallelRegion(CGF, S, Action); 4088 }; 4089 emitCommonOMPTargetDirective(*this, S, CodeGen); 4090 } 4091 4092 void CodeGenFunction::EmitOMPTargetParallelForDirective( 4093 const OMPTargetParallelForDirective &S) { 4094 // TODO: codegen for target parallel for. 4095 } 4096 4097 /// Emit a helper variable and return corresponding lvalue. 4098 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper, 4099 const ImplicitParamDecl *PVD, 4100 CodeGenFunction::OMPPrivateScope &Privates) { 4101 auto *VDecl = cast<VarDecl>(Helper->getDecl()); 4102 Privates.addPrivate( 4103 VDecl, [&CGF, PVD]() -> Address { return CGF.GetAddrOfLocalVar(PVD); }); 4104 } 4105 4106 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { 4107 assert(isOpenMPTaskLoopDirective(S.getDirectiveKind())); 4108 // Emit outlined function for task construct. 4109 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 4110 auto CapturedStruct = GenerateCapturedStmtArgument(*CS); 4111 auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 4112 const Expr *IfCond = nullptr; 4113 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 4114 if (C->getNameModifier() == OMPD_unknown || 4115 C->getNameModifier() == OMPD_taskloop) { 4116 IfCond = C->getCondition(); 4117 break; 4118 } 4119 } 4120 4121 OMPTaskDataTy Data; 4122 // Check if taskloop must be emitted without taskgroup. 4123 Data.Nogroup = S.getSingleClause<OMPNogroupClause>(); 4124 // TODO: Check if we should emit tied or untied task. 4125 Data.Tied = true; 4126 // Set scheduling for taskloop 4127 if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) { 4128 // grainsize clause 4129 Data.Schedule.setInt(/*IntVal=*/false); 4130 Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize())); 4131 } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) { 4132 // num_tasks clause 4133 Data.Schedule.setInt(/*IntVal=*/true); 4134 Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks())); 4135 } 4136 4137 auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) { 4138 // if (PreCond) { 4139 // for (IV in 0..LastIteration) BODY; 4140 // <Final counter/linear vars updates>; 4141 // } 4142 // 4143 4144 // Emit: if (PreCond) - begin. 4145 // If the condition constant folds and can be elided, avoid emitting the 4146 // whole loop. 4147 bool CondConstant; 4148 llvm::BasicBlock *ContBlock = nullptr; 4149 OMPLoopScope PreInitScope(CGF, S); 4150 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 4151 if (!CondConstant) 4152 return; 4153 } else { 4154 auto *ThenBlock = CGF.createBasicBlock("taskloop.if.then"); 4155 ContBlock = CGF.createBasicBlock("taskloop.if.end"); 4156 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 4157 CGF.getProfileCount(&S)); 4158 CGF.EmitBlock(ThenBlock); 4159 CGF.incrementProfileCounter(&S); 4160 } 4161 4162 if (isOpenMPSimdDirective(S.getDirectiveKind())) 4163 CGF.EmitOMPSimdInit(S); 4164 4165 OMPPrivateScope LoopScope(CGF); 4166 // Emit helper vars inits. 4167 enum { LowerBound = 5, UpperBound, Stride, LastIter }; 4168 auto *I = CS->getCapturedDecl()->param_begin(); 4169 auto *LBP = std::next(I, LowerBound); 4170 auto *UBP = std::next(I, UpperBound); 4171 auto *STP = std::next(I, Stride); 4172 auto *LIP = std::next(I, LastIter); 4173 mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP, 4174 LoopScope); 4175 mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP, 4176 LoopScope); 4177 mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope); 4178 mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP, 4179 LoopScope); 4180 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 4181 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 4182 (void)LoopScope.Privatize(); 4183 // Emit the loop iteration variable. 4184 const Expr *IVExpr = S.getIterationVariable(); 4185 const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 4186 CGF.EmitVarDecl(*IVDecl); 4187 CGF.EmitIgnoredExpr(S.getInit()); 4188 4189 // Emit the iterations count variable. 4190 // If it is not a variable, Sema decided to calculate iterations count on 4191 // each iteration (e.g., it is foldable into a constant). 4192 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 4193 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 4194 // Emit calculation of the iterations count. 4195 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 4196 } 4197 4198 CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 4199 S.getInc(), 4200 [&S](CodeGenFunction &CGF) { 4201 CGF.EmitOMPLoopBody(S, JumpDest()); 4202 CGF.EmitStopPoint(&S); 4203 }, 4204 [](CodeGenFunction &) {}); 4205 // Emit: if (PreCond) - end. 4206 if (ContBlock) { 4207 CGF.EmitBranch(ContBlock); 4208 CGF.EmitBlock(ContBlock, true); 4209 } 4210 // Emit final copy of the lastprivate variables if IsLastIter != 0. 4211 if (HasLastprivateClause) { 4212 CGF.EmitOMPLastprivateClauseFinal( 4213 S, isOpenMPSimdDirective(S.getDirectiveKind()), 4214 CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar( 4215 CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false, 4216 (*LIP)->getType(), S.getLocStart()))); 4217 } 4218 }; 4219 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 4220 IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn, 4221 const OMPTaskDataTy &Data) { 4222 auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &) { 4223 OMPLoopScope PreInitScope(CGF, S); 4224 CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getLocStart(), S, 4225 OutlinedFn, SharedsTy, 4226 CapturedStruct, IfCond, Data); 4227 }; 4228 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop, 4229 CodeGen); 4230 }; 4231 if (Data.Nogroup) 4232 EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data); 4233 else { 4234 CGM.getOpenMPRuntime().emitTaskgroupRegion( 4235 *this, 4236 [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF, 4237 PrePostActionTy &Action) { 4238 Action.Enter(CGF); 4239 CGF.EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data); 4240 }, 4241 S.getLocStart()); 4242 } 4243 } 4244 4245 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) { 4246 EmitOMPTaskLoopBasedDirective(S); 4247 } 4248 4249 void CodeGenFunction::EmitOMPTaskLoopSimdDirective( 4250 const OMPTaskLoopSimdDirective &S) { 4251 EmitOMPTaskLoopBasedDirective(S); 4252 } 4253 4254 // Generate the instructions for '#pragma omp target update' directive. 4255 void CodeGenFunction::EmitOMPTargetUpdateDirective( 4256 const OMPTargetUpdateDirective &S) { 4257 // If we don't have target devices, don't bother emitting the data mapping 4258 // code. 4259 if (CGM.getLangOpts().OMPTargetTriples.empty()) 4260 return; 4261 4262 // Check if we have any if clause associated with the directive. 4263 const Expr *IfCond = nullptr; 4264 if (auto *C = S.getSingleClause<OMPIfClause>()) 4265 IfCond = C->getCondition(); 4266 4267 // Check if we have any device clause associated with the directive. 4268 const Expr *Device = nullptr; 4269 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 4270 Device = C->getDevice(); 4271 4272 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 4273 } 4274