1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This contains code to emit OpenMP nodes as LLVM code. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGCleanup.h" 15 #include "CGOpenMPRuntime.h" 16 #include "CodeGenFunction.h" 17 #include "CodeGenModule.h" 18 #include "TargetInfo.h" 19 #include "clang/AST/Stmt.h" 20 #include "clang/AST/StmtOpenMP.h" 21 #include "clang/AST/DeclOpenMP.h" 22 #include "llvm/IR/CallSite.h" 23 using namespace clang; 24 using namespace CodeGen; 25 26 namespace { 27 /// Lexical scope for OpenMP executable constructs, that handles correct codegen 28 /// for captured expressions. 29 class OMPLexicalScope : public CodeGenFunction::LexicalScope { 30 void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 31 for (const auto *C : S.clauses()) { 32 if (auto *CPI = OMPClauseWithPreInit::get(C)) { 33 if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { 34 for (const auto *I : PreInit->decls()) { 35 if (!I->hasAttr<OMPCaptureNoInitAttr>()) 36 CGF.EmitVarDecl(cast<VarDecl>(*I)); 37 else { 38 CodeGenFunction::AutoVarEmission Emission = 39 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 40 CGF.EmitAutoVarCleanups(Emission); 41 } 42 } 43 } 44 } 45 } 46 } 47 CodeGenFunction::OMPPrivateScope InlinedShareds; 48 49 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { 50 return CGF.LambdaCaptureFields.lookup(VD) || 51 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || 52 (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl)); 53 } 54 55 public: 56 OMPLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S, 57 bool AsInlined = false, bool EmitPreInitStmt = true) 58 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), 59 InlinedShareds(CGF) { 60 if (EmitPreInitStmt) 61 emitPreInitStmt(CGF, S); 62 if (AsInlined) { 63 if (S.hasAssociatedStmt()) { 64 auto *CS = cast<CapturedStmt>(S.getAssociatedStmt()); 65 for (auto &C : CS->captures()) { 66 if (C.capturesVariable() || C.capturesVariableByCopy()) { 67 auto *VD = C.getCapturedVar(); 68 assert(VD == VD->getCanonicalDecl() && 69 "Canonical decl must be captured."); 70 DeclRefExpr DRE(const_cast<VarDecl *>(VD), 71 isCapturedVar(CGF, VD) || 72 (CGF.CapturedStmtInfo && 73 InlinedShareds.isGlobalVarCaptured(VD)), 74 VD->getType().getNonReferenceType(), VK_LValue, 75 SourceLocation()); 76 InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address { 77 return CGF.EmitLValue(&DRE).getAddress(); 78 }); 79 } 80 } 81 (void)InlinedShareds.Privatize(); 82 } 83 } 84 } 85 }; 86 87 /// Lexical scope for OpenMP parallel construct, that handles correct codegen 88 /// for captured expressions. 89 class OMPParallelScope final : public OMPLexicalScope { 90 bool EmitPreInitStmt(const OMPExecutableDirective &S) { 91 OpenMPDirectiveKind Kind = S.getDirectiveKind(); 92 return !(isOpenMPTargetExecutionDirective(Kind) || 93 isOpenMPLoopBoundSharingDirective(Kind)) && 94 isOpenMPParallelDirective(Kind); 95 } 96 97 public: 98 OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 99 : OMPLexicalScope(CGF, S, 100 /*AsInlined=*/false, 101 /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {} 102 }; 103 104 /// Lexical scope for OpenMP teams construct, that handles correct codegen 105 /// for captured expressions. 106 class OMPTeamsScope final : public OMPLexicalScope { 107 bool EmitPreInitStmt(const OMPExecutableDirective &S) { 108 OpenMPDirectiveKind Kind = S.getDirectiveKind(); 109 return !isOpenMPTargetExecutionDirective(Kind) && 110 isOpenMPTeamsDirective(Kind); 111 } 112 113 public: 114 OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 115 : OMPLexicalScope(CGF, S, 116 /*AsInlined=*/false, 117 /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {} 118 }; 119 120 /// Private scope for OpenMP loop-based directives, that supports capturing 121 /// of used expression from loop statement. 122 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { 123 void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) { 124 if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) { 125 if (auto *PreInits = cast_or_null<DeclStmt>(LD->getPreInits())) { 126 for (const auto *I : PreInits->decls()) 127 CGF.EmitVarDecl(cast<VarDecl>(*I)); 128 } 129 } 130 } 131 132 public: 133 OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S) 134 : CodeGenFunction::RunCleanupsScope(CGF) { 135 emitPreInitStmt(CGF, S); 136 } 137 }; 138 139 } // namespace 140 141 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) { 142 auto &C = getContext(); 143 llvm::Value *Size = nullptr; 144 auto SizeInChars = C.getTypeSizeInChars(Ty); 145 if (SizeInChars.isZero()) { 146 // getTypeSizeInChars() returns 0 for a VLA. 147 while (auto *VAT = C.getAsVariableArrayType(Ty)) { 148 llvm::Value *ArraySize; 149 std::tie(ArraySize, Ty) = getVLASize(VAT); 150 Size = Size ? Builder.CreateNUWMul(Size, ArraySize) : ArraySize; 151 } 152 SizeInChars = C.getTypeSizeInChars(Ty); 153 if (SizeInChars.isZero()) 154 return llvm::ConstantInt::get(SizeTy, /*V=*/0); 155 Size = Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars)); 156 } else 157 Size = CGM.getSize(SizeInChars); 158 return Size; 159 } 160 161 void CodeGenFunction::GenerateOpenMPCapturedVars( 162 const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) { 163 const RecordDecl *RD = S.getCapturedRecordDecl(); 164 auto CurField = RD->field_begin(); 165 auto CurCap = S.captures().begin(); 166 for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(), 167 E = S.capture_init_end(); 168 I != E; ++I, ++CurField, ++CurCap) { 169 if (CurField->hasCapturedVLAType()) { 170 auto VAT = CurField->getCapturedVLAType(); 171 auto *Val = VLASizeMap[VAT->getSizeExpr()]; 172 CapturedVars.push_back(Val); 173 } else if (CurCap->capturesThis()) 174 CapturedVars.push_back(CXXThisValue); 175 else if (CurCap->capturesVariableByCopy()) { 176 llvm::Value *CV = 177 EmitLoadOfLValue(EmitLValue(*I), SourceLocation()).getScalarVal(); 178 179 // If the field is not a pointer, we need to save the actual value 180 // and load it as a void pointer. 181 if (!CurField->getType()->isAnyPointerType()) { 182 auto &Ctx = getContext(); 183 auto DstAddr = CreateMemTemp( 184 Ctx.getUIntPtrType(), 185 Twine(CurCap->getCapturedVar()->getName()) + ".casted"); 186 LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType()); 187 188 auto *SrcAddrVal = EmitScalarConversion( 189 DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()), 190 Ctx.getPointerType(CurField->getType()), SourceLocation()); 191 LValue SrcLV = 192 MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType()); 193 194 // Store the value using the source type pointer. 195 EmitStoreThroughLValue(RValue::get(CV), SrcLV); 196 197 // Load the value using the destination type pointer. 198 CV = EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal(); 199 } 200 CapturedVars.push_back(CV); 201 } else { 202 assert(CurCap->capturesVariable() && "Expected capture by reference."); 203 CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer()); 204 } 205 } 206 } 207 208 static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType, 209 StringRef Name, LValue AddrLV, 210 bool isReferenceType = false) { 211 ASTContext &Ctx = CGF.getContext(); 212 213 auto *CastedPtr = CGF.EmitScalarConversion( 214 AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(), 215 Ctx.getPointerType(DstType), SourceLocation()); 216 auto TmpAddr = 217 CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType)) 218 .getAddress(); 219 220 // If we are dealing with references we need to return the address of the 221 // reference instead of the reference of the value. 222 if (isReferenceType) { 223 QualType RefType = Ctx.getLValueReferenceType(DstType); 224 auto *RefVal = TmpAddr.getPointer(); 225 TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name) + ".ref"); 226 auto TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType); 227 CGF.EmitStoreThroughLValue(RValue::get(RefVal), TmpLVal, /*isInit*/ true); 228 } 229 230 return TmpAddr; 231 } 232 233 static QualType getCanonicalParamType(ASTContext &C, QualType T) { 234 if (T->isLValueReferenceType()) { 235 return C.getLValueReferenceType( 236 getCanonicalParamType(C, T.getNonReferenceType()), 237 /*SpelledAsLValue=*/false); 238 } 239 if (T->isPointerType()) 240 return C.getPointerType(getCanonicalParamType(C, T->getPointeeType())); 241 return C.getCanonicalParamType(T); 242 } 243 244 namespace { 245 /// Contains required data for proper outlined function codegen. 246 struct FunctionOptions { 247 /// Captured statement for which the function is generated. 248 const CapturedStmt *S = nullptr; 249 /// true if cast to/from UIntPtr is required for variables captured by 250 /// value. 251 const bool UIntPtrCastRequired = true; 252 /// true if only casted arguments must be registered as local args or VLA 253 /// sizes. 254 const bool RegisterCastedArgsOnly = false; 255 /// Name of the generated function. 256 const StringRef FunctionName; 257 explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired, 258 bool RegisterCastedArgsOnly, 259 StringRef FunctionName) 260 : S(S), UIntPtrCastRequired(UIntPtrCastRequired), 261 RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly), 262 FunctionName(FunctionName) {} 263 }; 264 } 265 266 static llvm::Function *emitOutlinedFunctionPrologue( 267 CodeGenFunction &CGF, FunctionArgList &Args, 268 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> 269 &LocalAddrs, 270 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> 271 &VLASizes, 272 llvm::Value *&CXXThisValue, const FunctionOptions &FO) { 273 const CapturedDecl *CD = FO.S->getCapturedDecl(); 274 const RecordDecl *RD = FO.S->getCapturedRecordDecl(); 275 assert(CD->hasBody() && "missing CapturedDecl body"); 276 277 CXXThisValue = nullptr; 278 // Build the argument list. 279 CodeGenModule &CGM = CGF.CGM; 280 ASTContext &Ctx = CGM.getContext(); 281 FunctionArgList TargetArgs; 282 Args.append(CD->param_begin(), 283 std::next(CD->param_begin(), CD->getContextParamPosition())); 284 TargetArgs.append( 285 CD->param_begin(), 286 std::next(CD->param_begin(), CD->getContextParamPosition())); 287 auto I = FO.S->captures().begin(); 288 for (auto *FD : RD->fields()) { 289 QualType ArgType = FD->getType(); 290 IdentifierInfo *II = nullptr; 291 VarDecl *CapVar = nullptr; 292 293 // If this is a capture by copy and the type is not a pointer, the outlined 294 // function argument type should be uintptr and the value properly casted to 295 // uintptr. This is necessary given that the runtime library is only able to 296 // deal with pointers. We can pass in the same way the VLA type sizes to the 297 // outlined function. 298 if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) || 299 I->capturesVariableArrayType()) { 300 if (FO.UIntPtrCastRequired) 301 ArgType = Ctx.getUIntPtrType(); 302 } 303 304 if (I->capturesVariable() || I->capturesVariableByCopy()) { 305 CapVar = I->getCapturedVar(); 306 II = CapVar->getIdentifier(); 307 } else if (I->capturesThis()) 308 II = &Ctx.Idents.get("this"); 309 else { 310 assert(I->capturesVariableArrayType()); 311 II = &Ctx.Idents.get("vla"); 312 } 313 if (ArgType->isVariablyModifiedType()) 314 ArgType = getCanonicalParamType(Ctx, ArgType.getNonReferenceType()); 315 auto *Arg = 316 ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(), II, 317 ArgType, ImplicitParamDecl::Other); 318 Args.emplace_back(Arg); 319 // Do not cast arguments if we emit function with non-original types. 320 TargetArgs.emplace_back( 321 FO.UIntPtrCastRequired 322 ? Arg 323 : CGM.getOpenMPRuntime().translateParameter(FD, Arg)); 324 ++I; 325 } 326 Args.append( 327 std::next(CD->param_begin(), CD->getContextParamPosition() + 1), 328 CD->param_end()); 329 TargetArgs.append( 330 std::next(CD->param_begin(), CD->getContextParamPosition() + 1), 331 CD->param_end()); 332 333 // Create the function declaration. 334 FunctionType::ExtInfo ExtInfo; 335 const CGFunctionInfo &FuncInfo = 336 CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs); 337 llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); 338 339 llvm::Function *F = 340 llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage, 341 FO.FunctionName, &CGM.getModule()); 342 CGM.SetInternalFunctionAttributes(CD, F, FuncInfo); 343 if (CD->isNothrow()) 344 F->setDoesNotThrow(); 345 346 // Generate the function. 347 CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs, 348 FO.S->getLocStart(), CD->getBody()->getLocStart()); 349 unsigned Cnt = CD->getContextParamPosition(); 350 I = FO.S->captures().begin(); 351 for (auto *FD : RD->fields()) { 352 // Do not map arguments if we emit function with non-original types. 353 Address LocalAddr(Address::invalid()); 354 if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) { 355 LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt], 356 TargetArgs[Cnt]); 357 } else { 358 LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]); 359 } 360 // If we are capturing a pointer by copy we don't need to do anything, just 361 // use the value that we get from the arguments. 362 if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) { 363 const VarDecl *CurVD = I->getCapturedVar(); 364 // If the variable is a reference we need to materialize it here. 365 if (CurVD->getType()->isReferenceType()) { 366 Address RefAddr = CGF.CreateMemTemp( 367 CurVD->getType(), CGM.getPointerAlign(), ".materialized_ref"); 368 CGF.EmitStoreOfScalar(LocalAddr.getPointer(), RefAddr, 369 /*Volatile=*/false, CurVD->getType()); 370 LocalAddr = RefAddr; 371 } 372 if (!FO.RegisterCastedArgsOnly) 373 LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}}); 374 ++Cnt; 375 ++I; 376 continue; 377 } 378 379 LValueBaseInfo BaseInfo(AlignmentSource::Decl, false); 380 LValue ArgLVal = 381 CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(), BaseInfo); 382 if (FD->hasCapturedVLAType()) { 383 if (FO.UIntPtrCastRequired) { 384 ArgLVal = CGF.MakeAddrLValue(castValueFromUintptr(CGF, FD->getType(), 385 Args[Cnt]->getName(), 386 ArgLVal), 387 FD->getType(), BaseInfo); 388 } 389 auto *ExprArg = 390 CGF.EmitLoadOfLValue(ArgLVal, SourceLocation()).getScalarVal(); 391 auto VAT = FD->getCapturedVLAType(); 392 VLASizes.insert({Args[Cnt], {VAT->getSizeExpr(), ExprArg}}); 393 } else if (I->capturesVariable()) { 394 auto *Var = I->getCapturedVar(); 395 QualType VarTy = Var->getType(); 396 Address ArgAddr = ArgLVal.getAddress(); 397 if (!VarTy->isReferenceType()) { 398 if (ArgLVal.getType()->isLValueReferenceType()) { 399 ArgAddr = CGF.EmitLoadOfReference( 400 ArgAddr, ArgLVal.getType()->castAs<ReferenceType>()); 401 } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) { 402 assert(ArgLVal.getType()->isPointerType()); 403 ArgAddr = CGF.EmitLoadOfPointer( 404 ArgAddr, ArgLVal.getType()->castAs<PointerType>()); 405 } 406 } 407 if (!FO.RegisterCastedArgsOnly) { 408 LocalAddrs.insert( 409 {Args[Cnt], 410 {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}}); 411 } 412 } else if (I->capturesVariableByCopy()) { 413 assert(!FD->getType()->isAnyPointerType() && 414 "Not expecting a captured pointer."); 415 auto *Var = I->getCapturedVar(); 416 QualType VarTy = Var->getType(); 417 LocalAddrs.insert( 418 {Args[Cnt], 419 {Var, 420 FO.UIntPtrCastRequired 421 ? castValueFromUintptr(CGF, FD->getType(), Args[Cnt]->getName(), 422 ArgLVal, VarTy->isReferenceType()) 423 : ArgLVal.getAddress()}}); 424 } else { 425 // If 'this' is captured, load it into CXXThisValue. 426 assert(I->capturesThis()); 427 CXXThisValue = CGF.EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation()) 428 .getScalarVal(); 429 LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress()}}); 430 } 431 ++Cnt; 432 ++I; 433 } 434 435 return F; 436 } 437 438 llvm::Function * 439 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { 440 assert( 441 CapturedStmtInfo && 442 "CapturedStmtInfo should be set when generating the captured function"); 443 const CapturedDecl *CD = S.getCapturedDecl(); 444 // Build the argument list. 445 bool NeedWrapperFunction = 446 getDebugInfo() && 447 CGM.getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo; 448 FunctionArgList Args; 449 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs; 450 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes; 451 SmallString<256> Buffer; 452 llvm::raw_svector_ostream Out(Buffer); 453 Out << CapturedStmtInfo->getHelperName(); 454 if (NeedWrapperFunction) 455 Out << "_debug__"; 456 FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false, 457 Out.str()); 458 llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs, 459 VLASizes, CXXThisValue, FO); 460 for (const auto &LocalAddrPair : LocalAddrs) { 461 if (LocalAddrPair.second.first) { 462 setAddrOfLocalVar(LocalAddrPair.second.first, 463 LocalAddrPair.second.second); 464 } 465 } 466 for (const auto &VLASizePair : VLASizes) 467 VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second; 468 PGO.assignRegionCounters(GlobalDecl(CD), F); 469 CapturedStmtInfo->EmitBody(*this, CD->getBody()); 470 FinishFunction(CD->getBodyRBrace()); 471 if (!NeedWrapperFunction) 472 return F; 473 474 FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true, 475 /*RegisterCastedArgsOnly=*/true, 476 CapturedStmtInfo->getHelperName()); 477 CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true); 478 Args.clear(); 479 LocalAddrs.clear(); 480 VLASizes.clear(); 481 llvm::Function *WrapperF = 482 emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes, 483 WrapperCGF.CXXThisValue, WrapperFO); 484 LValueBaseInfo BaseInfo(AlignmentSource::Decl, false); 485 llvm::SmallVector<llvm::Value *, 4> CallArgs; 486 for (const auto *Arg : Args) { 487 llvm::Value *CallArg; 488 auto I = LocalAddrs.find(Arg); 489 if (I != LocalAddrs.end()) { 490 LValue LV = 491 WrapperCGF.MakeAddrLValue(I->second.second, Arg->getType(), BaseInfo); 492 CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation()); 493 } else { 494 auto EI = VLASizes.find(Arg); 495 if (EI != VLASizes.end()) 496 CallArg = EI->second.second; 497 else { 498 LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg), 499 Arg->getType(), BaseInfo); 500 CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation()); 501 } 502 } 503 CallArgs.emplace_back(CallArg); 504 } 505 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, S.getLocStart(), 506 F, CallArgs); 507 WrapperCGF.FinishFunction(); 508 return WrapperF; 509 } 510 511 //===----------------------------------------------------------------------===// 512 // OpenMP Directive Emission 513 //===----------------------------------------------------------------------===// 514 void CodeGenFunction::EmitOMPAggregateAssign( 515 Address DestAddr, Address SrcAddr, QualType OriginalType, 516 const llvm::function_ref<void(Address, Address)> &CopyGen) { 517 // Perform element-by-element initialization. 518 QualType ElementTy; 519 520 // Drill down to the base element type on both arrays. 521 auto ArrayTy = OriginalType->getAsArrayTypeUnsafe(); 522 auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr); 523 SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 524 525 auto SrcBegin = SrcAddr.getPointer(); 526 auto DestBegin = DestAddr.getPointer(); 527 // Cast from pointer to array type to pointer to single element. 528 auto DestEnd = Builder.CreateGEP(DestBegin, NumElements); 529 // The basic structure here is a while-do loop. 530 auto BodyBB = createBasicBlock("omp.arraycpy.body"); 531 auto DoneBB = createBasicBlock("omp.arraycpy.done"); 532 auto IsEmpty = 533 Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty"); 534 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 535 536 // Enter the loop body, making that address the current address. 537 auto EntryBB = Builder.GetInsertBlock(); 538 EmitBlock(BodyBB); 539 540 CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy); 541 542 llvm::PHINode *SrcElementPHI = 543 Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 544 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 545 Address SrcElementCurrent = 546 Address(SrcElementPHI, 547 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 548 549 llvm::PHINode *DestElementPHI = 550 Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 551 DestElementPHI->addIncoming(DestBegin, EntryBB); 552 Address DestElementCurrent = 553 Address(DestElementPHI, 554 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 555 556 // Emit copy. 557 CopyGen(DestElementCurrent, SrcElementCurrent); 558 559 // Shift the address forward by one element. 560 auto DestElementNext = Builder.CreateConstGEP1_32( 561 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 562 auto SrcElementNext = Builder.CreateConstGEP1_32( 563 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 564 // Check whether we've reached the end. 565 auto Done = 566 Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 567 Builder.CreateCondBr(Done, DoneBB, BodyBB); 568 DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock()); 569 SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock()); 570 571 // Done. 572 EmitBlock(DoneBB, /*IsFinished=*/true); 573 } 574 575 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr, 576 Address SrcAddr, const VarDecl *DestVD, 577 const VarDecl *SrcVD, const Expr *Copy) { 578 if (OriginalType->isArrayType()) { 579 auto *BO = dyn_cast<BinaryOperator>(Copy); 580 if (BO && BO->getOpcode() == BO_Assign) { 581 // Perform simple memcpy for simple copying. 582 EmitAggregateAssign(DestAddr, SrcAddr, OriginalType); 583 } else { 584 // For arrays with complex element types perform element by element 585 // copying. 586 EmitOMPAggregateAssign( 587 DestAddr, SrcAddr, OriginalType, 588 [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) { 589 // Working with the single array element, so have to remap 590 // destination and source variables to corresponding array 591 // elements. 592 CodeGenFunction::OMPPrivateScope Remap(*this); 593 Remap.addPrivate(DestVD, [DestElement]() -> Address { 594 return DestElement; 595 }); 596 Remap.addPrivate( 597 SrcVD, [SrcElement]() -> Address { return SrcElement; }); 598 (void)Remap.Privatize(); 599 EmitIgnoredExpr(Copy); 600 }); 601 } 602 } else { 603 // Remap pseudo source variable to private copy. 604 CodeGenFunction::OMPPrivateScope Remap(*this); 605 Remap.addPrivate(SrcVD, [SrcAddr]() -> Address { return SrcAddr; }); 606 Remap.addPrivate(DestVD, [DestAddr]() -> Address { return DestAddr; }); 607 (void)Remap.Privatize(); 608 // Emit copying of the whole variable. 609 EmitIgnoredExpr(Copy); 610 } 611 } 612 613 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, 614 OMPPrivateScope &PrivateScope) { 615 if (!HaveInsertPoint()) 616 return false; 617 bool FirstprivateIsLastprivate = false; 618 llvm::DenseSet<const VarDecl *> Lastprivates; 619 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 620 for (const auto *D : C->varlists()) 621 Lastprivates.insert( 622 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); 623 } 624 llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate; 625 CGCapturedStmtInfo CapturesInfo(cast<CapturedStmt>(*D.getAssociatedStmt())); 626 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { 627 auto IRef = C->varlist_begin(); 628 auto InitsRef = C->inits().begin(); 629 for (auto IInit : C->private_copies()) { 630 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 631 bool ThisFirstprivateIsLastprivate = 632 Lastprivates.count(OrigVD->getCanonicalDecl()) > 0; 633 auto *CapFD = CapturesInfo.lookup(OrigVD); 634 auto *FD = CapturedStmtInfo->lookup(OrigVD); 635 if (!ThisFirstprivateIsLastprivate && FD && (FD == CapFD) && 636 !FD->getType()->isReferenceType()) { 637 EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()); 638 ++IRef; 639 ++InitsRef; 640 continue; 641 } 642 FirstprivateIsLastprivate = 643 FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate; 644 if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) { 645 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 646 auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl()); 647 bool IsRegistered; 648 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 649 /*RefersToEnclosingVariableOrCapture=*/FD != nullptr, 650 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 651 Address OriginalAddr = EmitLValue(&DRE).getAddress(); 652 QualType Type = VD->getType(); 653 if (Type->isArrayType()) { 654 // Emit VarDecl with copy init for arrays. 655 // Get the address of the original variable captured in current 656 // captured region. 657 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 658 auto Emission = EmitAutoVarAlloca(*VD); 659 auto *Init = VD->getInit(); 660 if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) { 661 // Perform simple memcpy. 662 EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr, 663 Type); 664 } else { 665 EmitOMPAggregateAssign( 666 Emission.getAllocatedAddress(), OriginalAddr, Type, 667 [this, VDInit, Init](Address DestElement, 668 Address SrcElement) { 669 // Clean up any temporaries needed by the initialization. 670 RunCleanupsScope InitScope(*this); 671 // Emit initialization for single element. 672 setAddrOfLocalVar(VDInit, SrcElement); 673 EmitAnyExprToMem(Init, DestElement, 674 Init->getType().getQualifiers(), 675 /*IsInitializer*/ false); 676 LocalDeclMap.erase(VDInit); 677 }); 678 } 679 EmitAutoVarCleanups(Emission); 680 return Emission.getAllocatedAddress(); 681 }); 682 } else { 683 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 684 // Emit private VarDecl with copy init. 685 // Remap temp VDInit variable to the address of the original 686 // variable 687 // (for proper handling of captured global variables). 688 setAddrOfLocalVar(VDInit, OriginalAddr); 689 EmitDecl(*VD); 690 LocalDeclMap.erase(VDInit); 691 return GetAddrOfLocalVar(VD); 692 }); 693 } 694 assert(IsRegistered && 695 "firstprivate var already registered as private"); 696 // Silence the warning about unused variable. 697 (void)IsRegistered; 698 } 699 ++IRef; 700 ++InitsRef; 701 } 702 } 703 return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty(); 704 } 705 706 void CodeGenFunction::EmitOMPPrivateClause( 707 const OMPExecutableDirective &D, 708 CodeGenFunction::OMPPrivateScope &PrivateScope) { 709 if (!HaveInsertPoint()) 710 return; 711 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 712 for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) { 713 auto IRef = C->varlist_begin(); 714 for (auto IInit : C->private_copies()) { 715 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 716 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 717 auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 718 bool IsRegistered = 719 PrivateScope.addPrivate(OrigVD, [&]() -> Address { 720 // Emit private VarDecl with copy init. 721 EmitDecl(*VD); 722 return GetAddrOfLocalVar(VD); 723 }); 724 assert(IsRegistered && "private var already registered as private"); 725 // Silence the warning about unused variable. 726 (void)IsRegistered; 727 } 728 ++IRef; 729 } 730 } 731 } 732 733 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { 734 if (!HaveInsertPoint()) 735 return false; 736 // threadprivate_var1 = master_threadprivate_var1; 737 // operator=(threadprivate_var2, master_threadprivate_var2); 738 // ... 739 // __kmpc_barrier(&loc, global_tid); 740 llvm::DenseSet<const VarDecl *> CopiedVars; 741 llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr; 742 for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) { 743 auto IRef = C->varlist_begin(); 744 auto ISrcRef = C->source_exprs().begin(); 745 auto IDestRef = C->destination_exprs().begin(); 746 for (auto *AssignOp : C->assignment_ops()) { 747 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 748 QualType Type = VD->getType(); 749 if (CopiedVars.insert(VD->getCanonicalDecl()).second) { 750 // Get the address of the master variable. If we are emitting code with 751 // TLS support, the address is passed from the master as field in the 752 // captured declaration. 753 Address MasterAddr = Address::invalid(); 754 if (getLangOpts().OpenMPUseTLS && 755 getContext().getTargetInfo().isTLSSupported()) { 756 assert(CapturedStmtInfo->lookup(VD) && 757 "Copyin threadprivates should have been captured!"); 758 DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(), 759 VK_LValue, (*IRef)->getExprLoc()); 760 MasterAddr = EmitLValue(&DRE).getAddress(); 761 LocalDeclMap.erase(VD); 762 } else { 763 MasterAddr = 764 Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD) 765 : CGM.GetAddrOfGlobal(VD), 766 getContext().getDeclAlign(VD)); 767 } 768 // Get the address of the threadprivate variable. 769 Address PrivateAddr = EmitLValue(*IRef).getAddress(); 770 if (CopiedVars.size() == 1) { 771 // At first check if current thread is a master thread. If it is, no 772 // need to copy data. 773 CopyBegin = createBasicBlock("copyin.not.master"); 774 CopyEnd = createBasicBlock("copyin.not.master.end"); 775 Builder.CreateCondBr( 776 Builder.CreateICmpNE( 777 Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy), 778 Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy)), 779 CopyBegin, CopyEnd); 780 EmitBlock(CopyBegin); 781 } 782 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 783 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 784 EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp); 785 } 786 ++IRef; 787 ++ISrcRef; 788 ++IDestRef; 789 } 790 } 791 if (CopyEnd) { 792 // Exit out of copying procedure for non-master thread. 793 EmitBlock(CopyEnd, /*IsFinished=*/true); 794 return true; 795 } 796 return false; 797 } 798 799 bool CodeGenFunction::EmitOMPLastprivateClauseInit( 800 const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) { 801 if (!HaveInsertPoint()) 802 return false; 803 bool HasAtLeastOneLastprivate = false; 804 llvm::DenseSet<const VarDecl *> SIMDLCVs; 805 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 806 auto *LoopDirective = cast<OMPLoopDirective>(&D); 807 for (auto *C : LoopDirective->counters()) { 808 SIMDLCVs.insert( 809 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 810 } 811 } 812 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 813 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 814 HasAtLeastOneLastprivate = true; 815 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) 816 break; 817 auto IRef = C->varlist_begin(); 818 auto IDestRef = C->destination_exprs().begin(); 819 for (auto *IInit : C->private_copies()) { 820 // Keep the address of the original variable for future update at the end 821 // of the loop. 822 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 823 // Taskloops do not require additional initialization, it is done in 824 // runtime support library. 825 if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) { 826 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 827 PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address { 828 DeclRefExpr DRE( 829 const_cast<VarDecl *>(OrigVD), 830 /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup( 831 OrigVD) != nullptr, 832 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 833 return EmitLValue(&DRE).getAddress(); 834 }); 835 // Check if the variable is also a firstprivate: in this case IInit is 836 // not generated. Initialization of this variable will happen in codegen 837 // for 'firstprivate' clause. 838 if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) { 839 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 840 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 841 // Emit private VarDecl with copy init. 842 EmitDecl(*VD); 843 return GetAddrOfLocalVar(VD); 844 }); 845 assert(IsRegistered && 846 "lastprivate var already registered as private"); 847 (void)IsRegistered; 848 } 849 } 850 ++IRef; 851 ++IDestRef; 852 } 853 } 854 return HasAtLeastOneLastprivate; 855 } 856 857 void CodeGenFunction::EmitOMPLastprivateClauseFinal( 858 const OMPExecutableDirective &D, bool NoFinals, 859 llvm::Value *IsLastIterCond) { 860 if (!HaveInsertPoint()) 861 return; 862 // Emit following code: 863 // if (<IsLastIterCond>) { 864 // orig_var1 = private_orig_var1; 865 // ... 866 // orig_varn = private_orig_varn; 867 // } 868 llvm::BasicBlock *ThenBB = nullptr; 869 llvm::BasicBlock *DoneBB = nullptr; 870 if (IsLastIterCond) { 871 ThenBB = createBasicBlock(".omp.lastprivate.then"); 872 DoneBB = createBasicBlock(".omp.lastprivate.done"); 873 Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB); 874 EmitBlock(ThenBB); 875 } 876 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 877 llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates; 878 if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) { 879 auto IC = LoopDirective->counters().begin(); 880 for (auto F : LoopDirective->finals()) { 881 auto *D = 882 cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl(); 883 if (NoFinals) 884 AlreadyEmittedVars.insert(D); 885 else 886 LoopCountersAndUpdates[D] = F; 887 ++IC; 888 } 889 } 890 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 891 auto IRef = C->varlist_begin(); 892 auto ISrcRef = C->source_exprs().begin(); 893 auto IDestRef = C->destination_exprs().begin(); 894 for (auto *AssignOp : C->assignment_ops()) { 895 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 896 QualType Type = PrivateVD->getType(); 897 auto *CanonicalVD = PrivateVD->getCanonicalDecl(); 898 if (AlreadyEmittedVars.insert(CanonicalVD).second) { 899 // If lastprivate variable is a loop control variable for loop-based 900 // directive, update its value before copyin back to original 901 // variable. 902 if (auto *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD)) 903 EmitIgnoredExpr(FinalExpr); 904 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 905 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 906 // Get the address of the original variable. 907 Address OriginalAddr = GetAddrOfLocalVar(DestVD); 908 // Get the address of the private variable. 909 Address PrivateAddr = GetAddrOfLocalVar(PrivateVD); 910 if (auto RefTy = PrivateVD->getType()->getAs<ReferenceType>()) 911 PrivateAddr = 912 Address(Builder.CreateLoad(PrivateAddr), 913 getNaturalTypeAlignment(RefTy->getPointeeType())); 914 EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp); 915 } 916 ++IRef; 917 ++ISrcRef; 918 ++IDestRef; 919 } 920 if (auto *PostUpdate = C->getPostUpdateExpr()) 921 EmitIgnoredExpr(PostUpdate); 922 } 923 if (IsLastIterCond) 924 EmitBlock(DoneBB, /*IsFinished=*/true); 925 } 926 927 void CodeGenFunction::EmitOMPReductionClauseInit( 928 const OMPExecutableDirective &D, 929 CodeGenFunction::OMPPrivateScope &PrivateScope) { 930 if (!HaveInsertPoint()) 931 return; 932 SmallVector<const Expr *, 4> Shareds; 933 SmallVector<const Expr *, 4> Privates; 934 SmallVector<const Expr *, 4> ReductionOps; 935 SmallVector<const Expr *, 4> LHSs; 936 SmallVector<const Expr *, 4> RHSs; 937 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 938 auto IPriv = C->privates().begin(); 939 auto IRed = C->reduction_ops().begin(); 940 auto ILHS = C->lhs_exprs().begin(); 941 auto IRHS = C->rhs_exprs().begin(); 942 for (const auto *Ref : C->varlists()) { 943 Shareds.emplace_back(Ref); 944 Privates.emplace_back(*IPriv); 945 ReductionOps.emplace_back(*IRed); 946 LHSs.emplace_back(*ILHS); 947 RHSs.emplace_back(*IRHS); 948 std::advance(IPriv, 1); 949 std::advance(IRed, 1); 950 std::advance(ILHS, 1); 951 std::advance(IRHS, 1); 952 } 953 } 954 ReductionCodeGen RedCG(Shareds, Privates, ReductionOps); 955 unsigned Count = 0; 956 auto ILHS = LHSs.begin(); 957 auto IRHS = RHSs.begin(); 958 auto IPriv = Privates.begin(); 959 for (const auto *IRef : Shareds) { 960 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl()); 961 // Emit private VarDecl with reduction init. 962 RedCG.emitSharedLValue(*this, Count); 963 RedCG.emitAggregateType(*this, Count); 964 auto Emission = EmitAutoVarAlloca(*PrivateVD); 965 RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(), 966 RedCG.getSharedLValue(Count), 967 [&Emission](CodeGenFunction &CGF) { 968 CGF.EmitAutoVarInit(Emission); 969 return true; 970 }); 971 EmitAutoVarCleanups(Emission); 972 Address BaseAddr = RedCG.adjustPrivateAddress( 973 *this, Count, Emission.getAllocatedAddress()); 974 bool IsRegistered = PrivateScope.addPrivate( 975 RedCG.getBaseDecl(Count), [BaseAddr]() -> Address { return BaseAddr; }); 976 assert(IsRegistered && "private var already registered as private"); 977 // Silence the warning about unused variable. 978 (void)IsRegistered; 979 980 auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 981 auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 982 if (isa<OMPArraySectionExpr>(IRef)) { 983 // Store the address of the original variable associated with the LHS 984 // implicit variable. 985 PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address { 986 return RedCG.getSharedLValue(Count).getAddress(); 987 }); 988 PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { 989 return GetAddrOfLocalVar(PrivateVD); 990 }); 991 } else if (isa<ArraySubscriptExpr>(IRef)) { 992 // Store the address of the original variable associated with the LHS 993 // implicit variable. 994 PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address { 995 return RedCG.getSharedLValue(Count).getAddress(); 996 }); 997 PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address { 998 return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD), 999 ConvertTypeForMem(RHSVD->getType()), 1000 "rhs.begin"); 1001 }); 1002 } else { 1003 QualType Type = PrivateVD->getType(); 1004 bool IsArray = getContext().getAsArrayType(Type) != nullptr; 1005 Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress(); 1006 // Store the address of the original variable associated with the LHS 1007 // implicit variable. 1008 if (IsArray) { 1009 OriginalAddr = Builder.CreateElementBitCast( 1010 OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin"); 1011 } 1012 PrivateScope.addPrivate( 1013 LHSVD, [OriginalAddr]() -> Address { return OriginalAddr; }); 1014 PrivateScope.addPrivate( 1015 RHSVD, [this, PrivateVD, RHSVD, IsArray]() -> Address { 1016 return IsArray 1017 ? Builder.CreateElementBitCast( 1018 GetAddrOfLocalVar(PrivateVD), 1019 ConvertTypeForMem(RHSVD->getType()), "rhs.begin") 1020 : GetAddrOfLocalVar(PrivateVD); 1021 }); 1022 } 1023 ++ILHS; 1024 ++IRHS; 1025 ++IPriv; 1026 ++Count; 1027 } 1028 } 1029 1030 void CodeGenFunction::EmitOMPReductionClauseFinal( 1031 const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) { 1032 if (!HaveInsertPoint()) 1033 return; 1034 llvm::SmallVector<const Expr *, 8> Privates; 1035 llvm::SmallVector<const Expr *, 8> LHSExprs; 1036 llvm::SmallVector<const Expr *, 8> RHSExprs; 1037 llvm::SmallVector<const Expr *, 8> ReductionOps; 1038 bool HasAtLeastOneReduction = false; 1039 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1040 HasAtLeastOneReduction = true; 1041 Privates.append(C->privates().begin(), C->privates().end()); 1042 LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 1043 RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 1044 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 1045 } 1046 if (HasAtLeastOneReduction) { 1047 bool WithNowait = D.getSingleClause<OMPNowaitClause>() || 1048 isOpenMPParallelDirective(D.getDirectiveKind()) || 1049 D.getDirectiveKind() == OMPD_simd; 1050 bool SimpleReduction = D.getDirectiveKind() == OMPD_simd; 1051 // Emit nowait reduction if nowait clause is present or directive is a 1052 // parallel directive (it always has implicit barrier). 1053 CGM.getOpenMPRuntime().emitReduction( 1054 *this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps, 1055 {WithNowait, SimpleReduction, ReductionKind}); 1056 } 1057 } 1058 1059 static void emitPostUpdateForReductionClause( 1060 CodeGenFunction &CGF, const OMPExecutableDirective &D, 1061 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1062 if (!CGF.HaveInsertPoint()) 1063 return; 1064 llvm::BasicBlock *DoneBB = nullptr; 1065 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1066 if (auto *PostUpdate = C->getPostUpdateExpr()) { 1067 if (!DoneBB) { 1068 if (auto *Cond = CondGen(CGF)) { 1069 // If the first post-update expression is found, emit conditional 1070 // block if it was requested. 1071 auto *ThenBB = CGF.createBasicBlock(".omp.reduction.pu"); 1072 DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done"); 1073 CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1074 CGF.EmitBlock(ThenBB); 1075 } 1076 } 1077 CGF.EmitIgnoredExpr(PostUpdate); 1078 } 1079 } 1080 if (DoneBB) 1081 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 1082 } 1083 1084 namespace { 1085 /// Codegen lambda for appending distribute lower and upper bounds to outlined 1086 /// parallel function. This is necessary for combined constructs such as 1087 /// 'distribute parallel for' 1088 typedef llvm::function_ref<void(CodeGenFunction &, 1089 const OMPExecutableDirective &, 1090 llvm::SmallVectorImpl<llvm::Value *> &)> 1091 CodeGenBoundParametersTy; 1092 } // anonymous namespace 1093 1094 static void emitCommonOMPParallelDirective( 1095 CodeGenFunction &CGF, const OMPExecutableDirective &S, 1096 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1097 const CodeGenBoundParametersTy &CodeGenBoundParameters) { 1098 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); 1099 auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( 1100 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 1101 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) { 1102 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 1103 auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), 1104 /*IgnoreResultAssign*/ true); 1105 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause( 1106 CGF, NumThreads, NumThreadsClause->getLocStart()); 1107 } 1108 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) { 1109 CodeGenFunction::RunCleanupsScope ProcBindScope(CGF); 1110 CGF.CGM.getOpenMPRuntime().emitProcBindClause( 1111 CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart()); 1112 } 1113 const Expr *IfCond = nullptr; 1114 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 1115 if (C->getNameModifier() == OMPD_unknown || 1116 C->getNameModifier() == OMPD_parallel) { 1117 IfCond = C->getCondition(); 1118 break; 1119 } 1120 } 1121 1122 OMPParallelScope Scope(CGF, S); 1123 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 1124 // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk 1125 // lower and upper bounds with the pragma 'for' chunking mechanism. 1126 // The following lambda takes care of appending the lower and upper bound 1127 // parameters when necessary 1128 CodeGenBoundParameters(CGF, S, CapturedVars); 1129 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 1130 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn, 1131 CapturedVars, IfCond); 1132 } 1133 1134 static void emitEmptyBoundParameters(CodeGenFunction &, 1135 const OMPExecutableDirective &, 1136 llvm::SmallVectorImpl<llvm::Value *> &) {} 1137 1138 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { 1139 // Emit parallel region as a standalone region. 1140 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1141 OMPPrivateScope PrivateScope(CGF); 1142 bool Copyins = CGF.EmitOMPCopyinClause(S); 1143 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 1144 if (Copyins) { 1145 // Emit implicit barrier to synchronize threads and avoid data races on 1146 // propagation master's thread values of threadprivate variables to local 1147 // instances of that variables of all other implicit threads. 1148 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 1149 CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 1150 /*ForceSimpleCall=*/true); 1151 } 1152 CGF.EmitOMPPrivateClause(S, PrivateScope); 1153 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 1154 (void)PrivateScope.Privatize(); 1155 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1156 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 1157 }; 1158 emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen, 1159 emitEmptyBoundParameters); 1160 emitPostUpdateForReductionClause( 1161 *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1162 } 1163 1164 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, 1165 JumpDest LoopExit) { 1166 RunCleanupsScope BodyScope(*this); 1167 // Update counters values on current iteration. 1168 for (auto I : D.updates()) { 1169 EmitIgnoredExpr(I); 1170 } 1171 // Update the linear variables. 1172 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1173 for (auto *U : C->updates()) 1174 EmitIgnoredExpr(U); 1175 } 1176 1177 // On a continue in the body, jump to the end. 1178 auto Continue = getJumpDestInCurrentScope("omp.body.continue"); 1179 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1180 // Emit loop body. 1181 EmitStmt(D.getBody()); 1182 // The end (updates/cleanups). 1183 EmitBlock(Continue.getBlock()); 1184 BreakContinueStack.pop_back(); 1185 } 1186 1187 void CodeGenFunction::EmitOMPInnerLoop( 1188 const Stmt &S, bool RequiresCleanup, const Expr *LoopCond, 1189 const Expr *IncExpr, 1190 const llvm::function_ref<void(CodeGenFunction &)> &BodyGen, 1191 const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen) { 1192 auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end"); 1193 1194 // Start the loop with a block that tests the condition. 1195 auto CondBlock = createBasicBlock("omp.inner.for.cond"); 1196 EmitBlock(CondBlock); 1197 const SourceRange &R = S.getSourceRange(); 1198 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), 1199 SourceLocToDebugLoc(R.getEnd())); 1200 1201 // If there are any cleanups between here and the loop-exit scope, 1202 // create a block to stage a loop exit along. 1203 auto ExitBlock = LoopExit.getBlock(); 1204 if (RequiresCleanup) 1205 ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup"); 1206 1207 auto LoopBody = createBasicBlock("omp.inner.for.body"); 1208 1209 // Emit condition. 1210 EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S)); 1211 if (ExitBlock != LoopExit.getBlock()) { 1212 EmitBlock(ExitBlock); 1213 EmitBranchThroughCleanup(LoopExit); 1214 } 1215 1216 EmitBlock(LoopBody); 1217 incrementProfileCounter(&S); 1218 1219 // Create a block for the increment. 1220 auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc"); 1221 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1222 1223 BodyGen(*this); 1224 1225 // Emit "IV = IV + 1" and a back-edge to the condition block. 1226 EmitBlock(Continue.getBlock()); 1227 EmitIgnoredExpr(IncExpr); 1228 PostIncGen(*this); 1229 BreakContinueStack.pop_back(); 1230 EmitBranch(CondBlock); 1231 LoopStack.pop(); 1232 // Emit the fall-through block. 1233 EmitBlock(LoopExit.getBlock()); 1234 } 1235 1236 bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { 1237 if (!HaveInsertPoint()) 1238 return false; 1239 // Emit inits for the linear variables. 1240 bool HasLinears = false; 1241 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1242 for (auto *Init : C->inits()) { 1243 HasLinears = true; 1244 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl()); 1245 if (auto *Ref = dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) { 1246 AutoVarEmission Emission = EmitAutoVarAlloca(*VD); 1247 auto *OrigVD = cast<VarDecl>(Ref->getDecl()); 1248 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1249 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1250 VD->getInit()->getType(), VK_LValue, 1251 VD->getInit()->getExprLoc()); 1252 EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(), 1253 VD->getType()), 1254 /*capturedByInit=*/false); 1255 EmitAutoVarCleanups(Emission); 1256 } else 1257 EmitVarDecl(*VD); 1258 } 1259 // Emit the linear steps for the linear clauses. 1260 // If a step is not constant, it is pre-calculated before the loop. 1261 if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep())) 1262 if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) { 1263 EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl())); 1264 // Emit calculation of the linear step. 1265 EmitIgnoredExpr(CS); 1266 } 1267 } 1268 return HasLinears; 1269 } 1270 1271 void CodeGenFunction::EmitOMPLinearClauseFinal( 1272 const OMPLoopDirective &D, 1273 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1274 if (!HaveInsertPoint()) 1275 return; 1276 llvm::BasicBlock *DoneBB = nullptr; 1277 // Emit the final values of the linear variables. 1278 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1279 auto IC = C->varlist_begin(); 1280 for (auto *F : C->finals()) { 1281 if (!DoneBB) { 1282 if (auto *Cond = CondGen(*this)) { 1283 // If the first post-update expression is found, emit conditional 1284 // block if it was requested. 1285 auto *ThenBB = createBasicBlock(".omp.linear.pu"); 1286 DoneBB = createBasicBlock(".omp.linear.pu.done"); 1287 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1288 EmitBlock(ThenBB); 1289 } 1290 } 1291 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl()); 1292 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1293 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1294 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); 1295 Address OrigAddr = EmitLValue(&DRE).getAddress(); 1296 CodeGenFunction::OMPPrivateScope VarScope(*this); 1297 VarScope.addPrivate(OrigVD, [OrigAddr]() -> Address { return OrigAddr; }); 1298 (void)VarScope.Privatize(); 1299 EmitIgnoredExpr(F); 1300 ++IC; 1301 } 1302 if (auto *PostUpdate = C->getPostUpdateExpr()) 1303 EmitIgnoredExpr(PostUpdate); 1304 } 1305 if (DoneBB) 1306 EmitBlock(DoneBB, /*IsFinished=*/true); 1307 } 1308 1309 static void emitAlignedClause(CodeGenFunction &CGF, 1310 const OMPExecutableDirective &D) { 1311 if (!CGF.HaveInsertPoint()) 1312 return; 1313 for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) { 1314 unsigned ClauseAlignment = 0; 1315 if (auto AlignmentExpr = Clause->getAlignment()) { 1316 auto AlignmentCI = 1317 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); 1318 ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue()); 1319 } 1320 for (auto E : Clause->varlists()) { 1321 unsigned Alignment = ClauseAlignment; 1322 if (Alignment == 0) { 1323 // OpenMP [2.8.1, Description] 1324 // If no optional parameter is specified, implementation-defined default 1325 // alignments for SIMD instructions on the target platforms are assumed. 1326 Alignment = 1327 CGF.getContext() 1328 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign( 1329 E->getType()->getPointeeType())) 1330 .getQuantity(); 1331 } 1332 assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) && 1333 "alignment is not power of 2"); 1334 if (Alignment != 0) { 1335 llvm::Value *PtrValue = CGF.EmitScalarExpr(E); 1336 CGF.EmitAlignmentAssumption(PtrValue, Alignment); 1337 } 1338 } 1339 } 1340 } 1341 1342 void CodeGenFunction::EmitOMPPrivateLoopCounters( 1343 const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) { 1344 if (!HaveInsertPoint()) 1345 return; 1346 auto I = S.private_counters().begin(); 1347 for (auto *E : S.counters()) { 1348 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1349 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()); 1350 (void)LoopScope.addPrivate(VD, [&]() -> Address { 1351 // Emit var without initialization. 1352 if (!LocalDeclMap.count(PrivateVD)) { 1353 auto VarEmission = EmitAutoVarAlloca(*PrivateVD); 1354 EmitAutoVarCleanups(VarEmission); 1355 } 1356 DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD), 1357 /*RefersToEnclosingVariableOrCapture=*/false, 1358 (*I)->getType(), VK_LValue, (*I)->getExprLoc()); 1359 return EmitLValue(&DRE).getAddress(); 1360 }); 1361 if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) || 1362 VD->hasGlobalStorage()) { 1363 (void)LoopScope.addPrivate(PrivateVD, [&]() -> Address { 1364 DeclRefExpr DRE(const_cast<VarDecl *>(VD), 1365 LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD), 1366 E->getType(), VK_LValue, E->getExprLoc()); 1367 return EmitLValue(&DRE).getAddress(); 1368 }); 1369 } 1370 ++I; 1371 } 1372 } 1373 1374 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S, 1375 const Expr *Cond, llvm::BasicBlock *TrueBlock, 1376 llvm::BasicBlock *FalseBlock, uint64_t TrueCount) { 1377 if (!CGF.HaveInsertPoint()) 1378 return; 1379 { 1380 CodeGenFunction::OMPPrivateScope PreCondScope(CGF); 1381 CGF.EmitOMPPrivateLoopCounters(S, PreCondScope); 1382 (void)PreCondScope.Privatize(); 1383 // Get initial values of real counters. 1384 for (auto I : S.inits()) { 1385 CGF.EmitIgnoredExpr(I); 1386 } 1387 } 1388 // Check that loop is executed at least one time. 1389 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount); 1390 } 1391 1392 void CodeGenFunction::EmitOMPLinearClause( 1393 const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) { 1394 if (!HaveInsertPoint()) 1395 return; 1396 llvm::DenseSet<const VarDecl *> SIMDLCVs; 1397 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 1398 auto *LoopDirective = cast<OMPLoopDirective>(&D); 1399 for (auto *C : LoopDirective->counters()) { 1400 SIMDLCVs.insert( 1401 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 1402 } 1403 } 1404 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1405 auto CurPrivate = C->privates().begin(); 1406 for (auto *E : C->varlists()) { 1407 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1408 auto *PrivateVD = 1409 cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl()); 1410 if (!SIMDLCVs.count(VD->getCanonicalDecl())) { 1411 bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address { 1412 // Emit private VarDecl with copy init. 1413 EmitVarDecl(*PrivateVD); 1414 return GetAddrOfLocalVar(PrivateVD); 1415 }); 1416 assert(IsRegistered && "linear var already registered as private"); 1417 // Silence the warning about unused variable. 1418 (void)IsRegistered; 1419 } else 1420 EmitVarDecl(*PrivateVD); 1421 ++CurPrivate; 1422 } 1423 } 1424 } 1425 1426 static void emitSimdlenSafelenClause(CodeGenFunction &CGF, 1427 const OMPExecutableDirective &D, 1428 bool IsMonotonic) { 1429 if (!CGF.HaveInsertPoint()) 1430 return; 1431 if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) { 1432 RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(), 1433 /*ignoreResult=*/true); 1434 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 1435 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 1436 // In presence of finite 'safelen', it may be unsafe to mark all 1437 // the memory instructions parallel, because loop-carried 1438 // dependences of 'safelen' iterations are possible. 1439 if (!IsMonotonic) 1440 CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>()); 1441 } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) { 1442 RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(), 1443 /*ignoreResult=*/true); 1444 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 1445 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 1446 // In presence of finite 'safelen', it may be unsafe to mark all 1447 // the memory instructions parallel, because loop-carried 1448 // dependences of 'safelen' iterations are possible. 1449 CGF.LoopStack.setParallel(false); 1450 } 1451 } 1452 1453 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D, 1454 bool IsMonotonic) { 1455 // Walk clauses and process safelen/lastprivate. 1456 LoopStack.setParallel(!IsMonotonic); 1457 LoopStack.setVectorizeEnable(true); 1458 emitSimdlenSafelenClause(*this, D, IsMonotonic); 1459 } 1460 1461 void CodeGenFunction::EmitOMPSimdFinal( 1462 const OMPLoopDirective &D, 1463 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1464 if (!HaveInsertPoint()) 1465 return; 1466 llvm::BasicBlock *DoneBB = nullptr; 1467 auto IC = D.counters().begin(); 1468 auto IPC = D.private_counters().begin(); 1469 for (auto F : D.finals()) { 1470 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl()); 1471 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl()); 1472 auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD); 1473 if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) || 1474 OrigVD->hasGlobalStorage() || CED) { 1475 if (!DoneBB) { 1476 if (auto *Cond = CondGen(*this)) { 1477 // If the first post-update expression is found, emit conditional 1478 // block if it was requested. 1479 auto *ThenBB = createBasicBlock(".omp.final.then"); 1480 DoneBB = createBasicBlock(".omp.final.done"); 1481 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1482 EmitBlock(ThenBB); 1483 } 1484 } 1485 Address OrigAddr = Address::invalid(); 1486 if (CED) 1487 OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress(); 1488 else { 1489 DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD), 1490 /*RefersToEnclosingVariableOrCapture=*/false, 1491 (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc()); 1492 OrigAddr = EmitLValue(&DRE).getAddress(); 1493 } 1494 OMPPrivateScope VarScope(*this); 1495 VarScope.addPrivate(OrigVD, 1496 [OrigAddr]() -> Address { return OrigAddr; }); 1497 (void)VarScope.Privatize(); 1498 EmitIgnoredExpr(F); 1499 } 1500 ++IC; 1501 ++IPC; 1502 } 1503 if (DoneBB) 1504 EmitBlock(DoneBB, /*IsFinished=*/true); 1505 } 1506 1507 static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF, 1508 const OMPLoopDirective &S, 1509 CodeGenFunction::JumpDest LoopExit) { 1510 CGF.EmitOMPLoopBody(S, LoopExit); 1511 CGF.EmitStopPoint(&S); 1512 } 1513 1514 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { 1515 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1516 OMPLoopScope PreInitScope(CGF, S); 1517 // if (PreCond) { 1518 // for (IV in 0..LastIteration) BODY; 1519 // <Final counter/linear vars updates>; 1520 // } 1521 // 1522 1523 // Emit: if (PreCond) - begin. 1524 // If the condition constant folds and can be elided, avoid emitting the 1525 // whole loop. 1526 bool CondConstant; 1527 llvm::BasicBlock *ContBlock = nullptr; 1528 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 1529 if (!CondConstant) 1530 return; 1531 } else { 1532 auto *ThenBlock = CGF.createBasicBlock("simd.if.then"); 1533 ContBlock = CGF.createBasicBlock("simd.if.end"); 1534 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 1535 CGF.getProfileCount(&S)); 1536 CGF.EmitBlock(ThenBlock); 1537 CGF.incrementProfileCounter(&S); 1538 } 1539 1540 // Emit the loop iteration variable. 1541 const Expr *IVExpr = S.getIterationVariable(); 1542 const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 1543 CGF.EmitVarDecl(*IVDecl); 1544 CGF.EmitIgnoredExpr(S.getInit()); 1545 1546 // Emit the iterations count variable. 1547 // If it is not a variable, Sema decided to calculate iterations count on 1548 // each iteration (e.g., it is foldable into a constant). 1549 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 1550 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 1551 // Emit calculation of the iterations count. 1552 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 1553 } 1554 1555 CGF.EmitOMPSimdInit(S); 1556 1557 emitAlignedClause(CGF, S); 1558 (void)CGF.EmitOMPLinearClauseInit(S); 1559 { 1560 OMPPrivateScope LoopScope(CGF); 1561 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 1562 CGF.EmitOMPLinearClause(S, LoopScope); 1563 CGF.EmitOMPPrivateClause(S, LoopScope); 1564 CGF.EmitOMPReductionClauseInit(S, LoopScope); 1565 bool HasLastprivateClause = 1566 CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 1567 (void)LoopScope.Privatize(); 1568 CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 1569 S.getInc(), 1570 [&S](CodeGenFunction &CGF) { 1571 CGF.EmitOMPLoopBody(S, JumpDest()); 1572 CGF.EmitStopPoint(&S); 1573 }, 1574 [](CodeGenFunction &) {}); 1575 CGF.EmitOMPSimdFinal( 1576 S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1577 // Emit final copy of the lastprivate variables at the end of loops. 1578 if (HasLastprivateClause) 1579 CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true); 1580 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd); 1581 emitPostUpdateForReductionClause( 1582 CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1583 } 1584 CGF.EmitOMPLinearClauseFinal( 1585 S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1586 // Emit: if (PreCond) - end. 1587 if (ContBlock) { 1588 CGF.EmitBranch(ContBlock); 1589 CGF.EmitBlock(ContBlock, true); 1590 } 1591 }; 1592 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1593 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 1594 } 1595 1596 void CodeGenFunction::EmitOMPOuterLoop( 1597 bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S, 1598 CodeGenFunction::OMPPrivateScope &LoopScope, 1599 const CodeGenFunction::OMPLoopArguments &LoopArgs, 1600 const CodeGenFunction::CodeGenLoopTy &CodeGenLoop, 1601 const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) { 1602 auto &RT = CGM.getOpenMPRuntime(); 1603 1604 const Expr *IVExpr = S.getIterationVariable(); 1605 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1606 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1607 1608 auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end"); 1609 1610 // Start the loop with a block that tests the condition. 1611 auto CondBlock = createBasicBlock("omp.dispatch.cond"); 1612 EmitBlock(CondBlock); 1613 const SourceRange &R = S.getSourceRange(); 1614 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), 1615 SourceLocToDebugLoc(R.getEnd())); 1616 1617 llvm::Value *BoolCondVal = nullptr; 1618 if (!DynamicOrOrdered) { 1619 // UB = min(UB, GlobalUB) or 1620 // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g. 1621 // 'distribute parallel for') 1622 EmitIgnoredExpr(LoopArgs.EUB); 1623 // IV = LB 1624 EmitIgnoredExpr(LoopArgs.Init); 1625 // IV < UB 1626 BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond); 1627 } else { 1628 BoolCondVal = 1629 RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, LoopArgs.IL, 1630 LoopArgs.LB, LoopArgs.UB, LoopArgs.ST); 1631 } 1632 1633 // If there are any cleanups between here and the loop-exit scope, 1634 // create a block to stage a loop exit along. 1635 auto ExitBlock = LoopExit.getBlock(); 1636 if (LoopScope.requiresCleanups()) 1637 ExitBlock = createBasicBlock("omp.dispatch.cleanup"); 1638 1639 auto LoopBody = createBasicBlock("omp.dispatch.body"); 1640 Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock); 1641 if (ExitBlock != LoopExit.getBlock()) { 1642 EmitBlock(ExitBlock); 1643 EmitBranchThroughCleanup(LoopExit); 1644 } 1645 EmitBlock(LoopBody); 1646 1647 // Emit "IV = LB" (in case of static schedule, we have already calculated new 1648 // LB for loop condition and emitted it above). 1649 if (DynamicOrOrdered) 1650 EmitIgnoredExpr(LoopArgs.Init); 1651 1652 // Create a block for the increment. 1653 auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); 1654 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1655 1656 // Generate !llvm.loop.parallel metadata for loads and stores for loops 1657 // with dynamic/guided scheduling and without ordered clause. 1658 if (!isOpenMPSimdDirective(S.getDirectiveKind())) 1659 LoopStack.setParallel(!IsMonotonic); 1660 else 1661 EmitOMPSimdInit(S, IsMonotonic); 1662 1663 SourceLocation Loc = S.getLocStart(); 1664 1665 // when 'distribute' is not combined with a 'for': 1666 // while (idx <= UB) { BODY; ++idx; } 1667 // when 'distribute' is combined with a 'for' 1668 // (e.g. 'distribute parallel for') 1669 // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; } 1670 EmitOMPInnerLoop( 1671 S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr, 1672 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { 1673 CodeGenLoop(CGF, S, LoopExit); 1674 }, 1675 [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) { 1676 CodeGenOrdered(CGF, Loc, IVSize, IVSigned); 1677 }); 1678 1679 EmitBlock(Continue.getBlock()); 1680 BreakContinueStack.pop_back(); 1681 if (!DynamicOrOrdered) { 1682 // Emit "LB = LB + Stride", "UB = UB + Stride". 1683 EmitIgnoredExpr(LoopArgs.NextLB); 1684 EmitIgnoredExpr(LoopArgs.NextUB); 1685 } 1686 1687 EmitBranch(CondBlock); 1688 LoopStack.pop(); 1689 // Emit the fall-through block. 1690 EmitBlock(LoopExit.getBlock()); 1691 1692 // Tell the runtime we are done. 1693 auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) { 1694 if (!DynamicOrOrdered) 1695 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd()); 1696 }; 1697 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); 1698 } 1699 1700 void CodeGenFunction::EmitOMPForOuterLoop( 1701 const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic, 1702 const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, 1703 const OMPLoopArguments &LoopArgs, 1704 const CodeGenDispatchBoundsTy &CGDispatchBounds) { 1705 auto &RT = CGM.getOpenMPRuntime(); 1706 1707 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). 1708 const bool DynamicOrOrdered = 1709 Ordered || RT.isDynamic(ScheduleKind.Schedule); 1710 1711 assert((Ordered || 1712 !RT.isStaticNonchunked(ScheduleKind.Schedule, 1713 LoopArgs.Chunk != nullptr)) && 1714 "static non-chunked schedule does not need outer loop"); 1715 1716 // Emit outer loop. 1717 // 1718 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1719 // When schedule(dynamic,chunk_size) is specified, the iterations are 1720 // distributed to threads in the team in chunks as the threads request them. 1721 // Each thread executes a chunk of iterations, then requests another chunk, 1722 // until no chunks remain to be distributed. Each chunk contains chunk_size 1723 // iterations, except for the last chunk to be distributed, which may have 1724 // fewer iterations. When no chunk_size is specified, it defaults to 1. 1725 // 1726 // When schedule(guided,chunk_size) is specified, the iterations are assigned 1727 // to threads in the team in chunks as the executing threads request them. 1728 // Each thread executes a chunk of iterations, then requests another chunk, 1729 // until no chunks remain to be assigned. For a chunk_size of 1, the size of 1730 // each chunk is proportional to the number of unassigned iterations divided 1731 // by the number of threads in the team, decreasing to 1. For a chunk_size 1732 // with value k (greater than 1), the size of each chunk is determined in the 1733 // same way, with the restriction that the chunks do not contain fewer than k 1734 // iterations (except for the last chunk to be assigned, which may have fewer 1735 // than k iterations). 1736 // 1737 // When schedule(auto) is specified, the decision regarding scheduling is 1738 // delegated to the compiler and/or runtime system. The programmer gives the 1739 // implementation the freedom to choose any possible mapping of iterations to 1740 // threads in the team. 1741 // 1742 // When schedule(runtime) is specified, the decision regarding scheduling is 1743 // deferred until run time, and the schedule and chunk size are taken from the 1744 // run-sched-var ICV. If the ICV is set to auto, the schedule is 1745 // implementation defined 1746 // 1747 // while(__kmpc_dispatch_next(&LB, &UB)) { 1748 // idx = LB; 1749 // while (idx <= UB) { BODY; ++idx; 1750 // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only. 1751 // } // inner loop 1752 // } 1753 // 1754 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1755 // When schedule(static, chunk_size) is specified, iterations are divided into 1756 // chunks of size chunk_size, and the chunks are assigned to the threads in 1757 // the team in a round-robin fashion in the order of the thread number. 1758 // 1759 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { 1760 // while (idx <= UB) { BODY; ++idx; } // inner loop 1761 // LB = LB + ST; 1762 // UB = UB + ST; 1763 // } 1764 // 1765 1766 const Expr *IVExpr = S.getIterationVariable(); 1767 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1768 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1769 1770 if (DynamicOrOrdered) { 1771 auto DispatchBounds = CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB); 1772 llvm::Value *LBVal = DispatchBounds.first; 1773 llvm::Value *UBVal = DispatchBounds.second; 1774 CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal, 1775 LoopArgs.Chunk}; 1776 RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, IVSize, 1777 IVSigned, Ordered, DipatchRTInputValues); 1778 } else { 1779 CGOpenMPRuntime::StaticRTInput StaticInit( 1780 IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB, 1781 LoopArgs.ST, LoopArgs.Chunk); 1782 RT.emitForStaticInit(*this, S.getLocStart(), S.getDirectiveKind(), 1783 ScheduleKind, StaticInit); 1784 } 1785 1786 auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc, 1787 const unsigned IVSize, 1788 const bool IVSigned) { 1789 if (Ordered) { 1790 CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize, 1791 IVSigned); 1792 } 1793 }; 1794 1795 OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST, 1796 LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB); 1797 OuterLoopArgs.IncExpr = S.getInc(); 1798 OuterLoopArgs.Init = S.getInit(); 1799 OuterLoopArgs.Cond = S.getCond(); 1800 OuterLoopArgs.NextLB = S.getNextLowerBound(); 1801 OuterLoopArgs.NextUB = S.getNextUpperBound(); 1802 EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs, 1803 emitOMPLoopBodyWithStopPoint, CodeGenOrdered); 1804 } 1805 1806 static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc, 1807 const unsigned IVSize, const bool IVSigned) {} 1808 1809 void CodeGenFunction::EmitOMPDistributeOuterLoop( 1810 OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S, 1811 OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs, 1812 const CodeGenLoopTy &CodeGenLoopContent) { 1813 1814 auto &RT = CGM.getOpenMPRuntime(); 1815 1816 // Emit outer loop. 1817 // Same behavior as a OMPForOuterLoop, except that schedule cannot be 1818 // dynamic 1819 // 1820 1821 const Expr *IVExpr = S.getIterationVariable(); 1822 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1823 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1824 1825 CGOpenMPRuntime::StaticRTInput StaticInit( 1826 IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB, 1827 LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk); 1828 RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, StaticInit); 1829 1830 // for combined 'distribute' and 'for' the increment expression of distribute 1831 // is store in DistInc. For 'distribute' alone, it is in Inc. 1832 Expr *IncExpr; 1833 if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())) 1834 IncExpr = S.getDistInc(); 1835 else 1836 IncExpr = S.getInc(); 1837 1838 // this routine is shared by 'omp distribute parallel for' and 1839 // 'omp distribute': select the right EUB expression depending on the 1840 // directive 1841 OMPLoopArguments OuterLoopArgs; 1842 OuterLoopArgs.LB = LoopArgs.LB; 1843 OuterLoopArgs.UB = LoopArgs.UB; 1844 OuterLoopArgs.ST = LoopArgs.ST; 1845 OuterLoopArgs.IL = LoopArgs.IL; 1846 OuterLoopArgs.Chunk = LoopArgs.Chunk; 1847 OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 1848 ? S.getCombinedEnsureUpperBound() 1849 : S.getEnsureUpperBound(); 1850 OuterLoopArgs.IncExpr = IncExpr; 1851 OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 1852 ? S.getCombinedInit() 1853 : S.getInit(); 1854 OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 1855 ? S.getCombinedCond() 1856 : S.getCond(); 1857 OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 1858 ? S.getCombinedNextLowerBound() 1859 : S.getNextLowerBound(); 1860 OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 1861 ? S.getCombinedNextUpperBound() 1862 : S.getNextUpperBound(); 1863 1864 EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S, 1865 LoopScope, OuterLoopArgs, CodeGenLoopContent, 1866 emitEmptyOrdered); 1867 } 1868 1869 /// Emit a helper variable and return corresponding lvalue. 1870 static LValue EmitOMPHelperVar(CodeGenFunction &CGF, 1871 const DeclRefExpr *Helper) { 1872 auto VDecl = cast<VarDecl>(Helper->getDecl()); 1873 CGF.EmitVarDecl(*VDecl); 1874 return CGF.EmitLValue(Helper); 1875 } 1876 1877 static std::pair<LValue, LValue> 1878 emitDistributeParallelForInnerBounds(CodeGenFunction &CGF, 1879 const OMPExecutableDirective &S) { 1880 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 1881 LValue LB = 1882 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); 1883 LValue UB = 1884 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); 1885 1886 // When composing 'distribute' with 'for' (e.g. as in 'distribute 1887 // parallel for') we need to use the 'distribute' 1888 // chunk lower and upper bounds rather than the whole loop iteration 1889 // space. These are parameters to the outlined function for 'parallel' 1890 // and we copy the bounds of the previous schedule into the 1891 // the current ones. 1892 LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable()); 1893 LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable()); 1894 llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(PrevLB, SourceLocation()); 1895 PrevLBVal = CGF.EmitScalarConversion( 1896 PrevLBVal, LS.getPrevLowerBoundVariable()->getType(), 1897 LS.getIterationVariable()->getType(), SourceLocation()); 1898 llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(PrevUB, SourceLocation()); 1899 PrevUBVal = CGF.EmitScalarConversion( 1900 PrevUBVal, LS.getPrevUpperBoundVariable()->getType(), 1901 LS.getIterationVariable()->getType(), SourceLocation()); 1902 1903 CGF.EmitStoreOfScalar(PrevLBVal, LB); 1904 CGF.EmitStoreOfScalar(PrevUBVal, UB); 1905 1906 return {LB, UB}; 1907 } 1908 1909 /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then 1910 /// we need to use the LB and UB expressions generated by the worksharing 1911 /// code generation support, whereas in non combined situations we would 1912 /// just emit 0 and the LastIteration expression 1913 /// This function is necessary due to the difference of the LB and UB 1914 /// types for the RT emission routines for 'for_static_init' and 1915 /// 'for_dispatch_init' 1916 static std::pair<llvm::Value *, llvm::Value *> 1917 emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF, 1918 const OMPExecutableDirective &S, 1919 Address LB, Address UB) { 1920 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 1921 const Expr *IVExpr = LS.getIterationVariable(); 1922 // when implementing a dynamic schedule for a 'for' combined with a 1923 // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop 1924 // is not normalized as each team only executes its own assigned 1925 // distribute chunk 1926 QualType IteratorTy = IVExpr->getType(); 1927 llvm::Value *LBVal = CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, 1928 SourceLocation()); 1929 llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, 1930 SourceLocation()); 1931 return {LBVal, UBVal}; 1932 } 1933 1934 static void emitDistributeParallelForDistributeInnerBoundParams( 1935 CodeGenFunction &CGF, const OMPExecutableDirective &S, 1936 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) { 1937 const auto &Dir = cast<OMPLoopDirective>(S); 1938 LValue LB = 1939 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable())); 1940 auto LBCast = CGF.Builder.CreateIntCast( 1941 CGF.Builder.CreateLoad(LB.getAddress()), CGF.SizeTy, /*isSigned=*/false); 1942 CapturedVars.push_back(LBCast); 1943 LValue UB = 1944 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable())); 1945 1946 auto UBCast = CGF.Builder.CreateIntCast( 1947 CGF.Builder.CreateLoad(UB.getAddress()), CGF.SizeTy, /*isSigned=*/false); 1948 CapturedVars.push_back(UBCast); 1949 } 1950 1951 static void 1952 emitInnerParallelForWhenCombined(CodeGenFunction &CGF, 1953 const OMPLoopDirective &S, 1954 CodeGenFunction::JumpDest LoopExit) { 1955 auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF, 1956 PrePostActionTy &) { 1957 CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(), 1958 emitDistributeParallelForInnerBounds, 1959 emitDistributeParallelForDispatchBounds); 1960 }; 1961 1962 emitCommonOMPParallelDirective( 1963 CGF, S, OMPD_for, CGInlinedWorksharingLoop, 1964 emitDistributeParallelForDistributeInnerBoundParams); 1965 } 1966 1967 void CodeGenFunction::EmitOMPDistributeParallelForDirective( 1968 const OMPDistributeParallelForDirective &S) { 1969 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1970 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 1971 S.getDistInc()); 1972 }; 1973 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1974 OMPCancelStackRAII CancelRegion(*this, OMPD_distribute_parallel_for, 1975 /*HasCancel=*/false); 1976 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen, 1977 /*HasCancel=*/false); 1978 } 1979 1980 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective( 1981 const OMPDistributeParallelForSimdDirective &S) { 1982 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1983 CGM.getOpenMPRuntime().emitInlinedDirective( 1984 *this, OMPD_distribute_parallel_for_simd, 1985 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1986 OMPLoopScope PreInitScope(CGF, S); 1987 CGF.EmitStmt( 1988 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1989 }); 1990 } 1991 1992 void CodeGenFunction::EmitOMPDistributeSimdDirective( 1993 const OMPDistributeSimdDirective &S) { 1994 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1995 CGM.getOpenMPRuntime().emitInlinedDirective( 1996 *this, OMPD_distribute_simd, 1997 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1998 OMPLoopScope PreInitScope(CGF, S); 1999 CGF.EmitStmt( 2000 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2001 }); 2002 } 2003 2004 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective( 2005 const OMPTargetParallelForSimdDirective &S) { 2006 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2007 CGM.getOpenMPRuntime().emitInlinedDirective( 2008 *this, OMPD_target_parallel_for_simd, 2009 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2010 OMPLoopScope PreInitScope(CGF, S); 2011 CGF.EmitStmt( 2012 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2013 }); 2014 } 2015 2016 void CodeGenFunction::EmitOMPTargetSimdDirective( 2017 const OMPTargetSimdDirective &S) { 2018 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2019 CGM.getOpenMPRuntime().emitInlinedDirective( 2020 *this, OMPD_target_simd, [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2021 OMPLoopScope PreInitScope(CGF, S); 2022 CGF.EmitStmt( 2023 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2024 }); 2025 } 2026 2027 void CodeGenFunction::EmitOMPTeamsDistributeDirective( 2028 const OMPTeamsDistributeDirective &S) { 2029 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2030 CGM.getOpenMPRuntime().emitInlinedDirective( 2031 *this, OMPD_teams_distribute, 2032 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2033 OMPLoopScope PreInitScope(CGF, S); 2034 CGF.EmitStmt( 2035 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2036 }); 2037 } 2038 2039 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective( 2040 const OMPTeamsDistributeSimdDirective &S) { 2041 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2042 CGM.getOpenMPRuntime().emitInlinedDirective( 2043 *this, OMPD_teams_distribute_simd, 2044 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2045 OMPLoopScope PreInitScope(CGF, S); 2046 CGF.EmitStmt( 2047 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2048 }); 2049 } 2050 2051 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective( 2052 const OMPTeamsDistributeParallelForSimdDirective &S) { 2053 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2054 CGM.getOpenMPRuntime().emitInlinedDirective( 2055 *this, OMPD_teams_distribute_parallel_for_simd, 2056 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2057 OMPLoopScope PreInitScope(CGF, S); 2058 CGF.EmitStmt( 2059 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2060 }); 2061 } 2062 2063 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective( 2064 const OMPTeamsDistributeParallelForDirective &S) { 2065 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2066 CGM.getOpenMPRuntime().emitInlinedDirective( 2067 *this, OMPD_teams_distribute_parallel_for, 2068 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2069 OMPLoopScope PreInitScope(CGF, S); 2070 CGF.EmitStmt( 2071 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2072 }); 2073 } 2074 2075 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective( 2076 const OMPTargetTeamsDistributeDirective &S) { 2077 CGM.getOpenMPRuntime().emitInlinedDirective( 2078 *this, OMPD_target_teams_distribute, 2079 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2080 CGF.EmitStmt( 2081 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2082 }); 2083 } 2084 2085 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective( 2086 const OMPTargetTeamsDistributeParallelForDirective &S) { 2087 CGM.getOpenMPRuntime().emitInlinedDirective( 2088 *this, OMPD_target_teams_distribute_parallel_for, 2089 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2090 CGF.EmitStmt( 2091 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2092 }); 2093 } 2094 2095 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective( 2096 const OMPTargetTeamsDistributeParallelForSimdDirective &S) { 2097 CGM.getOpenMPRuntime().emitInlinedDirective( 2098 *this, OMPD_target_teams_distribute_parallel_for_simd, 2099 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2100 CGF.EmitStmt( 2101 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2102 }); 2103 } 2104 2105 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective( 2106 const OMPTargetTeamsDistributeSimdDirective &S) { 2107 CGM.getOpenMPRuntime().emitInlinedDirective( 2108 *this, OMPD_target_teams_distribute_simd, 2109 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2110 CGF.EmitStmt( 2111 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2112 }); 2113 } 2114 2115 namespace { 2116 struct ScheduleKindModifiersTy { 2117 OpenMPScheduleClauseKind Kind; 2118 OpenMPScheduleClauseModifier M1; 2119 OpenMPScheduleClauseModifier M2; 2120 ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind, 2121 OpenMPScheduleClauseModifier M1, 2122 OpenMPScheduleClauseModifier M2) 2123 : Kind(Kind), M1(M1), M2(M2) {} 2124 }; 2125 } // namespace 2126 2127 bool CodeGenFunction::EmitOMPWorksharingLoop( 2128 const OMPLoopDirective &S, Expr *EUB, 2129 const CodeGenLoopBoundsTy &CodeGenLoopBounds, 2130 const CodeGenDispatchBoundsTy &CGDispatchBounds) { 2131 // Emit the loop iteration variable. 2132 auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 2133 auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); 2134 EmitVarDecl(*IVDecl); 2135 2136 // Emit the iterations count variable. 2137 // If it is not a variable, Sema decided to calculate iterations count on each 2138 // iteration (e.g., it is foldable into a constant). 2139 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 2140 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 2141 // Emit calculation of the iterations count. 2142 EmitIgnoredExpr(S.getCalcLastIteration()); 2143 } 2144 2145 auto &RT = CGM.getOpenMPRuntime(); 2146 2147 bool HasLastprivateClause; 2148 // Check pre-condition. 2149 { 2150 OMPLoopScope PreInitScope(*this, S); 2151 // Skip the entire loop if we don't meet the precondition. 2152 // If the condition constant folds and can be elided, avoid emitting the 2153 // whole loop. 2154 bool CondConstant; 2155 llvm::BasicBlock *ContBlock = nullptr; 2156 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 2157 if (!CondConstant) 2158 return false; 2159 } else { 2160 auto *ThenBlock = createBasicBlock("omp.precond.then"); 2161 ContBlock = createBasicBlock("omp.precond.end"); 2162 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 2163 getProfileCount(&S)); 2164 EmitBlock(ThenBlock); 2165 incrementProfileCounter(&S); 2166 } 2167 2168 bool Ordered = false; 2169 if (auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) { 2170 if (OrderedClause->getNumForLoops()) 2171 RT.emitDoacrossInit(*this, S); 2172 else 2173 Ordered = true; 2174 } 2175 2176 llvm::DenseSet<const Expr *> EmittedFinals; 2177 emitAlignedClause(*this, S); 2178 bool HasLinears = EmitOMPLinearClauseInit(S); 2179 // Emit helper vars inits. 2180 2181 std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S); 2182 LValue LB = Bounds.first; 2183 LValue UB = Bounds.second; 2184 LValue ST = 2185 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 2186 LValue IL = 2187 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 2188 2189 // Emit 'then' code. 2190 { 2191 OMPPrivateScope LoopScope(*this); 2192 if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) { 2193 // Emit implicit barrier to synchronize threads and avoid data races on 2194 // initialization of firstprivate variables and post-update of 2195 // lastprivate variables. 2196 CGM.getOpenMPRuntime().emitBarrierCall( 2197 *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 2198 /*ForceSimpleCall=*/true); 2199 } 2200 EmitOMPPrivateClause(S, LoopScope); 2201 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 2202 EmitOMPReductionClauseInit(S, LoopScope); 2203 EmitOMPPrivateLoopCounters(S, LoopScope); 2204 EmitOMPLinearClause(S, LoopScope); 2205 (void)LoopScope.Privatize(); 2206 2207 // Detect the loop schedule kind and chunk. 2208 llvm::Value *Chunk = nullptr; 2209 OpenMPScheduleTy ScheduleKind; 2210 if (auto *C = S.getSingleClause<OMPScheduleClause>()) { 2211 ScheduleKind.Schedule = C->getScheduleKind(); 2212 ScheduleKind.M1 = C->getFirstScheduleModifier(); 2213 ScheduleKind.M2 = C->getSecondScheduleModifier(); 2214 if (const auto *Ch = C->getChunkSize()) { 2215 Chunk = EmitScalarExpr(Ch); 2216 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 2217 S.getIterationVariable()->getType(), 2218 S.getLocStart()); 2219 } 2220 } 2221 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2222 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2223 // OpenMP 4.5, 2.7.1 Loop Construct, Description. 2224 // If the static schedule kind is specified or if the ordered clause is 2225 // specified, and if no monotonic modifier is specified, the effect will 2226 // be as if the monotonic modifier was specified. 2227 if (RT.isStaticNonchunked(ScheduleKind.Schedule, 2228 /* Chunked */ Chunk != nullptr) && 2229 !Ordered) { 2230 if (isOpenMPSimdDirective(S.getDirectiveKind())) 2231 EmitOMPSimdInit(S, /*IsMonotonic=*/true); 2232 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 2233 // When no chunk_size is specified, the iteration space is divided into 2234 // chunks that are approximately equal in size, and at most one chunk is 2235 // distributed to each thread. Note that the size of the chunks is 2236 // unspecified in this case. 2237 CGOpenMPRuntime::StaticRTInput StaticInit( 2238 IVSize, IVSigned, Ordered, IL.getAddress(), LB.getAddress(), 2239 UB.getAddress(), ST.getAddress()); 2240 RT.emitForStaticInit(*this, S.getLocStart(), S.getDirectiveKind(), 2241 ScheduleKind, StaticInit); 2242 auto LoopExit = 2243 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 2244 // UB = min(UB, GlobalUB); 2245 EmitIgnoredExpr(S.getEnsureUpperBound()); 2246 // IV = LB; 2247 EmitIgnoredExpr(S.getInit()); 2248 // while (idx <= UB) { BODY; ++idx; } 2249 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 2250 S.getInc(), 2251 [&S, LoopExit](CodeGenFunction &CGF) { 2252 CGF.EmitOMPLoopBody(S, LoopExit); 2253 CGF.EmitStopPoint(&S); 2254 }, 2255 [](CodeGenFunction &) {}); 2256 EmitBlock(LoopExit.getBlock()); 2257 // Tell the runtime we are done. 2258 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 2259 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd()); 2260 }; 2261 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); 2262 } else { 2263 const bool IsMonotonic = 2264 Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static || 2265 ScheduleKind.Schedule == OMPC_SCHEDULE_unknown || 2266 ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic || 2267 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic; 2268 // Emit the outer loop, which requests its work chunk [LB..UB] from 2269 // runtime and runs the inner loop to process it. 2270 const OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(), 2271 ST.getAddress(), IL.getAddress(), 2272 Chunk, EUB); 2273 EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered, 2274 LoopArguments, CGDispatchBounds); 2275 } 2276 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 2277 EmitOMPSimdFinal(S, 2278 [&](CodeGenFunction &CGF) -> llvm::Value * { 2279 return CGF.Builder.CreateIsNotNull( 2280 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2281 }); 2282 } 2283 EmitOMPReductionClauseFinal( 2284 S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind()) 2285 ? /*Parallel and Simd*/ OMPD_parallel_for_simd 2286 : /*Parallel only*/ OMPD_parallel); 2287 // Emit post-update of the reduction variables if IsLastIter != 0. 2288 emitPostUpdateForReductionClause( 2289 *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * { 2290 return CGF.Builder.CreateIsNotNull( 2291 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2292 }); 2293 // Emit final copy of the lastprivate variables if IsLastIter != 0. 2294 if (HasLastprivateClause) 2295 EmitOMPLastprivateClauseFinal( 2296 S, isOpenMPSimdDirective(S.getDirectiveKind()), 2297 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart()))); 2298 } 2299 EmitOMPLinearClauseFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * { 2300 return CGF.Builder.CreateIsNotNull( 2301 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2302 }); 2303 // We're now done with the loop, so jump to the continuation block. 2304 if (ContBlock) { 2305 EmitBranch(ContBlock); 2306 EmitBlock(ContBlock, true); 2307 } 2308 } 2309 return HasLastprivateClause; 2310 } 2311 2312 /// The following two functions generate expressions for the loop lower 2313 /// and upper bounds in case of static and dynamic (dispatch) schedule 2314 /// of the associated 'for' or 'distribute' loop. 2315 static std::pair<LValue, LValue> 2316 emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 2317 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 2318 LValue LB = 2319 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); 2320 LValue UB = 2321 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); 2322 return {LB, UB}; 2323 } 2324 2325 /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not 2326 /// consider the lower and upper bound expressions generated by the 2327 /// worksharing loop support, but we use 0 and the iteration space size as 2328 /// constants 2329 static std::pair<llvm::Value *, llvm::Value *> 2330 emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S, 2331 Address LB, Address UB) { 2332 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 2333 const Expr *IVExpr = LS.getIterationVariable(); 2334 const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType()); 2335 llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0); 2336 llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration()); 2337 return {LBVal, UBVal}; 2338 } 2339 2340 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { 2341 bool HasLastprivates = false; 2342 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, 2343 PrePostActionTy &) { 2344 OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel()); 2345 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 2346 emitForLoopBounds, 2347 emitDispatchForLoopBounds); 2348 }; 2349 { 2350 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2351 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen, 2352 S.hasCancel()); 2353 } 2354 2355 // Emit an implicit barrier at the end. 2356 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) { 2357 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); 2358 } 2359 } 2360 2361 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { 2362 bool HasLastprivates = false; 2363 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, 2364 PrePostActionTy &) { 2365 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 2366 emitForLoopBounds, 2367 emitDispatchForLoopBounds); 2368 }; 2369 { 2370 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2371 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 2372 } 2373 2374 // Emit an implicit barrier at the end. 2375 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) { 2376 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); 2377 } 2378 } 2379 2380 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, 2381 const Twine &Name, 2382 llvm::Value *Init = nullptr) { 2383 auto LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty); 2384 if (Init) 2385 CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true); 2386 return LVal; 2387 } 2388 2389 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { 2390 auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt(); 2391 auto *CS = dyn_cast<CompoundStmt>(Stmt); 2392 bool HasLastprivates = false; 2393 auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF, 2394 PrePostActionTy &) { 2395 auto &C = CGF.CGM.getContext(); 2396 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2397 // Emit helper vars inits. 2398 LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.", 2399 CGF.Builder.getInt32(0)); 2400 auto *GlobalUBVal = CS != nullptr ? CGF.Builder.getInt32(CS->size() - 1) 2401 : CGF.Builder.getInt32(0); 2402 LValue UB = 2403 createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal); 2404 LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.", 2405 CGF.Builder.getInt32(1)); 2406 LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.", 2407 CGF.Builder.getInt32(0)); 2408 // Loop counter. 2409 LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv."); 2410 OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 2411 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV); 2412 OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 2413 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); 2414 // Generate condition for loop. 2415 BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, 2416 OK_Ordinary, S.getLocStart(), FPOptions()); 2417 // Increment for loop counter. 2418 UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary, 2419 S.getLocStart()); 2420 auto BodyGen = [Stmt, CS, &S, &IV](CodeGenFunction &CGF) { 2421 // Iterate through all sections and emit a switch construct: 2422 // switch (IV) { 2423 // case 0: 2424 // <SectionStmt[0]>; 2425 // break; 2426 // ... 2427 // case <NumSection> - 1: 2428 // <SectionStmt[<NumSection> - 1]>; 2429 // break; 2430 // } 2431 // .omp.sections.exit: 2432 auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit"); 2433 auto *SwitchStmt = CGF.Builder.CreateSwitch( 2434 CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB, 2435 CS == nullptr ? 1 : CS->size()); 2436 if (CS) { 2437 unsigned CaseNumber = 0; 2438 for (auto *SubStmt : CS->children()) { 2439 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 2440 CGF.EmitBlock(CaseBB); 2441 SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB); 2442 CGF.EmitStmt(SubStmt); 2443 CGF.EmitBranch(ExitBB); 2444 ++CaseNumber; 2445 } 2446 } else { 2447 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 2448 CGF.EmitBlock(CaseBB); 2449 SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB); 2450 CGF.EmitStmt(Stmt); 2451 CGF.EmitBranch(ExitBB); 2452 } 2453 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 2454 }; 2455 2456 CodeGenFunction::OMPPrivateScope LoopScope(CGF); 2457 if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) { 2458 // Emit implicit barrier to synchronize threads and avoid data races on 2459 // initialization of firstprivate variables and post-update of lastprivate 2460 // variables. 2461 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 2462 CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 2463 /*ForceSimpleCall=*/true); 2464 } 2465 CGF.EmitOMPPrivateClause(S, LoopScope); 2466 HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 2467 CGF.EmitOMPReductionClauseInit(S, LoopScope); 2468 (void)LoopScope.Privatize(); 2469 2470 // Emit static non-chunked loop. 2471 OpenMPScheduleTy ScheduleKind; 2472 ScheduleKind.Schedule = OMPC_SCHEDULE_static; 2473 CGOpenMPRuntime::StaticRTInput StaticInit( 2474 /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), 2475 LB.getAddress(), UB.getAddress(), ST.getAddress()); 2476 CGF.CGM.getOpenMPRuntime().emitForStaticInit( 2477 CGF, S.getLocStart(), S.getDirectiveKind(), ScheduleKind, StaticInit); 2478 // UB = min(UB, GlobalUB); 2479 auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart()); 2480 auto *MinUBGlobalUB = CGF.Builder.CreateSelect( 2481 CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal); 2482 CGF.EmitStoreOfScalar(MinUBGlobalUB, UB); 2483 // IV = LB; 2484 CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV); 2485 // while (idx <= UB) { BODY; ++idx; } 2486 CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen, 2487 [](CodeGenFunction &) {}); 2488 // Tell the runtime we are done. 2489 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 2490 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd()); 2491 }; 2492 CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen); 2493 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 2494 // Emit post-update of the reduction variables if IsLastIter != 0. 2495 emitPostUpdateForReductionClause( 2496 CGF, S, [&](CodeGenFunction &CGF) -> llvm::Value * { 2497 return CGF.Builder.CreateIsNotNull( 2498 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2499 }); 2500 2501 // Emit final copy of the lastprivate variables if IsLastIter != 0. 2502 if (HasLastprivates) 2503 CGF.EmitOMPLastprivateClauseFinal( 2504 S, /*NoFinals=*/false, 2505 CGF.Builder.CreateIsNotNull( 2506 CGF.EmitLoadOfScalar(IL, S.getLocStart()))); 2507 }; 2508 2509 bool HasCancel = false; 2510 if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S)) 2511 HasCancel = OSD->hasCancel(); 2512 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S)) 2513 HasCancel = OPSD->hasCancel(); 2514 OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel); 2515 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen, 2516 HasCancel); 2517 // Emit barrier for lastprivates only if 'sections' directive has 'nowait' 2518 // clause. Otherwise the barrier will be generated by the codegen for the 2519 // directive. 2520 if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) { 2521 // Emit implicit barrier to synchronize threads and avoid data races on 2522 // initialization of firstprivate variables. 2523 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), 2524 OMPD_unknown); 2525 } 2526 } 2527 2528 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { 2529 { 2530 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2531 EmitSections(S); 2532 } 2533 // Emit an implicit barrier at the end. 2534 if (!S.getSingleClause<OMPNowaitClause>()) { 2535 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), 2536 OMPD_sections); 2537 } 2538 } 2539 2540 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { 2541 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2542 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2543 }; 2544 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2545 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen, 2546 S.hasCancel()); 2547 } 2548 2549 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { 2550 llvm::SmallVector<const Expr *, 8> CopyprivateVars; 2551 llvm::SmallVector<const Expr *, 8> DestExprs; 2552 llvm::SmallVector<const Expr *, 8> SrcExprs; 2553 llvm::SmallVector<const Expr *, 8> AssignmentOps; 2554 // Check if there are any 'copyprivate' clauses associated with this 2555 // 'single' construct. 2556 // Build a list of copyprivate variables along with helper expressions 2557 // (<source>, <destination>, <destination>=<source> expressions) 2558 for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) { 2559 CopyprivateVars.append(C->varlists().begin(), C->varlists().end()); 2560 DestExprs.append(C->destination_exprs().begin(), 2561 C->destination_exprs().end()); 2562 SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end()); 2563 AssignmentOps.append(C->assignment_ops().begin(), 2564 C->assignment_ops().end()); 2565 } 2566 // Emit code for 'single' region along with 'copyprivate' clauses 2567 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2568 Action.Enter(CGF); 2569 OMPPrivateScope SingleScope(CGF); 2570 (void)CGF.EmitOMPFirstprivateClause(S, SingleScope); 2571 CGF.EmitOMPPrivateClause(S, SingleScope); 2572 (void)SingleScope.Privatize(); 2573 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2574 }; 2575 { 2576 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2577 CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(), 2578 CopyprivateVars, DestExprs, 2579 SrcExprs, AssignmentOps); 2580 } 2581 // Emit an implicit barrier at the end (to avoid data race on firstprivate 2582 // init or if no 'nowait' clause was specified and no 'copyprivate' clause). 2583 if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) { 2584 CGM.getOpenMPRuntime().emitBarrierCall( 2585 *this, S.getLocStart(), 2586 S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single); 2587 } 2588 } 2589 2590 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { 2591 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2592 Action.Enter(CGF); 2593 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2594 }; 2595 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2596 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart()); 2597 } 2598 2599 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { 2600 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2601 Action.Enter(CGF); 2602 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2603 }; 2604 Expr *Hint = nullptr; 2605 if (auto *HintClause = S.getSingleClause<OMPHintClause>()) 2606 Hint = HintClause->getHint(); 2607 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2608 CGM.getOpenMPRuntime().emitCriticalRegion(*this, 2609 S.getDirectiveName().getAsString(), 2610 CodeGen, S.getLocStart(), Hint); 2611 } 2612 2613 void CodeGenFunction::EmitOMPParallelForDirective( 2614 const OMPParallelForDirective &S) { 2615 // Emit directive as a combined directive that consists of two implicit 2616 // directives: 'parallel' with 'for' directive. 2617 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2618 OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel()); 2619 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 2620 emitDispatchForLoopBounds); 2621 }; 2622 emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen, 2623 emitEmptyBoundParameters); 2624 } 2625 2626 void CodeGenFunction::EmitOMPParallelForSimdDirective( 2627 const OMPParallelForSimdDirective &S) { 2628 // Emit directive as a combined directive that consists of two implicit 2629 // directives: 'parallel' with 'for' directive. 2630 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2631 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 2632 emitDispatchForLoopBounds); 2633 }; 2634 emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen, 2635 emitEmptyBoundParameters); 2636 } 2637 2638 void CodeGenFunction::EmitOMPParallelSectionsDirective( 2639 const OMPParallelSectionsDirective &S) { 2640 // Emit directive as a combined directive that consists of two implicit 2641 // directives: 'parallel' with 'sections' directive. 2642 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2643 CGF.EmitSections(S); 2644 }; 2645 emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen, 2646 emitEmptyBoundParameters); 2647 } 2648 2649 void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, 2650 const RegionCodeGenTy &BodyGen, 2651 const TaskGenTy &TaskGen, 2652 OMPTaskDataTy &Data) { 2653 // Emit outlined function for task construct. 2654 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2655 auto *I = CS->getCapturedDecl()->param_begin(); 2656 auto *PartId = std::next(I); 2657 auto *TaskT = std::next(I, 4); 2658 // Check if the task is final 2659 if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) { 2660 // If the condition constant folds and can be elided, try to avoid emitting 2661 // the condition and the dead arm of the if/else. 2662 auto *Cond = Clause->getCondition(); 2663 bool CondConstant; 2664 if (ConstantFoldsToSimpleInteger(Cond, CondConstant)) 2665 Data.Final.setInt(CondConstant); 2666 else 2667 Data.Final.setPointer(EvaluateExprAsBool(Cond)); 2668 } else { 2669 // By default the task is not final. 2670 Data.Final.setInt(/*IntVal=*/false); 2671 } 2672 // Check if the task has 'priority' clause. 2673 if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) { 2674 auto *Prio = Clause->getPriority(); 2675 Data.Priority.setInt(/*IntVal=*/true); 2676 Data.Priority.setPointer(EmitScalarConversion( 2677 EmitScalarExpr(Prio), Prio->getType(), 2678 getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1), 2679 Prio->getExprLoc())); 2680 } 2681 // The first function argument for tasks is a thread id, the second one is a 2682 // part id (0 for tied tasks, >=0 for untied task). 2683 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 2684 // Get list of private variables. 2685 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 2686 auto IRef = C->varlist_begin(); 2687 for (auto *IInit : C->private_copies()) { 2688 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2689 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2690 Data.PrivateVars.push_back(*IRef); 2691 Data.PrivateCopies.push_back(IInit); 2692 } 2693 ++IRef; 2694 } 2695 } 2696 EmittedAsPrivate.clear(); 2697 // Get list of firstprivate variables. 2698 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 2699 auto IRef = C->varlist_begin(); 2700 auto IElemInitRef = C->inits().begin(); 2701 for (auto *IInit : C->private_copies()) { 2702 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2703 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2704 Data.FirstprivateVars.push_back(*IRef); 2705 Data.FirstprivateCopies.push_back(IInit); 2706 Data.FirstprivateInits.push_back(*IElemInitRef); 2707 } 2708 ++IRef; 2709 ++IElemInitRef; 2710 } 2711 } 2712 // Get list of lastprivate variables (for taskloops). 2713 llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs; 2714 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 2715 auto IRef = C->varlist_begin(); 2716 auto ID = C->destination_exprs().begin(); 2717 for (auto *IInit : C->private_copies()) { 2718 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2719 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2720 Data.LastprivateVars.push_back(*IRef); 2721 Data.LastprivateCopies.push_back(IInit); 2722 } 2723 LastprivateDstsOrigs.insert( 2724 {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()), 2725 cast<DeclRefExpr>(*IRef)}); 2726 ++IRef; 2727 ++ID; 2728 } 2729 } 2730 SmallVector<const Expr *, 4> LHSs; 2731 SmallVector<const Expr *, 4> RHSs; 2732 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 2733 auto IPriv = C->privates().begin(); 2734 auto IRed = C->reduction_ops().begin(); 2735 auto ILHS = C->lhs_exprs().begin(); 2736 auto IRHS = C->rhs_exprs().begin(); 2737 for (const auto *Ref : C->varlists()) { 2738 Data.ReductionVars.emplace_back(Ref); 2739 Data.ReductionCopies.emplace_back(*IPriv); 2740 Data.ReductionOps.emplace_back(*IRed); 2741 LHSs.emplace_back(*ILHS); 2742 RHSs.emplace_back(*IRHS); 2743 std::advance(IPriv, 1); 2744 std::advance(IRed, 1); 2745 std::advance(ILHS, 1); 2746 std::advance(IRHS, 1); 2747 } 2748 } 2749 Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit( 2750 *this, S.getLocStart(), LHSs, RHSs, Data); 2751 // Build list of dependences. 2752 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) 2753 for (auto *IRef : C->varlists()) 2754 Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef)); 2755 auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs]( 2756 CodeGenFunction &CGF, PrePostActionTy &Action) { 2757 // Set proper addresses for generated private copies. 2758 OMPPrivateScope Scope(CGF); 2759 if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() || 2760 !Data.LastprivateVars.empty()) { 2761 enum { PrivatesParam = 2, CopyFnParam = 3 }; 2762 auto *CopyFn = CGF.Builder.CreateLoad( 2763 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3))); 2764 auto *PrivatesPtr = CGF.Builder.CreateLoad( 2765 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2))); 2766 // Map privates. 2767 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; 2768 llvm::SmallVector<llvm::Value *, 16> CallArgs; 2769 CallArgs.push_back(PrivatesPtr); 2770 for (auto *E : Data.PrivateVars) { 2771 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2772 Address PrivatePtr = CGF.CreateMemTemp( 2773 CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr"); 2774 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2775 CallArgs.push_back(PrivatePtr.getPointer()); 2776 } 2777 for (auto *E : Data.FirstprivateVars) { 2778 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2779 Address PrivatePtr = 2780 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 2781 ".firstpriv.ptr.addr"); 2782 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2783 CallArgs.push_back(PrivatePtr.getPointer()); 2784 } 2785 for (auto *E : Data.LastprivateVars) { 2786 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2787 Address PrivatePtr = 2788 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 2789 ".lastpriv.ptr.addr"); 2790 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2791 CallArgs.push_back(PrivatePtr.getPointer()); 2792 } 2793 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(), 2794 CopyFn, CallArgs); 2795 for (auto &&Pair : LastprivateDstsOrigs) { 2796 auto *OrigVD = cast<VarDecl>(Pair.second->getDecl()); 2797 DeclRefExpr DRE( 2798 const_cast<VarDecl *>(OrigVD), 2799 /*RefersToEnclosingVariableOrCapture=*/CGF.CapturedStmtInfo->lookup( 2800 OrigVD) != nullptr, 2801 Pair.second->getType(), VK_LValue, Pair.second->getExprLoc()); 2802 Scope.addPrivate(Pair.first, [&CGF, &DRE]() { 2803 return CGF.EmitLValue(&DRE).getAddress(); 2804 }); 2805 } 2806 for (auto &&Pair : PrivatePtrs) { 2807 Address Replacement(CGF.Builder.CreateLoad(Pair.second), 2808 CGF.getContext().getDeclAlign(Pair.first)); 2809 Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); 2810 } 2811 } 2812 if (Data.Reductions) { 2813 OMPLexicalScope LexScope(CGF, S, /*AsInlined=*/true); 2814 ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionCopies, 2815 Data.ReductionOps); 2816 llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad( 2817 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9))); 2818 for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) { 2819 RedCG.emitSharedLValue(CGF, Cnt); 2820 RedCG.emitAggregateType(CGF, Cnt); 2821 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( 2822 CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); 2823 Replacement = 2824 Address(CGF.EmitScalarConversion( 2825 Replacement.getPointer(), CGF.getContext().VoidPtrTy, 2826 CGF.getContext().getPointerType( 2827 Data.ReductionCopies[Cnt]->getType()), 2828 SourceLocation()), 2829 Replacement.getAlignment()); 2830 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); 2831 Scope.addPrivate(RedCG.getBaseDecl(Cnt), 2832 [Replacement]() { return Replacement; }); 2833 // FIXME: This must removed once the runtime library is fixed. 2834 // Emit required threadprivate variables for 2835 // initilizer/combiner/finalizer. 2836 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(), 2837 RedCG, Cnt); 2838 } 2839 } 2840 // Privatize all private variables except for in_reduction items. 2841 (void)Scope.Privatize(); 2842 SmallVector<const Expr *, 4> InRedVars; 2843 SmallVector<const Expr *, 4> InRedPrivs; 2844 SmallVector<const Expr *, 4> InRedOps; 2845 SmallVector<const Expr *, 4> TaskgroupDescriptors; 2846 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) { 2847 auto IPriv = C->privates().begin(); 2848 auto IRed = C->reduction_ops().begin(); 2849 auto ITD = C->taskgroup_descriptors().begin(); 2850 for (const auto *Ref : C->varlists()) { 2851 InRedVars.emplace_back(Ref); 2852 InRedPrivs.emplace_back(*IPriv); 2853 InRedOps.emplace_back(*IRed); 2854 TaskgroupDescriptors.emplace_back(*ITD); 2855 std::advance(IPriv, 1); 2856 std::advance(IRed, 1); 2857 std::advance(ITD, 1); 2858 } 2859 } 2860 // Privatize in_reduction items here, because taskgroup descriptors must be 2861 // privatized earlier. 2862 OMPPrivateScope InRedScope(CGF); 2863 if (!InRedVars.empty()) { 2864 ReductionCodeGen RedCG(InRedVars, InRedPrivs, InRedOps); 2865 for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) { 2866 RedCG.emitSharedLValue(CGF, Cnt); 2867 RedCG.emitAggregateType(CGF, Cnt); 2868 // The taskgroup descriptor variable is always implicit firstprivate and 2869 // privatized already during procoessing of the firstprivates. 2870 llvm::Value *ReductionsPtr = CGF.EmitLoadOfScalar( 2871 CGF.EmitLValue(TaskgroupDescriptors[Cnt]), SourceLocation()); 2872 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( 2873 CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); 2874 Replacement = Address( 2875 CGF.EmitScalarConversion( 2876 Replacement.getPointer(), CGF.getContext().VoidPtrTy, 2877 CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()), 2878 SourceLocation()), 2879 Replacement.getAlignment()); 2880 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); 2881 InRedScope.addPrivate(RedCG.getBaseDecl(Cnt), 2882 [Replacement]() { return Replacement; }); 2883 // FIXME: This must removed once the runtime library is fixed. 2884 // Emit required threadprivate variables for 2885 // initilizer/combiner/finalizer. 2886 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(), 2887 RedCG, Cnt); 2888 } 2889 } 2890 (void)InRedScope.Privatize(); 2891 2892 Action.Enter(CGF); 2893 BodyGen(CGF); 2894 }; 2895 auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( 2896 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied, 2897 Data.NumberOfParts); 2898 OMPLexicalScope Scope(*this, S); 2899 TaskGen(*this, OutlinedFn, Data); 2900 } 2901 2902 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { 2903 // Emit outlined function for task construct. 2904 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2905 auto CapturedStruct = GenerateCapturedStmtArgument(*CS); 2906 auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 2907 const Expr *IfCond = nullptr; 2908 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 2909 if (C->getNameModifier() == OMPD_unknown || 2910 C->getNameModifier() == OMPD_task) { 2911 IfCond = C->getCondition(); 2912 break; 2913 } 2914 } 2915 2916 OMPTaskDataTy Data; 2917 // Check if we should emit tied or untied task. 2918 Data.Tied = !S.getSingleClause<OMPUntiedClause>(); 2919 auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { 2920 CGF.EmitStmt(CS->getCapturedStmt()); 2921 }; 2922 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 2923 IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn, 2924 const OMPTaskDataTy &Data) { 2925 CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getLocStart(), S, OutlinedFn, 2926 SharedsTy, CapturedStruct, IfCond, 2927 Data); 2928 }; 2929 EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data); 2930 } 2931 2932 void CodeGenFunction::EmitOMPTaskyieldDirective( 2933 const OMPTaskyieldDirective &S) { 2934 CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart()); 2935 } 2936 2937 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { 2938 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier); 2939 } 2940 2941 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) { 2942 CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart()); 2943 } 2944 2945 void CodeGenFunction::EmitOMPTaskgroupDirective( 2946 const OMPTaskgroupDirective &S) { 2947 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2948 Action.Enter(CGF); 2949 if (const Expr *E = S.getReductionRef()) { 2950 SmallVector<const Expr *, 4> LHSs; 2951 SmallVector<const Expr *, 4> RHSs; 2952 OMPTaskDataTy Data; 2953 for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) { 2954 auto IPriv = C->privates().begin(); 2955 auto IRed = C->reduction_ops().begin(); 2956 auto ILHS = C->lhs_exprs().begin(); 2957 auto IRHS = C->rhs_exprs().begin(); 2958 for (const auto *Ref : C->varlists()) { 2959 Data.ReductionVars.emplace_back(Ref); 2960 Data.ReductionCopies.emplace_back(*IPriv); 2961 Data.ReductionOps.emplace_back(*IRed); 2962 LHSs.emplace_back(*ILHS); 2963 RHSs.emplace_back(*IRHS); 2964 std::advance(IPriv, 1); 2965 std::advance(IRed, 1); 2966 std::advance(ILHS, 1); 2967 std::advance(IRHS, 1); 2968 } 2969 } 2970 llvm::Value *ReductionDesc = 2971 CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getLocStart(), 2972 LHSs, RHSs, Data); 2973 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2974 CGF.EmitVarDecl(*VD); 2975 CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD), 2976 /*Volatile=*/false, E->getType()); 2977 } 2978 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2979 }; 2980 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2981 CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart()); 2982 } 2983 2984 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { 2985 CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> { 2986 if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) { 2987 return llvm::makeArrayRef(FlushClause->varlist_begin(), 2988 FlushClause->varlist_end()); 2989 } 2990 return llvm::None; 2991 }(), S.getLocStart()); 2992 } 2993 2994 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, 2995 const CodeGenLoopTy &CodeGenLoop, 2996 Expr *IncExpr) { 2997 // Emit the loop iteration variable. 2998 auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 2999 auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); 3000 EmitVarDecl(*IVDecl); 3001 3002 // Emit the iterations count variable. 3003 // If it is not a variable, Sema decided to calculate iterations count on each 3004 // iteration (e.g., it is foldable into a constant). 3005 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 3006 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 3007 // Emit calculation of the iterations count. 3008 EmitIgnoredExpr(S.getCalcLastIteration()); 3009 } 3010 3011 auto &RT = CGM.getOpenMPRuntime(); 3012 3013 bool HasLastprivateClause = false; 3014 // Check pre-condition. 3015 { 3016 OMPLoopScope PreInitScope(*this, S); 3017 // Skip the entire loop if we don't meet the precondition. 3018 // If the condition constant folds and can be elided, avoid emitting the 3019 // whole loop. 3020 bool CondConstant; 3021 llvm::BasicBlock *ContBlock = nullptr; 3022 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 3023 if (!CondConstant) 3024 return; 3025 } else { 3026 auto *ThenBlock = createBasicBlock("omp.precond.then"); 3027 ContBlock = createBasicBlock("omp.precond.end"); 3028 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 3029 getProfileCount(&S)); 3030 EmitBlock(ThenBlock); 3031 incrementProfileCounter(&S); 3032 } 3033 3034 // Emit 'then' code. 3035 { 3036 // Emit helper vars inits. 3037 3038 LValue LB = EmitOMPHelperVar( 3039 *this, cast<DeclRefExpr>( 3040 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3041 ? S.getCombinedLowerBoundVariable() 3042 : S.getLowerBoundVariable()))); 3043 LValue UB = EmitOMPHelperVar( 3044 *this, cast<DeclRefExpr>( 3045 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3046 ? S.getCombinedUpperBoundVariable() 3047 : S.getUpperBoundVariable()))); 3048 LValue ST = 3049 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 3050 LValue IL = 3051 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 3052 3053 OMPPrivateScope LoopScope(*this); 3054 if (EmitOMPFirstprivateClause(S, LoopScope)) { 3055 // Emit implicit barrier to synchronize threads and avoid data races on 3056 // initialization of firstprivate variables and post-update of 3057 // lastprivate variables. 3058 CGM.getOpenMPRuntime().emitBarrierCall( 3059 *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 3060 /*ForceSimpleCall=*/true); 3061 } 3062 EmitOMPPrivateClause(S, LoopScope); 3063 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 3064 EmitOMPPrivateLoopCounters(S, LoopScope); 3065 (void)LoopScope.Privatize(); 3066 3067 // Detect the distribute schedule kind and chunk. 3068 llvm::Value *Chunk = nullptr; 3069 OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown; 3070 if (auto *C = S.getSingleClause<OMPDistScheduleClause>()) { 3071 ScheduleKind = C->getDistScheduleKind(); 3072 if (const auto *Ch = C->getChunkSize()) { 3073 Chunk = EmitScalarExpr(Ch); 3074 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 3075 S.getIterationVariable()->getType(), 3076 S.getLocStart()); 3077 } 3078 } 3079 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 3080 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 3081 3082 // OpenMP [2.10.8, distribute Construct, Description] 3083 // If dist_schedule is specified, kind must be static. If specified, 3084 // iterations are divided into chunks of size chunk_size, chunks are 3085 // assigned to the teams of the league in a round-robin fashion in the 3086 // order of the team number. When no chunk_size is specified, the 3087 // iteration space is divided into chunks that are approximately equal 3088 // in size, and at most one chunk is distributed to each team of the 3089 // league. The size of the chunks is unspecified in this case. 3090 if (RT.isStaticNonchunked(ScheduleKind, 3091 /* Chunked */ Chunk != nullptr)) { 3092 CGOpenMPRuntime::StaticRTInput StaticInit( 3093 IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(), 3094 LB.getAddress(), UB.getAddress(), ST.getAddress()); 3095 RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, 3096 StaticInit); 3097 auto LoopExit = 3098 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 3099 // UB = min(UB, GlobalUB); 3100 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3101 ? S.getCombinedEnsureUpperBound() 3102 : S.getEnsureUpperBound()); 3103 // IV = LB; 3104 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3105 ? S.getCombinedInit() 3106 : S.getInit()); 3107 3108 Expr *Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3109 ? S.getCombinedCond() 3110 : S.getCond(); 3111 3112 // for distribute alone, codegen 3113 // while (idx <= UB) { BODY; ++idx; } 3114 // when combined with 'for' (e.g. as in 'distribute parallel for') 3115 // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; } 3116 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), Cond, IncExpr, 3117 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { 3118 CodeGenLoop(CGF, S, LoopExit); 3119 }, 3120 [](CodeGenFunction &) {}); 3121 EmitBlock(LoopExit.getBlock()); 3122 // Tell the runtime we are done. 3123 RT.emitForStaticFinish(*this, S.getLocStart()); 3124 } else { 3125 // Emit the outer loop, which requests its work chunk [LB..UB] from 3126 // runtime and runs the inner loop to process it. 3127 const OMPLoopArguments LoopArguments = { 3128 LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(), 3129 Chunk}; 3130 EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments, 3131 CodeGenLoop); 3132 } 3133 3134 // Emit final copy of the lastprivate variables if IsLastIter != 0. 3135 if (HasLastprivateClause) 3136 EmitOMPLastprivateClauseFinal( 3137 S, /*NoFinals=*/false, 3138 Builder.CreateIsNotNull( 3139 EmitLoadOfScalar(IL, S.getLocStart()))); 3140 } 3141 3142 // We're now done with the loop, so jump to the continuation block. 3143 if (ContBlock) { 3144 EmitBranch(ContBlock); 3145 EmitBlock(ContBlock, true); 3146 } 3147 } 3148 } 3149 3150 void CodeGenFunction::EmitOMPDistributeDirective( 3151 const OMPDistributeDirective &S) { 3152 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3153 3154 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 3155 }; 3156 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 3157 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen, 3158 false); 3159 } 3160 3161 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, 3162 const CapturedStmt *S) { 3163 CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); 3164 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; 3165 CGF.CapturedStmtInfo = &CapStmtInfo; 3166 auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S); 3167 Fn->addFnAttr(llvm::Attribute::NoInline); 3168 return Fn; 3169 } 3170 3171 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { 3172 if (!S.getAssociatedStmt()) { 3173 for (const auto *DC : S.getClausesOfKind<OMPDependClause>()) 3174 CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC); 3175 return; 3176 } 3177 auto *C = S.getSingleClause<OMPSIMDClause>(); 3178 auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF, 3179 PrePostActionTy &Action) { 3180 if (C) { 3181 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 3182 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 3183 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 3184 auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS); 3185 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(), 3186 OutlinedFn, CapturedVars); 3187 } else { 3188 Action.Enter(CGF); 3189 CGF.EmitStmt( 3190 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3191 } 3192 }; 3193 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 3194 CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C); 3195 } 3196 3197 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, 3198 QualType SrcType, QualType DestType, 3199 SourceLocation Loc) { 3200 assert(CGF.hasScalarEvaluationKind(DestType) && 3201 "DestType must have scalar evaluation kind."); 3202 assert(!Val.isAggregate() && "Must be a scalar or complex."); 3203 return Val.isScalar() 3204 ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType, 3205 Loc) 3206 : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType, 3207 DestType, Loc); 3208 } 3209 3210 static CodeGenFunction::ComplexPairTy 3211 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, 3212 QualType DestType, SourceLocation Loc) { 3213 assert(CGF.getEvaluationKind(DestType) == TEK_Complex && 3214 "DestType must have complex evaluation kind."); 3215 CodeGenFunction::ComplexPairTy ComplexVal; 3216 if (Val.isScalar()) { 3217 // Convert the input element to the element type of the complex. 3218 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 3219 auto ScalarVal = CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, 3220 DestElementType, Loc); 3221 ComplexVal = CodeGenFunction::ComplexPairTy( 3222 ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType())); 3223 } else { 3224 assert(Val.isComplex() && "Must be a scalar or complex."); 3225 auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType(); 3226 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 3227 ComplexVal.first = CGF.EmitScalarConversion( 3228 Val.getComplexVal().first, SrcElementType, DestElementType, Loc); 3229 ComplexVal.second = CGF.EmitScalarConversion( 3230 Val.getComplexVal().second, SrcElementType, DestElementType, Loc); 3231 } 3232 return ComplexVal; 3233 } 3234 3235 static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst, 3236 LValue LVal, RValue RVal) { 3237 if (LVal.isGlobalReg()) { 3238 CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal); 3239 } else { 3240 CGF.EmitAtomicStore(RVal, LVal, 3241 IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3242 : llvm::AtomicOrdering::Monotonic, 3243 LVal.isVolatile(), /*IsInit=*/false); 3244 } 3245 } 3246 3247 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal, 3248 QualType RValTy, SourceLocation Loc) { 3249 switch (getEvaluationKind(LVal.getType())) { 3250 case TEK_Scalar: 3251 EmitStoreThroughLValue(RValue::get(convertToScalarValue( 3252 *this, RVal, RValTy, LVal.getType(), Loc)), 3253 LVal); 3254 break; 3255 case TEK_Complex: 3256 EmitStoreOfComplex( 3257 convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal, 3258 /*isInit=*/false); 3259 break; 3260 case TEK_Aggregate: 3261 llvm_unreachable("Must be a scalar or complex."); 3262 } 3263 } 3264 3265 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst, 3266 const Expr *X, const Expr *V, 3267 SourceLocation Loc) { 3268 // v = x; 3269 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue"); 3270 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue"); 3271 LValue XLValue = CGF.EmitLValue(X); 3272 LValue VLValue = CGF.EmitLValue(V); 3273 RValue Res = XLValue.isGlobalReg() 3274 ? CGF.EmitLoadOfLValue(XLValue, Loc) 3275 : CGF.EmitAtomicLoad( 3276 XLValue, Loc, 3277 IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3278 : llvm::AtomicOrdering::Monotonic, 3279 XLValue.isVolatile()); 3280 // OpenMP, 2.12.6, atomic Construct 3281 // Any atomic construct with a seq_cst clause forces the atomically 3282 // performed operation to include an implicit flush operation without a 3283 // list. 3284 if (IsSeqCst) 3285 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3286 CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc); 3287 } 3288 3289 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst, 3290 const Expr *X, const Expr *E, 3291 SourceLocation Loc) { 3292 // x = expr; 3293 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue"); 3294 emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E)); 3295 // OpenMP, 2.12.6, atomic Construct 3296 // Any atomic construct with a seq_cst clause forces the atomically 3297 // performed operation to include an implicit flush operation without a 3298 // list. 3299 if (IsSeqCst) 3300 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3301 } 3302 3303 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, 3304 RValue Update, 3305 BinaryOperatorKind BO, 3306 llvm::AtomicOrdering AO, 3307 bool IsXLHSInRHSPart) { 3308 auto &Context = CGF.CGM.getContext(); 3309 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x' 3310 // expression is simple and atomic is allowed for the given type for the 3311 // target platform. 3312 if (BO == BO_Comma || !Update.isScalar() || 3313 !Update.getScalarVal()->getType()->isIntegerTy() || 3314 !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) && 3315 (Update.getScalarVal()->getType() != 3316 X.getAddress().getElementType())) || 3317 !X.getAddress().getElementType()->isIntegerTy() || 3318 !Context.getTargetInfo().hasBuiltinAtomic( 3319 Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment()))) 3320 return std::make_pair(false, RValue::get(nullptr)); 3321 3322 llvm::AtomicRMWInst::BinOp RMWOp; 3323 switch (BO) { 3324 case BO_Add: 3325 RMWOp = llvm::AtomicRMWInst::Add; 3326 break; 3327 case BO_Sub: 3328 if (!IsXLHSInRHSPart) 3329 return std::make_pair(false, RValue::get(nullptr)); 3330 RMWOp = llvm::AtomicRMWInst::Sub; 3331 break; 3332 case BO_And: 3333 RMWOp = llvm::AtomicRMWInst::And; 3334 break; 3335 case BO_Or: 3336 RMWOp = llvm::AtomicRMWInst::Or; 3337 break; 3338 case BO_Xor: 3339 RMWOp = llvm::AtomicRMWInst::Xor; 3340 break; 3341 case BO_LT: 3342 RMWOp = X.getType()->hasSignedIntegerRepresentation() 3343 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min 3344 : llvm::AtomicRMWInst::Max) 3345 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin 3346 : llvm::AtomicRMWInst::UMax); 3347 break; 3348 case BO_GT: 3349 RMWOp = X.getType()->hasSignedIntegerRepresentation() 3350 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max 3351 : llvm::AtomicRMWInst::Min) 3352 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax 3353 : llvm::AtomicRMWInst::UMin); 3354 break; 3355 case BO_Assign: 3356 RMWOp = llvm::AtomicRMWInst::Xchg; 3357 break; 3358 case BO_Mul: 3359 case BO_Div: 3360 case BO_Rem: 3361 case BO_Shl: 3362 case BO_Shr: 3363 case BO_LAnd: 3364 case BO_LOr: 3365 return std::make_pair(false, RValue::get(nullptr)); 3366 case BO_PtrMemD: 3367 case BO_PtrMemI: 3368 case BO_LE: 3369 case BO_GE: 3370 case BO_EQ: 3371 case BO_NE: 3372 case BO_AddAssign: 3373 case BO_SubAssign: 3374 case BO_AndAssign: 3375 case BO_OrAssign: 3376 case BO_XorAssign: 3377 case BO_MulAssign: 3378 case BO_DivAssign: 3379 case BO_RemAssign: 3380 case BO_ShlAssign: 3381 case BO_ShrAssign: 3382 case BO_Comma: 3383 llvm_unreachable("Unsupported atomic update operation"); 3384 } 3385 auto *UpdateVal = Update.getScalarVal(); 3386 if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) { 3387 UpdateVal = CGF.Builder.CreateIntCast( 3388 IC, X.getAddress().getElementType(), 3389 X.getType()->hasSignedIntegerRepresentation()); 3390 } 3391 auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO); 3392 return std::make_pair(true, RValue::get(Res)); 3393 } 3394 3395 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr( 3396 LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart, 3397 llvm::AtomicOrdering AO, SourceLocation Loc, 3398 const llvm::function_ref<RValue(RValue)> &CommonGen) { 3399 // Update expressions are allowed to have the following forms: 3400 // x binop= expr; -> xrval + expr; 3401 // x++, ++x -> xrval + 1; 3402 // x--, --x -> xrval - 1; 3403 // x = x binop expr; -> xrval binop expr 3404 // x = expr Op x; - > expr binop xrval; 3405 auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart); 3406 if (!Res.first) { 3407 if (X.isGlobalReg()) { 3408 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop 3409 // 'xrval'. 3410 EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X); 3411 } else { 3412 // Perform compare-and-swap procedure. 3413 EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified()); 3414 } 3415 } 3416 return Res; 3417 } 3418 3419 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst, 3420 const Expr *X, const Expr *E, 3421 const Expr *UE, bool IsXLHSInRHSPart, 3422 SourceLocation Loc) { 3423 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 3424 "Update expr in 'atomic update' must be a binary operator."); 3425 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 3426 // Update expressions are allowed to have the following forms: 3427 // x binop= expr; -> xrval + expr; 3428 // x++, ++x -> xrval + 1; 3429 // x--, --x -> xrval - 1; 3430 // x = x binop expr; -> xrval binop expr 3431 // x = expr Op x; - > expr binop xrval; 3432 assert(X->isLValue() && "X of 'omp atomic update' is not lvalue"); 3433 LValue XLValue = CGF.EmitLValue(X); 3434 RValue ExprRValue = CGF.EmitAnyExpr(E); 3435 auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3436 : llvm::AtomicOrdering::Monotonic; 3437 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 3438 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 3439 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 3440 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 3441 auto Gen = 3442 [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue { 3443 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3444 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 3445 return CGF.EmitAnyExpr(UE); 3446 }; 3447 (void)CGF.EmitOMPAtomicSimpleUpdateExpr( 3448 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 3449 // OpenMP, 2.12.6, atomic Construct 3450 // Any atomic construct with a seq_cst clause forces the atomically 3451 // performed operation to include an implicit flush operation without a 3452 // list. 3453 if (IsSeqCst) 3454 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3455 } 3456 3457 static RValue convertToType(CodeGenFunction &CGF, RValue Value, 3458 QualType SourceType, QualType ResType, 3459 SourceLocation Loc) { 3460 switch (CGF.getEvaluationKind(ResType)) { 3461 case TEK_Scalar: 3462 return RValue::get( 3463 convertToScalarValue(CGF, Value, SourceType, ResType, Loc)); 3464 case TEK_Complex: { 3465 auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc); 3466 return RValue::getComplex(Res.first, Res.second); 3467 } 3468 case TEK_Aggregate: 3469 break; 3470 } 3471 llvm_unreachable("Must be a scalar or complex."); 3472 } 3473 3474 static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst, 3475 bool IsPostfixUpdate, const Expr *V, 3476 const Expr *X, const Expr *E, 3477 const Expr *UE, bool IsXLHSInRHSPart, 3478 SourceLocation Loc) { 3479 assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue"); 3480 assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue"); 3481 RValue NewVVal; 3482 LValue VLValue = CGF.EmitLValue(V); 3483 LValue XLValue = CGF.EmitLValue(X); 3484 RValue ExprRValue = CGF.EmitAnyExpr(E); 3485 auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3486 : llvm::AtomicOrdering::Monotonic; 3487 QualType NewVValType; 3488 if (UE) { 3489 // 'x' is updated with some additional value. 3490 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 3491 "Update expr in 'atomic capture' must be a binary operator."); 3492 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 3493 // Update expressions are allowed to have the following forms: 3494 // x binop= expr; -> xrval + expr; 3495 // x++, ++x -> xrval + 1; 3496 // x--, --x -> xrval - 1; 3497 // x = x binop expr; -> xrval binop expr 3498 // x = expr Op x; - > expr binop xrval; 3499 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 3500 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 3501 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 3502 NewVValType = XRValExpr->getType(); 3503 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 3504 auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr, 3505 IsPostfixUpdate](RValue XRValue) -> RValue { 3506 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3507 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 3508 RValue Res = CGF.EmitAnyExpr(UE); 3509 NewVVal = IsPostfixUpdate ? XRValue : Res; 3510 return Res; 3511 }; 3512 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 3513 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 3514 if (Res.first) { 3515 // 'atomicrmw' instruction was generated. 3516 if (IsPostfixUpdate) { 3517 // Use old value from 'atomicrmw'. 3518 NewVVal = Res.second; 3519 } else { 3520 // 'atomicrmw' does not provide new value, so evaluate it using old 3521 // value of 'x'. 3522 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3523 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second); 3524 NewVVal = CGF.EmitAnyExpr(UE); 3525 } 3526 } 3527 } else { 3528 // 'x' is simply rewritten with some 'expr'. 3529 NewVValType = X->getType().getNonReferenceType(); 3530 ExprRValue = convertToType(CGF, ExprRValue, E->getType(), 3531 X->getType().getNonReferenceType(), Loc); 3532 auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) -> RValue { 3533 NewVVal = XRValue; 3534 return ExprRValue; 3535 }; 3536 // Try to perform atomicrmw xchg, otherwise simple exchange. 3537 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 3538 XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO, 3539 Loc, Gen); 3540 if (Res.first) { 3541 // 'atomicrmw' instruction was generated. 3542 NewVVal = IsPostfixUpdate ? Res.second : ExprRValue; 3543 } 3544 } 3545 // Emit post-update store to 'v' of old/new 'x' value. 3546 CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc); 3547 // OpenMP, 2.12.6, atomic Construct 3548 // Any atomic construct with a seq_cst clause forces the atomically 3549 // performed operation to include an implicit flush operation without a 3550 // list. 3551 if (IsSeqCst) 3552 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3553 } 3554 3555 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, 3556 bool IsSeqCst, bool IsPostfixUpdate, 3557 const Expr *X, const Expr *V, const Expr *E, 3558 const Expr *UE, bool IsXLHSInRHSPart, 3559 SourceLocation Loc) { 3560 switch (Kind) { 3561 case OMPC_read: 3562 EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc); 3563 break; 3564 case OMPC_write: 3565 EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc); 3566 break; 3567 case OMPC_unknown: 3568 case OMPC_update: 3569 EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc); 3570 break; 3571 case OMPC_capture: 3572 EmitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE, 3573 IsXLHSInRHSPart, Loc); 3574 break; 3575 case OMPC_if: 3576 case OMPC_final: 3577 case OMPC_num_threads: 3578 case OMPC_private: 3579 case OMPC_firstprivate: 3580 case OMPC_lastprivate: 3581 case OMPC_reduction: 3582 case OMPC_task_reduction: 3583 case OMPC_in_reduction: 3584 case OMPC_safelen: 3585 case OMPC_simdlen: 3586 case OMPC_collapse: 3587 case OMPC_default: 3588 case OMPC_seq_cst: 3589 case OMPC_shared: 3590 case OMPC_linear: 3591 case OMPC_aligned: 3592 case OMPC_copyin: 3593 case OMPC_copyprivate: 3594 case OMPC_flush: 3595 case OMPC_proc_bind: 3596 case OMPC_schedule: 3597 case OMPC_ordered: 3598 case OMPC_nowait: 3599 case OMPC_untied: 3600 case OMPC_threadprivate: 3601 case OMPC_depend: 3602 case OMPC_mergeable: 3603 case OMPC_device: 3604 case OMPC_threads: 3605 case OMPC_simd: 3606 case OMPC_map: 3607 case OMPC_num_teams: 3608 case OMPC_thread_limit: 3609 case OMPC_priority: 3610 case OMPC_grainsize: 3611 case OMPC_nogroup: 3612 case OMPC_num_tasks: 3613 case OMPC_hint: 3614 case OMPC_dist_schedule: 3615 case OMPC_defaultmap: 3616 case OMPC_uniform: 3617 case OMPC_to: 3618 case OMPC_from: 3619 case OMPC_use_device_ptr: 3620 case OMPC_is_device_ptr: 3621 llvm_unreachable("Clause is not allowed in 'omp atomic'."); 3622 } 3623 } 3624 3625 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { 3626 bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>(); 3627 OpenMPClauseKind Kind = OMPC_unknown; 3628 for (auto *C : S.clauses()) { 3629 // Find first clause (skip seq_cst clause, if it is first). 3630 if (C->getClauseKind() != OMPC_seq_cst) { 3631 Kind = C->getClauseKind(); 3632 break; 3633 } 3634 } 3635 3636 const auto *CS = 3637 S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 3638 if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) { 3639 enterFullExpression(EWC); 3640 } 3641 // Processing for statements under 'atomic capture'. 3642 if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) { 3643 for (const auto *C : Compound->body()) { 3644 if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) { 3645 enterFullExpression(EWC); 3646 } 3647 } 3648 } 3649 3650 auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF, 3651 PrePostActionTy &) { 3652 CGF.EmitStopPoint(CS); 3653 EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(), 3654 S.getV(), S.getExpr(), S.getUpdateExpr(), 3655 S.isXLHSInRHSPart(), S.getLocStart()); 3656 }; 3657 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 3658 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen); 3659 } 3660 3661 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, 3662 const OMPExecutableDirective &S, 3663 const RegionCodeGenTy &CodeGen) { 3664 assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind())); 3665 CodeGenModule &CGM = CGF.CGM; 3666 const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt()); 3667 3668 llvm::Function *Fn = nullptr; 3669 llvm::Constant *FnID = nullptr; 3670 3671 const Expr *IfCond = nullptr; 3672 // Check for the at most one if clause associated with the target region. 3673 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 3674 if (C->getNameModifier() == OMPD_unknown || 3675 C->getNameModifier() == OMPD_target) { 3676 IfCond = C->getCondition(); 3677 break; 3678 } 3679 } 3680 3681 // Check if we have any device clause associated with the directive. 3682 const Expr *Device = nullptr; 3683 if (auto *C = S.getSingleClause<OMPDeviceClause>()) { 3684 Device = C->getDevice(); 3685 } 3686 3687 // Check if we have an if clause whose conditional always evaluates to false 3688 // or if we do not have any targets specified. If so the target region is not 3689 // an offload entry point. 3690 bool IsOffloadEntry = true; 3691 if (IfCond) { 3692 bool Val; 3693 if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val) 3694 IsOffloadEntry = false; 3695 } 3696 if (CGM.getLangOpts().OMPTargetTriples.empty()) 3697 IsOffloadEntry = false; 3698 3699 assert(CGF.CurFuncDecl && "No parent declaration for target region!"); 3700 StringRef ParentName; 3701 // In case we have Ctors/Dtors we use the complete type variant to produce 3702 // the mangling of the device outlined kernel. 3703 if (auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl)) 3704 ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete)); 3705 else if (auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl)) 3706 ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete)); 3707 else 3708 ParentName = 3709 CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl))); 3710 3711 // Emit target region as a standalone region. 3712 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID, 3713 IsOffloadEntry, CodeGen); 3714 OMPLexicalScope Scope(CGF, S); 3715 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 3716 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 3717 CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device, 3718 CapturedVars); 3719 } 3720 3721 static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S, 3722 PrePostActionTy &Action) { 3723 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 3724 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 3725 CGF.EmitOMPPrivateClause(S, PrivateScope); 3726 (void)PrivateScope.Privatize(); 3727 3728 Action.Enter(CGF); 3729 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3730 } 3731 3732 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM, 3733 StringRef ParentName, 3734 const OMPTargetDirective &S) { 3735 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3736 emitTargetRegion(CGF, S, Action); 3737 }; 3738 llvm::Function *Fn; 3739 llvm::Constant *Addr; 3740 // Emit target region as a standalone region. 3741 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 3742 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 3743 assert(Fn && Addr && "Target device function emission failed."); 3744 } 3745 3746 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { 3747 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3748 emitTargetRegion(CGF, S, Action); 3749 }; 3750 emitCommonOMPTargetDirective(*this, S, CodeGen); 3751 } 3752 3753 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF, 3754 const OMPExecutableDirective &S, 3755 OpenMPDirectiveKind InnermostKind, 3756 const RegionCodeGenTy &CodeGen) { 3757 const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams); 3758 auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction( 3759 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 3760 3761 const OMPNumTeamsClause *NT = S.getSingleClause<OMPNumTeamsClause>(); 3762 const OMPThreadLimitClause *TL = S.getSingleClause<OMPThreadLimitClause>(); 3763 if (NT || TL) { 3764 Expr *NumTeams = (NT) ? NT->getNumTeams() : nullptr; 3765 Expr *ThreadLimit = (TL) ? TL->getThreadLimit() : nullptr; 3766 3767 CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit, 3768 S.getLocStart()); 3769 } 3770 3771 OMPTeamsScope Scope(CGF, S); 3772 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 3773 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 3774 CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn, 3775 CapturedVars); 3776 } 3777 3778 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) { 3779 // Emit teams region as a standalone region. 3780 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3781 OMPPrivateScope PrivateScope(CGF); 3782 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 3783 CGF.EmitOMPPrivateClause(S, PrivateScope); 3784 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 3785 (void)PrivateScope.Privatize(); 3786 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3787 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 3788 }; 3789 emitCommonOMPTeamsDirective(*this, S, OMPD_teams, CodeGen); 3790 emitPostUpdateForReductionClause( 3791 *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 3792 } 3793 3794 static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action, 3795 const OMPTargetTeamsDirective &S) { 3796 auto *CS = S.getCapturedStmt(OMPD_teams); 3797 Action.Enter(CGF); 3798 auto &&CodeGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { 3799 // TODO: Add support for clauses. 3800 CGF.EmitStmt(CS->getCapturedStmt()); 3801 }; 3802 emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen); 3803 } 3804 3805 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 3806 CodeGenModule &CGM, StringRef ParentName, 3807 const OMPTargetTeamsDirective &S) { 3808 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3809 emitTargetTeamsRegion(CGF, Action, S); 3810 }; 3811 llvm::Function *Fn; 3812 llvm::Constant *Addr; 3813 // Emit target region as a standalone region. 3814 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 3815 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 3816 assert(Fn && Addr && "Target device function emission failed."); 3817 } 3818 3819 void CodeGenFunction::EmitOMPTargetTeamsDirective( 3820 const OMPTargetTeamsDirective &S) { 3821 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3822 emitTargetTeamsRegion(CGF, Action, S); 3823 }; 3824 emitCommonOMPTargetDirective(*this, S, CodeGen); 3825 } 3826 3827 void CodeGenFunction::EmitOMPCancellationPointDirective( 3828 const OMPCancellationPointDirective &S) { 3829 CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(), 3830 S.getCancelRegion()); 3831 } 3832 3833 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { 3834 const Expr *IfCond = nullptr; 3835 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 3836 if (C->getNameModifier() == OMPD_unknown || 3837 C->getNameModifier() == OMPD_cancel) { 3838 IfCond = C->getCondition(); 3839 break; 3840 } 3841 } 3842 CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(), IfCond, 3843 S.getCancelRegion()); 3844 } 3845 3846 CodeGenFunction::JumpDest 3847 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) { 3848 if (Kind == OMPD_parallel || Kind == OMPD_task || 3849 Kind == OMPD_target_parallel) 3850 return ReturnBlock; 3851 assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections || 3852 Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for || 3853 Kind == OMPD_distribute_parallel_for || 3854 Kind == OMPD_target_parallel_for); 3855 return OMPCancelStack.getExitBlock(); 3856 } 3857 3858 void CodeGenFunction::EmitOMPUseDevicePtrClause( 3859 const OMPClause &NC, OMPPrivateScope &PrivateScope, 3860 const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) { 3861 const auto &C = cast<OMPUseDevicePtrClause>(NC); 3862 auto OrigVarIt = C.varlist_begin(); 3863 auto InitIt = C.inits().begin(); 3864 for (auto PvtVarIt : C.private_copies()) { 3865 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl()); 3866 auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl()); 3867 auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl()); 3868 3869 // In order to identify the right initializer we need to match the 3870 // declaration used by the mapping logic. In some cases we may get 3871 // OMPCapturedExprDecl that refers to the original declaration. 3872 const ValueDecl *MatchingVD = OrigVD; 3873 if (auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) { 3874 // OMPCapturedExprDecl are used to privative fields of the current 3875 // structure. 3876 auto *ME = cast<MemberExpr>(OED->getInit()); 3877 assert(isa<CXXThisExpr>(ME->getBase()) && 3878 "Base should be the current struct!"); 3879 MatchingVD = ME->getMemberDecl(); 3880 } 3881 3882 // If we don't have information about the current list item, move on to 3883 // the next one. 3884 auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD); 3885 if (InitAddrIt == CaptureDeviceAddrMap.end()) 3886 continue; 3887 3888 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 3889 // Initialize the temporary initialization variable with the address we 3890 // get from the runtime library. We have to cast the source address 3891 // because it is always a void *. References are materialized in the 3892 // privatization scope, so the initialization here disregards the fact 3893 // the original variable is a reference. 3894 QualType AddrQTy = 3895 getContext().getPointerType(OrigVD->getType().getNonReferenceType()); 3896 llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy); 3897 Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy); 3898 setAddrOfLocalVar(InitVD, InitAddr); 3899 3900 // Emit private declaration, it will be initialized by the value we 3901 // declaration we just added to the local declarations map. 3902 EmitDecl(*PvtVD); 3903 3904 // The initialization variables reached its purpose in the emission 3905 // ofthe previous declaration, so we don't need it anymore. 3906 LocalDeclMap.erase(InitVD); 3907 3908 // Return the address of the private variable. 3909 return GetAddrOfLocalVar(PvtVD); 3910 }); 3911 assert(IsRegistered && "firstprivate var already registered as private"); 3912 // Silence the warning about unused variable. 3913 (void)IsRegistered; 3914 3915 ++OrigVarIt; 3916 ++InitIt; 3917 } 3918 } 3919 3920 // Generate the instructions for '#pragma omp target data' directive. 3921 void CodeGenFunction::EmitOMPTargetDataDirective( 3922 const OMPTargetDataDirective &S) { 3923 CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true); 3924 3925 // Create a pre/post action to signal the privatization of the device pointer. 3926 // This action can be replaced by the OpenMP runtime code generation to 3927 // deactivate privatization. 3928 bool PrivatizeDevicePointers = false; 3929 class DevicePointerPrivActionTy : public PrePostActionTy { 3930 bool &PrivatizeDevicePointers; 3931 3932 public: 3933 explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers) 3934 : PrePostActionTy(), PrivatizeDevicePointers(PrivatizeDevicePointers) {} 3935 void Enter(CodeGenFunction &CGF) override { 3936 PrivatizeDevicePointers = true; 3937 } 3938 }; 3939 DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers); 3940 3941 auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers]( 3942 CodeGenFunction &CGF, PrePostActionTy &Action) { 3943 auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3944 CGF.EmitStmt( 3945 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3946 }; 3947 3948 // Codegen that selects wheather to generate the privatization code or not. 3949 auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers, 3950 &InnermostCodeGen](CodeGenFunction &CGF, 3951 PrePostActionTy &Action) { 3952 RegionCodeGenTy RCG(InnermostCodeGen); 3953 PrivatizeDevicePointers = false; 3954 3955 // Call the pre-action to change the status of PrivatizeDevicePointers if 3956 // needed. 3957 Action.Enter(CGF); 3958 3959 if (PrivatizeDevicePointers) { 3960 OMPPrivateScope PrivateScope(CGF); 3961 // Emit all instances of the use_device_ptr clause. 3962 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>()) 3963 CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope, 3964 Info.CaptureDeviceAddrMap); 3965 (void)PrivateScope.Privatize(); 3966 RCG(CGF); 3967 } else 3968 RCG(CGF); 3969 }; 3970 3971 // Forward the provided action to the privatization codegen. 3972 RegionCodeGenTy PrivRCG(PrivCodeGen); 3973 PrivRCG.setAction(Action); 3974 3975 // Notwithstanding the body of the region is emitted as inlined directive, 3976 // we don't use an inline scope as changes in the references inside the 3977 // region are expected to be visible outside, so we do not privative them. 3978 OMPLexicalScope Scope(CGF, S); 3979 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data, 3980 PrivRCG); 3981 }; 3982 3983 RegionCodeGenTy RCG(CodeGen); 3984 3985 // If we don't have target devices, don't bother emitting the data mapping 3986 // code. 3987 if (CGM.getLangOpts().OMPTargetTriples.empty()) { 3988 RCG(*this); 3989 return; 3990 } 3991 3992 // Check if we have any if clause associated with the directive. 3993 const Expr *IfCond = nullptr; 3994 if (auto *C = S.getSingleClause<OMPIfClause>()) 3995 IfCond = C->getCondition(); 3996 3997 // Check if we have any device clause associated with the directive. 3998 const Expr *Device = nullptr; 3999 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 4000 Device = C->getDevice(); 4001 4002 // Set the action to signal privatization of device pointers. 4003 RCG.setAction(PrivAction); 4004 4005 // Emit region code. 4006 CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG, 4007 Info); 4008 } 4009 4010 void CodeGenFunction::EmitOMPTargetEnterDataDirective( 4011 const OMPTargetEnterDataDirective &S) { 4012 // If we don't have target devices, don't bother emitting the data mapping 4013 // code. 4014 if (CGM.getLangOpts().OMPTargetTriples.empty()) 4015 return; 4016 4017 // Check if we have any if clause associated with the directive. 4018 const Expr *IfCond = nullptr; 4019 if (auto *C = S.getSingleClause<OMPIfClause>()) 4020 IfCond = C->getCondition(); 4021 4022 // Check if we have any device clause associated with the directive. 4023 const Expr *Device = nullptr; 4024 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 4025 Device = C->getDevice(); 4026 4027 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 4028 } 4029 4030 void CodeGenFunction::EmitOMPTargetExitDataDirective( 4031 const OMPTargetExitDataDirective &S) { 4032 // If we don't have target devices, don't bother emitting the data mapping 4033 // code. 4034 if (CGM.getLangOpts().OMPTargetTriples.empty()) 4035 return; 4036 4037 // Check if we have any if clause associated with the directive. 4038 const Expr *IfCond = nullptr; 4039 if (auto *C = S.getSingleClause<OMPIfClause>()) 4040 IfCond = C->getCondition(); 4041 4042 // Check if we have any device clause associated with the directive. 4043 const Expr *Device = nullptr; 4044 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 4045 Device = C->getDevice(); 4046 4047 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 4048 } 4049 4050 static void emitTargetParallelRegion(CodeGenFunction &CGF, 4051 const OMPTargetParallelDirective &S, 4052 PrePostActionTy &Action) { 4053 // Get the captured statement associated with the 'parallel' region. 4054 auto *CS = S.getCapturedStmt(OMPD_parallel); 4055 Action.Enter(CGF); 4056 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &) { 4057 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 4058 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 4059 CGF.EmitOMPPrivateClause(S, PrivateScope); 4060 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4061 (void)PrivateScope.Privatize(); 4062 // TODO: Add support for clauses. 4063 CGF.EmitStmt(CS->getCapturedStmt()); 4064 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 4065 }; 4066 emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen, 4067 emitEmptyBoundParameters); 4068 emitPostUpdateForReductionClause( 4069 CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 4070 } 4071 4072 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 4073 CodeGenModule &CGM, StringRef ParentName, 4074 const OMPTargetParallelDirective &S) { 4075 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4076 emitTargetParallelRegion(CGF, S, Action); 4077 }; 4078 llvm::Function *Fn; 4079 llvm::Constant *Addr; 4080 // Emit target region as a standalone region. 4081 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 4082 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 4083 assert(Fn && Addr && "Target device function emission failed."); 4084 } 4085 4086 void CodeGenFunction::EmitOMPTargetParallelDirective( 4087 const OMPTargetParallelDirective &S) { 4088 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4089 emitTargetParallelRegion(CGF, S, Action); 4090 }; 4091 emitCommonOMPTargetDirective(*this, S, CodeGen); 4092 } 4093 4094 void CodeGenFunction::EmitOMPTargetParallelForDirective( 4095 const OMPTargetParallelForDirective &S) { 4096 // TODO: codegen for target parallel for. 4097 } 4098 4099 /// Emit a helper variable and return corresponding lvalue. 4100 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper, 4101 const ImplicitParamDecl *PVD, 4102 CodeGenFunction::OMPPrivateScope &Privates) { 4103 auto *VDecl = cast<VarDecl>(Helper->getDecl()); 4104 Privates.addPrivate( 4105 VDecl, [&CGF, PVD]() -> Address { return CGF.GetAddrOfLocalVar(PVD); }); 4106 } 4107 4108 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { 4109 assert(isOpenMPTaskLoopDirective(S.getDirectiveKind())); 4110 // Emit outlined function for task construct. 4111 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 4112 auto CapturedStruct = GenerateCapturedStmtArgument(*CS); 4113 auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 4114 const Expr *IfCond = nullptr; 4115 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 4116 if (C->getNameModifier() == OMPD_unknown || 4117 C->getNameModifier() == OMPD_taskloop) { 4118 IfCond = C->getCondition(); 4119 break; 4120 } 4121 } 4122 4123 OMPTaskDataTy Data; 4124 // Check if taskloop must be emitted without taskgroup. 4125 Data.Nogroup = S.getSingleClause<OMPNogroupClause>(); 4126 // TODO: Check if we should emit tied or untied task. 4127 Data.Tied = true; 4128 // Set scheduling for taskloop 4129 if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) { 4130 // grainsize clause 4131 Data.Schedule.setInt(/*IntVal=*/false); 4132 Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize())); 4133 } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) { 4134 // num_tasks clause 4135 Data.Schedule.setInt(/*IntVal=*/true); 4136 Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks())); 4137 } 4138 4139 auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) { 4140 // if (PreCond) { 4141 // for (IV in 0..LastIteration) BODY; 4142 // <Final counter/linear vars updates>; 4143 // } 4144 // 4145 4146 // Emit: if (PreCond) - begin. 4147 // If the condition constant folds and can be elided, avoid emitting the 4148 // whole loop. 4149 bool CondConstant; 4150 llvm::BasicBlock *ContBlock = nullptr; 4151 OMPLoopScope PreInitScope(CGF, S); 4152 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 4153 if (!CondConstant) 4154 return; 4155 } else { 4156 auto *ThenBlock = CGF.createBasicBlock("taskloop.if.then"); 4157 ContBlock = CGF.createBasicBlock("taskloop.if.end"); 4158 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 4159 CGF.getProfileCount(&S)); 4160 CGF.EmitBlock(ThenBlock); 4161 CGF.incrementProfileCounter(&S); 4162 } 4163 4164 if (isOpenMPSimdDirective(S.getDirectiveKind())) 4165 CGF.EmitOMPSimdInit(S); 4166 4167 OMPPrivateScope LoopScope(CGF); 4168 // Emit helper vars inits. 4169 enum { LowerBound = 5, UpperBound, Stride, LastIter }; 4170 auto *I = CS->getCapturedDecl()->param_begin(); 4171 auto *LBP = std::next(I, LowerBound); 4172 auto *UBP = std::next(I, UpperBound); 4173 auto *STP = std::next(I, Stride); 4174 auto *LIP = std::next(I, LastIter); 4175 mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP, 4176 LoopScope); 4177 mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP, 4178 LoopScope); 4179 mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope); 4180 mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP, 4181 LoopScope); 4182 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 4183 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 4184 (void)LoopScope.Privatize(); 4185 // Emit the loop iteration variable. 4186 const Expr *IVExpr = S.getIterationVariable(); 4187 const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 4188 CGF.EmitVarDecl(*IVDecl); 4189 CGF.EmitIgnoredExpr(S.getInit()); 4190 4191 // Emit the iterations count variable. 4192 // If it is not a variable, Sema decided to calculate iterations count on 4193 // each iteration (e.g., it is foldable into a constant). 4194 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 4195 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 4196 // Emit calculation of the iterations count. 4197 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 4198 } 4199 4200 CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 4201 S.getInc(), 4202 [&S](CodeGenFunction &CGF) { 4203 CGF.EmitOMPLoopBody(S, JumpDest()); 4204 CGF.EmitStopPoint(&S); 4205 }, 4206 [](CodeGenFunction &) {}); 4207 // Emit: if (PreCond) - end. 4208 if (ContBlock) { 4209 CGF.EmitBranch(ContBlock); 4210 CGF.EmitBlock(ContBlock, true); 4211 } 4212 // Emit final copy of the lastprivate variables if IsLastIter != 0. 4213 if (HasLastprivateClause) { 4214 CGF.EmitOMPLastprivateClauseFinal( 4215 S, isOpenMPSimdDirective(S.getDirectiveKind()), 4216 CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar( 4217 CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false, 4218 (*LIP)->getType(), S.getLocStart()))); 4219 } 4220 }; 4221 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 4222 IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn, 4223 const OMPTaskDataTy &Data) { 4224 auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &) { 4225 OMPLoopScope PreInitScope(CGF, S); 4226 CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getLocStart(), S, 4227 OutlinedFn, SharedsTy, 4228 CapturedStruct, IfCond, Data); 4229 }; 4230 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop, 4231 CodeGen); 4232 }; 4233 if (Data.Nogroup) 4234 EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data); 4235 else { 4236 CGM.getOpenMPRuntime().emitTaskgroupRegion( 4237 *this, 4238 [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF, 4239 PrePostActionTy &Action) { 4240 Action.Enter(CGF); 4241 CGF.EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data); 4242 }, 4243 S.getLocStart()); 4244 } 4245 } 4246 4247 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) { 4248 EmitOMPTaskLoopBasedDirective(S); 4249 } 4250 4251 void CodeGenFunction::EmitOMPTaskLoopSimdDirective( 4252 const OMPTaskLoopSimdDirective &S) { 4253 EmitOMPTaskLoopBasedDirective(S); 4254 } 4255 4256 // Generate the instructions for '#pragma omp target update' directive. 4257 void CodeGenFunction::EmitOMPTargetUpdateDirective( 4258 const OMPTargetUpdateDirective &S) { 4259 // If we don't have target devices, don't bother emitting the data mapping 4260 // code. 4261 if (CGM.getLangOpts().OMPTargetTriples.empty()) 4262 return; 4263 4264 // Check if we have any if clause associated with the directive. 4265 const Expr *IfCond = nullptr; 4266 if (auto *C = S.getSingleClause<OMPIfClause>()) 4267 IfCond = C->getCondition(); 4268 4269 // Check if we have any device clause associated with the directive. 4270 const Expr *Device = nullptr; 4271 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 4272 Device = C->getDevice(); 4273 4274 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 4275 } 4276