1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This contains code to emit OpenMP nodes as LLVM code. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGCleanup.h" 15 #include "CGOpenMPRuntime.h" 16 #include "CodeGenFunction.h" 17 #include "CodeGenModule.h" 18 #include "TargetInfo.h" 19 #include "clang/AST/Stmt.h" 20 #include "clang/AST/StmtOpenMP.h" 21 #include "clang/AST/DeclOpenMP.h" 22 #include "llvm/IR/CallSite.h" 23 using namespace clang; 24 using namespace CodeGen; 25 26 namespace { 27 /// Lexical scope for OpenMP executable constructs, that handles correct codegen 28 /// for captured expressions. 29 class OMPLexicalScope : public CodeGenFunction::LexicalScope { 30 void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 31 for (const auto *C : S.clauses()) { 32 if (auto *CPI = OMPClauseWithPreInit::get(C)) { 33 if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { 34 for (const auto *I : PreInit->decls()) { 35 if (!I->hasAttr<OMPCaptureNoInitAttr>()) 36 CGF.EmitVarDecl(cast<VarDecl>(*I)); 37 else { 38 CodeGenFunction::AutoVarEmission Emission = 39 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 40 CGF.EmitAutoVarCleanups(Emission); 41 } 42 } 43 } 44 } 45 } 46 } 47 CodeGenFunction::OMPPrivateScope InlinedShareds; 48 49 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { 50 return CGF.LambdaCaptureFields.lookup(VD) || 51 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || 52 (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl)); 53 } 54 55 public: 56 OMPLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S, 57 bool AsInlined = false, bool EmitPreInitStmt = true) 58 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), 59 InlinedShareds(CGF) { 60 if (EmitPreInitStmt) 61 emitPreInitStmt(CGF, S); 62 if (AsInlined) { 63 if (S.hasAssociatedStmt()) { 64 auto *CS = cast<CapturedStmt>(S.getAssociatedStmt()); 65 for (auto &C : CS->captures()) { 66 if (C.capturesVariable() || C.capturesVariableByCopy()) { 67 auto *VD = C.getCapturedVar(); 68 assert(VD == VD->getCanonicalDecl() && 69 "Canonical decl must be captured."); 70 DeclRefExpr DRE(const_cast<VarDecl *>(VD), 71 isCapturedVar(CGF, VD) || 72 (CGF.CapturedStmtInfo && 73 InlinedShareds.isGlobalVarCaptured(VD)), 74 VD->getType().getNonReferenceType(), VK_LValue, 75 SourceLocation()); 76 InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address { 77 return CGF.EmitLValue(&DRE).getAddress(); 78 }); 79 } 80 } 81 (void)InlinedShareds.Privatize(); 82 } 83 } 84 } 85 }; 86 87 /// Lexical scope for OpenMP parallel construct, that handles correct codegen 88 /// for captured expressions. 89 class OMPParallelScope final : public OMPLexicalScope { 90 bool EmitPreInitStmt(const OMPExecutableDirective &S) { 91 OpenMPDirectiveKind Kind = S.getDirectiveKind(); 92 return !(isOpenMPTargetExecutionDirective(Kind) || 93 isOpenMPLoopBoundSharingDirective(Kind)) && 94 isOpenMPParallelDirective(Kind); 95 } 96 97 public: 98 OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 99 : OMPLexicalScope(CGF, S, 100 /*AsInlined=*/false, 101 /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {} 102 }; 103 104 /// Lexical scope for OpenMP teams construct, that handles correct codegen 105 /// for captured expressions. 106 class OMPTeamsScope final : public OMPLexicalScope { 107 bool EmitPreInitStmt(const OMPExecutableDirective &S) { 108 OpenMPDirectiveKind Kind = S.getDirectiveKind(); 109 return !isOpenMPTargetExecutionDirective(Kind) && 110 isOpenMPTeamsDirective(Kind); 111 } 112 113 public: 114 OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 115 : OMPLexicalScope(CGF, S, 116 /*AsInlined=*/false, 117 /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {} 118 }; 119 120 /// Private scope for OpenMP loop-based directives, that supports capturing 121 /// of used expression from loop statement. 122 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { 123 void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) { 124 if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) { 125 if (auto *PreInits = cast_or_null<DeclStmt>(LD->getPreInits())) { 126 for (const auto *I : PreInits->decls()) 127 CGF.EmitVarDecl(cast<VarDecl>(*I)); 128 } 129 } 130 } 131 132 public: 133 OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S) 134 : CodeGenFunction::RunCleanupsScope(CGF) { 135 emitPreInitStmt(CGF, S); 136 } 137 }; 138 139 } // namespace 140 141 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) { 142 auto &C = getContext(); 143 llvm::Value *Size = nullptr; 144 auto SizeInChars = C.getTypeSizeInChars(Ty); 145 if (SizeInChars.isZero()) { 146 // getTypeSizeInChars() returns 0 for a VLA. 147 while (auto *VAT = C.getAsVariableArrayType(Ty)) { 148 llvm::Value *ArraySize; 149 std::tie(ArraySize, Ty) = getVLASize(VAT); 150 Size = Size ? Builder.CreateNUWMul(Size, ArraySize) : ArraySize; 151 } 152 SizeInChars = C.getTypeSizeInChars(Ty); 153 if (SizeInChars.isZero()) 154 return llvm::ConstantInt::get(SizeTy, /*V=*/0); 155 Size = Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars)); 156 } else 157 Size = CGM.getSize(SizeInChars); 158 return Size; 159 } 160 161 void CodeGenFunction::GenerateOpenMPCapturedVars( 162 const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) { 163 const RecordDecl *RD = S.getCapturedRecordDecl(); 164 auto CurField = RD->field_begin(); 165 auto CurCap = S.captures().begin(); 166 for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(), 167 E = S.capture_init_end(); 168 I != E; ++I, ++CurField, ++CurCap) { 169 if (CurField->hasCapturedVLAType()) { 170 auto VAT = CurField->getCapturedVLAType(); 171 auto *Val = VLASizeMap[VAT->getSizeExpr()]; 172 CapturedVars.push_back(Val); 173 } else if (CurCap->capturesThis()) 174 CapturedVars.push_back(CXXThisValue); 175 else if (CurCap->capturesVariableByCopy()) { 176 llvm::Value *CV = 177 EmitLoadOfLValue(EmitLValue(*I), SourceLocation()).getScalarVal(); 178 179 // If the field is not a pointer, we need to save the actual value 180 // and load it as a void pointer. 181 if (!CurField->getType()->isAnyPointerType()) { 182 auto &Ctx = getContext(); 183 auto DstAddr = CreateMemTemp( 184 Ctx.getUIntPtrType(), 185 Twine(CurCap->getCapturedVar()->getName()) + ".casted"); 186 LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType()); 187 188 auto *SrcAddrVal = EmitScalarConversion( 189 DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()), 190 Ctx.getPointerType(CurField->getType()), SourceLocation()); 191 LValue SrcLV = 192 MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType()); 193 194 // Store the value using the source type pointer. 195 EmitStoreThroughLValue(RValue::get(CV), SrcLV); 196 197 // Load the value using the destination type pointer. 198 CV = EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal(); 199 } 200 CapturedVars.push_back(CV); 201 } else { 202 assert(CurCap->capturesVariable() && "Expected capture by reference."); 203 CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer()); 204 } 205 } 206 } 207 208 static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType, 209 StringRef Name, LValue AddrLV, 210 bool isReferenceType = false) { 211 ASTContext &Ctx = CGF.getContext(); 212 213 auto *CastedPtr = CGF.EmitScalarConversion( 214 AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(), 215 Ctx.getPointerType(DstType), SourceLocation()); 216 auto TmpAddr = 217 CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType)) 218 .getAddress(); 219 220 // If we are dealing with references we need to return the address of the 221 // reference instead of the reference of the value. 222 if (isReferenceType) { 223 QualType RefType = Ctx.getLValueReferenceType(DstType); 224 auto *RefVal = TmpAddr.getPointer(); 225 TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name) + ".ref"); 226 auto TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType); 227 CGF.EmitStoreThroughLValue(RValue::get(RefVal), TmpLVal, /*isInit*/ true); 228 } 229 230 return TmpAddr; 231 } 232 233 static QualType getCanonicalParamType(ASTContext &C, QualType T) { 234 if (T->isLValueReferenceType()) { 235 return C.getLValueReferenceType( 236 getCanonicalParamType(C, T.getNonReferenceType()), 237 /*SpelledAsLValue=*/false); 238 } 239 if (T->isPointerType()) 240 return C.getPointerType(getCanonicalParamType(C, T->getPointeeType())); 241 return C.getCanonicalParamType(T); 242 } 243 244 namespace { 245 /// Contains required data for proper outlined function codegen. 246 struct FunctionOptions { 247 /// Captured statement for which the function is generated. 248 const CapturedStmt *S = nullptr; 249 /// true if cast to/from UIntPtr is required for variables captured by 250 /// value. 251 const bool UIntPtrCastRequired = true; 252 /// true if only casted arguments must be registered as local args or VLA 253 /// sizes. 254 const bool RegisterCastedArgsOnly = false; 255 /// Name of the generated function. 256 const StringRef FunctionName; 257 explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired, 258 bool RegisterCastedArgsOnly, 259 StringRef FunctionName) 260 : S(S), UIntPtrCastRequired(UIntPtrCastRequired), 261 RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly), 262 FunctionName(FunctionName) {} 263 }; 264 } 265 266 static llvm::Function *emitOutlinedFunctionPrologue( 267 CodeGenFunction &CGF, FunctionArgList &Args, 268 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> 269 &LocalAddrs, 270 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> 271 &VLASizes, 272 llvm::Value *&CXXThisValue, const FunctionOptions &FO) { 273 const CapturedDecl *CD = FO.S->getCapturedDecl(); 274 const RecordDecl *RD = FO.S->getCapturedRecordDecl(); 275 assert(CD->hasBody() && "missing CapturedDecl body"); 276 277 CXXThisValue = nullptr; 278 // Build the argument list. 279 CodeGenModule &CGM = CGF.CGM; 280 ASTContext &Ctx = CGM.getContext(); 281 FunctionArgList TargetArgs; 282 Args.append(CD->param_begin(), 283 std::next(CD->param_begin(), CD->getContextParamPosition())); 284 TargetArgs.append( 285 CD->param_begin(), 286 std::next(CD->param_begin(), CD->getContextParamPosition())); 287 auto I = FO.S->captures().begin(); 288 for (auto *FD : RD->fields()) { 289 QualType ArgType = FD->getType(); 290 IdentifierInfo *II = nullptr; 291 VarDecl *CapVar = nullptr; 292 293 // If this is a capture by copy and the type is not a pointer, the outlined 294 // function argument type should be uintptr and the value properly casted to 295 // uintptr. This is necessary given that the runtime library is only able to 296 // deal with pointers. We can pass in the same way the VLA type sizes to the 297 // outlined function. 298 if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) || 299 I->capturesVariableArrayType()) { 300 if (FO.UIntPtrCastRequired) 301 ArgType = Ctx.getUIntPtrType(); 302 } 303 304 if (I->capturesVariable() || I->capturesVariableByCopy()) { 305 CapVar = I->getCapturedVar(); 306 II = CapVar->getIdentifier(); 307 } else if (I->capturesThis()) 308 II = &Ctx.Idents.get("this"); 309 else { 310 assert(I->capturesVariableArrayType()); 311 II = &Ctx.Idents.get("vla"); 312 } 313 if (ArgType->isVariablyModifiedType()) 314 ArgType = getCanonicalParamType(Ctx, ArgType.getNonReferenceType()); 315 auto *Arg = 316 ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(), II, 317 ArgType, ImplicitParamDecl::Other); 318 Args.emplace_back(Arg); 319 // Do not cast arguments if we emit function with non-original types. 320 TargetArgs.emplace_back( 321 FO.UIntPtrCastRequired 322 ? Arg 323 : CGM.getOpenMPRuntime().translateParameter(FD, Arg)); 324 ++I; 325 } 326 Args.append( 327 std::next(CD->param_begin(), CD->getContextParamPosition() + 1), 328 CD->param_end()); 329 TargetArgs.append( 330 std::next(CD->param_begin(), CD->getContextParamPosition() + 1), 331 CD->param_end()); 332 333 // Create the function declaration. 334 FunctionType::ExtInfo ExtInfo; 335 const CGFunctionInfo &FuncInfo = 336 CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs); 337 llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); 338 339 llvm::Function *F = 340 llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage, 341 FO.FunctionName, &CGM.getModule()); 342 CGM.SetInternalFunctionAttributes(CD, F, FuncInfo); 343 if (CD->isNothrow()) 344 F->setDoesNotThrow(); 345 346 // Generate the function. 347 CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs, 348 FO.S->getLocStart(), CD->getBody()->getLocStart()); 349 unsigned Cnt = CD->getContextParamPosition(); 350 I = FO.S->captures().begin(); 351 for (auto *FD : RD->fields()) { 352 // Do not map arguments if we emit function with non-original types. 353 Address LocalAddr(Address::invalid()); 354 if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) { 355 LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt], 356 TargetArgs[Cnt]); 357 } else { 358 LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]); 359 } 360 // If we are capturing a pointer by copy we don't need to do anything, just 361 // use the value that we get from the arguments. 362 if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) { 363 const VarDecl *CurVD = I->getCapturedVar(); 364 // If the variable is a reference we need to materialize it here. 365 if (CurVD->getType()->isReferenceType()) { 366 Address RefAddr = CGF.CreateMemTemp( 367 CurVD->getType(), CGM.getPointerAlign(), ".materialized_ref"); 368 CGF.EmitStoreOfScalar(LocalAddr.getPointer(), RefAddr, 369 /*Volatile=*/false, CurVD->getType()); 370 LocalAddr = RefAddr; 371 } 372 if (!FO.RegisterCastedArgsOnly) 373 LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}}); 374 ++Cnt; 375 ++I; 376 continue; 377 } 378 379 LValueBaseInfo BaseInfo(AlignmentSource::Decl, false); 380 LValue ArgLVal = 381 CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(), BaseInfo); 382 if (FD->hasCapturedVLAType()) { 383 if (FO.UIntPtrCastRequired) { 384 ArgLVal = CGF.MakeAddrLValue(castValueFromUintptr(CGF, FD->getType(), 385 Args[Cnt]->getName(), 386 ArgLVal), 387 FD->getType(), BaseInfo); 388 } 389 auto *ExprArg = 390 CGF.EmitLoadOfLValue(ArgLVal, SourceLocation()).getScalarVal(); 391 auto VAT = FD->getCapturedVLAType(); 392 VLASizes.insert({Args[Cnt], {VAT->getSizeExpr(), ExprArg}}); 393 } else if (I->capturesVariable()) { 394 auto *Var = I->getCapturedVar(); 395 QualType VarTy = Var->getType(); 396 Address ArgAddr = ArgLVal.getAddress(); 397 if (!VarTy->isReferenceType()) { 398 if (ArgLVal.getType()->isLValueReferenceType()) { 399 ArgAddr = CGF.EmitLoadOfReference( 400 ArgAddr, ArgLVal.getType()->castAs<ReferenceType>()); 401 } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) { 402 assert(ArgLVal.getType()->isPointerType()); 403 ArgAddr = CGF.EmitLoadOfPointer( 404 ArgAddr, ArgLVal.getType()->castAs<PointerType>()); 405 } 406 } 407 if (!FO.RegisterCastedArgsOnly) { 408 LocalAddrs.insert( 409 {Args[Cnt], 410 {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}}); 411 } 412 } else if (I->capturesVariableByCopy()) { 413 assert(!FD->getType()->isAnyPointerType() && 414 "Not expecting a captured pointer."); 415 auto *Var = I->getCapturedVar(); 416 QualType VarTy = Var->getType(); 417 LocalAddrs.insert( 418 {Args[Cnt], 419 {Var, 420 FO.UIntPtrCastRequired 421 ? castValueFromUintptr(CGF, FD->getType(), Args[Cnt]->getName(), 422 ArgLVal, VarTy->isReferenceType()) 423 : ArgLVal.getAddress()}}); 424 } else { 425 // If 'this' is captured, load it into CXXThisValue. 426 assert(I->capturesThis()); 427 CXXThisValue = CGF.EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation()) 428 .getScalarVal(); 429 LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress()}}); 430 } 431 ++Cnt; 432 ++I; 433 } 434 435 return F; 436 } 437 438 llvm::Function * 439 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { 440 assert( 441 CapturedStmtInfo && 442 "CapturedStmtInfo should be set when generating the captured function"); 443 const CapturedDecl *CD = S.getCapturedDecl(); 444 // Build the argument list. 445 bool NeedWrapperFunction = 446 getDebugInfo() && 447 CGM.getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo; 448 FunctionArgList Args; 449 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs; 450 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes; 451 SmallString<256> Buffer; 452 llvm::raw_svector_ostream Out(Buffer); 453 Out << CapturedStmtInfo->getHelperName(); 454 if (NeedWrapperFunction) 455 Out << "_debug__"; 456 FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false, 457 Out.str()); 458 llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs, 459 VLASizes, CXXThisValue, FO); 460 for (const auto &LocalAddrPair : LocalAddrs) { 461 if (LocalAddrPair.second.first) { 462 setAddrOfLocalVar(LocalAddrPair.second.first, 463 LocalAddrPair.second.second); 464 } 465 } 466 for (const auto &VLASizePair : VLASizes) 467 VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second; 468 PGO.assignRegionCounters(GlobalDecl(CD), F); 469 CapturedStmtInfo->EmitBody(*this, CD->getBody()); 470 FinishFunction(CD->getBodyRBrace()); 471 if (!NeedWrapperFunction) 472 return F; 473 474 FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true, 475 /*RegisterCastedArgsOnly=*/true, 476 CapturedStmtInfo->getHelperName()); 477 CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true); 478 Args.clear(); 479 LocalAddrs.clear(); 480 VLASizes.clear(); 481 llvm::Function *WrapperF = 482 emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes, 483 WrapperCGF.CXXThisValue, WrapperFO); 484 LValueBaseInfo BaseInfo(AlignmentSource::Decl, false); 485 llvm::SmallVector<llvm::Value *, 4> CallArgs; 486 for (const auto *Arg : Args) { 487 llvm::Value *CallArg; 488 auto I = LocalAddrs.find(Arg); 489 if (I != LocalAddrs.end()) { 490 LValue LV = 491 WrapperCGF.MakeAddrLValue(I->second.second, Arg->getType(), BaseInfo); 492 CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation()); 493 } else { 494 auto EI = VLASizes.find(Arg); 495 if (EI != VLASizes.end()) 496 CallArg = EI->second.second; 497 else { 498 LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg), 499 Arg->getType(), BaseInfo); 500 CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation()); 501 } 502 } 503 CallArgs.emplace_back(CallArg); 504 } 505 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, S.getLocStart(), 506 F, CallArgs); 507 WrapperCGF.FinishFunction(); 508 return WrapperF; 509 } 510 511 //===----------------------------------------------------------------------===// 512 // OpenMP Directive Emission 513 //===----------------------------------------------------------------------===// 514 void CodeGenFunction::EmitOMPAggregateAssign( 515 Address DestAddr, Address SrcAddr, QualType OriginalType, 516 const llvm::function_ref<void(Address, Address)> &CopyGen) { 517 // Perform element-by-element initialization. 518 QualType ElementTy; 519 520 // Drill down to the base element type on both arrays. 521 auto ArrayTy = OriginalType->getAsArrayTypeUnsafe(); 522 auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr); 523 SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 524 525 auto SrcBegin = SrcAddr.getPointer(); 526 auto DestBegin = DestAddr.getPointer(); 527 // Cast from pointer to array type to pointer to single element. 528 auto DestEnd = Builder.CreateGEP(DestBegin, NumElements); 529 // The basic structure here is a while-do loop. 530 auto BodyBB = createBasicBlock("omp.arraycpy.body"); 531 auto DoneBB = createBasicBlock("omp.arraycpy.done"); 532 auto IsEmpty = 533 Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty"); 534 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 535 536 // Enter the loop body, making that address the current address. 537 auto EntryBB = Builder.GetInsertBlock(); 538 EmitBlock(BodyBB); 539 540 CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy); 541 542 llvm::PHINode *SrcElementPHI = 543 Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 544 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 545 Address SrcElementCurrent = 546 Address(SrcElementPHI, 547 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 548 549 llvm::PHINode *DestElementPHI = 550 Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 551 DestElementPHI->addIncoming(DestBegin, EntryBB); 552 Address DestElementCurrent = 553 Address(DestElementPHI, 554 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 555 556 // Emit copy. 557 CopyGen(DestElementCurrent, SrcElementCurrent); 558 559 // Shift the address forward by one element. 560 auto DestElementNext = Builder.CreateConstGEP1_32( 561 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 562 auto SrcElementNext = Builder.CreateConstGEP1_32( 563 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 564 // Check whether we've reached the end. 565 auto Done = 566 Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 567 Builder.CreateCondBr(Done, DoneBB, BodyBB); 568 DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock()); 569 SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock()); 570 571 // Done. 572 EmitBlock(DoneBB, /*IsFinished=*/true); 573 } 574 575 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr, 576 Address SrcAddr, const VarDecl *DestVD, 577 const VarDecl *SrcVD, const Expr *Copy) { 578 if (OriginalType->isArrayType()) { 579 auto *BO = dyn_cast<BinaryOperator>(Copy); 580 if (BO && BO->getOpcode() == BO_Assign) { 581 // Perform simple memcpy for simple copying. 582 EmitAggregateAssign(DestAddr, SrcAddr, OriginalType); 583 } else { 584 // For arrays with complex element types perform element by element 585 // copying. 586 EmitOMPAggregateAssign( 587 DestAddr, SrcAddr, OriginalType, 588 [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) { 589 // Working with the single array element, so have to remap 590 // destination and source variables to corresponding array 591 // elements. 592 CodeGenFunction::OMPPrivateScope Remap(*this); 593 Remap.addPrivate(DestVD, [DestElement]() -> Address { 594 return DestElement; 595 }); 596 Remap.addPrivate( 597 SrcVD, [SrcElement]() -> Address { return SrcElement; }); 598 (void)Remap.Privatize(); 599 EmitIgnoredExpr(Copy); 600 }); 601 } 602 } else { 603 // Remap pseudo source variable to private copy. 604 CodeGenFunction::OMPPrivateScope Remap(*this); 605 Remap.addPrivate(SrcVD, [SrcAddr]() -> Address { return SrcAddr; }); 606 Remap.addPrivate(DestVD, [DestAddr]() -> Address { return DestAddr; }); 607 (void)Remap.Privatize(); 608 // Emit copying of the whole variable. 609 EmitIgnoredExpr(Copy); 610 } 611 } 612 613 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, 614 OMPPrivateScope &PrivateScope) { 615 if (!HaveInsertPoint()) 616 return false; 617 bool FirstprivateIsLastprivate = false; 618 llvm::DenseSet<const VarDecl *> Lastprivates; 619 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 620 for (const auto *D : C->varlists()) 621 Lastprivates.insert( 622 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); 623 } 624 llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate; 625 CGCapturedStmtInfo CapturesInfo(cast<CapturedStmt>(*D.getAssociatedStmt())); 626 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { 627 auto IRef = C->varlist_begin(); 628 auto InitsRef = C->inits().begin(); 629 for (auto IInit : C->private_copies()) { 630 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 631 bool ThisFirstprivateIsLastprivate = 632 Lastprivates.count(OrigVD->getCanonicalDecl()) > 0; 633 auto *CapFD = CapturesInfo.lookup(OrigVD); 634 auto *FD = CapturedStmtInfo->lookup(OrigVD); 635 if (!ThisFirstprivateIsLastprivate && FD && (FD == CapFD) && 636 !FD->getType()->isReferenceType()) { 637 EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()); 638 ++IRef; 639 ++InitsRef; 640 continue; 641 } 642 FirstprivateIsLastprivate = 643 FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate; 644 if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) { 645 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 646 auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl()); 647 bool IsRegistered; 648 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 649 /*RefersToEnclosingVariableOrCapture=*/FD != nullptr, 650 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 651 Address OriginalAddr = EmitLValue(&DRE).getAddress(); 652 QualType Type = VD->getType(); 653 if (Type->isArrayType()) { 654 // Emit VarDecl with copy init for arrays. 655 // Get the address of the original variable captured in current 656 // captured region. 657 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 658 auto Emission = EmitAutoVarAlloca(*VD); 659 auto *Init = VD->getInit(); 660 if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) { 661 // Perform simple memcpy. 662 EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr, 663 Type); 664 } else { 665 EmitOMPAggregateAssign( 666 Emission.getAllocatedAddress(), OriginalAddr, Type, 667 [this, VDInit, Init](Address DestElement, 668 Address SrcElement) { 669 // Clean up any temporaries needed by the initialization. 670 RunCleanupsScope InitScope(*this); 671 // Emit initialization for single element. 672 setAddrOfLocalVar(VDInit, SrcElement); 673 EmitAnyExprToMem(Init, DestElement, 674 Init->getType().getQualifiers(), 675 /*IsInitializer*/ false); 676 LocalDeclMap.erase(VDInit); 677 }); 678 } 679 EmitAutoVarCleanups(Emission); 680 return Emission.getAllocatedAddress(); 681 }); 682 } else { 683 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 684 // Emit private VarDecl with copy init. 685 // Remap temp VDInit variable to the address of the original 686 // variable 687 // (for proper handling of captured global variables). 688 setAddrOfLocalVar(VDInit, OriginalAddr); 689 EmitDecl(*VD); 690 LocalDeclMap.erase(VDInit); 691 return GetAddrOfLocalVar(VD); 692 }); 693 } 694 assert(IsRegistered && 695 "firstprivate var already registered as private"); 696 // Silence the warning about unused variable. 697 (void)IsRegistered; 698 } 699 ++IRef; 700 ++InitsRef; 701 } 702 } 703 return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty(); 704 } 705 706 void CodeGenFunction::EmitOMPPrivateClause( 707 const OMPExecutableDirective &D, 708 CodeGenFunction::OMPPrivateScope &PrivateScope) { 709 if (!HaveInsertPoint()) 710 return; 711 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 712 for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) { 713 auto IRef = C->varlist_begin(); 714 for (auto IInit : C->private_copies()) { 715 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 716 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 717 auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 718 bool IsRegistered = 719 PrivateScope.addPrivate(OrigVD, [&]() -> Address { 720 // Emit private VarDecl with copy init. 721 EmitDecl(*VD); 722 return GetAddrOfLocalVar(VD); 723 }); 724 assert(IsRegistered && "private var already registered as private"); 725 // Silence the warning about unused variable. 726 (void)IsRegistered; 727 } 728 ++IRef; 729 } 730 } 731 } 732 733 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { 734 if (!HaveInsertPoint()) 735 return false; 736 // threadprivate_var1 = master_threadprivate_var1; 737 // operator=(threadprivate_var2, master_threadprivate_var2); 738 // ... 739 // __kmpc_barrier(&loc, global_tid); 740 llvm::DenseSet<const VarDecl *> CopiedVars; 741 llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr; 742 for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) { 743 auto IRef = C->varlist_begin(); 744 auto ISrcRef = C->source_exprs().begin(); 745 auto IDestRef = C->destination_exprs().begin(); 746 for (auto *AssignOp : C->assignment_ops()) { 747 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 748 QualType Type = VD->getType(); 749 if (CopiedVars.insert(VD->getCanonicalDecl()).second) { 750 // Get the address of the master variable. If we are emitting code with 751 // TLS support, the address is passed from the master as field in the 752 // captured declaration. 753 Address MasterAddr = Address::invalid(); 754 if (getLangOpts().OpenMPUseTLS && 755 getContext().getTargetInfo().isTLSSupported()) { 756 assert(CapturedStmtInfo->lookup(VD) && 757 "Copyin threadprivates should have been captured!"); 758 DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(), 759 VK_LValue, (*IRef)->getExprLoc()); 760 MasterAddr = EmitLValue(&DRE).getAddress(); 761 LocalDeclMap.erase(VD); 762 } else { 763 MasterAddr = 764 Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD) 765 : CGM.GetAddrOfGlobal(VD), 766 getContext().getDeclAlign(VD)); 767 } 768 // Get the address of the threadprivate variable. 769 Address PrivateAddr = EmitLValue(*IRef).getAddress(); 770 if (CopiedVars.size() == 1) { 771 // At first check if current thread is a master thread. If it is, no 772 // need to copy data. 773 CopyBegin = createBasicBlock("copyin.not.master"); 774 CopyEnd = createBasicBlock("copyin.not.master.end"); 775 Builder.CreateCondBr( 776 Builder.CreateICmpNE( 777 Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy), 778 Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy)), 779 CopyBegin, CopyEnd); 780 EmitBlock(CopyBegin); 781 } 782 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 783 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 784 EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp); 785 } 786 ++IRef; 787 ++ISrcRef; 788 ++IDestRef; 789 } 790 } 791 if (CopyEnd) { 792 // Exit out of copying procedure for non-master thread. 793 EmitBlock(CopyEnd, /*IsFinished=*/true); 794 return true; 795 } 796 return false; 797 } 798 799 bool CodeGenFunction::EmitOMPLastprivateClauseInit( 800 const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) { 801 if (!HaveInsertPoint()) 802 return false; 803 bool HasAtLeastOneLastprivate = false; 804 llvm::DenseSet<const VarDecl *> SIMDLCVs; 805 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 806 auto *LoopDirective = cast<OMPLoopDirective>(&D); 807 for (auto *C : LoopDirective->counters()) { 808 SIMDLCVs.insert( 809 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 810 } 811 } 812 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 813 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 814 HasAtLeastOneLastprivate = true; 815 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) 816 break; 817 auto IRef = C->varlist_begin(); 818 auto IDestRef = C->destination_exprs().begin(); 819 for (auto *IInit : C->private_copies()) { 820 // Keep the address of the original variable for future update at the end 821 // of the loop. 822 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 823 // Taskloops do not require additional initialization, it is done in 824 // runtime support library. 825 if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) { 826 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 827 PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address { 828 DeclRefExpr DRE( 829 const_cast<VarDecl *>(OrigVD), 830 /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup( 831 OrigVD) != nullptr, 832 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 833 return EmitLValue(&DRE).getAddress(); 834 }); 835 // Check if the variable is also a firstprivate: in this case IInit is 836 // not generated. Initialization of this variable will happen in codegen 837 // for 'firstprivate' clause. 838 if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) { 839 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 840 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 841 // Emit private VarDecl with copy init. 842 EmitDecl(*VD); 843 return GetAddrOfLocalVar(VD); 844 }); 845 assert(IsRegistered && 846 "lastprivate var already registered as private"); 847 (void)IsRegistered; 848 } 849 } 850 ++IRef; 851 ++IDestRef; 852 } 853 } 854 return HasAtLeastOneLastprivate; 855 } 856 857 void CodeGenFunction::EmitOMPLastprivateClauseFinal( 858 const OMPExecutableDirective &D, bool NoFinals, 859 llvm::Value *IsLastIterCond) { 860 if (!HaveInsertPoint()) 861 return; 862 // Emit following code: 863 // if (<IsLastIterCond>) { 864 // orig_var1 = private_orig_var1; 865 // ... 866 // orig_varn = private_orig_varn; 867 // } 868 llvm::BasicBlock *ThenBB = nullptr; 869 llvm::BasicBlock *DoneBB = nullptr; 870 if (IsLastIterCond) { 871 ThenBB = createBasicBlock(".omp.lastprivate.then"); 872 DoneBB = createBasicBlock(".omp.lastprivate.done"); 873 Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB); 874 EmitBlock(ThenBB); 875 } 876 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 877 llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates; 878 if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) { 879 auto IC = LoopDirective->counters().begin(); 880 for (auto F : LoopDirective->finals()) { 881 auto *D = 882 cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl(); 883 if (NoFinals) 884 AlreadyEmittedVars.insert(D); 885 else 886 LoopCountersAndUpdates[D] = F; 887 ++IC; 888 } 889 } 890 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 891 auto IRef = C->varlist_begin(); 892 auto ISrcRef = C->source_exprs().begin(); 893 auto IDestRef = C->destination_exprs().begin(); 894 for (auto *AssignOp : C->assignment_ops()) { 895 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 896 QualType Type = PrivateVD->getType(); 897 auto *CanonicalVD = PrivateVD->getCanonicalDecl(); 898 if (AlreadyEmittedVars.insert(CanonicalVD).second) { 899 // If lastprivate variable is a loop control variable for loop-based 900 // directive, update its value before copyin back to original 901 // variable. 902 if (auto *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD)) 903 EmitIgnoredExpr(FinalExpr); 904 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 905 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 906 // Get the address of the original variable. 907 Address OriginalAddr = GetAddrOfLocalVar(DestVD); 908 // Get the address of the private variable. 909 Address PrivateAddr = GetAddrOfLocalVar(PrivateVD); 910 if (auto RefTy = PrivateVD->getType()->getAs<ReferenceType>()) 911 PrivateAddr = 912 Address(Builder.CreateLoad(PrivateAddr), 913 getNaturalTypeAlignment(RefTy->getPointeeType())); 914 EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp); 915 } 916 ++IRef; 917 ++ISrcRef; 918 ++IDestRef; 919 } 920 if (auto *PostUpdate = C->getPostUpdateExpr()) 921 EmitIgnoredExpr(PostUpdate); 922 } 923 if (IsLastIterCond) 924 EmitBlock(DoneBB, /*IsFinished=*/true); 925 } 926 927 void CodeGenFunction::EmitOMPReductionClauseInit( 928 const OMPExecutableDirective &D, 929 CodeGenFunction::OMPPrivateScope &PrivateScope) { 930 if (!HaveInsertPoint()) 931 return; 932 SmallVector<const Expr *, 4> Shareds; 933 SmallVector<const Expr *, 4> Privates; 934 SmallVector<const Expr *, 4> ReductionOps; 935 SmallVector<const Expr *, 4> LHSs; 936 SmallVector<const Expr *, 4> RHSs; 937 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 938 auto IPriv = C->privates().begin(); 939 auto IRed = C->reduction_ops().begin(); 940 auto ILHS = C->lhs_exprs().begin(); 941 auto IRHS = C->rhs_exprs().begin(); 942 for (const auto *Ref : C->varlists()) { 943 Shareds.emplace_back(Ref); 944 Privates.emplace_back(*IPriv); 945 ReductionOps.emplace_back(*IRed); 946 LHSs.emplace_back(*ILHS); 947 RHSs.emplace_back(*IRHS); 948 std::advance(IPriv, 1); 949 std::advance(IRed, 1); 950 std::advance(ILHS, 1); 951 std::advance(IRHS, 1); 952 } 953 } 954 ReductionCodeGen RedCG(Shareds, Privates, ReductionOps); 955 unsigned Count = 0; 956 auto ILHS = LHSs.begin(); 957 auto IRHS = RHSs.begin(); 958 auto IPriv = Privates.begin(); 959 for (const auto *IRef : Shareds) { 960 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl()); 961 // Emit private VarDecl with reduction init. 962 RedCG.emitSharedLValue(*this, Count); 963 RedCG.emitAggregateType(*this, Count); 964 auto Emission = EmitAutoVarAlloca(*PrivateVD); 965 RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(), 966 RedCG.getSharedLValue(Count), 967 [&Emission](CodeGenFunction &CGF) { 968 CGF.EmitAutoVarInit(Emission); 969 return true; 970 }); 971 EmitAutoVarCleanups(Emission); 972 Address BaseAddr = RedCG.adjustPrivateAddress( 973 *this, Count, Emission.getAllocatedAddress()); 974 bool IsRegistered = PrivateScope.addPrivate( 975 RedCG.getBaseDecl(Count), [BaseAddr]() -> Address { return BaseAddr; }); 976 assert(IsRegistered && "private var already registered as private"); 977 // Silence the warning about unused variable. 978 (void)IsRegistered; 979 980 auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 981 auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 982 if (isa<OMPArraySectionExpr>(IRef)) { 983 // Store the address of the original variable associated with the LHS 984 // implicit variable. 985 PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address { 986 return RedCG.getSharedLValue(Count).getAddress(); 987 }); 988 PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { 989 return GetAddrOfLocalVar(PrivateVD); 990 }); 991 } else if (isa<ArraySubscriptExpr>(IRef)) { 992 // Store the address of the original variable associated with the LHS 993 // implicit variable. 994 PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address { 995 return RedCG.getSharedLValue(Count).getAddress(); 996 }); 997 PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address { 998 return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD), 999 ConvertTypeForMem(RHSVD->getType()), 1000 "rhs.begin"); 1001 }); 1002 } else { 1003 QualType Type = PrivateVD->getType(); 1004 bool IsArray = getContext().getAsArrayType(Type) != nullptr; 1005 Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress(); 1006 // Store the address of the original variable associated with the LHS 1007 // implicit variable. 1008 if (IsArray) { 1009 OriginalAddr = Builder.CreateElementBitCast( 1010 OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin"); 1011 } 1012 PrivateScope.addPrivate( 1013 LHSVD, [OriginalAddr]() -> Address { return OriginalAddr; }); 1014 PrivateScope.addPrivate( 1015 RHSVD, [this, PrivateVD, RHSVD, IsArray]() -> Address { 1016 return IsArray 1017 ? Builder.CreateElementBitCast( 1018 GetAddrOfLocalVar(PrivateVD), 1019 ConvertTypeForMem(RHSVD->getType()), "rhs.begin") 1020 : GetAddrOfLocalVar(PrivateVD); 1021 }); 1022 } 1023 ++ILHS; 1024 ++IRHS; 1025 ++IPriv; 1026 ++Count; 1027 } 1028 } 1029 1030 void CodeGenFunction::EmitOMPReductionClauseFinal( 1031 const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) { 1032 if (!HaveInsertPoint()) 1033 return; 1034 llvm::SmallVector<const Expr *, 8> Privates; 1035 llvm::SmallVector<const Expr *, 8> LHSExprs; 1036 llvm::SmallVector<const Expr *, 8> RHSExprs; 1037 llvm::SmallVector<const Expr *, 8> ReductionOps; 1038 bool HasAtLeastOneReduction = false; 1039 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1040 HasAtLeastOneReduction = true; 1041 Privates.append(C->privates().begin(), C->privates().end()); 1042 LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 1043 RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 1044 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 1045 } 1046 if (HasAtLeastOneReduction) { 1047 bool WithNowait = D.getSingleClause<OMPNowaitClause>() || 1048 isOpenMPParallelDirective(D.getDirectiveKind()) || 1049 D.getDirectiveKind() == OMPD_simd; 1050 bool SimpleReduction = D.getDirectiveKind() == OMPD_simd; 1051 // Emit nowait reduction if nowait clause is present or directive is a 1052 // parallel directive (it always has implicit barrier). 1053 CGM.getOpenMPRuntime().emitReduction( 1054 *this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps, 1055 {WithNowait, SimpleReduction, ReductionKind}); 1056 } 1057 } 1058 1059 static void emitPostUpdateForReductionClause( 1060 CodeGenFunction &CGF, const OMPExecutableDirective &D, 1061 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1062 if (!CGF.HaveInsertPoint()) 1063 return; 1064 llvm::BasicBlock *DoneBB = nullptr; 1065 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1066 if (auto *PostUpdate = C->getPostUpdateExpr()) { 1067 if (!DoneBB) { 1068 if (auto *Cond = CondGen(CGF)) { 1069 // If the first post-update expression is found, emit conditional 1070 // block if it was requested. 1071 auto *ThenBB = CGF.createBasicBlock(".omp.reduction.pu"); 1072 DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done"); 1073 CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1074 CGF.EmitBlock(ThenBB); 1075 } 1076 } 1077 CGF.EmitIgnoredExpr(PostUpdate); 1078 } 1079 } 1080 if (DoneBB) 1081 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 1082 } 1083 1084 namespace { 1085 /// Codegen lambda for appending distribute lower and upper bounds to outlined 1086 /// parallel function. This is necessary for combined constructs such as 1087 /// 'distribute parallel for' 1088 typedef llvm::function_ref<void(CodeGenFunction &, 1089 const OMPExecutableDirective &, 1090 llvm::SmallVectorImpl<llvm::Value *> &)> 1091 CodeGenBoundParametersTy; 1092 } // anonymous namespace 1093 1094 static void emitCommonOMPParallelDirective( 1095 CodeGenFunction &CGF, const OMPExecutableDirective &S, 1096 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1097 const CodeGenBoundParametersTy &CodeGenBoundParameters) { 1098 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); 1099 auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( 1100 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 1101 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) { 1102 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 1103 auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), 1104 /*IgnoreResultAssign*/ true); 1105 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause( 1106 CGF, NumThreads, NumThreadsClause->getLocStart()); 1107 } 1108 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) { 1109 CodeGenFunction::RunCleanupsScope ProcBindScope(CGF); 1110 CGF.CGM.getOpenMPRuntime().emitProcBindClause( 1111 CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart()); 1112 } 1113 const Expr *IfCond = nullptr; 1114 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 1115 if (C->getNameModifier() == OMPD_unknown || 1116 C->getNameModifier() == OMPD_parallel) { 1117 IfCond = C->getCondition(); 1118 break; 1119 } 1120 } 1121 1122 OMPParallelScope Scope(CGF, S); 1123 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 1124 // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk 1125 // lower and upper bounds with the pragma 'for' chunking mechanism. 1126 // The following lambda takes care of appending the lower and upper bound 1127 // parameters when necessary 1128 CodeGenBoundParameters(CGF, S, CapturedVars); 1129 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 1130 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn, 1131 CapturedVars, IfCond); 1132 } 1133 1134 static void emitEmptyBoundParameters(CodeGenFunction &, 1135 const OMPExecutableDirective &, 1136 llvm::SmallVectorImpl<llvm::Value *> &) {} 1137 1138 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { 1139 // Emit parallel region as a standalone region. 1140 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1141 OMPPrivateScope PrivateScope(CGF); 1142 bool Copyins = CGF.EmitOMPCopyinClause(S); 1143 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 1144 if (Copyins) { 1145 // Emit implicit barrier to synchronize threads and avoid data races on 1146 // propagation master's thread values of threadprivate variables to local 1147 // instances of that variables of all other implicit threads. 1148 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 1149 CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 1150 /*ForceSimpleCall=*/true); 1151 } 1152 CGF.EmitOMPPrivateClause(S, PrivateScope); 1153 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 1154 (void)PrivateScope.Privatize(); 1155 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1156 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 1157 }; 1158 emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen, 1159 emitEmptyBoundParameters); 1160 emitPostUpdateForReductionClause( 1161 *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1162 } 1163 1164 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, 1165 JumpDest LoopExit) { 1166 RunCleanupsScope BodyScope(*this); 1167 // Update counters values on current iteration. 1168 for (auto I : D.updates()) { 1169 EmitIgnoredExpr(I); 1170 } 1171 // Update the linear variables. 1172 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1173 for (auto *U : C->updates()) 1174 EmitIgnoredExpr(U); 1175 } 1176 1177 // On a continue in the body, jump to the end. 1178 auto Continue = getJumpDestInCurrentScope("omp.body.continue"); 1179 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1180 // Emit loop body. 1181 EmitStmt(D.getBody()); 1182 // The end (updates/cleanups). 1183 EmitBlock(Continue.getBlock()); 1184 BreakContinueStack.pop_back(); 1185 } 1186 1187 void CodeGenFunction::EmitOMPInnerLoop( 1188 const Stmt &S, bool RequiresCleanup, const Expr *LoopCond, 1189 const Expr *IncExpr, 1190 const llvm::function_ref<void(CodeGenFunction &)> &BodyGen, 1191 const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen) { 1192 auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end"); 1193 1194 // Start the loop with a block that tests the condition. 1195 auto CondBlock = createBasicBlock("omp.inner.for.cond"); 1196 EmitBlock(CondBlock); 1197 const SourceRange &R = S.getSourceRange(); 1198 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), 1199 SourceLocToDebugLoc(R.getEnd())); 1200 1201 // If there are any cleanups between here and the loop-exit scope, 1202 // create a block to stage a loop exit along. 1203 auto ExitBlock = LoopExit.getBlock(); 1204 if (RequiresCleanup) 1205 ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup"); 1206 1207 auto LoopBody = createBasicBlock("omp.inner.for.body"); 1208 1209 // Emit condition. 1210 EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S)); 1211 if (ExitBlock != LoopExit.getBlock()) { 1212 EmitBlock(ExitBlock); 1213 EmitBranchThroughCleanup(LoopExit); 1214 } 1215 1216 EmitBlock(LoopBody); 1217 incrementProfileCounter(&S); 1218 1219 // Create a block for the increment. 1220 auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc"); 1221 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1222 1223 BodyGen(*this); 1224 1225 // Emit "IV = IV + 1" and a back-edge to the condition block. 1226 EmitBlock(Continue.getBlock()); 1227 EmitIgnoredExpr(IncExpr); 1228 PostIncGen(*this); 1229 BreakContinueStack.pop_back(); 1230 EmitBranch(CondBlock); 1231 LoopStack.pop(); 1232 // Emit the fall-through block. 1233 EmitBlock(LoopExit.getBlock()); 1234 } 1235 1236 bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { 1237 if (!HaveInsertPoint()) 1238 return false; 1239 // Emit inits for the linear variables. 1240 bool HasLinears = false; 1241 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1242 for (auto *Init : C->inits()) { 1243 HasLinears = true; 1244 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl()); 1245 if (auto *Ref = dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) { 1246 AutoVarEmission Emission = EmitAutoVarAlloca(*VD); 1247 auto *OrigVD = cast<VarDecl>(Ref->getDecl()); 1248 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1249 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1250 VD->getInit()->getType(), VK_LValue, 1251 VD->getInit()->getExprLoc()); 1252 EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(), 1253 VD->getType()), 1254 /*capturedByInit=*/false); 1255 EmitAutoVarCleanups(Emission); 1256 } else 1257 EmitVarDecl(*VD); 1258 } 1259 // Emit the linear steps for the linear clauses. 1260 // If a step is not constant, it is pre-calculated before the loop. 1261 if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep())) 1262 if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) { 1263 EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl())); 1264 // Emit calculation of the linear step. 1265 EmitIgnoredExpr(CS); 1266 } 1267 } 1268 return HasLinears; 1269 } 1270 1271 void CodeGenFunction::EmitOMPLinearClauseFinal( 1272 const OMPLoopDirective &D, 1273 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1274 if (!HaveInsertPoint()) 1275 return; 1276 llvm::BasicBlock *DoneBB = nullptr; 1277 // Emit the final values of the linear variables. 1278 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1279 auto IC = C->varlist_begin(); 1280 for (auto *F : C->finals()) { 1281 if (!DoneBB) { 1282 if (auto *Cond = CondGen(*this)) { 1283 // If the first post-update expression is found, emit conditional 1284 // block if it was requested. 1285 auto *ThenBB = createBasicBlock(".omp.linear.pu"); 1286 DoneBB = createBasicBlock(".omp.linear.pu.done"); 1287 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1288 EmitBlock(ThenBB); 1289 } 1290 } 1291 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl()); 1292 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1293 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1294 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); 1295 Address OrigAddr = EmitLValue(&DRE).getAddress(); 1296 CodeGenFunction::OMPPrivateScope VarScope(*this); 1297 VarScope.addPrivate(OrigVD, [OrigAddr]() -> Address { return OrigAddr; }); 1298 (void)VarScope.Privatize(); 1299 EmitIgnoredExpr(F); 1300 ++IC; 1301 } 1302 if (auto *PostUpdate = C->getPostUpdateExpr()) 1303 EmitIgnoredExpr(PostUpdate); 1304 } 1305 if (DoneBB) 1306 EmitBlock(DoneBB, /*IsFinished=*/true); 1307 } 1308 1309 static void emitAlignedClause(CodeGenFunction &CGF, 1310 const OMPExecutableDirective &D) { 1311 if (!CGF.HaveInsertPoint()) 1312 return; 1313 for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) { 1314 unsigned ClauseAlignment = 0; 1315 if (auto AlignmentExpr = Clause->getAlignment()) { 1316 auto AlignmentCI = 1317 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); 1318 ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue()); 1319 } 1320 for (auto E : Clause->varlists()) { 1321 unsigned Alignment = ClauseAlignment; 1322 if (Alignment == 0) { 1323 // OpenMP [2.8.1, Description] 1324 // If no optional parameter is specified, implementation-defined default 1325 // alignments for SIMD instructions on the target platforms are assumed. 1326 Alignment = 1327 CGF.getContext() 1328 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign( 1329 E->getType()->getPointeeType())) 1330 .getQuantity(); 1331 } 1332 assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) && 1333 "alignment is not power of 2"); 1334 if (Alignment != 0) { 1335 llvm::Value *PtrValue = CGF.EmitScalarExpr(E); 1336 CGF.EmitAlignmentAssumption(PtrValue, Alignment); 1337 } 1338 } 1339 } 1340 } 1341 1342 void CodeGenFunction::EmitOMPPrivateLoopCounters( 1343 const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) { 1344 if (!HaveInsertPoint()) 1345 return; 1346 auto I = S.private_counters().begin(); 1347 for (auto *E : S.counters()) { 1348 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1349 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()); 1350 (void)LoopScope.addPrivate(VD, [&]() -> Address { 1351 // Emit var without initialization. 1352 if (!LocalDeclMap.count(PrivateVD)) { 1353 auto VarEmission = EmitAutoVarAlloca(*PrivateVD); 1354 EmitAutoVarCleanups(VarEmission); 1355 } 1356 DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD), 1357 /*RefersToEnclosingVariableOrCapture=*/false, 1358 (*I)->getType(), VK_LValue, (*I)->getExprLoc()); 1359 return EmitLValue(&DRE).getAddress(); 1360 }); 1361 if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) || 1362 VD->hasGlobalStorage()) { 1363 (void)LoopScope.addPrivate(PrivateVD, [&]() -> Address { 1364 DeclRefExpr DRE(const_cast<VarDecl *>(VD), 1365 LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD), 1366 E->getType(), VK_LValue, E->getExprLoc()); 1367 return EmitLValue(&DRE).getAddress(); 1368 }); 1369 } 1370 ++I; 1371 } 1372 } 1373 1374 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S, 1375 const Expr *Cond, llvm::BasicBlock *TrueBlock, 1376 llvm::BasicBlock *FalseBlock, uint64_t TrueCount) { 1377 if (!CGF.HaveInsertPoint()) 1378 return; 1379 { 1380 CodeGenFunction::OMPPrivateScope PreCondScope(CGF); 1381 CGF.EmitOMPPrivateLoopCounters(S, PreCondScope); 1382 (void)PreCondScope.Privatize(); 1383 // Get initial values of real counters. 1384 for (auto I : S.inits()) { 1385 CGF.EmitIgnoredExpr(I); 1386 } 1387 } 1388 // Check that loop is executed at least one time. 1389 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount); 1390 } 1391 1392 void CodeGenFunction::EmitOMPLinearClause( 1393 const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) { 1394 if (!HaveInsertPoint()) 1395 return; 1396 llvm::DenseSet<const VarDecl *> SIMDLCVs; 1397 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 1398 auto *LoopDirective = cast<OMPLoopDirective>(&D); 1399 for (auto *C : LoopDirective->counters()) { 1400 SIMDLCVs.insert( 1401 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 1402 } 1403 } 1404 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1405 auto CurPrivate = C->privates().begin(); 1406 for (auto *E : C->varlists()) { 1407 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1408 auto *PrivateVD = 1409 cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl()); 1410 if (!SIMDLCVs.count(VD->getCanonicalDecl())) { 1411 bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address { 1412 // Emit private VarDecl with copy init. 1413 EmitVarDecl(*PrivateVD); 1414 return GetAddrOfLocalVar(PrivateVD); 1415 }); 1416 assert(IsRegistered && "linear var already registered as private"); 1417 // Silence the warning about unused variable. 1418 (void)IsRegistered; 1419 } else 1420 EmitVarDecl(*PrivateVD); 1421 ++CurPrivate; 1422 } 1423 } 1424 } 1425 1426 static void emitSimdlenSafelenClause(CodeGenFunction &CGF, 1427 const OMPExecutableDirective &D, 1428 bool IsMonotonic) { 1429 if (!CGF.HaveInsertPoint()) 1430 return; 1431 if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) { 1432 RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(), 1433 /*ignoreResult=*/true); 1434 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 1435 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 1436 // In presence of finite 'safelen', it may be unsafe to mark all 1437 // the memory instructions parallel, because loop-carried 1438 // dependences of 'safelen' iterations are possible. 1439 if (!IsMonotonic) 1440 CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>()); 1441 } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) { 1442 RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(), 1443 /*ignoreResult=*/true); 1444 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 1445 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 1446 // In presence of finite 'safelen', it may be unsafe to mark all 1447 // the memory instructions parallel, because loop-carried 1448 // dependences of 'safelen' iterations are possible. 1449 CGF.LoopStack.setParallel(false); 1450 } 1451 } 1452 1453 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D, 1454 bool IsMonotonic) { 1455 // Walk clauses and process safelen/lastprivate. 1456 LoopStack.setParallel(!IsMonotonic); 1457 LoopStack.setVectorizeEnable(true); 1458 emitSimdlenSafelenClause(*this, D, IsMonotonic); 1459 } 1460 1461 void CodeGenFunction::EmitOMPSimdFinal( 1462 const OMPLoopDirective &D, 1463 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1464 if (!HaveInsertPoint()) 1465 return; 1466 llvm::BasicBlock *DoneBB = nullptr; 1467 auto IC = D.counters().begin(); 1468 auto IPC = D.private_counters().begin(); 1469 for (auto F : D.finals()) { 1470 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl()); 1471 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl()); 1472 auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD); 1473 if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) || 1474 OrigVD->hasGlobalStorage() || CED) { 1475 if (!DoneBB) { 1476 if (auto *Cond = CondGen(*this)) { 1477 // If the first post-update expression is found, emit conditional 1478 // block if it was requested. 1479 auto *ThenBB = createBasicBlock(".omp.final.then"); 1480 DoneBB = createBasicBlock(".omp.final.done"); 1481 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1482 EmitBlock(ThenBB); 1483 } 1484 } 1485 Address OrigAddr = Address::invalid(); 1486 if (CED) 1487 OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress(); 1488 else { 1489 DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD), 1490 /*RefersToEnclosingVariableOrCapture=*/false, 1491 (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc()); 1492 OrigAddr = EmitLValue(&DRE).getAddress(); 1493 } 1494 OMPPrivateScope VarScope(*this); 1495 VarScope.addPrivate(OrigVD, 1496 [OrigAddr]() -> Address { return OrigAddr; }); 1497 (void)VarScope.Privatize(); 1498 EmitIgnoredExpr(F); 1499 } 1500 ++IC; 1501 ++IPC; 1502 } 1503 if (DoneBB) 1504 EmitBlock(DoneBB, /*IsFinished=*/true); 1505 } 1506 1507 static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF, 1508 const OMPLoopDirective &S, 1509 CodeGenFunction::JumpDest LoopExit) { 1510 CGF.EmitOMPLoopBody(S, LoopExit); 1511 CGF.EmitStopPoint(&S); 1512 } 1513 1514 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { 1515 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1516 OMPLoopScope PreInitScope(CGF, S); 1517 // if (PreCond) { 1518 // for (IV in 0..LastIteration) BODY; 1519 // <Final counter/linear vars updates>; 1520 // } 1521 // 1522 1523 // Emit: if (PreCond) - begin. 1524 // If the condition constant folds and can be elided, avoid emitting the 1525 // whole loop. 1526 bool CondConstant; 1527 llvm::BasicBlock *ContBlock = nullptr; 1528 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 1529 if (!CondConstant) 1530 return; 1531 } else { 1532 auto *ThenBlock = CGF.createBasicBlock("simd.if.then"); 1533 ContBlock = CGF.createBasicBlock("simd.if.end"); 1534 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 1535 CGF.getProfileCount(&S)); 1536 CGF.EmitBlock(ThenBlock); 1537 CGF.incrementProfileCounter(&S); 1538 } 1539 1540 // Emit the loop iteration variable. 1541 const Expr *IVExpr = S.getIterationVariable(); 1542 const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 1543 CGF.EmitVarDecl(*IVDecl); 1544 CGF.EmitIgnoredExpr(S.getInit()); 1545 1546 // Emit the iterations count variable. 1547 // If it is not a variable, Sema decided to calculate iterations count on 1548 // each iteration (e.g., it is foldable into a constant). 1549 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 1550 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 1551 // Emit calculation of the iterations count. 1552 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 1553 } 1554 1555 CGF.EmitOMPSimdInit(S); 1556 1557 emitAlignedClause(CGF, S); 1558 (void)CGF.EmitOMPLinearClauseInit(S); 1559 { 1560 OMPPrivateScope LoopScope(CGF); 1561 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 1562 CGF.EmitOMPLinearClause(S, LoopScope); 1563 CGF.EmitOMPPrivateClause(S, LoopScope); 1564 CGF.EmitOMPReductionClauseInit(S, LoopScope); 1565 bool HasLastprivateClause = 1566 CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 1567 (void)LoopScope.Privatize(); 1568 CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 1569 S.getInc(), 1570 [&S](CodeGenFunction &CGF) { 1571 CGF.EmitOMPLoopBody(S, JumpDest()); 1572 CGF.EmitStopPoint(&S); 1573 }, 1574 [](CodeGenFunction &) {}); 1575 CGF.EmitOMPSimdFinal( 1576 S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1577 // Emit final copy of the lastprivate variables at the end of loops. 1578 if (HasLastprivateClause) 1579 CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true); 1580 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd); 1581 emitPostUpdateForReductionClause( 1582 CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1583 } 1584 CGF.EmitOMPLinearClauseFinal( 1585 S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1586 // Emit: if (PreCond) - end. 1587 if (ContBlock) { 1588 CGF.EmitBranch(ContBlock); 1589 CGF.EmitBlock(ContBlock, true); 1590 } 1591 }; 1592 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1593 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 1594 } 1595 1596 void CodeGenFunction::EmitOMPOuterLoop( 1597 bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S, 1598 CodeGenFunction::OMPPrivateScope &LoopScope, 1599 const CodeGenFunction::OMPLoopArguments &LoopArgs, 1600 const CodeGenFunction::CodeGenLoopTy &CodeGenLoop, 1601 const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) { 1602 auto &RT = CGM.getOpenMPRuntime(); 1603 1604 const Expr *IVExpr = S.getIterationVariable(); 1605 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1606 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1607 1608 auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end"); 1609 1610 // Start the loop with a block that tests the condition. 1611 auto CondBlock = createBasicBlock("omp.dispatch.cond"); 1612 EmitBlock(CondBlock); 1613 const SourceRange &R = S.getSourceRange(); 1614 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), 1615 SourceLocToDebugLoc(R.getEnd())); 1616 1617 llvm::Value *BoolCondVal = nullptr; 1618 if (!DynamicOrOrdered) { 1619 // UB = min(UB, GlobalUB) or 1620 // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g. 1621 // 'distribute parallel for') 1622 EmitIgnoredExpr(LoopArgs.EUB); 1623 // IV = LB 1624 EmitIgnoredExpr(LoopArgs.Init); 1625 // IV < UB 1626 BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond); 1627 } else { 1628 BoolCondVal = 1629 RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, LoopArgs.IL, 1630 LoopArgs.LB, LoopArgs.UB, LoopArgs.ST); 1631 } 1632 1633 // If there are any cleanups between here and the loop-exit scope, 1634 // create a block to stage a loop exit along. 1635 auto ExitBlock = LoopExit.getBlock(); 1636 if (LoopScope.requiresCleanups()) 1637 ExitBlock = createBasicBlock("omp.dispatch.cleanup"); 1638 1639 auto LoopBody = createBasicBlock("omp.dispatch.body"); 1640 Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock); 1641 if (ExitBlock != LoopExit.getBlock()) { 1642 EmitBlock(ExitBlock); 1643 EmitBranchThroughCleanup(LoopExit); 1644 } 1645 EmitBlock(LoopBody); 1646 1647 // Emit "IV = LB" (in case of static schedule, we have already calculated new 1648 // LB for loop condition and emitted it above). 1649 if (DynamicOrOrdered) 1650 EmitIgnoredExpr(LoopArgs.Init); 1651 1652 // Create a block for the increment. 1653 auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); 1654 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1655 1656 // Generate !llvm.loop.parallel metadata for loads and stores for loops 1657 // with dynamic/guided scheduling and without ordered clause. 1658 if (!isOpenMPSimdDirective(S.getDirectiveKind())) 1659 LoopStack.setParallel(!IsMonotonic); 1660 else 1661 EmitOMPSimdInit(S, IsMonotonic); 1662 1663 SourceLocation Loc = S.getLocStart(); 1664 1665 // when 'distribute' is not combined with a 'for': 1666 // while (idx <= UB) { BODY; ++idx; } 1667 // when 'distribute' is combined with a 'for' 1668 // (e.g. 'distribute parallel for') 1669 // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; } 1670 EmitOMPInnerLoop( 1671 S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr, 1672 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { 1673 CodeGenLoop(CGF, S, LoopExit); 1674 }, 1675 [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) { 1676 CodeGenOrdered(CGF, Loc, IVSize, IVSigned); 1677 }); 1678 1679 EmitBlock(Continue.getBlock()); 1680 BreakContinueStack.pop_back(); 1681 if (!DynamicOrOrdered) { 1682 // Emit "LB = LB + Stride", "UB = UB + Stride". 1683 EmitIgnoredExpr(LoopArgs.NextLB); 1684 EmitIgnoredExpr(LoopArgs.NextUB); 1685 } 1686 1687 EmitBranch(CondBlock); 1688 LoopStack.pop(); 1689 // Emit the fall-through block. 1690 EmitBlock(LoopExit.getBlock()); 1691 1692 // Tell the runtime we are done. 1693 auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) { 1694 if (!DynamicOrOrdered) 1695 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(), 1696 S.getDirectiveKind()); 1697 }; 1698 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); 1699 } 1700 1701 void CodeGenFunction::EmitOMPForOuterLoop( 1702 const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic, 1703 const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, 1704 const OMPLoopArguments &LoopArgs, 1705 const CodeGenDispatchBoundsTy &CGDispatchBounds) { 1706 auto &RT = CGM.getOpenMPRuntime(); 1707 1708 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). 1709 const bool DynamicOrOrdered = 1710 Ordered || RT.isDynamic(ScheduleKind.Schedule); 1711 1712 assert((Ordered || 1713 !RT.isStaticNonchunked(ScheduleKind.Schedule, 1714 LoopArgs.Chunk != nullptr)) && 1715 "static non-chunked schedule does not need outer loop"); 1716 1717 // Emit outer loop. 1718 // 1719 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1720 // When schedule(dynamic,chunk_size) is specified, the iterations are 1721 // distributed to threads in the team in chunks as the threads request them. 1722 // Each thread executes a chunk of iterations, then requests another chunk, 1723 // until no chunks remain to be distributed. Each chunk contains chunk_size 1724 // iterations, except for the last chunk to be distributed, which may have 1725 // fewer iterations. When no chunk_size is specified, it defaults to 1. 1726 // 1727 // When schedule(guided,chunk_size) is specified, the iterations are assigned 1728 // to threads in the team in chunks as the executing threads request them. 1729 // Each thread executes a chunk of iterations, then requests another chunk, 1730 // until no chunks remain to be assigned. For a chunk_size of 1, the size of 1731 // each chunk is proportional to the number of unassigned iterations divided 1732 // by the number of threads in the team, decreasing to 1. For a chunk_size 1733 // with value k (greater than 1), the size of each chunk is determined in the 1734 // same way, with the restriction that the chunks do not contain fewer than k 1735 // iterations (except for the last chunk to be assigned, which may have fewer 1736 // than k iterations). 1737 // 1738 // When schedule(auto) is specified, the decision regarding scheduling is 1739 // delegated to the compiler and/or runtime system. The programmer gives the 1740 // implementation the freedom to choose any possible mapping of iterations to 1741 // threads in the team. 1742 // 1743 // When schedule(runtime) is specified, the decision regarding scheduling is 1744 // deferred until run time, and the schedule and chunk size are taken from the 1745 // run-sched-var ICV. If the ICV is set to auto, the schedule is 1746 // implementation defined 1747 // 1748 // while(__kmpc_dispatch_next(&LB, &UB)) { 1749 // idx = LB; 1750 // while (idx <= UB) { BODY; ++idx; 1751 // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only. 1752 // } // inner loop 1753 // } 1754 // 1755 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1756 // When schedule(static, chunk_size) is specified, iterations are divided into 1757 // chunks of size chunk_size, and the chunks are assigned to the threads in 1758 // the team in a round-robin fashion in the order of the thread number. 1759 // 1760 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { 1761 // while (idx <= UB) { BODY; ++idx; } // inner loop 1762 // LB = LB + ST; 1763 // UB = UB + ST; 1764 // } 1765 // 1766 1767 const Expr *IVExpr = S.getIterationVariable(); 1768 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1769 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1770 1771 if (DynamicOrOrdered) { 1772 auto DispatchBounds = CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB); 1773 llvm::Value *LBVal = DispatchBounds.first; 1774 llvm::Value *UBVal = DispatchBounds.second; 1775 CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal, 1776 LoopArgs.Chunk}; 1777 RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, IVSize, 1778 IVSigned, Ordered, DipatchRTInputValues); 1779 } else { 1780 CGOpenMPRuntime::StaticRTInput StaticInit( 1781 IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB, 1782 LoopArgs.ST, LoopArgs.Chunk); 1783 RT.emitForStaticInit(*this, S.getLocStart(), S.getDirectiveKind(), 1784 ScheduleKind, StaticInit); 1785 } 1786 1787 auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc, 1788 const unsigned IVSize, 1789 const bool IVSigned) { 1790 if (Ordered) { 1791 CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize, 1792 IVSigned); 1793 } 1794 }; 1795 1796 OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST, 1797 LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB); 1798 OuterLoopArgs.IncExpr = S.getInc(); 1799 OuterLoopArgs.Init = S.getInit(); 1800 OuterLoopArgs.Cond = S.getCond(); 1801 OuterLoopArgs.NextLB = S.getNextLowerBound(); 1802 OuterLoopArgs.NextUB = S.getNextUpperBound(); 1803 EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs, 1804 emitOMPLoopBodyWithStopPoint, CodeGenOrdered); 1805 } 1806 1807 static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc, 1808 const unsigned IVSize, const bool IVSigned) {} 1809 1810 void CodeGenFunction::EmitOMPDistributeOuterLoop( 1811 OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S, 1812 OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs, 1813 const CodeGenLoopTy &CodeGenLoopContent) { 1814 1815 auto &RT = CGM.getOpenMPRuntime(); 1816 1817 // Emit outer loop. 1818 // Same behavior as a OMPForOuterLoop, except that schedule cannot be 1819 // dynamic 1820 // 1821 1822 const Expr *IVExpr = S.getIterationVariable(); 1823 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1824 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1825 1826 CGOpenMPRuntime::StaticRTInput StaticInit( 1827 IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB, 1828 LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk); 1829 RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, StaticInit); 1830 1831 // for combined 'distribute' and 'for' the increment expression of distribute 1832 // is store in DistInc. For 'distribute' alone, it is in Inc. 1833 Expr *IncExpr; 1834 if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())) 1835 IncExpr = S.getDistInc(); 1836 else 1837 IncExpr = S.getInc(); 1838 1839 // this routine is shared by 'omp distribute parallel for' and 1840 // 'omp distribute': select the right EUB expression depending on the 1841 // directive 1842 OMPLoopArguments OuterLoopArgs; 1843 OuterLoopArgs.LB = LoopArgs.LB; 1844 OuterLoopArgs.UB = LoopArgs.UB; 1845 OuterLoopArgs.ST = LoopArgs.ST; 1846 OuterLoopArgs.IL = LoopArgs.IL; 1847 OuterLoopArgs.Chunk = LoopArgs.Chunk; 1848 OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 1849 ? S.getCombinedEnsureUpperBound() 1850 : S.getEnsureUpperBound(); 1851 OuterLoopArgs.IncExpr = IncExpr; 1852 OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 1853 ? S.getCombinedInit() 1854 : S.getInit(); 1855 OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 1856 ? S.getCombinedCond() 1857 : S.getCond(); 1858 OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 1859 ? S.getCombinedNextLowerBound() 1860 : S.getNextLowerBound(); 1861 OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 1862 ? S.getCombinedNextUpperBound() 1863 : S.getNextUpperBound(); 1864 1865 EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S, 1866 LoopScope, OuterLoopArgs, CodeGenLoopContent, 1867 emitEmptyOrdered); 1868 } 1869 1870 /// Emit a helper variable and return corresponding lvalue. 1871 static LValue EmitOMPHelperVar(CodeGenFunction &CGF, 1872 const DeclRefExpr *Helper) { 1873 auto VDecl = cast<VarDecl>(Helper->getDecl()); 1874 CGF.EmitVarDecl(*VDecl); 1875 return CGF.EmitLValue(Helper); 1876 } 1877 1878 static std::pair<LValue, LValue> 1879 emitDistributeParallelForInnerBounds(CodeGenFunction &CGF, 1880 const OMPExecutableDirective &S) { 1881 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 1882 LValue LB = 1883 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); 1884 LValue UB = 1885 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); 1886 1887 // When composing 'distribute' with 'for' (e.g. as in 'distribute 1888 // parallel for') we need to use the 'distribute' 1889 // chunk lower and upper bounds rather than the whole loop iteration 1890 // space. These are parameters to the outlined function for 'parallel' 1891 // and we copy the bounds of the previous schedule into the 1892 // the current ones. 1893 LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable()); 1894 LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable()); 1895 llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(PrevLB, SourceLocation()); 1896 PrevLBVal = CGF.EmitScalarConversion( 1897 PrevLBVal, LS.getPrevLowerBoundVariable()->getType(), 1898 LS.getIterationVariable()->getType(), SourceLocation()); 1899 llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(PrevUB, SourceLocation()); 1900 PrevUBVal = CGF.EmitScalarConversion( 1901 PrevUBVal, LS.getPrevUpperBoundVariable()->getType(), 1902 LS.getIterationVariable()->getType(), SourceLocation()); 1903 1904 CGF.EmitStoreOfScalar(PrevLBVal, LB); 1905 CGF.EmitStoreOfScalar(PrevUBVal, UB); 1906 1907 return {LB, UB}; 1908 } 1909 1910 /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then 1911 /// we need to use the LB and UB expressions generated by the worksharing 1912 /// code generation support, whereas in non combined situations we would 1913 /// just emit 0 and the LastIteration expression 1914 /// This function is necessary due to the difference of the LB and UB 1915 /// types for the RT emission routines for 'for_static_init' and 1916 /// 'for_dispatch_init' 1917 static std::pair<llvm::Value *, llvm::Value *> 1918 emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF, 1919 const OMPExecutableDirective &S, 1920 Address LB, Address UB) { 1921 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 1922 const Expr *IVExpr = LS.getIterationVariable(); 1923 // when implementing a dynamic schedule for a 'for' combined with a 1924 // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop 1925 // is not normalized as each team only executes its own assigned 1926 // distribute chunk 1927 QualType IteratorTy = IVExpr->getType(); 1928 llvm::Value *LBVal = CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, 1929 SourceLocation()); 1930 llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, 1931 SourceLocation()); 1932 return {LBVal, UBVal}; 1933 } 1934 1935 static void emitDistributeParallelForDistributeInnerBoundParams( 1936 CodeGenFunction &CGF, const OMPExecutableDirective &S, 1937 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) { 1938 const auto &Dir = cast<OMPLoopDirective>(S); 1939 LValue LB = 1940 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable())); 1941 auto LBCast = CGF.Builder.CreateIntCast( 1942 CGF.Builder.CreateLoad(LB.getAddress()), CGF.SizeTy, /*isSigned=*/false); 1943 CapturedVars.push_back(LBCast); 1944 LValue UB = 1945 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable())); 1946 1947 auto UBCast = CGF.Builder.CreateIntCast( 1948 CGF.Builder.CreateLoad(UB.getAddress()), CGF.SizeTy, /*isSigned=*/false); 1949 CapturedVars.push_back(UBCast); 1950 } 1951 1952 static void 1953 emitInnerParallelForWhenCombined(CodeGenFunction &CGF, 1954 const OMPLoopDirective &S, 1955 CodeGenFunction::JumpDest LoopExit) { 1956 auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF, 1957 PrePostActionTy &) { 1958 CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(), 1959 emitDistributeParallelForInnerBounds, 1960 emitDistributeParallelForDispatchBounds); 1961 }; 1962 1963 emitCommonOMPParallelDirective( 1964 CGF, S, OMPD_for, CGInlinedWorksharingLoop, 1965 emitDistributeParallelForDistributeInnerBoundParams); 1966 } 1967 1968 void CodeGenFunction::EmitOMPDistributeParallelForDirective( 1969 const OMPDistributeParallelForDirective &S) { 1970 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1971 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 1972 S.getDistInc()); 1973 }; 1974 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1975 OMPCancelStackRAII CancelRegion(*this, OMPD_distribute_parallel_for, 1976 /*HasCancel=*/false); 1977 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen, 1978 /*HasCancel=*/false); 1979 } 1980 1981 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective( 1982 const OMPDistributeParallelForSimdDirective &S) { 1983 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1984 CGM.getOpenMPRuntime().emitInlinedDirective( 1985 *this, OMPD_distribute_parallel_for_simd, 1986 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1987 OMPLoopScope PreInitScope(CGF, S); 1988 CGF.EmitStmt( 1989 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1990 }); 1991 } 1992 1993 void CodeGenFunction::EmitOMPDistributeSimdDirective( 1994 const OMPDistributeSimdDirective &S) { 1995 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1996 CGM.getOpenMPRuntime().emitInlinedDirective( 1997 *this, OMPD_distribute_simd, 1998 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1999 OMPLoopScope PreInitScope(CGF, S); 2000 CGF.EmitStmt( 2001 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2002 }); 2003 } 2004 2005 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective( 2006 const OMPTargetParallelForSimdDirective &S) { 2007 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2008 CGM.getOpenMPRuntime().emitInlinedDirective( 2009 *this, OMPD_target_parallel_for_simd, 2010 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2011 OMPLoopScope PreInitScope(CGF, S); 2012 CGF.EmitStmt( 2013 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2014 }); 2015 } 2016 2017 void CodeGenFunction::EmitOMPTargetSimdDirective( 2018 const OMPTargetSimdDirective &S) { 2019 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2020 CGM.getOpenMPRuntime().emitInlinedDirective( 2021 *this, OMPD_target_simd, [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2022 OMPLoopScope PreInitScope(CGF, S); 2023 CGF.EmitStmt( 2024 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2025 }); 2026 } 2027 2028 void CodeGenFunction::EmitOMPTeamsDistributeDirective( 2029 const OMPTeamsDistributeDirective &S) { 2030 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2031 CGM.getOpenMPRuntime().emitInlinedDirective( 2032 *this, OMPD_teams_distribute, 2033 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2034 OMPLoopScope PreInitScope(CGF, S); 2035 CGF.EmitStmt( 2036 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2037 }); 2038 } 2039 2040 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective( 2041 const OMPTeamsDistributeSimdDirective &S) { 2042 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2043 CGM.getOpenMPRuntime().emitInlinedDirective( 2044 *this, OMPD_teams_distribute_simd, 2045 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2046 OMPLoopScope PreInitScope(CGF, S); 2047 CGF.EmitStmt( 2048 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2049 }); 2050 } 2051 2052 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective( 2053 const OMPTeamsDistributeParallelForSimdDirective &S) { 2054 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2055 CGM.getOpenMPRuntime().emitInlinedDirective( 2056 *this, OMPD_teams_distribute_parallel_for_simd, 2057 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2058 OMPLoopScope PreInitScope(CGF, S); 2059 CGF.EmitStmt( 2060 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2061 }); 2062 } 2063 2064 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective( 2065 const OMPTeamsDistributeParallelForDirective &S) { 2066 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2067 CGM.getOpenMPRuntime().emitInlinedDirective( 2068 *this, OMPD_teams_distribute_parallel_for, 2069 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2070 OMPLoopScope PreInitScope(CGF, S); 2071 CGF.EmitStmt( 2072 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2073 }); 2074 } 2075 2076 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective( 2077 const OMPTargetTeamsDistributeDirective &S) { 2078 CGM.getOpenMPRuntime().emitInlinedDirective( 2079 *this, OMPD_target_teams_distribute, 2080 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2081 CGF.EmitStmt( 2082 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2083 }); 2084 } 2085 2086 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective( 2087 const OMPTargetTeamsDistributeParallelForDirective &S) { 2088 CGM.getOpenMPRuntime().emitInlinedDirective( 2089 *this, OMPD_target_teams_distribute_parallel_for, 2090 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2091 CGF.EmitStmt( 2092 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2093 }); 2094 } 2095 2096 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective( 2097 const OMPTargetTeamsDistributeParallelForSimdDirective &S) { 2098 CGM.getOpenMPRuntime().emitInlinedDirective( 2099 *this, OMPD_target_teams_distribute_parallel_for_simd, 2100 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2101 CGF.EmitStmt( 2102 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2103 }); 2104 } 2105 2106 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective( 2107 const OMPTargetTeamsDistributeSimdDirective &S) { 2108 CGM.getOpenMPRuntime().emitInlinedDirective( 2109 *this, OMPD_target_teams_distribute_simd, 2110 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2111 CGF.EmitStmt( 2112 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2113 }); 2114 } 2115 2116 namespace { 2117 struct ScheduleKindModifiersTy { 2118 OpenMPScheduleClauseKind Kind; 2119 OpenMPScheduleClauseModifier M1; 2120 OpenMPScheduleClauseModifier M2; 2121 ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind, 2122 OpenMPScheduleClauseModifier M1, 2123 OpenMPScheduleClauseModifier M2) 2124 : Kind(Kind), M1(M1), M2(M2) {} 2125 }; 2126 } // namespace 2127 2128 bool CodeGenFunction::EmitOMPWorksharingLoop( 2129 const OMPLoopDirective &S, Expr *EUB, 2130 const CodeGenLoopBoundsTy &CodeGenLoopBounds, 2131 const CodeGenDispatchBoundsTy &CGDispatchBounds) { 2132 // Emit the loop iteration variable. 2133 auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 2134 auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); 2135 EmitVarDecl(*IVDecl); 2136 2137 // Emit the iterations count variable. 2138 // If it is not a variable, Sema decided to calculate iterations count on each 2139 // iteration (e.g., it is foldable into a constant). 2140 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 2141 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 2142 // Emit calculation of the iterations count. 2143 EmitIgnoredExpr(S.getCalcLastIteration()); 2144 } 2145 2146 auto &RT = CGM.getOpenMPRuntime(); 2147 2148 bool HasLastprivateClause; 2149 // Check pre-condition. 2150 { 2151 OMPLoopScope PreInitScope(*this, S); 2152 // Skip the entire loop if we don't meet the precondition. 2153 // If the condition constant folds and can be elided, avoid emitting the 2154 // whole loop. 2155 bool CondConstant; 2156 llvm::BasicBlock *ContBlock = nullptr; 2157 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 2158 if (!CondConstant) 2159 return false; 2160 } else { 2161 auto *ThenBlock = createBasicBlock("omp.precond.then"); 2162 ContBlock = createBasicBlock("omp.precond.end"); 2163 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 2164 getProfileCount(&S)); 2165 EmitBlock(ThenBlock); 2166 incrementProfileCounter(&S); 2167 } 2168 2169 bool Ordered = false; 2170 if (auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) { 2171 if (OrderedClause->getNumForLoops()) 2172 RT.emitDoacrossInit(*this, S); 2173 else 2174 Ordered = true; 2175 } 2176 2177 llvm::DenseSet<const Expr *> EmittedFinals; 2178 emitAlignedClause(*this, S); 2179 bool HasLinears = EmitOMPLinearClauseInit(S); 2180 // Emit helper vars inits. 2181 2182 std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S); 2183 LValue LB = Bounds.first; 2184 LValue UB = Bounds.second; 2185 LValue ST = 2186 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 2187 LValue IL = 2188 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 2189 2190 // Emit 'then' code. 2191 { 2192 OMPPrivateScope LoopScope(*this); 2193 if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) { 2194 // Emit implicit barrier to synchronize threads and avoid data races on 2195 // initialization of firstprivate variables and post-update of 2196 // lastprivate variables. 2197 CGM.getOpenMPRuntime().emitBarrierCall( 2198 *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 2199 /*ForceSimpleCall=*/true); 2200 } 2201 EmitOMPPrivateClause(S, LoopScope); 2202 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 2203 EmitOMPReductionClauseInit(S, LoopScope); 2204 EmitOMPPrivateLoopCounters(S, LoopScope); 2205 EmitOMPLinearClause(S, LoopScope); 2206 (void)LoopScope.Privatize(); 2207 2208 // Detect the loop schedule kind and chunk. 2209 llvm::Value *Chunk = nullptr; 2210 OpenMPScheduleTy ScheduleKind; 2211 if (auto *C = S.getSingleClause<OMPScheduleClause>()) { 2212 ScheduleKind.Schedule = C->getScheduleKind(); 2213 ScheduleKind.M1 = C->getFirstScheduleModifier(); 2214 ScheduleKind.M2 = C->getSecondScheduleModifier(); 2215 if (const auto *Ch = C->getChunkSize()) { 2216 Chunk = EmitScalarExpr(Ch); 2217 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 2218 S.getIterationVariable()->getType(), 2219 S.getLocStart()); 2220 } 2221 } 2222 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2223 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2224 // OpenMP 4.5, 2.7.1 Loop Construct, Description. 2225 // If the static schedule kind is specified or if the ordered clause is 2226 // specified, and if no monotonic modifier is specified, the effect will 2227 // be as if the monotonic modifier was specified. 2228 if (RT.isStaticNonchunked(ScheduleKind.Schedule, 2229 /* Chunked */ Chunk != nullptr) && 2230 !Ordered) { 2231 if (isOpenMPSimdDirective(S.getDirectiveKind())) 2232 EmitOMPSimdInit(S, /*IsMonotonic=*/true); 2233 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 2234 // When no chunk_size is specified, the iteration space is divided into 2235 // chunks that are approximately equal in size, and at most one chunk is 2236 // distributed to each thread. Note that the size of the chunks is 2237 // unspecified in this case. 2238 CGOpenMPRuntime::StaticRTInput StaticInit( 2239 IVSize, IVSigned, Ordered, IL.getAddress(), LB.getAddress(), 2240 UB.getAddress(), ST.getAddress()); 2241 RT.emitForStaticInit(*this, S.getLocStart(), S.getDirectiveKind(), 2242 ScheduleKind, StaticInit); 2243 auto LoopExit = 2244 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 2245 // UB = min(UB, GlobalUB); 2246 EmitIgnoredExpr(S.getEnsureUpperBound()); 2247 // IV = LB; 2248 EmitIgnoredExpr(S.getInit()); 2249 // while (idx <= UB) { BODY; ++idx; } 2250 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 2251 S.getInc(), 2252 [&S, LoopExit](CodeGenFunction &CGF) { 2253 CGF.EmitOMPLoopBody(S, LoopExit); 2254 CGF.EmitStopPoint(&S); 2255 }, 2256 [](CodeGenFunction &) {}); 2257 EmitBlock(LoopExit.getBlock()); 2258 // Tell the runtime we are done. 2259 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 2260 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(), 2261 S.getDirectiveKind()); 2262 }; 2263 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); 2264 } else { 2265 const bool IsMonotonic = 2266 Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static || 2267 ScheduleKind.Schedule == OMPC_SCHEDULE_unknown || 2268 ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic || 2269 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic; 2270 // Emit the outer loop, which requests its work chunk [LB..UB] from 2271 // runtime and runs the inner loop to process it. 2272 const OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(), 2273 ST.getAddress(), IL.getAddress(), 2274 Chunk, EUB); 2275 EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered, 2276 LoopArguments, CGDispatchBounds); 2277 } 2278 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 2279 EmitOMPSimdFinal(S, 2280 [&](CodeGenFunction &CGF) -> llvm::Value * { 2281 return CGF.Builder.CreateIsNotNull( 2282 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2283 }); 2284 } 2285 EmitOMPReductionClauseFinal( 2286 S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind()) 2287 ? /*Parallel and Simd*/ OMPD_parallel_for_simd 2288 : /*Parallel only*/ OMPD_parallel); 2289 // Emit post-update of the reduction variables if IsLastIter != 0. 2290 emitPostUpdateForReductionClause( 2291 *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * { 2292 return CGF.Builder.CreateIsNotNull( 2293 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2294 }); 2295 // Emit final copy of the lastprivate variables if IsLastIter != 0. 2296 if (HasLastprivateClause) 2297 EmitOMPLastprivateClauseFinal( 2298 S, isOpenMPSimdDirective(S.getDirectiveKind()), 2299 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart()))); 2300 } 2301 EmitOMPLinearClauseFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * { 2302 return CGF.Builder.CreateIsNotNull( 2303 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2304 }); 2305 // We're now done with the loop, so jump to the continuation block. 2306 if (ContBlock) { 2307 EmitBranch(ContBlock); 2308 EmitBlock(ContBlock, true); 2309 } 2310 } 2311 return HasLastprivateClause; 2312 } 2313 2314 /// The following two functions generate expressions for the loop lower 2315 /// and upper bounds in case of static and dynamic (dispatch) schedule 2316 /// of the associated 'for' or 'distribute' loop. 2317 static std::pair<LValue, LValue> 2318 emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 2319 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 2320 LValue LB = 2321 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); 2322 LValue UB = 2323 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); 2324 return {LB, UB}; 2325 } 2326 2327 /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not 2328 /// consider the lower and upper bound expressions generated by the 2329 /// worksharing loop support, but we use 0 and the iteration space size as 2330 /// constants 2331 static std::pair<llvm::Value *, llvm::Value *> 2332 emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S, 2333 Address LB, Address UB) { 2334 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 2335 const Expr *IVExpr = LS.getIterationVariable(); 2336 const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType()); 2337 llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0); 2338 llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration()); 2339 return {LBVal, UBVal}; 2340 } 2341 2342 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { 2343 bool HasLastprivates = false; 2344 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, 2345 PrePostActionTy &) { 2346 OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel()); 2347 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 2348 emitForLoopBounds, 2349 emitDispatchForLoopBounds); 2350 }; 2351 { 2352 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2353 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen, 2354 S.hasCancel()); 2355 } 2356 2357 // Emit an implicit barrier at the end. 2358 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) { 2359 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); 2360 } 2361 } 2362 2363 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { 2364 bool HasLastprivates = false; 2365 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, 2366 PrePostActionTy &) { 2367 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 2368 emitForLoopBounds, 2369 emitDispatchForLoopBounds); 2370 }; 2371 { 2372 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2373 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 2374 } 2375 2376 // Emit an implicit barrier at the end. 2377 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) { 2378 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); 2379 } 2380 } 2381 2382 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, 2383 const Twine &Name, 2384 llvm::Value *Init = nullptr) { 2385 auto LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty); 2386 if (Init) 2387 CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true); 2388 return LVal; 2389 } 2390 2391 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { 2392 auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt(); 2393 auto *CS = dyn_cast<CompoundStmt>(Stmt); 2394 bool HasLastprivates = false; 2395 auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF, 2396 PrePostActionTy &) { 2397 auto &C = CGF.CGM.getContext(); 2398 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2399 // Emit helper vars inits. 2400 LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.", 2401 CGF.Builder.getInt32(0)); 2402 auto *GlobalUBVal = CS != nullptr ? CGF.Builder.getInt32(CS->size() - 1) 2403 : CGF.Builder.getInt32(0); 2404 LValue UB = 2405 createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal); 2406 LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.", 2407 CGF.Builder.getInt32(1)); 2408 LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.", 2409 CGF.Builder.getInt32(0)); 2410 // Loop counter. 2411 LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv."); 2412 OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 2413 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV); 2414 OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 2415 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); 2416 // Generate condition for loop. 2417 BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, 2418 OK_Ordinary, S.getLocStart(), FPOptions()); 2419 // Increment for loop counter. 2420 UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary, 2421 S.getLocStart()); 2422 auto BodyGen = [Stmt, CS, &S, &IV](CodeGenFunction &CGF) { 2423 // Iterate through all sections and emit a switch construct: 2424 // switch (IV) { 2425 // case 0: 2426 // <SectionStmt[0]>; 2427 // break; 2428 // ... 2429 // case <NumSection> - 1: 2430 // <SectionStmt[<NumSection> - 1]>; 2431 // break; 2432 // } 2433 // .omp.sections.exit: 2434 auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit"); 2435 auto *SwitchStmt = CGF.Builder.CreateSwitch( 2436 CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB, 2437 CS == nullptr ? 1 : CS->size()); 2438 if (CS) { 2439 unsigned CaseNumber = 0; 2440 for (auto *SubStmt : CS->children()) { 2441 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 2442 CGF.EmitBlock(CaseBB); 2443 SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB); 2444 CGF.EmitStmt(SubStmt); 2445 CGF.EmitBranch(ExitBB); 2446 ++CaseNumber; 2447 } 2448 } else { 2449 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 2450 CGF.EmitBlock(CaseBB); 2451 SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB); 2452 CGF.EmitStmt(Stmt); 2453 CGF.EmitBranch(ExitBB); 2454 } 2455 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 2456 }; 2457 2458 CodeGenFunction::OMPPrivateScope LoopScope(CGF); 2459 if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) { 2460 // Emit implicit barrier to synchronize threads and avoid data races on 2461 // initialization of firstprivate variables and post-update of lastprivate 2462 // variables. 2463 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 2464 CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 2465 /*ForceSimpleCall=*/true); 2466 } 2467 CGF.EmitOMPPrivateClause(S, LoopScope); 2468 HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 2469 CGF.EmitOMPReductionClauseInit(S, LoopScope); 2470 (void)LoopScope.Privatize(); 2471 2472 // Emit static non-chunked loop. 2473 OpenMPScheduleTy ScheduleKind; 2474 ScheduleKind.Schedule = OMPC_SCHEDULE_static; 2475 CGOpenMPRuntime::StaticRTInput StaticInit( 2476 /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), 2477 LB.getAddress(), UB.getAddress(), ST.getAddress()); 2478 CGF.CGM.getOpenMPRuntime().emitForStaticInit( 2479 CGF, S.getLocStart(), S.getDirectiveKind(), ScheduleKind, StaticInit); 2480 // UB = min(UB, GlobalUB); 2481 auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart()); 2482 auto *MinUBGlobalUB = CGF.Builder.CreateSelect( 2483 CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal); 2484 CGF.EmitStoreOfScalar(MinUBGlobalUB, UB); 2485 // IV = LB; 2486 CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV); 2487 // while (idx <= UB) { BODY; ++idx; } 2488 CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen, 2489 [](CodeGenFunction &) {}); 2490 // Tell the runtime we are done. 2491 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 2492 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(), 2493 S.getDirectiveKind()); 2494 }; 2495 CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen); 2496 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 2497 // Emit post-update of the reduction variables if IsLastIter != 0. 2498 emitPostUpdateForReductionClause( 2499 CGF, S, [&](CodeGenFunction &CGF) -> llvm::Value * { 2500 return CGF.Builder.CreateIsNotNull( 2501 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2502 }); 2503 2504 // Emit final copy of the lastprivate variables if IsLastIter != 0. 2505 if (HasLastprivates) 2506 CGF.EmitOMPLastprivateClauseFinal( 2507 S, /*NoFinals=*/false, 2508 CGF.Builder.CreateIsNotNull( 2509 CGF.EmitLoadOfScalar(IL, S.getLocStart()))); 2510 }; 2511 2512 bool HasCancel = false; 2513 if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S)) 2514 HasCancel = OSD->hasCancel(); 2515 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S)) 2516 HasCancel = OPSD->hasCancel(); 2517 OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel); 2518 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen, 2519 HasCancel); 2520 // Emit barrier for lastprivates only if 'sections' directive has 'nowait' 2521 // clause. Otherwise the barrier will be generated by the codegen for the 2522 // directive. 2523 if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) { 2524 // Emit implicit barrier to synchronize threads and avoid data races on 2525 // initialization of firstprivate variables. 2526 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), 2527 OMPD_unknown); 2528 } 2529 } 2530 2531 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { 2532 { 2533 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2534 EmitSections(S); 2535 } 2536 // Emit an implicit barrier at the end. 2537 if (!S.getSingleClause<OMPNowaitClause>()) { 2538 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), 2539 OMPD_sections); 2540 } 2541 } 2542 2543 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { 2544 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2545 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2546 }; 2547 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2548 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen, 2549 S.hasCancel()); 2550 } 2551 2552 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { 2553 llvm::SmallVector<const Expr *, 8> CopyprivateVars; 2554 llvm::SmallVector<const Expr *, 8> DestExprs; 2555 llvm::SmallVector<const Expr *, 8> SrcExprs; 2556 llvm::SmallVector<const Expr *, 8> AssignmentOps; 2557 // Check if there are any 'copyprivate' clauses associated with this 2558 // 'single' construct. 2559 // Build a list of copyprivate variables along with helper expressions 2560 // (<source>, <destination>, <destination>=<source> expressions) 2561 for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) { 2562 CopyprivateVars.append(C->varlists().begin(), C->varlists().end()); 2563 DestExprs.append(C->destination_exprs().begin(), 2564 C->destination_exprs().end()); 2565 SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end()); 2566 AssignmentOps.append(C->assignment_ops().begin(), 2567 C->assignment_ops().end()); 2568 } 2569 // Emit code for 'single' region along with 'copyprivate' clauses 2570 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2571 Action.Enter(CGF); 2572 OMPPrivateScope SingleScope(CGF); 2573 (void)CGF.EmitOMPFirstprivateClause(S, SingleScope); 2574 CGF.EmitOMPPrivateClause(S, SingleScope); 2575 (void)SingleScope.Privatize(); 2576 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2577 }; 2578 { 2579 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2580 CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(), 2581 CopyprivateVars, DestExprs, 2582 SrcExprs, AssignmentOps); 2583 } 2584 // Emit an implicit barrier at the end (to avoid data race on firstprivate 2585 // init or if no 'nowait' clause was specified and no 'copyprivate' clause). 2586 if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) { 2587 CGM.getOpenMPRuntime().emitBarrierCall( 2588 *this, S.getLocStart(), 2589 S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single); 2590 } 2591 } 2592 2593 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { 2594 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2595 Action.Enter(CGF); 2596 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2597 }; 2598 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2599 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart()); 2600 } 2601 2602 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { 2603 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2604 Action.Enter(CGF); 2605 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2606 }; 2607 Expr *Hint = nullptr; 2608 if (auto *HintClause = S.getSingleClause<OMPHintClause>()) 2609 Hint = HintClause->getHint(); 2610 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2611 CGM.getOpenMPRuntime().emitCriticalRegion(*this, 2612 S.getDirectiveName().getAsString(), 2613 CodeGen, S.getLocStart(), Hint); 2614 } 2615 2616 void CodeGenFunction::EmitOMPParallelForDirective( 2617 const OMPParallelForDirective &S) { 2618 // Emit directive as a combined directive that consists of two implicit 2619 // directives: 'parallel' with 'for' directive. 2620 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2621 OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel()); 2622 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 2623 emitDispatchForLoopBounds); 2624 }; 2625 emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen, 2626 emitEmptyBoundParameters); 2627 } 2628 2629 void CodeGenFunction::EmitOMPParallelForSimdDirective( 2630 const OMPParallelForSimdDirective &S) { 2631 // Emit directive as a combined directive that consists of two implicit 2632 // directives: 'parallel' with 'for' directive. 2633 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2634 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 2635 emitDispatchForLoopBounds); 2636 }; 2637 emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen, 2638 emitEmptyBoundParameters); 2639 } 2640 2641 void CodeGenFunction::EmitOMPParallelSectionsDirective( 2642 const OMPParallelSectionsDirective &S) { 2643 // Emit directive as a combined directive that consists of two implicit 2644 // directives: 'parallel' with 'sections' directive. 2645 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2646 CGF.EmitSections(S); 2647 }; 2648 emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen, 2649 emitEmptyBoundParameters); 2650 } 2651 2652 void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, 2653 const RegionCodeGenTy &BodyGen, 2654 const TaskGenTy &TaskGen, 2655 OMPTaskDataTy &Data) { 2656 // Emit outlined function for task construct. 2657 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2658 auto *I = CS->getCapturedDecl()->param_begin(); 2659 auto *PartId = std::next(I); 2660 auto *TaskT = std::next(I, 4); 2661 // Check if the task is final 2662 if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) { 2663 // If the condition constant folds and can be elided, try to avoid emitting 2664 // the condition and the dead arm of the if/else. 2665 auto *Cond = Clause->getCondition(); 2666 bool CondConstant; 2667 if (ConstantFoldsToSimpleInteger(Cond, CondConstant)) 2668 Data.Final.setInt(CondConstant); 2669 else 2670 Data.Final.setPointer(EvaluateExprAsBool(Cond)); 2671 } else { 2672 // By default the task is not final. 2673 Data.Final.setInt(/*IntVal=*/false); 2674 } 2675 // Check if the task has 'priority' clause. 2676 if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) { 2677 auto *Prio = Clause->getPriority(); 2678 Data.Priority.setInt(/*IntVal=*/true); 2679 Data.Priority.setPointer(EmitScalarConversion( 2680 EmitScalarExpr(Prio), Prio->getType(), 2681 getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1), 2682 Prio->getExprLoc())); 2683 } 2684 // The first function argument for tasks is a thread id, the second one is a 2685 // part id (0 for tied tasks, >=0 for untied task). 2686 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 2687 // Get list of private variables. 2688 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 2689 auto IRef = C->varlist_begin(); 2690 for (auto *IInit : C->private_copies()) { 2691 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2692 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2693 Data.PrivateVars.push_back(*IRef); 2694 Data.PrivateCopies.push_back(IInit); 2695 } 2696 ++IRef; 2697 } 2698 } 2699 EmittedAsPrivate.clear(); 2700 // Get list of firstprivate variables. 2701 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 2702 auto IRef = C->varlist_begin(); 2703 auto IElemInitRef = C->inits().begin(); 2704 for (auto *IInit : C->private_copies()) { 2705 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2706 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2707 Data.FirstprivateVars.push_back(*IRef); 2708 Data.FirstprivateCopies.push_back(IInit); 2709 Data.FirstprivateInits.push_back(*IElemInitRef); 2710 } 2711 ++IRef; 2712 ++IElemInitRef; 2713 } 2714 } 2715 // Get list of lastprivate variables (for taskloops). 2716 llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs; 2717 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 2718 auto IRef = C->varlist_begin(); 2719 auto ID = C->destination_exprs().begin(); 2720 for (auto *IInit : C->private_copies()) { 2721 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2722 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2723 Data.LastprivateVars.push_back(*IRef); 2724 Data.LastprivateCopies.push_back(IInit); 2725 } 2726 LastprivateDstsOrigs.insert( 2727 {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()), 2728 cast<DeclRefExpr>(*IRef)}); 2729 ++IRef; 2730 ++ID; 2731 } 2732 } 2733 SmallVector<const Expr *, 4> LHSs; 2734 SmallVector<const Expr *, 4> RHSs; 2735 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 2736 auto IPriv = C->privates().begin(); 2737 auto IRed = C->reduction_ops().begin(); 2738 auto ILHS = C->lhs_exprs().begin(); 2739 auto IRHS = C->rhs_exprs().begin(); 2740 for (const auto *Ref : C->varlists()) { 2741 Data.ReductionVars.emplace_back(Ref); 2742 Data.ReductionCopies.emplace_back(*IPriv); 2743 Data.ReductionOps.emplace_back(*IRed); 2744 LHSs.emplace_back(*ILHS); 2745 RHSs.emplace_back(*IRHS); 2746 std::advance(IPriv, 1); 2747 std::advance(IRed, 1); 2748 std::advance(ILHS, 1); 2749 std::advance(IRHS, 1); 2750 } 2751 } 2752 Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit( 2753 *this, S.getLocStart(), LHSs, RHSs, Data); 2754 // Build list of dependences. 2755 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) 2756 for (auto *IRef : C->varlists()) 2757 Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef)); 2758 auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs]( 2759 CodeGenFunction &CGF, PrePostActionTy &Action) { 2760 // Set proper addresses for generated private copies. 2761 OMPPrivateScope Scope(CGF); 2762 if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() || 2763 !Data.LastprivateVars.empty()) { 2764 enum { PrivatesParam = 2, CopyFnParam = 3 }; 2765 auto *CopyFn = CGF.Builder.CreateLoad( 2766 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3))); 2767 auto *PrivatesPtr = CGF.Builder.CreateLoad( 2768 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2))); 2769 // Map privates. 2770 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; 2771 llvm::SmallVector<llvm::Value *, 16> CallArgs; 2772 CallArgs.push_back(PrivatesPtr); 2773 for (auto *E : Data.PrivateVars) { 2774 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2775 Address PrivatePtr = CGF.CreateMemTemp( 2776 CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr"); 2777 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2778 CallArgs.push_back(PrivatePtr.getPointer()); 2779 } 2780 for (auto *E : Data.FirstprivateVars) { 2781 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2782 Address PrivatePtr = 2783 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 2784 ".firstpriv.ptr.addr"); 2785 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2786 CallArgs.push_back(PrivatePtr.getPointer()); 2787 } 2788 for (auto *E : Data.LastprivateVars) { 2789 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2790 Address PrivatePtr = 2791 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 2792 ".lastpriv.ptr.addr"); 2793 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2794 CallArgs.push_back(PrivatePtr.getPointer()); 2795 } 2796 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(), 2797 CopyFn, CallArgs); 2798 for (auto &&Pair : LastprivateDstsOrigs) { 2799 auto *OrigVD = cast<VarDecl>(Pair.second->getDecl()); 2800 DeclRefExpr DRE( 2801 const_cast<VarDecl *>(OrigVD), 2802 /*RefersToEnclosingVariableOrCapture=*/CGF.CapturedStmtInfo->lookup( 2803 OrigVD) != nullptr, 2804 Pair.second->getType(), VK_LValue, Pair.second->getExprLoc()); 2805 Scope.addPrivate(Pair.first, [&CGF, &DRE]() { 2806 return CGF.EmitLValue(&DRE).getAddress(); 2807 }); 2808 } 2809 for (auto &&Pair : PrivatePtrs) { 2810 Address Replacement(CGF.Builder.CreateLoad(Pair.second), 2811 CGF.getContext().getDeclAlign(Pair.first)); 2812 Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); 2813 } 2814 } 2815 if (Data.Reductions) { 2816 OMPLexicalScope LexScope(CGF, S, /*AsInlined=*/true); 2817 ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionCopies, 2818 Data.ReductionOps); 2819 llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad( 2820 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9))); 2821 for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) { 2822 RedCG.emitSharedLValue(CGF, Cnt); 2823 RedCG.emitAggregateType(CGF, Cnt); 2824 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( 2825 CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); 2826 Replacement = 2827 Address(CGF.EmitScalarConversion( 2828 Replacement.getPointer(), CGF.getContext().VoidPtrTy, 2829 CGF.getContext().getPointerType( 2830 Data.ReductionCopies[Cnt]->getType()), 2831 SourceLocation()), 2832 Replacement.getAlignment()); 2833 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); 2834 Scope.addPrivate(RedCG.getBaseDecl(Cnt), 2835 [Replacement]() { return Replacement; }); 2836 // FIXME: This must removed once the runtime library is fixed. 2837 // Emit required threadprivate variables for 2838 // initilizer/combiner/finalizer. 2839 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(), 2840 RedCG, Cnt); 2841 } 2842 } 2843 // Privatize all private variables except for in_reduction items. 2844 (void)Scope.Privatize(); 2845 SmallVector<const Expr *, 4> InRedVars; 2846 SmallVector<const Expr *, 4> InRedPrivs; 2847 SmallVector<const Expr *, 4> InRedOps; 2848 SmallVector<const Expr *, 4> TaskgroupDescriptors; 2849 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) { 2850 auto IPriv = C->privates().begin(); 2851 auto IRed = C->reduction_ops().begin(); 2852 auto ITD = C->taskgroup_descriptors().begin(); 2853 for (const auto *Ref : C->varlists()) { 2854 InRedVars.emplace_back(Ref); 2855 InRedPrivs.emplace_back(*IPriv); 2856 InRedOps.emplace_back(*IRed); 2857 TaskgroupDescriptors.emplace_back(*ITD); 2858 std::advance(IPriv, 1); 2859 std::advance(IRed, 1); 2860 std::advance(ITD, 1); 2861 } 2862 } 2863 // Privatize in_reduction items here, because taskgroup descriptors must be 2864 // privatized earlier. 2865 OMPPrivateScope InRedScope(CGF); 2866 if (!InRedVars.empty()) { 2867 ReductionCodeGen RedCG(InRedVars, InRedPrivs, InRedOps); 2868 for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) { 2869 RedCG.emitSharedLValue(CGF, Cnt); 2870 RedCG.emitAggregateType(CGF, Cnt); 2871 // The taskgroup descriptor variable is always implicit firstprivate and 2872 // privatized already during procoessing of the firstprivates. 2873 llvm::Value *ReductionsPtr = CGF.EmitLoadOfScalar( 2874 CGF.EmitLValue(TaskgroupDescriptors[Cnt]), SourceLocation()); 2875 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( 2876 CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); 2877 Replacement = Address( 2878 CGF.EmitScalarConversion( 2879 Replacement.getPointer(), CGF.getContext().VoidPtrTy, 2880 CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()), 2881 SourceLocation()), 2882 Replacement.getAlignment()); 2883 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); 2884 InRedScope.addPrivate(RedCG.getBaseDecl(Cnt), 2885 [Replacement]() { return Replacement; }); 2886 // FIXME: This must removed once the runtime library is fixed. 2887 // Emit required threadprivate variables for 2888 // initilizer/combiner/finalizer. 2889 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(), 2890 RedCG, Cnt); 2891 } 2892 } 2893 (void)InRedScope.Privatize(); 2894 2895 Action.Enter(CGF); 2896 BodyGen(CGF); 2897 }; 2898 auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( 2899 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied, 2900 Data.NumberOfParts); 2901 OMPLexicalScope Scope(*this, S); 2902 TaskGen(*this, OutlinedFn, Data); 2903 } 2904 2905 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { 2906 // Emit outlined function for task construct. 2907 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2908 auto CapturedStruct = GenerateCapturedStmtArgument(*CS); 2909 auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 2910 const Expr *IfCond = nullptr; 2911 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 2912 if (C->getNameModifier() == OMPD_unknown || 2913 C->getNameModifier() == OMPD_task) { 2914 IfCond = C->getCondition(); 2915 break; 2916 } 2917 } 2918 2919 OMPTaskDataTy Data; 2920 // Check if we should emit tied or untied task. 2921 Data.Tied = !S.getSingleClause<OMPUntiedClause>(); 2922 auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { 2923 CGF.EmitStmt(CS->getCapturedStmt()); 2924 }; 2925 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 2926 IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn, 2927 const OMPTaskDataTy &Data) { 2928 CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getLocStart(), S, OutlinedFn, 2929 SharedsTy, CapturedStruct, IfCond, 2930 Data); 2931 }; 2932 EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data); 2933 } 2934 2935 void CodeGenFunction::EmitOMPTaskyieldDirective( 2936 const OMPTaskyieldDirective &S) { 2937 CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart()); 2938 } 2939 2940 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { 2941 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier); 2942 } 2943 2944 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) { 2945 CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart()); 2946 } 2947 2948 void CodeGenFunction::EmitOMPTaskgroupDirective( 2949 const OMPTaskgroupDirective &S) { 2950 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2951 Action.Enter(CGF); 2952 if (const Expr *E = S.getReductionRef()) { 2953 SmallVector<const Expr *, 4> LHSs; 2954 SmallVector<const Expr *, 4> RHSs; 2955 OMPTaskDataTy Data; 2956 for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) { 2957 auto IPriv = C->privates().begin(); 2958 auto IRed = C->reduction_ops().begin(); 2959 auto ILHS = C->lhs_exprs().begin(); 2960 auto IRHS = C->rhs_exprs().begin(); 2961 for (const auto *Ref : C->varlists()) { 2962 Data.ReductionVars.emplace_back(Ref); 2963 Data.ReductionCopies.emplace_back(*IPriv); 2964 Data.ReductionOps.emplace_back(*IRed); 2965 LHSs.emplace_back(*ILHS); 2966 RHSs.emplace_back(*IRHS); 2967 std::advance(IPriv, 1); 2968 std::advance(IRed, 1); 2969 std::advance(ILHS, 1); 2970 std::advance(IRHS, 1); 2971 } 2972 } 2973 llvm::Value *ReductionDesc = 2974 CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getLocStart(), 2975 LHSs, RHSs, Data); 2976 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2977 CGF.EmitVarDecl(*VD); 2978 CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD), 2979 /*Volatile=*/false, E->getType()); 2980 } 2981 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2982 }; 2983 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2984 CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart()); 2985 } 2986 2987 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { 2988 CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> { 2989 if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) { 2990 return llvm::makeArrayRef(FlushClause->varlist_begin(), 2991 FlushClause->varlist_end()); 2992 } 2993 return llvm::None; 2994 }(), S.getLocStart()); 2995 } 2996 2997 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, 2998 const CodeGenLoopTy &CodeGenLoop, 2999 Expr *IncExpr) { 3000 // Emit the loop iteration variable. 3001 auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 3002 auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); 3003 EmitVarDecl(*IVDecl); 3004 3005 // Emit the iterations count variable. 3006 // If it is not a variable, Sema decided to calculate iterations count on each 3007 // iteration (e.g., it is foldable into a constant). 3008 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 3009 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 3010 // Emit calculation of the iterations count. 3011 EmitIgnoredExpr(S.getCalcLastIteration()); 3012 } 3013 3014 auto &RT = CGM.getOpenMPRuntime(); 3015 3016 bool HasLastprivateClause = false; 3017 // Check pre-condition. 3018 { 3019 OMPLoopScope PreInitScope(*this, S); 3020 // Skip the entire loop if we don't meet the precondition. 3021 // If the condition constant folds and can be elided, avoid emitting the 3022 // whole loop. 3023 bool CondConstant; 3024 llvm::BasicBlock *ContBlock = nullptr; 3025 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 3026 if (!CondConstant) 3027 return; 3028 } else { 3029 auto *ThenBlock = createBasicBlock("omp.precond.then"); 3030 ContBlock = createBasicBlock("omp.precond.end"); 3031 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 3032 getProfileCount(&S)); 3033 EmitBlock(ThenBlock); 3034 incrementProfileCounter(&S); 3035 } 3036 3037 // Emit 'then' code. 3038 { 3039 // Emit helper vars inits. 3040 3041 LValue LB = EmitOMPHelperVar( 3042 *this, cast<DeclRefExpr>( 3043 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3044 ? S.getCombinedLowerBoundVariable() 3045 : S.getLowerBoundVariable()))); 3046 LValue UB = EmitOMPHelperVar( 3047 *this, cast<DeclRefExpr>( 3048 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3049 ? S.getCombinedUpperBoundVariable() 3050 : S.getUpperBoundVariable()))); 3051 LValue ST = 3052 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 3053 LValue IL = 3054 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 3055 3056 OMPPrivateScope LoopScope(*this); 3057 if (EmitOMPFirstprivateClause(S, LoopScope)) { 3058 // Emit implicit barrier to synchronize threads and avoid data races on 3059 // initialization of firstprivate variables and post-update of 3060 // lastprivate variables. 3061 CGM.getOpenMPRuntime().emitBarrierCall( 3062 *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 3063 /*ForceSimpleCall=*/true); 3064 } 3065 EmitOMPPrivateClause(S, LoopScope); 3066 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 3067 EmitOMPPrivateLoopCounters(S, LoopScope); 3068 (void)LoopScope.Privatize(); 3069 3070 // Detect the distribute schedule kind and chunk. 3071 llvm::Value *Chunk = nullptr; 3072 OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown; 3073 if (auto *C = S.getSingleClause<OMPDistScheduleClause>()) { 3074 ScheduleKind = C->getDistScheduleKind(); 3075 if (const auto *Ch = C->getChunkSize()) { 3076 Chunk = EmitScalarExpr(Ch); 3077 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 3078 S.getIterationVariable()->getType(), 3079 S.getLocStart()); 3080 } 3081 } 3082 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 3083 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 3084 3085 // OpenMP [2.10.8, distribute Construct, Description] 3086 // If dist_schedule is specified, kind must be static. If specified, 3087 // iterations are divided into chunks of size chunk_size, chunks are 3088 // assigned to the teams of the league in a round-robin fashion in the 3089 // order of the team number. When no chunk_size is specified, the 3090 // iteration space is divided into chunks that are approximately equal 3091 // in size, and at most one chunk is distributed to each team of the 3092 // league. The size of the chunks is unspecified in this case. 3093 if (RT.isStaticNonchunked(ScheduleKind, 3094 /* Chunked */ Chunk != nullptr)) { 3095 CGOpenMPRuntime::StaticRTInput StaticInit( 3096 IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(), 3097 LB.getAddress(), UB.getAddress(), ST.getAddress()); 3098 RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, 3099 StaticInit); 3100 auto LoopExit = 3101 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 3102 // UB = min(UB, GlobalUB); 3103 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3104 ? S.getCombinedEnsureUpperBound() 3105 : S.getEnsureUpperBound()); 3106 // IV = LB; 3107 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3108 ? S.getCombinedInit() 3109 : S.getInit()); 3110 3111 Expr *Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3112 ? S.getCombinedCond() 3113 : S.getCond(); 3114 3115 // for distribute alone, codegen 3116 // while (idx <= UB) { BODY; ++idx; } 3117 // when combined with 'for' (e.g. as in 'distribute parallel for') 3118 // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; } 3119 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), Cond, IncExpr, 3120 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { 3121 CodeGenLoop(CGF, S, LoopExit); 3122 }, 3123 [](CodeGenFunction &) {}); 3124 EmitBlock(LoopExit.getBlock()); 3125 // Tell the runtime we are done. 3126 RT.emitForStaticFinish(*this, S.getLocStart(), S.getDirectiveKind()); 3127 } else { 3128 // Emit the outer loop, which requests its work chunk [LB..UB] from 3129 // runtime and runs the inner loop to process it. 3130 const OMPLoopArguments LoopArguments = { 3131 LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(), 3132 Chunk}; 3133 EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments, 3134 CodeGenLoop); 3135 } 3136 3137 // Emit final copy of the lastprivate variables if IsLastIter != 0. 3138 if (HasLastprivateClause) 3139 EmitOMPLastprivateClauseFinal( 3140 S, /*NoFinals=*/false, 3141 Builder.CreateIsNotNull( 3142 EmitLoadOfScalar(IL, S.getLocStart()))); 3143 } 3144 3145 // We're now done with the loop, so jump to the continuation block. 3146 if (ContBlock) { 3147 EmitBranch(ContBlock); 3148 EmitBlock(ContBlock, true); 3149 } 3150 } 3151 } 3152 3153 void CodeGenFunction::EmitOMPDistributeDirective( 3154 const OMPDistributeDirective &S) { 3155 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3156 3157 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 3158 }; 3159 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 3160 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen, 3161 false); 3162 } 3163 3164 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, 3165 const CapturedStmt *S) { 3166 CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); 3167 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; 3168 CGF.CapturedStmtInfo = &CapStmtInfo; 3169 auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S); 3170 Fn->addFnAttr(llvm::Attribute::NoInline); 3171 return Fn; 3172 } 3173 3174 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { 3175 if (!S.getAssociatedStmt()) { 3176 for (const auto *DC : S.getClausesOfKind<OMPDependClause>()) 3177 CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC); 3178 return; 3179 } 3180 auto *C = S.getSingleClause<OMPSIMDClause>(); 3181 auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF, 3182 PrePostActionTy &Action) { 3183 if (C) { 3184 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 3185 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 3186 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 3187 auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS); 3188 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(), 3189 OutlinedFn, CapturedVars); 3190 } else { 3191 Action.Enter(CGF); 3192 CGF.EmitStmt( 3193 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3194 } 3195 }; 3196 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 3197 CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C); 3198 } 3199 3200 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, 3201 QualType SrcType, QualType DestType, 3202 SourceLocation Loc) { 3203 assert(CGF.hasScalarEvaluationKind(DestType) && 3204 "DestType must have scalar evaluation kind."); 3205 assert(!Val.isAggregate() && "Must be a scalar or complex."); 3206 return Val.isScalar() 3207 ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType, 3208 Loc) 3209 : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType, 3210 DestType, Loc); 3211 } 3212 3213 static CodeGenFunction::ComplexPairTy 3214 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, 3215 QualType DestType, SourceLocation Loc) { 3216 assert(CGF.getEvaluationKind(DestType) == TEK_Complex && 3217 "DestType must have complex evaluation kind."); 3218 CodeGenFunction::ComplexPairTy ComplexVal; 3219 if (Val.isScalar()) { 3220 // Convert the input element to the element type of the complex. 3221 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 3222 auto ScalarVal = CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, 3223 DestElementType, Loc); 3224 ComplexVal = CodeGenFunction::ComplexPairTy( 3225 ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType())); 3226 } else { 3227 assert(Val.isComplex() && "Must be a scalar or complex."); 3228 auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType(); 3229 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 3230 ComplexVal.first = CGF.EmitScalarConversion( 3231 Val.getComplexVal().first, SrcElementType, DestElementType, Loc); 3232 ComplexVal.second = CGF.EmitScalarConversion( 3233 Val.getComplexVal().second, SrcElementType, DestElementType, Loc); 3234 } 3235 return ComplexVal; 3236 } 3237 3238 static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst, 3239 LValue LVal, RValue RVal) { 3240 if (LVal.isGlobalReg()) { 3241 CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal); 3242 } else { 3243 CGF.EmitAtomicStore(RVal, LVal, 3244 IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3245 : llvm::AtomicOrdering::Monotonic, 3246 LVal.isVolatile(), /*IsInit=*/false); 3247 } 3248 } 3249 3250 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal, 3251 QualType RValTy, SourceLocation Loc) { 3252 switch (getEvaluationKind(LVal.getType())) { 3253 case TEK_Scalar: 3254 EmitStoreThroughLValue(RValue::get(convertToScalarValue( 3255 *this, RVal, RValTy, LVal.getType(), Loc)), 3256 LVal); 3257 break; 3258 case TEK_Complex: 3259 EmitStoreOfComplex( 3260 convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal, 3261 /*isInit=*/false); 3262 break; 3263 case TEK_Aggregate: 3264 llvm_unreachable("Must be a scalar or complex."); 3265 } 3266 } 3267 3268 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst, 3269 const Expr *X, const Expr *V, 3270 SourceLocation Loc) { 3271 // v = x; 3272 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue"); 3273 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue"); 3274 LValue XLValue = CGF.EmitLValue(X); 3275 LValue VLValue = CGF.EmitLValue(V); 3276 RValue Res = XLValue.isGlobalReg() 3277 ? CGF.EmitLoadOfLValue(XLValue, Loc) 3278 : CGF.EmitAtomicLoad( 3279 XLValue, Loc, 3280 IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3281 : llvm::AtomicOrdering::Monotonic, 3282 XLValue.isVolatile()); 3283 // OpenMP, 2.12.6, atomic Construct 3284 // Any atomic construct with a seq_cst clause forces the atomically 3285 // performed operation to include an implicit flush operation without a 3286 // list. 3287 if (IsSeqCst) 3288 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3289 CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc); 3290 } 3291 3292 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst, 3293 const Expr *X, const Expr *E, 3294 SourceLocation Loc) { 3295 // x = expr; 3296 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue"); 3297 emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E)); 3298 // OpenMP, 2.12.6, atomic Construct 3299 // Any atomic construct with a seq_cst clause forces the atomically 3300 // performed operation to include an implicit flush operation without a 3301 // list. 3302 if (IsSeqCst) 3303 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3304 } 3305 3306 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, 3307 RValue Update, 3308 BinaryOperatorKind BO, 3309 llvm::AtomicOrdering AO, 3310 bool IsXLHSInRHSPart) { 3311 auto &Context = CGF.CGM.getContext(); 3312 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x' 3313 // expression is simple and atomic is allowed for the given type for the 3314 // target platform. 3315 if (BO == BO_Comma || !Update.isScalar() || 3316 !Update.getScalarVal()->getType()->isIntegerTy() || 3317 !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) && 3318 (Update.getScalarVal()->getType() != 3319 X.getAddress().getElementType())) || 3320 !X.getAddress().getElementType()->isIntegerTy() || 3321 !Context.getTargetInfo().hasBuiltinAtomic( 3322 Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment()))) 3323 return std::make_pair(false, RValue::get(nullptr)); 3324 3325 llvm::AtomicRMWInst::BinOp RMWOp; 3326 switch (BO) { 3327 case BO_Add: 3328 RMWOp = llvm::AtomicRMWInst::Add; 3329 break; 3330 case BO_Sub: 3331 if (!IsXLHSInRHSPart) 3332 return std::make_pair(false, RValue::get(nullptr)); 3333 RMWOp = llvm::AtomicRMWInst::Sub; 3334 break; 3335 case BO_And: 3336 RMWOp = llvm::AtomicRMWInst::And; 3337 break; 3338 case BO_Or: 3339 RMWOp = llvm::AtomicRMWInst::Or; 3340 break; 3341 case BO_Xor: 3342 RMWOp = llvm::AtomicRMWInst::Xor; 3343 break; 3344 case BO_LT: 3345 RMWOp = X.getType()->hasSignedIntegerRepresentation() 3346 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min 3347 : llvm::AtomicRMWInst::Max) 3348 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin 3349 : llvm::AtomicRMWInst::UMax); 3350 break; 3351 case BO_GT: 3352 RMWOp = X.getType()->hasSignedIntegerRepresentation() 3353 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max 3354 : llvm::AtomicRMWInst::Min) 3355 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax 3356 : llvm::AtomicRMWInst::UMin); 3357 break; 3358 case BO_Assign: 3359 RMWOp = llvm::AtomicRMWInst::Xchg; 3360 break; 3361 case BO_Mul: 3362 case BO_Div: 3363 case BO_Rem: 3364 case BO_Shl: 3365 case BO_Shr: 3366 case BO_LAnd: 3367 case BO_LOr: 3368 return std::make_pair(false, RValue::get(nullptr)); 3369 case BO_PtrMemD: 3370 case BO_PtrMemI: 3371 case BO_LE: 3372 case BO_GE: 3373 case BO_EQ: 3374 case BO_NE: 3375 case BO_AddAssign: 3376 case BO_SubAssign: 3377 case BO_AndAssign: 3378 case BO_OrAssign: 3379 case BO_XorAssign: 3380 case BO_MulAssign: 3381 case BO_DivAssign: 3382 case BO_RemAssign: 3383 case BO_ShlAssign: 3384 case BO_ShrAssign: 3385 case BO_Comma: 3386 llvm_unreachable("Unsupported atomic update operation"); 3387 } 3388 auto *UpdateVal = Update.getScalarVal(); 3389 if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) { 3390 UpdateVal = CGF.Builder.CreateIntCast( 3391 IC, X.getAddress().getElementType(), 3392 X.getType()->hasSignedIntegerRepresentation()); 3393 } 3394 auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO); 3395 return std::make_pair(true, RValue::get(Res)); 3396 } 3397 3398 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr( 3399 LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart, 3400 llvm::AtomicOrdering AO, SourceLocation Loc, 3401 const llvm::function_ref<RValue(RValue)> &CommonGen) { 3402 // Update expressions are allowed to have the following forms: 3403 // x binop= expr; -> xrval + expr; 3404 // x++, ++x -> xrval + 1; 3405 // x--, --x -> xrval - 1; 3406 // x = x binop expr; -> xrval binop expr 3407 // x = expr Op x; - > expr binop xrval; 3408 auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart); 3409 if (!Res.first) { 3410 if (X.isGlobalReg()) { 3411 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop 3412 // 'xrval'. 3413 EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X); 3414 } else { 3415 // Perform compare-and-swap procedure. 3416 EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified()); 3417 } 3418 } 3419 return Res; 3420 } 3421 3422 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst, 3423 const Expr *X, const Expr *E, 3424 const Expr *UE, bool IsXLHSInRHSPart, 3425 SourceLocation Loc) { 3426 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 3427 "Update expr in 'atomic update' must be a binary operator."); 3428 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 3429 // Update expressions are allowed to have the following forms: 3430 // x binop= expr; -> xrval + expr; 3431 // x++, ++x -> xrval + 1; 3432 // x--, --x -> xrval - 1; 3433 // x = x binop expr; -> xrval binop expr 3434 // x = expr Op x; - > expr binop xrval; 3435 assert(X->isLValue() && "X of 'omp atomic update' is not lvalue"); 3436 LValue XLValue = CGF.EmitLValue(X); 3437 RValue ExprRValue = CGF.EmitAnyExpr(E); 3438 auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3439 : llvm::AtomicOrdering::Monotonic; 3440 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 3441 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 3442 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 3443 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 3444 auto Gen = 3445 [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue { 3446 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3447 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 3448 return CGF.EmitAnyExpr(UE); 3449 }; 3450 (void)CGF.EmitOMPAtomicSimpleUpdateExpr( 3451 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 3452 // OpenMP, 2.12.6, atomic Construct 3453 // Any atomic construct with a seq_cst clause forces the atomically 3454 // performed operation to include an implicit flush operation without a 3455 // list. 3456 if (IsSeqCst) 3457 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3458 } 3459 3460 static RValue convertToType(CodeGenFunction &CGF, RValue Value, 3461 QualType SourceType, QualType ResType, 3462 SourceLocation Loc) { 3463 switch (CGF.getEvaluationKind(ResType)) { 3464 case TEK_Scalar: 3465 return RValue::get( 3466 convertToScalarValue(CGF, Value, SourceType, ResType, Loc)); 3467 case TEK_Complex: { 3468 auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc); 3469 return RValue::getComplex(Res.first, Res.second); 3470 } 3471 case TEK_Aggregate: 3472 break; 3473 } 3474 llvm_unreachable("Must be a scalar or complex."); 3475 } 3476 3477 static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst, 3478 bool IsPostfixUpdate, const Expr *V, 3479 const Expr *X, const Expr *E, 3480 const Expr *UE, bool IsXLHSInRHSPart, 3481 SourceLocation Loc) { 3482 assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue"); 3483 assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue"); 3484 RValue NewVVal; 3485 LValue VLValue = CGF.EmitLValue(V); 3486 LValue XLValue = CGF.EmitLValue(X); 3487 RValue ExprRValue = CGF.EmitAnyExpr(E); 3488 auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3489 : llvm::AtomicOrdering::Monotonic; 3490 QualType NewVValType; 3491 if (UE) { 3492 // 'x' is updated with some additional value. 3493 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 3494 "Update expr in 'atomic capture' must be a binary operator."); 3495 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 3496 // Update expressions are allowed to have the following forms: 3497 // x binop= expr; -> xrval + expr; 3498 // x++, ++x -> xrval + 1; 3499 // x--, --x -> xrval - 1; 3500 // x = x binop expr; -> xrval binop expr 3501 // x = expr Op x; - > expr binop xrval; 3502 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 3503 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 3504 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 3505 NewVValType = XRValExpr->getType(); 3506 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 3507 auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr, 3508 IsPostfixUpdate](RValue XRValue) -> RValue { 3509 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3510 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 3511 RValue Res = CGF.EmitAnyExpr(UE); 3512 NewVVal = IsPostfixUpdate ? XRValue : Res; 3513 return Res; 3514 }; 3515 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 3516 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 3517 if (Res.first) { 3518 // 'atomicrmw' instruction was generated. 3519 if (IsPostfixUpdate) { 3520 // Use old value from 'atomicrmw'. 3521 NewVVal = Res.second; 3522 } else { 3523 // 'atomicrmw' does not provide new value, so evaluate it using old 3524 // value of 'x'. 3525 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3526 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second); 3527 NewVVal = CGF.EmitAnyExpr(UE); 3528 } 3529 } 3530 } else { 3531 // 'x' is simply rewritten with some 'expr'. 3532 NewVValType = X->getType().getNonReferenceType(); 3533 ExprRValue = convertToType(CGF, ExprRValue, E->getType(), 3534 X->getType().getNonReferenceType(), Loc); 3535 auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) -> RValue { 3536 NewVVal = XRValue; 3537 return ExprRValue; 3538 }; 3539 // Try to perform atomicrmw xchg, otherwise simple exchange. 3540 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 3541 XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO, 3542 Loc, Gen); 3543 if (Res.first) { 3544 // 'atomicrmw' instruction was generated. 3545 NewVVal = IsPostfixUpdate ? Res.second : ExprRValue; 3546 } 3547 } 3548 // Emit post-update store to 'v' of old/new 'x' value. 3549 CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc); 3550 // OpenMP, 2.12.6, atomic Construct 3551 // Any atomic construct with a seq_cst clause forces the atomically 3552 // performed operation to include an implicit flush operation without a 3553 // list. 3554 if (IsSeqCst) 3555 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3556 } 3557 3558 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, 3559 bool IsSeqCst, bool IsPostfixUpdate, 3560 const Expr *X, const Expr *V, const Expr *E, 3561 const Expr *UE, bool IsXLHSInRHSPart, 3562 SourceLocation Loc) { 3563 switch (Kind) { 3564 case OMPC_read: 3565 EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc); 3566 break; 3567 case OMPC_write: 3568 EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc); 3569 break; 3570 case OMPC_unknown: 3571 case OMPC_update: 3572 EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc); 3573 break; 3574 case OMPC_capture: 3575 EmitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE, 3576 IsXLHSInRHSPart, Loc); 3577 break; 3578 case OMPC_if: 3579 case OMPC_final: 3580 case OMPC_num_threads: 3581 case OMPC_private: 3582 case OMPC_firstprivate: 3583 case OMPC_lastprivate: 3584 case OMPC_reduction: 3585 case OMPC_task_reduction: 3586 case OMPC_in_reduction: 3587 case OMPC_safelen: 3588 case OMPC_simdlen: 3589 case OMPC_collapse: 3590 case OMPC_default: 3591 case OMPC_seq_cst: 3592 case OMPC_shared: 3593 case OMPC_linear: 3594 case OMPC_aligned: 3595 case OMPC_copyin: 3596 case OMPC_copyprivate: 3597 case OMPC_flush: 3598 case OMPC_proc_bind: 3599 case OMPC_schedule: 3600 case OMPC_ordered: 3601 case OMPC_nowait: 3602 case OMPC_untied: 3603 case OMPC_threadprivate: 3604 case OMPC_depend: 3605 case OMPC_mergeable: 3606 case OMPC_device: 3607 case OMPC_threads: 3608 case OMPC_simd: 3609 case OMPC_map: 3610 case OMPC_num_teams: 3611 case OMPC_thread_limit: 3612 case OMPC_priority: 3613 case OMPC_grainsize: 3614 case OMPC_nogroup: 3615 case OMPC_num_tasks: 3616 case OMPC_hint: 3617 case OMPC_dist_schedule: 3618 case OMPC_defaultmap: 3619 case OMPC_uniform: 3620 case OMPC_to: 3621 case OMPC_from: 3622 case OMPC_use_device_ptr: 3623 case OMPC_is_device_ptr: 3624 llvm_unreachable("Clause is not allowed in 'omp atomic'."); 3625 } 3626 } 3627 3628 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { 3629 bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>(); 3630 OpenMPClauseKind Kind = OMPC_unknown; 3631 for (auto *C : S.clauses()) { 3632 // Find first clause (skip seq_cst clause, if it is first). 3633 if (C->getClauseKind() != OMPC_seq_cst) { 3634 Kind = C->getClauseKind(); 3635 break; 3636 } 3637 } 3638 3639 const auto *CS = 3640 S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 3641 if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) { 3642 enterFullExpression(EWC); 3643 } 3644 // Processing for statements under 'atomic capture'. 3645 if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) { 3646 for (const auto *C : Compound->body()) { 3647 if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) { 3648 enterFullExpression(EWC); 3649 } 3650 } 3651 } 3652 3653 auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF, 3654 PrePostActionTy &) { 3655 CGF.EmitStopPoint(CS); 3656 EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(), 3657 S.getV(), S.getExpr(), S.getUpdateExpr(), 3658 S.isXLHSInRHSPart(), S.getLocStart()); 3659 }; 3660 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 3661 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen); 3662 } 3663 3664 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, 3665 const OMPExecutableDirective &S, 3666 const RegionCodeGenTy &CodeGen) { 3667 assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind())); 3668 CodeGenModule &CGM = CGF.CGM; 3669 const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt()); 3670 3671 llvm::Function *Fn = nullptr; 3672 llvm::Constant *FnID = nullptr; 3673 3674 const Expr *IfCond = nullptr; 3675 // Check for the at most one if clause associated with the target region. 3676 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 3677 if (C->getNameModifier() == OMPD_unknown || 3678 C->getNameModifier() == OMPD_target) { 3679 IfCond = C->getCondition(); 3680 break; 3681 } 3682 } 3683 3684 // Check if we have any device clause associated with the directive. 3685 const Expr *Device = nullptr; 3686 if (auto *C = S.getSingleClause<OMPDeviceClause>()) { 3687 Device = C->getDevice(); 3688 } 3689 3690 // Check if we have an if clause whose conditional always evaluates to false 3691 // or if we do not have any targets specified. If so the target region is not 3692 // an offload entry point. 3693 bool IsOffloadEntry = true; 3694 if (IfCond) { 3695 bool Val; 3696 if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val) 3697 IsOffloadEntry = false; 3698 } 3699 if (CGM.getLangOpts().OMPTargetTriples.empty()) 3700 IsOffloadEntry = false; 3701 3702 assert(CGF.CurFuncDecl && "No parent declaration for target region!"); 3703 StringRef ParentName; 3704 // In case we have Ctors/Dtors we use the complete type variant to produce 3705 // the mangling of the device outlined kernel. 3706 if (auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl)) 3707 ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete)); 3708 else if (auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl)) 3709 ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete)); 3710 else 3711 ParentName = 3712 CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl))); 3713 3714 // Emit target region as a standalone region. 3715 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID, 3716 IsOffloadEntry, CodeGen); 3717 OMPLexicalScope Scope(CGF, S); 3718 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 3719 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 3720 CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device, 3721 CapturedVars); 3722 } 3723 3724 static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S, 3725 PrePostActionTy &Action) { 3726 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 3727 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 3728 CGF.EmitOMPPrivateClause(S, PrivateScope); 3729 (void)PrivateScope.Privatize(); 3730 3731 Action.Enter(CGF); 3732 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3733 } 3734 3735 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM, 3736 StringRef ParentName, 3737 const OMPTargetDirective &S) { 3738 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3739 emitTargetRegion(CGF, S, Action); 3740 }; 3741 llvm::Function *Fn; 3742 llvm::Constant *Addr; 3743 // Emit target region as a standalone region. 3744 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 3745 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 3746 assert(Fn && Addr && "Target device function emission failed."); 3747 } 3748 3749 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { 3750 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3751 emitTargetRegion(CGF, S, Action); 3752 }; 3753 emitCommonOMPTargetDirective(*this, S, CodeGen); 3754 } 3755 3756 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF, 3757 const OMPExecutableDirective &S, 3758 OpenMPDirectiveKind InnermostKind, 3759 const RegionCodeGenTy &CodeGen) { 3760 const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams); 3761 auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction( 3762 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 3763 3764 const OMPNumTeamsClause *NT = S.getSingleClause<OMPNumTeamsClause>(); 3765 const OMPThreadLimitClause *TL = S.getSingleClause<OMPThreadLimitClause>(); 3766 if (NT || TL) { 3767 Expr *NumTeams = (NT) ? NT->getNumTeams() : nullptr; 3768 Expr *ThreadLimit = (TL) ? TL->getThreadLimit() : nullptr; 3769 3770 CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit, 3771 S.getLocStart()); 3772 } 3773 3774 OMPTeamsScope Scope(CGF, S); 3775 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 3776 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 3777 CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn, 3778 CapturedVars); 3779 } 3780 3781 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) { 3782 // Emit teams region as a standalone region. 3783 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3784 OMPPrivateScope PrivateScope(CGF); 3785 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 3786 CGF.EmitOMPPrivateClause(S, PrivateScope); 3787 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 3788 (void)PrivateScope.Privatize(); 3789 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3790 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 3791 }; 3792 emitCommonOMPTeamsDirective(*this, S, OMPD_teams, CodeGen); 3793 emitPostUpdateForReductionClause( 3794 *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 3795 } 3796 3797 static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action, 3798 const OMPTargetTeamsDirective &S) { 3799 auto *CS = S.getCapturedStmt(OMPD_teams); 3800 Action.Enter(CGF); 3801 auto &&CodeGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { 3802 // TODO: Add support for clauses. 3803 CGF.EmitStmt(CS->getCapturedStmt()); 3804 }; 3805 emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen); 3806 } 3807 3808 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 3809 CodeGenModule &CGM, StringRef ParentName, 3810 const OMPTargetTeamsDirective &S) { 3811 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3812 emitTargetTeamsRegion(CGF, Action, S); 3813 }; 3814 llvm::Function *Fn; 3815 llvm::Constant *Addr; 3816 // Emit target region as a standalone region. 3817 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 3818 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 3819 assert(Fn && Addr && "Target device function emission failed."); 3820 } 3821 3822 void CodeGenFunction::EmitOMPTargetTeamsDirective( 3823 const OMPTargetTeamsDirective &S) { 3824 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3825 emitTargetTeamsRegion(CGF, Action, S); 3826 }; 3827 emitCommonOMPTargetDirective(*this, S, CodeGen); 3828 } 3829 3830 void CodeGenFunction::EmitOMPCancellationPointDirective( 3831 const OMPCancellationPointDirective &S) { 3832 CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(), 3833 S.getCancelRegion()); 3834 } 3835 3836 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { 3837 const Expr *IfCond = nullptr; 3838 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 3839 if (C->getNameModifier() == OMPD_unknown || 3840 C->getNameModifier() == OMPD_cancel) { 3841 IfCond = C->getCondition(); 3842 break; 3843 } 3844 } 3845 CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(), IfCond, 3846 S.getCancelRegion()); 3847 } 3848 3849 CodeGenFunction::JumpDest 3850 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) { 3851 if (Kind == OMPD_parallel || Kind == OMPD_task || 3852 Kind == OMPD_target_parallel) 3853 return ReturnBlock; 3854 assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections || 3855 Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for || 3856 Kind == OMPD_distribute_parallel_for || 3857 Kind == OMPD_target_parallel_for); 3858 return OMPCancelStack.getExitBlock(); 3859 } 3860 3861 void CodeGenFunction::EmitOMPUseDevicePtrClause( 3862 const OMPClause &NC, OMPPrivateScope &PrivateScope, 3863 const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) { 3864 const auto &C = cast<OMPUseDevicePtrClause>(NC); 3865 auto OrigVarIt = C.varlist_begin(); 3866 auto InitIt = C.inits().begin(); 3867 for (auto PvtVarIt : C.private_copies()) { 3868 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl()); 3869 auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl()); 3870 auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl()); 3871 3872 // In order to identify the right initializer we need to match the 3873 // declaration used by the mapping logic. In some cases we may get 3874 // OMPCapturedExprDecl that refers to the original declaration. 3875 const ValueDecl *MatchingVD = OrigVD; 3876 if (auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) { 3877 // OMPCapturedExprDecl are used to privative fields of the current 3878 // structure. 3879 auto *ME = cast<MemberExpr>(OED->getInit()); 3880 assert(isa<CXXThisExpr>(ME->getBase()) && 3881 "Base should be the current struct!"); 3882 MatchingVD = ME->getMemberDecl(); 3883 } 3884 3885 // If we don't have information about the current list item, move on to 3886 // the next one. 3887 auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD); 3888 if (InitAddrIt == CaptureDeviceAddrMap.end()) 3889 continue; 3890 3891 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 3892 // Initialize the temporary initialization variable with the address we 3893 // get from the runtime library. We have to cast the source address 3894 // because it is always a void *. References are materialized in the 3895 // privatization scope, so the initialization here disregards the fact 3896 // the original variable is a reference. 3897 QualType AddrQTy = 3898 getContext().getPointerType(OrigVD->getType().getNonReferenceType()); 3899 llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy); 3900 Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy); 3901 setAddrOfLocalVar(InitVD, InitAddr); 3902 3903 // Emit private declaration, it will be initialized by the value we 3904 // declaration we just added to the local declarations map. 3905 EmitDecl(*PvtVD); 3906 3907 // The initialization variables reached its purpose in the emission 3908 // ofthe previous declaration, so we don't need it anymore. 3909 LocalDeclMap.erase(InitVD); 3910 3911 // Return the address of the private variable. 3912 return GetAddrOfLocalVar(PvtVD); 3913 }); 3914 assert(IsRegistered && "firstprivate var already registered as private"); 3915 // Silence the warning about unused variable. 3916 (void)IsRegistered; 3917 3918 ++OrigVarIt; 3919 ++InitIt; 3920 } 3921 } 3922 3923 // Generate the instructions for '#pragma omp target data' directive. 3924 void CodeGenFunction::EmitOMPTargetDataDirective( 3925 const OMPTargetDataDirective &S) { 3926 CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true); 3927 3928 // Create a pre/post action to signal the privatization of the device pointer. 3929 // This action can be replaced by the OpenMP runtime code generation to 3930 // deactivate privatization. 3931 bool PrivatizeDevicePointers = false; 3932 class DevicePointerPrivActionTy : public PrePostActionTy { 3933 bool &PrivatizeDevicePointers; 3934 3935 public: 3936 explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers) 3937 : PrePostActionTy(), PrivatizeDevicePointers(PrivatizeDevicePointers) {} 3938 void Enter(CodeGenFunction &CGF) override { 3939 PrivatizeDevicePointers = true; 3940 } 3941 }; 3942 DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers); 3943 3944 auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers]( 3945 CodeGenFunction &CGF, PrePostActionTy &Action) { 3946 auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3947 CGF.EmitStmt( 3948 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3949 }; 3950 3951 // Codegen that selects wheather to generate the privatization code or not. 3952 auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers, 3953 &InnermostCodeGen](CodeGenFunction &CGF, 3954 PrePostActionTy &Action) { 3955 RegionCodeGenTy RCG(InnermostCodeGen); 3956 PrivatizeDevicePointers = false; 3957 3958 // Call the pre-action to change the status of PrivatizeDevicePointers if 3959 // needed. 3960 Action.Enter(CGF); 3961 3962 if (PrivatizeDevicePointers) { 3963 OMPPrivateScope PrivateScope(CGF); 3964 // Emit all instances of the use_device_ptr clause. 3965 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>()) 3966 CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope, 3967 Info.CaptureDeviceAddrMap); 3968 (void)PrivateScope.Privatize(); 3969 RCG(CGF); 3970 } else 3971 RCG(CGF); 3972 }; 3973 3974 // Forward the provided action to the privatization codegen. 3975 RegionCodeGenTy PrivRCG(PrivCodeGen); 3976 PrivRCG.setAction(Action); 3977 3978 // Notwithstanding the body of the region is emitted as inlined directive, 3979 // we don't use an inline scope as changes in the references inside the 3980 // region are expected to be visible outside, so we do not privative them. 3981 OMPLexicalScope Scope(CGF, S); 3982 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data, 3983 PrivRCG); 3984 }; 3985 3986 RegionCodeGenTy RCG(CodeGen); 3987 3988 // If we don't have target devices, don't bother emitting the data mapping 3989 // code. 3990 if (CGM.getLangOpts().OMPTargetTriples.empty()) { 3991 RCG(*this); 3992 return; 3993 } 3994 3995 // Check if we have any if clause associated with the directive. 3996 const Expr *IfCond = nullptr; 3997 if (auto *C = S.getSingleClause<OMPIfClause>()) 3998 IfCond = C->getCondition(); 3999 4000 // Check if we have any device clause associated with the directive. 4001 const Expr *Device = nullptr; 4002 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 4003 Device = C->getDevice(); 4004 4005 // Set the action to signal privatization of device pointers. 4006 RCG.setAction(PrivAction); 4007 4008 // Emit region code. 4009 CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG, 4010 Info); 4011 } 4012 4013 void CodeGenFunction::EmitOMPTargetEnterDataDirective( 4014 const OMPTargetEnterDataDirective &S) { 4015 // If we don't have target devices, don't bother emitting the data mapping 4016 // code. 4017 if (CGM.getLangOpts().OMPTargetTriples.empty()) 4018 return; 4019 4020 // Check if we have any if clause associated with the directive. 4021 const Expr *IfCond = nullptr; 4022 if (auto *C = S.getSingleClause<OMPIfClause>()) 4023 IfCond = C->getCondition(); 4024 4025 // Check if we have any device clause associated with the directive. 4026 const Expr *Device = nullptr; 4027 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 4028 Device = C->getDevice(); 4029 4030 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 4031 } 4032 4033 void CodeGenFunction::EmitOMPTargetExitDataDirective( 4034 const OMPTargetExitDataDirective &S) { 4035 // If we don't have target devices, don't bother emitting the data mapping 4036 // code. 4037 if (CGM.getLangOpts().OMPTargetTriples.empty()) 4038 return; 4039 4040 // Check if we have any if clause associated with the directive. 4041 const Expr *IfCond = nullptr; 4042 if (auto *C = S.getSingleClause<OMPIfClause>()) 4043 IfCond = C->getCondition(); 4044 4045 // Check if we have any device clause associated with the directive. 4046 const Expr *Device = nullptr; 4047 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 4048 Device = C->getDevice(); 4049 4050 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 4051 } 4052 4053 static void emitTargetParallelRegion(CodeGenFunction &CGF, 4054 const OMPTargetParallelDirective &S, 4055 PrePostActionTy &Action) { 4056 // Get the captured statement associated with the 'parallel' region. 4057 auto *CS = S.getCapturedStmt(OMPD_parallel); 4058 Action.Enter(CGF); 4059 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &) { 4060 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 4061 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 4062 CGF.EmitOMPPrivateClause(S, PrivateScope); 4063 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4064 (void)PrivateScope.Privatize(); 4065 // TODO: Add support for clauses. 4066 CGF.EmitStmt(CS->getCapturedStmt()); 4067 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 4068 }; 4069 emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen, 4070 emitEmptyBoundParameters); 4071 emitPostUpdateForReductionClause( 4072 CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 4073 } 4074 4075 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 4076 CodeGenModule &CGM, StringRef ParentName, 4077 const OMPTargetParallelDirective &S) { 4078 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4079 emitTargetParallelRegion(CGF, S, Action); 4080 }; 4081 llvm::Function *Fn; 4082 llvm::Constant *Addr; 4083 // Emit target region as a standalone region. 4084 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 4085 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 4086 assert(Fn && Addr && "Target device function emission failed."); 4087 } 4088 4089 void CodeGenFunction::EmitOMPTargetParallelDirective( 4090 const OMPTargetParallelDirective &S) { 4091 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4092 emitTargetParallelRegion(CGF, S, Action); 4093 }; 4094 emitCommonOMPTargetDirective(*this, S, CodeGen); 4095 } 4096 4097 void CodeGenFunction::EmitOMPTargetParallelForDirective( 4098 const OMPTargetParallelForDirective &S) { 4099 // TODO: codegen for target parallel for. 4100 } 4101 4102 /// Emit a helper variable and return corresponding lvalue. 4103 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper, 4104 const ImplicitParamDecl *PVD, 4105 CodeGenFunction::OMPPrivateScope &Privates) { 4106 auto *VDecl = cast<VarDecl>(Helper->getDecl()); 4107 Privates.addPrivate( 4108 VDecl, [&CGF, PVD]() -> Address { return CGF.GetAddrOfLocalVar(PVD); }); 4109 } 4110 4111 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { 4112 assert(isOpenMPTaskLoopDirective(S.getDirectiveKind())); 4113 // Emit outlined function for task construct. 4114 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 4115 auto CapturedStruct = GenerateCapturedStmtArgument(*CS); 4116 auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 4117 const Expr *IfCond = nullptr; 4118 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 4119 if (C->getNameModifier() == OMPD_unknown || 4120 C->getNameModifier() == OMPD_taskloop) { 4121 IfCond = C->getCondition(); 4122 break; 4123 } 4124 } 4125 4126 OMPTaskDataTy Data; 4127 // Check if taskloop must be emitted without taskgroup. 4128 Data.Nogroup = S.getSingleClause<OMPNogroupClause>(); 4129 // TODO: Check if we should emit tied or untied task. 4130 Data.Tied = true; 4131 // Set scheduling for taskloop 4132 if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) { 4133 // grainsize clause 4134 Data.Schedule.setInt(/*IntVal=*/false); 4135 Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize())); 4136 } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) { 4137 // num_tasks clause 4138 Data.Schedule.setInt(/*IntVal=*/true); 4139 Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks())); 4140 } 4141 4142 auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) { 4143 // if (PreCond) { 4144 // for (IV in 0..LastIteration) BODY; 4145 // <Final counter/linear vars updates>; 4146 // } 4147 // 4148 4149 // Emit: if (PreCond) - begin. 4150 // If the condition constant folds and can be elided, avoid emitting the 4151 // whole loop. 4152 bool CondConstant; 4153 llvm::BasicBlock *ContBlock = nullptr; 4154 OMPLoopScope PreInitScope(CGF, S); 4155 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 4156 if (!CondConstant) 4157 return; 4158 } else { 4159 auto *ThenBlock = CGF.createBasicBlock("taskloop.if.then"); 4160 ContBlock = CGF.createBasicBlock("taskloop.if.end"); 4161 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 4162 CGF.getProfileCount(&S)); 4163 CGF.EmitBlock(ThenBlock); 4164 CGF.incrementProfileCounter(&S); 4165 } 4166 4167 if (isOpenMPSimdDirective(S.getDirectiveKind())) 4168 CGF.EmitOMPSimdInit(S); 4169 4170 OMPPrivateScope LoopScope(CGF); 4171 // Emit helper vars inits. 4172 enum { LowerBound = 5, UpperBound, Stride, LastIter }; 4173 auto *I = CS->getCapturedDecl()->param_begin(); 4174 auto *LBP = std::next(I, LowerBound); 4175 auto *UBP = std::next(I, UpperBound); 4176 auto *STP = std::next(I, Stride); 4177 auto *LIP = std::next(I, LastIter); 4178 mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP, 4179 LoopScope); 4180 mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP, 4181 LoopScope); 4182 mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope); 4183 mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP, 4184 LoopScope); 4185 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 4186 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 4187 (void)LoopScope.Privatize(); 4188 // Emit the loop iteration variable. 4189 const Expr *IVExpr = S.getIterationVariable(); 4190 const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 4191 CGF.EmitVarDecl(*IVDecl); 4192 CGF.EmitIgnoredExpr(S.getInit()); 4193 4194 // Emit the iterations count variable. 4195 // If it is not a variable, Sema decided to calculate iterations count on 4196 // each iteration (e.g., it is foldable into a constant). 4197 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 4198 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 4199 // Emit calculation of the iterations count. 4200 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 4201 } 4202 4203 CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 4204 S.getInc(), 4205 [&S](CodeGenFunction &CGF) { 4206 CGF.EmitOMPLoopBody(S, JumpDest()); 4207 CGF.EmitStopPoint(&S); 4208 }, 4209 [](CodeGenFunction &) {}); 4210 // Emit: if (PreCond) - end. 4211 if (ContBlock) { 4212 CGF.EmitBranch(ContBlock); 4213 CGF.EmitBlock(ContBlock, true); 4214 } 4215 // Emit final copy of the lastprivate variables if IsLastIter != 0. 4216 if (HasLastprivateClause) { 4217 CGF.EmitOMPLastprivateClauseFinal( 4218 S, isOpenMPSimdDirective(S.getDirectiveKind()), 4219 CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar( 4220 CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false, 4221 (*LIP)->getType(), S.getLocStart()))); 4222 } 4223 }; 4224 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 4225 IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn, 4226 const OMPTaskDataTy &Data) { 4227 auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &) { 4228 OMPLoopScope PreInitScope(CGF, S); 4229 CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getLocStart(), S, 4230 OutlinedFn, SharedsTy, 4231 CapturedStruct, IfCond, Data); 4232 }; 4233 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop, 4234 CodeGen); 4235 }; 4236 if (Data.Nogroup) 4237 EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data); 4238 else { 4239 CGM.getOpenMPRuntime().emitTaskgroupRegion( 4240 *this, 4241 [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF, 4242 PrePostActionTy &Action) { 4243 Action.Enter(CGF); 4244 CGF.EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data); 4245 }, 4246 S.getLocStart()); 4247 } 4248 } 4249 4250 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) { 4251 EmitOMPTaskLoopBasedDirective(S); 4252 } 4253 4254 void CodeGenFunction::EmitOMPTaskLoopSimdDirective( 4255 const OMPTaskLoopSimdDirective &S) { 4256 EmitOMPTaskLoopBasedDirective(S); 4257 } 4258 4259 // Generate the instructions for '#pragma omp target update' directive. 4260 void CodeGenFunction::EmitOMPTargetUpdateDirective( 4261 const OMPTargetUpdateDirective &S) { 4262 // If we don't have target devices, don't bother emitting the data mapping 4263 // code. 4264 if (CGM.getLangOpts().OMPTargetTriples.empty()) 4265 return; 4266 4267 // Check if we have any if clause associated with the directive. 4268 const Expr *IfCond = nullptr; 4269 if (auto *C = S.getSingleClause<OMPIfClause>()) 4270 IfCond = C->getCondition(); 4271 4272 // Check if we have any device clause associated with the directive. 4273 const Expr *Device = nullptr; 4274 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 4275 Device = C->getDevice(); 4276 4277 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 4278 } 4279