1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This contains code to emit OpenMP nodes as LLVM code. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGCleanup.h" 15 #include "CGOpenMPRuntime.h" 16 #include "CodeGenFunction.h" 17 #include "CodeGenModule.h" 18 #include "TargetInfo.h" 19 #include "clang/AST/Stmt.h" 20 #include "clang/AST/StmtOpenMP.h" 21 #include "clang/AST/DeclOpenMP.h" 22 #include "llvm/IR/CallSite.h" 23 using namespace clang; 24 using namespace CodeGen; 25 26 namespace { 27 /// Lexical scope for OpenMP executable constructs, that handles correct codegen 28 /// for captured expressions. 29 class OMPLexicalScope : public CodeGenFunction::LexicalScope { 30 void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 31 for (const auto *C : S.clauses()) { 32 if (auto *CPI = OMPClauseWithPreInit::get(C)) { 33 if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { 34 for (const auto *I : PreInit->decls()) { 35 if (!I->hasAttr<OMPCaptureNoInitAttr>()) 36 CGF.EmitVarDecl(cast<VarDecl>(*I)); 37 else { 38 CodeGenFunction::AutoVarEmission Emission = 39 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 40 CGF.EmitAutoVarCleanups(Emission); 41 } 42 } 43 } 44 } 45 } 46 } 47 CodeGenFunction::OMPPrivateScope InlinedShareds; 48 49 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { 50 return CGF.LambdaCaptureFields.lookup(VD) || 51 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || 52 (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl)); 53 } 54 55 public: 56 OMPLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S, 57 bool AsInlined = false, bool EmitPreInitStmt = true) 58 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), 59 InlinedShareds(CGF) { 60 if (EmitPreInitStmt) 61 emitPreInitStmt(CGF, S); 62 if (AsInlined) { 63 if (S.hasAssociatedStmt()) { 64 auto *CS = cast<CapturedStmt>(S.getAssociatedStmt()); 65 for (auto &C : CS->captures()) { 66 if (C.capturesVariable() || C.capturesVariableByCopy()) { 67 auto *VD = C.getCapturedVar(); 68 assert(VD == VD->getCanonicalDecl() && 69 "Canonical decl must be captured."); 70 DeclRefExpr DRE(const_cast<VarDecl *>(VD), 71 isCapturedVar(CGF, VD) || 72 (CGF.CapturedStmtInfo && 73 InlinedShareds.isGlobalVarCaptured(VD)), 74 VD->getType().getNonReferenceType(), VK_LValue, 75 SourceLocation()); 76 InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address { 77 return CGF.EmitLValue(&DRE).getAddress(); 78 }); 79 } 80 } 81 (void)InlinedShareds.Privatize(); 82 } 83 } 84 } 85 }; 86 87 /// Lexical scope for OpenMP parallel construct, that handles correct codegen 88 /// for captured expressions. 89 class OMPParallelScope final : public OMPLexicalScope { 90 bool EmitPreInitStmt(const OMPExecutableDirective &S) { 91 OpenMPDirectiveKind Kind = S.getDirectiveKind(); 92 return !(isOpenMPTargetExecutionDirective(Kind) || 93 isOpenMPLoopBoundSharingDirective(Kind)) && 94 isOpenMPParallelDirective(Kind); 95 } 96 97 public: 98 OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 99 : OMPLexicalScope(CGF, S, 100 /*AsInlined=*/false, 101 /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {} 102 }; 103 104 /// Lexical scope for OpenMP teams construct, that handles correct codegen 105 /// for captured expressions. 106 class OMPTeamsScope final : public OMPLexicalScope { 107 bool EmitPreInitStmt(const OMPExecutableDirective &S) { 108 OpenMPDirectiveKind Kind = S.getDirectiveKind(); 109 return !isOpenMPTargetExecutionDirective(Kind) && 110 isOpenMPTeamsDirective(Kind); 111 } 112 113 public: 114 OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 115 : OMPLexicalScope(CGF, S, 116 /*AsInlined=*/false, 117 /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {} 118 }; 119 120 /// Private scope for OpenMP loop-based directives, that supports capturing 121 /// of used expression from loop statement. 122 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { 123 void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) { 124 if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) { 125 if (auto *PreInits = cast_or_null<DeclStmt>(LD->getPreInits())) { 126 for (const auto *I : PreInits->decls()) 127 CGF.EmitVarDecl(cast<VarDecl>(*I)); 128 } 129 } 130 } 131 132 public: 133 OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S) 134 : CodeGenFunction::RunCleanupsScope(CGF) { 135 emitPreInitStmt(CGF, S); 136 } 137 }; 138 139 } // namespace 140 141 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, 142 const OMPExecutableDirective &S, 143 const RegionCodeGenTy &CodeGen); 144 145 LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) { 146 if (auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) { 147 if (auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) { 148 OrigVD = OrigVD->getCanonicalDecl(); 149 bool IsCaptured = 150 LambdaCaptureFields.lookup(OrigVD) || 151 (CapturedStmtInfo && CapturedStmtInfo->lookup(OrigVD)) || 152 (CurCodeDecl && isa<BlockDecl>(CurCodeDecl)); 153 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), IsCaptured, 154 OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc()); 155 return EmitLValue(&DRE); 156 } 157 } 158 return EmitLValue(E); 159 } 160 161 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) { 162 auto &C = getContext(); 163 llvm::Value *Size = nullptr; 164 auto SizeInChars = C.getTypeSizeInChars(Ty); 165 if (SizeInChars.isZero()) { 166 // getTypeSizeInChars() returns 0 for a VLA. 167 while (auto *VAT = C.getAsVariableArrayType(Ty)) { 168 llvm::Value *ArraySize; 169 std::tie(ArraySize, Ty) = getVLASize(VAT); 170 Size = Size ? Builder.CreateNUWMul(Size, ArraySize) : ArraySize; 171 } 172 SizeInChars = C.getTypeSizeInChars(Ty); 173 if (SizeInChars.isZero()) 174 return llvm::ConstantInt::get(SizeTy, /*V=*/0); 175 Size = Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars)); 176 } else 177 Size = CGM.getSize(SizeInChars); 178 return Size; 179 } 180 181 void CodeGenFunction::GenerateOpenMPCapturedVars( 182 const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) { 183 const RecordDecl *RD = S.getCapturedRecordDecl(); 184 auto CurField = RD->field_begin(); 185 auto CurCap = S.captures().begin(); 186 for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(), 187 E = S.capture_init_end(); 188 I != E; ++I, ++CurField, ++CurCap) { 189 if (CurField->hasCapturedVLAType()) { 190 auto VAT = CurField->getCapturedVLAType(); 191 auto *Val = VLASizeMap[VAT->getSizeExpr()]; 192 CapturedVars.push_back(Val); 193 } else if (CurCap->capturesThis()) 194 CapturedVars.push_back(CXXThisValue); 195 else if (CurCap->capturesVariableByCopy()) { 196 llvm::Value *CV = 197 EmitLoadOfLValue(EmitLValue(*I), SourceLocation()).getScalarVal(); 198 199 // If the field is not a pointer, we need to save the actual value 200 // and load it as a void pointer. 201 if (!CurField->getType()->isAnyPointerType()) { 202 auto &Ctx = getContext(); 203 auto DstAddr = CreateMemTemp( 204 Ctx.getUIntPtrType(), 205 Twine(CurCap->getCapturedVar()->getName()) + ".casted"); 206 LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType()); 207 208 auto *SrcAddrVal = EmitScalarConversion( 209 DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()), 210 Ctx.getPointerType(CurField->getType()), SourceLocation()); 211 LValue SrcLV = 212 MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType()); 213 214 // Store the value using the source type pointer. 215 EmitStoreThroughLValue(RValue::get(CV), SrcLV); 216 217 // Load the value using the destination type pointer. 218 CV = EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal(); 219 } 220 CapturedVars.push_back(CV); 221 } else { 222 assert(CurCap->capturesVariable() && "Expected capture by reference."); 223 CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer()); 224 } 225 } 226 } 227 228 static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType, 229 StringRef Name, LValue AddrLV, 230 bool isReferenceType = false) { 231 ASTContext &Ctx = CGF.getContext(); 232 233 auto *CastedPtr = CGF.EmitScalarConversion( 234 AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(), 235 Ctx.getPointerType(DstType), SourceLocation()); 236 auto TmpAddr = 237 CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType)) 238 .getAddress(); 239 240 // If we are dealing with references we need to return the address of the 241 // reference instead of the reference of the value. 242 if (isReferenceType) { 243 QualType RefType = Ctx.getLValueReferenceType(DstType); 244 auto *RefVal = TmpAddr.getPointer(); 245 TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name) + ".ref"); 246 auto TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType); 247 CGF.EmitStoreThroughLValue(RValue::get(RefVal), TmpLVal, /*isInit*/ true); 248 } 249 250 return TmpAddr; 251 } 252 253 static QualType getCanonicalParamType(ASTContext &C, QualType T) { 254 if (T->isLValueReferenceType()) { 255 return C.getLValueReferenceType( 256 getCanonicalParamType(C, T.getNonReferenceType()), 257 /*SpelledAsLValue=*/false); 258 } 259 if (T->isPointerType()) 260 return C.getPointerType(getCanonicalParamType(C, T->getPointeeType())); 261 if (auto *A = T->getAsArrayTypeUnsafe()) { 262 if (auto *VLA = dyn_cast<VariableArrayType>(A)) 263 return getCanonicalParamType(C, VLA->getElementType()); 264 else if (!A->isVariablyModifiedType()) 265 return C.getCanonicalType(T); 266 } 267 return C.getCanonicalParamType(T); 268 } 269 270 namespace { 271 /// Contains required data for proper outlined function codegen. 272 struct FunctionOptions { 273 /// Captured statement for which the function is generated. 274 const CapturedStmt *S = nullptr; 275 /// true if cast to/from UIntPtr is required for variables captured by 276 /// value. 277 const bool UIntPtrCastRequired = true; 278 /// true if only casted arguments must be registered as local args or VLA 279 /// sizes. 280 const bool RegisterCastedArgsOnly = false; 281 /// Name of the generated function. 282 const StringRef FunctionName; 283 explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired, 284 bool RegisterCastedArgsOnly, 285 StringRef FunctionName) 286 : S(S), UIntPtrCastRequired(UIntPtrCastRequired), 287 RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly), 288 FunctionName(FunctionName) {} 289 }; 290 } 291 292 static llvm::Function *emitOutlinedFunctionPrologue( 293 CodeGenFunction &CGF, FunctionArgList &Args, 294 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> 295 &LocalAddrs, 296 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> 297 &VLASizes, 298 llvm::Value *&CXXThisValue, const FunctionOptions &FO) { 299 const CapturedDecl *CD = FO.S->getCapturedDecl(); 300 const RecordDecl *RD = FO.S->getCapturedRecordDecl(); 301 assert(CD->hasBody() && "missing CapturedDecl body"); 302 303 CXXThisValue = nullptr; 304 // Build the argument list. 305 CodeGenModule &CGM = CGF.CGM; 306 ASTContext &Ctx = CGM.getContext(); 307 FunctionArgList TargetArgs; 308 Args.append(CD->param_begin(), 309 std::next(CD->param_begin(), CD->getContextParamPosition())); 310 TargetArgs.append( 311 CD->param_begin(), 312 std::next(CD->param_begin(), CD->getContextParamPosition())); 313 auto I = FO.S->captures().begin(); 314 for (auto *FD : RD->fields()) { 315 QualType ArgType = FD->getType(); 316 IdentifierInfo *II = nullptr; 317 VarDecl *CapVar = nullptr; 318 319 // If this is a capture by copy and the type is not a pointer, the outlined 320 // function argument type should be uintptr and the value properly casted to 321 // uintptr. This is necessary given that the runtime library is only able to 322 // deal with pointers. We can pass in the same way the VLA type sizes to the 323 // outlined function. 324 if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) || 325 I->capturesVariableArrayType()) { 326 if (FO.UIntPtrCastRequired) 327 ArgType = Ctx.getUIntPtrType(); 328 } 329 330 if (I->capturesVariable() || I->capturesVariableByCopy()) { 331 CapVar = I->getCapturedVar(); 332 II = CapVar->getIdentifier(); 333 } else if (I->capturesThis()) 334 II = &Ctx.Idents.get("this"); 335 else { 336 assert(I->capturesVariableArrayType()); 337 II = &Ctx.Idents.get("vla"); 338 } 339 if (ArgType->isVariablyModifiedType()) 340 ArgType = getCanonicalParamType(Ctx, ArgType); 341 auto *Arg = 342 ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(), II, 343 ArgType, ImplicitParamDecl::Other); 344 Args.emplace_back(Arg); 345 // Do not cast arguments if we emit function with non-original types. 346 TargetArgs.emplace_back( 347 FO.UIntPtrCastRequired 348 ? Arg 349 : CGM.getOpenMPRuntime().translateParameter(FD, Arg)); 350 ++I; 351 } 352 Args.append( 353 std::next(CD->param_begin(), CD->getContextParamPosition() + 1), 354 CD->param_end()); 355 TargetArgs.append( 356 std::next(CD->param_begin(), CD->getContextParamPosition() + 1), 357 CD->param_end()); 358 359 // Create the function declaration. 360 const CGFunctionInfo &FuncInfo = 361 CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs); 362 llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); 363 364 llvm::Function *F = 365 llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage, 366 FO.FunctionName, &CGM.getModule()); 367 CGM.SetInternalFunctionAttributes(CD, F, FuncInfo); 368 if (CD->isNothrow()) 369 F->setDoesNotThrow(); 370 371 // Generate the function. 372 CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs, 373 FO.S->getLocStart(), CD->getBody()->getLocStart()); 374 unsigned Cnt = CD->getContextParamPosition(); 375 I = FO.S->captures().begin(); 376 for (auto *FD : RD->fields()) { 377 // Do not map arguments if we emit function with non-original types. 378 Address LocalAddr(Address::invalid()); 379 if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) { 380 LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt], 381 TargetArgs[Cnt]); 382 } else { 383 LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]); 384 } 385 // If we are capturing a pointer by copy we don't need to do anything, just 386 // use the value that we get from the arguments. 387 if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) { 388 const VarDecl *CurVD = I->getCapturedVar(); 389 // If the variable is a reference we need to materialize it here. 390 if (CurVD->getType()->isReferenceType()) { 391 Address RefAddr = CGF.CreateMemTemp( 392 CurVD->getType(), CGM.getPointerAlign(), ".materialized_ref"); 393 CGF.EmitStoreOfScalar(LocalAddr.getPointer(), RefAddr, 394 /*Volatile=*/false, CurVD->getType()); 395 LocalAddr = RefAddr; 396 } 397 if (!FO.RegisterCastedArgsOnly) 398 LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}}); 399 ++Cnt; 400 ++I; 401 continue; 402 } 403 404 LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(), 405 AlignmentSource::Decl); 406 if (FD->hasCapturedVLAType()) { 407 if (FO.UIntPtrCastRequired) { 408 ArgLVal = CGF.MakeAddrLValue(castValueFromUintptr(CGF, FD->getType(), 409 Args[Cnt]->getName(), 410 ArgLVal), 411 FD->getType(), AlignmentSource::Decl); 412 } 413 auto *ExprArg = 414 CGF.EmitLoadOfLValue(ArgLVal, SourceLocation()).getScalarVal(); 415 auto VAT = FD->getCapturedVLAType(); 416 VLASizes.insert({Args[Cnt], {VAT->getSizeExpr(), ExprArg}}); 417 } else if (I->capturesVariable()) { 418 auto *Var = I->getCapturedVar(); 419 QualType VarTy = Var->getType(); 420 Address ArgAddr = ArgLVal.getAddress(); 421 if (!VarTy->isReferenceType()) { 422 if (ArgLVal.getType()->isLValueReferenceType()) { 423 ArgAddr = CGF.EmitLoadOfReference(ArgLVal); 424 } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) { 425 assert(ArgLVal.getType()->isPointerType()); 426 ArgAddr = CGF.EmitLoadOfPointer( 427 ArgAddr, ArgLVal.getType()->castAs<PointerType>()); 428 } 429 } 430 if (!FO.RegisterCastedArgsOnly) { 431 LocalAddrs.insert( 432 {Args[Cnt], 433 {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}}); 434 } 435 } else if (I->capturesVariableByCopy()) { 436 assert(!FD->getType()->isAnyPointerType() && 437 "Not expecting a captured pointer."); 438 auto *Var = I->getCapturedVar(); 439 QualType VarTy = Var->getType(); 440 LocalAddrs.insert( 441 {Args[Cnt], 442 {Var, 443 FO.UIntPtrCastRequired 444 ? castValueFromUintptr(CGF, FD->getType(), Args[Cnt]->getName(), 445 ArgLVal, VarTy->isReferenceType()) 446 : ArgLVal.getAddress()}}); 447 } else { 448 // If 'this' is captured, load it into CXXThisValue. 449 assert(I->capturesThis()); 450 CXXThisValue = CGF.EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation()) 451 .getScalarVal(); 452 LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress()}}); 453 } 454 ++Cnt; 455 ++I; 456 } 457 458 return F; 459 } 460 461 llvm::Function * 462 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { 463 assert( 464 CapturedStmtInfo && 465 "CapturedStmtInfo should be set when generating the captured function"); 466 const CapturedDecl *CD = S.getCapturedDecl(); 467 // Build the argument list. 468 bool NeedWrapperFunction = 469 getDebugInfo() && 470 CGM.getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo; 471 FunctionArgList Args; 472 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs; 473 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes; 474 SmallString<256> Buffer; 475 llvm::raw_svector_ostream Out(Buffer); 476 Out << CapturedStmtInfo->getHelperName(); 477 if (NeedWrapperFunction) 478 Out << "_debug__"; 479 FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false, 480 Out.str()); 481 llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs, 482 VLASizes, CXXThisValue, FO); 483 for (const auto &LocalAddrPair : LocalAddrs) { 484 if (LocalAddrPair.second.first) { 485 setAddrOfLocalVar(LocalAddrPair.second.first, 486 LocalAddrPair.second.second); 487 } 488 } 489 for (const auto &VLASizePair : VLASizes) 490 VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second; 491 PGO.assignRegionCounters(GlobalDecl(CD), F); 492 CapturedStmtInfo->EmitBody(*this, CD->getBody()); 493 FinishFunction(CD->getBodyRBrace()); 494 if (!NeedWrapperFunction) 495 return F; 496 497 FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true, 498 /*RegisterCastedArgsOnly=*/true, 499 CapturedStmtInfo->getHelperName()); 500 CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true); 501 Args.clear(); 502 LocalAddrs.clear(); 503 VLASizes.clear(); 504 llvm::Function *WrapperF = 505 emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes, 506 WrapperCGF.CXXThisValue, WrapperFO); 507 llvm::SmallVector<llvm::Value *, 4> CallArgs; 508 for (const auto *Arg : Args) { 509 llvm::Value *CallArg; 510 auto I = LocalAddrs.find(Arg); 511 if (I != LocalAddrs.end()) { 512 LValue LV = WrapperCGF.MakeAddrLValue( 513 I->second.second, 514 I->second.first ? I->second.first->getType() : Arg->getType(), 515 AlignmentSource::Decl); 516 CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation()); 517 } else { 518 auto EI = VLASizes.find(Arg); 519 if (EI != VLASizes.end()) 520 CallArg = EI->second.second; 521 else { 522 LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg), 523 Arg->getType(), 524 AlignmentSource::Decl); 525 CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation()); 526 } 527 } 528 CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType())); 529 } 530 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, S.getLocStart(), 531 F, CallArgs); 532 WrapperCGF.FinishFunction(); 533 return WrapperF; 534 } 535 536 //===----------------------------------------------------------------------===// 537 // OpenMP Directive Emission 538 //===----------------------------------------------------------------------===// 539 void CodeGenFunction::EmitOMPAggregateAssign( 540 Address DestAddr, Address SrcAddr, QualType OriginalType, 541 const llvm::function_ref<void(Address, Address)> &CopyGen) { 542 // Perform element-by-element initialization. 543 QualType ElementTy; 544 545 // Drill down to the base element type on both arrays. 546 auto ArrayTy = OriginalType->getAsArrayTypeUnsafe(); 547 auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr); 548 SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 549 550 auto SrcBegin = SrcAddr.getPointer(); 551 auto DestBegin = DestAddr.getPointer(); 552 // Cast from pointer to array type to pointer to single element. 553 auto DestEnd = Builder.CreateGEP(DestBegin, NumElements); 554 // The basic structure here is a while-do loop. 555 auto BodyBB = createBasicBlock("omp.arraycpy.body"); 556 auto DoneBB = createBasicBlock("omp.arraycpy.done"); 557 auto IsEmpty = 558 Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty"); 559 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 560 561 // Enter the loop body, making that address the current address. 562 auto EntryBB = Builder.GetInsertBlock(); 563 EmitBlock(BodyBB); 564 565 CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy); 566 567 llvm::PHINode *SrcElementPHI = 568 Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 569 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 570 Address SrcElementCurrent = 571 Address(SrcElementPHI, 572 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 573 574 llvm::PHINode *DestElementPHI = 575 Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 576 DestElementPHI->addIncoming(DestBegin, EntryBB); 577 Address DestElementCurrent = 578 Address(DestElementPHI, 579 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 580 581 // Emit copy. 582 CopyGen(DestElementCurrent, SrcElementCurrent); 583 584 // Shift the address forward by one element. 585 auto DestElementNext = Builder.CreateConstGEP1_32( 586 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 587 auto SrcElementNext = Builder.CreateConstGEP1_32( 588 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 589 // Check whether we've reached the end. 590 auto Done = 591 Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 592 Builder.CreateCondBr(Done, DoneBB, BodyBB); 593 DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock()); 594 SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock()); 595 596 // Done. 597 EmitBlock(DoneBB, /*IsFinished=*/true); 598 } 599 600 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr, 601 Address SrcAddr, const VarDecl *DestVD, 602 const VarDecl *SrcVD, const Expr *Copy) { 603 if (OriginalType->isArrayType()) { 604 auto *BO = dyn_cast<BinaryOperator>(Copy); 605 if (BO && BO->getOpcode() == BO_Assign) { 606 // Perform simple memcpy for simple copying. 607 EmitAggregateAssign(DestAddr, SrcAddr, OriginalType); 608 } else { 609 // For arrays with complex element types perform element by element 610 // copying. 611 EmitOMPAggregateAssign( 612 DestAddr, SrcAddr, OriginalType, 613 [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) { 614 // Working with the single array element, so have to remap 615 // destination and source variables to corresponding array 616 // elements. 617 CodeGenFunction::OMPPrivateScope Remap(*this); 618 Remap.addPrivate(DestVD, [DestElement]() -> Address { 619 return DestElement; 620 }); 621 Remap.addPrivate( 622 SrcVD, [SrcElement]() -> Address { return SrcElement; }); 623 (void)Remap.Privatize(); 624 EmitIgnoredExpr(Copy); 625 }); 626 } 627 } else { 628 // Remap pseudo source variable to private copy. 629 CodeGenFunction::OMPPrivateScope Remap(*this); 630 Remap.addPrivate(SrcVD, [SrcAddr]() -> Address { return SrcAddr; }); 631 Remap.addPrivate(DestVD, [DestAddr]() -> Address { return DestAddr; }); 632 (void)Remap.Privatize(); 633 // Emit copying of the whole variable. 634 EmitIgnoredExpr(Copy); 635 } 636 } 637 638 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, 639 OMPPrivateScope &PrivateScope) { 640 if (!HaveInsertPoint()) 641 return false; 642 bool FirstprivateIsLastprivate = false; 643 llvm::DenseSet<const VarDecl *> Lastprivates; 644 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 645 for (const auto *D : C->varlists()) 646 Lastprivates.insert( 647 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); 648 } 649 llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate; 650 CGCapturedStmtInfo CapturesInfo(cast<CapturedStmt>(*D.getAssociatedStmt())); 651 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { 652 auto IRef = C->varlist_begin(); 653 auto InitsRef = C->inits().begin(); 654 for (auto IInit : C->private_copies()) { 655 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 656 bool ThisFirstprivateIsLastprivate = 657 Lastprivates.count(OrigVD->getCanonicalDecl()) > 0; 658 auto *CapFD = CapturesInfo.lookup(OrigVD); 659 auto *FD = CapturedStmtInfo->lookup(OrigVD); 660 if (!ThisFirstprivateIsLastprivate && FD && (FD == CapFD) && 661 !FD->getType()->isReferenceType()) { 662 EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()); 663 ++IRef; 664 ++InitsRef; 665 continue; 666 } 667 FirstprivateIsLastprivate = 668 FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate; 669 if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) { 670 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 671 auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl()); 672 bool IsRegistered; 673 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 674 /*RefersToEnclosingVariableOrCapture=*/FD != nullptr, 675 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 676 Address OriginalAddr = EmitLValue(&DRE).getAddress(); 677 QualType Type = VD->getType(); 678 if (Type->isArrayType()) { 679 // Emit VarDecl with copy init for arrays. 680 // Get the address of the original variable captured in current 681 // captured region. 682 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 683 auto Emission = EmitAutoVarAlloca(*VD); 684 auto *Init = VD->getInit(); 685 if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) { 686 // Perform simple memcpy. 687 EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr, 688 Type); 689 } else { 690 EmitOMPAggregateAssign( 691 Emission.getAllocatedAddress(), OriginalAddr, Type, 692 [this, VDInit, Init](Address DestElement, 693 Address SrcElement) { 694 // Clean up any temporaries needed by the initialization. 695 RunCleanupsScope InitScope(*this); 696 // Emit initialization for single element. 697 setAddrOfLocalVar(VDInit, SrcElement); 698 EmitAnyExprToMem(Init, DestElement, 699 Init->getType().getQualifiers(), 700 /*IsInitializer*/ false); 701 LocalDeclMap.erase(VDInit); 702 }); 703 } 704 EmitAutoVarCleanups(Emission); 705 return Emission.getAllocatedAddress(); 706 }); 707 } else { 708 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 709 // Emit private VarDecl with copy init. 710 // Remap temp VDInit variable to the address of the original 711 // variable 712 // (for proper handling of captured global variables). 713 setAddrOfLocalVar(VDInit, OriginalAddr); 714 EmitDecl(*VD); 715 LocalDeclMap.erase(VDInit); 716 return GetAddrOfLocalVar(VD); 717 }); 718 } 719 assert(IsRegistered && 720 "firstprivate var already registered as private"); 721 // Silence the warning about unused variable. 722 (void)IsRegistered; 723 } 724 ++IRef; 725 ++InitsRef; 726 } 727 } 728 return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty(); 729 } 730 731 void CodeGenFunction::EmitOMPPrivateClause( 732 const OMPExecutableDirective &D, 733 CodeGenFunction::OMPPrivateScope &PrivateScope) { 734 if (!HaveInsertPoint()) 735 return; 736 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 737 for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) { 738 auto IRef = C->varlist_begin(); 739 for (auto IInit : C->private_copies()) { 740 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 741 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 742 auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 743 bool IsRegistered = 744 PrivateScope.addPrivate(OrigVD, [&]() -> Address { 745 // Emit private VarDecl with copy init. 746 EmitDecl(*VD); 747 return GetAddrOfLocalVar(VD); 748 }); 749 assert(IsRegistered && "private var already registered as private"); 750 // Silence the warning about unused variable. 751 (void)IsRegistered; 752 } 753 ++IRef; 754 } 755 } 756 } 757 758 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { 759 if (!HaveInsertPoint()) 760 return false; 761 // threadprivate_var1 = master_threadprivate_var1; 762 // operator=(threadprivate_var2, master_threadprivate_var2); 763 // ... 764 // __kmpc_barrier(&loc, global_tid); 765 llvm::DenseSet<const VarDecl *> CopiedVars; 766 llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr; 767 for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) { 768 auto IRef = C->varlist_begin(); 769 auto ISrcRef = C->source_exprs().begin(); 770 auto IDestRef = C->destination_exprs().begin(); 771 for (auto *AssignOp : C->assignment_ops()) { 772 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 773 QualType Type = VD->getType(); 774 if (CopiedVars.insert(VD->getCanonicalDecl()).second) { 775 // Get the address of the master variable. If we are emitting code with 776 // TLS support, the address is passed from the master as field in the 777 // captured declaration. 778 Address MasterAddr = Address::invalid(); 779 if (getLangOpts().OpenMPUseTLS && 780 getContext().getTargetInfo().isTLSSupported()) { 781 assert(CapturedStmtInfo->lookup(VD) && 782 "Copyin threadprivates should have been captured!"); 783 DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(), 784 VK_LValue, (*IRef)->getExprLoc()); 785 MasterAddr = EmitLValue(&DRE).getAddress(); 786 LocalDeclMap.erase(VD); 787 } else { 788 MasterAddr = 789 Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD) 790 : CGM.GetAddrOfGlobal(VD), 791 getContext().getDeclAlign(VD)); 792 } 793 // Get the address of the threadprivate variable. 794 Address PrivateAddr = EmitLValue(*IRef).getAddress(); 795 if (CopiedVars.size() == 1) { 796 // At first check if current thread is a master thread. If it is, no 797 // need to copy data. 798 CopyBegin = createBasicBlock("copyin.not.master"); 799 CopyEnd = createBasicBlock("copyin.not.master.end"); 800 Builder.CreateCondBr( 801 Builder.CreateICmpNE( 802 Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy), 803 Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy)), 804 CopyBegin, CopyEnd); 805 EmitBlock(CopyBegin); 806 } 807 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 808 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 809 EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp); 810 } 811 ++IRef; 812 ++ISrcRef; 813 ++IDestRef; 814 } 815 } 816 if (CopyEnd) { 817 // Exit out of copying procedure for non-master thread. 818 EmitBlock(CopyEnd, /*IsFinished=*/true); 819 return true; 820 } 821 return false; 822 } 823 824 bool CodeGenFunction::EmitOMPLastprivateClauseInit( 825 const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) { 826 if (!HaveInsertPoint()) 827 return false; 828 bool HasAtLeastOneLastprivate = false; 829 llvm::DenseSet<const VarDecl *> SIMDLCVs; 830 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 831 auto *LoopDirective = cast<OMPLoopDirective>(&D); 832 for (auto *C : LoopDirective->counters()) { 833 SIMDLCVs.insert( 834 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 835 } 836 } 837 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 838 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 839 HasAtLeastOneLastprivate = true; 840 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) 841 break; 842 auto IRef = C->varlist_begin(); 843 auto IDestRef = C->destination_exprs().begin(); 844 for (auto *IInit : C->private_copies()) { 845 // Keep the address of the original variable for future update at the end 846 // of the loop. 847 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 848 // Taskloops do not require additional initialization, it is done in 849 // runtime support library. 850 if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) { 851 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 852 PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address { 853 DeclRefExpr DRE( 854 const_cast<VarDecl *>(OrigVD), 855 /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup( 856 OrigVD) != nullptr, 857 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 858 return EmitLValue(&DRE).getAddress(); 859 }); 860 // Check if the variable is also a firstprivate: in this case IInit is 861 // not generated. Initialization of this variable will happen in codegen 862 // for 'firstprivate' clause. 863 if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) { 864 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 865 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 866 // Emit private VarDecl with copy init. 867 EmitDecl(*VD); 868 return GetAddrOfLocalVar(VD); 869 }); 870 assert(IsRegistered && 871 "lastprivate var already registered as private"); 872 (void)IsRegistered; 873 } 874 } 875 ++IRef; 876 ++IDestRef; 877 } 878 } 879 return HasAtLeastOneLastprivate; 880 } 881 882 void CodeGenFunction::EmitOMPLastprivateClauseFinal( 883 const OMPExecutableDirective &D, bool NoFinals, 884 llvm::Value *IsLastIterCond) { 885 if (!HaveInsertPoint()) 886 return; 887 // Emit following code: 888 // if (<IsLastIterCond>) { 889 // orig_var1 = private_orig_var1; 890 // ... 891 // orig_varn = private_orig_varn; 892 // } 893 llvm::BasicBlock *ThenBB = nullptr; 894 llvm::BasicBlock *DoneBB = nullptr; 895 if (IsLastIterCond) { 896 ThenBB = createBasicBlock(".omp.lastprivate.then"); 897 DoneBB = createBasicBlock(".omp.lastprivate.done"); 898 Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB); 899 EmitBlock(ThenBB); 900 } 901 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 902 llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates; 903 if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) { 904 auto IC = LoopDirective->counters().begin(); 905 for (auto F : LoopDirective->finals()) { 906 auto *D = 907 cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl(); 908 if (NoFinals) 909 AlreadyEmittedVars.insert(D); 910 else 911 LoopCountersAndUpdates[D] = F; 912 ++IC; 913 } 914 } 915 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 916 auto IRef = C->varlist_begin(); 917 auto ISrcRef = C->source_exprs().begin(); 918 auto IDestRef = C->destination_exprs().begin(); 919 for (auto *AssignOp : C->assignment_ops()) { 920 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 921 QualType Type = PrivateVD->getType(); 922 auto *CanonicalVD = PrivateVD->getCanonicalDecl(); 923 if (AlreadyEmittedVars.insert(CanonicalVD).second) { 924 // If lastprivate variable is a loop control variable for loop-based 925 // directive, update its value before copyin back to original 926 // variable. 927 if (auto *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD)) 928 EmitIgnoredExpr(FinalExpr); 929 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 930 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 931 // Get the address of the original variable. 932 Address OriginalAddr = GetAddrOfLocalVar(DestVD); 933 // Get the address of the private variable. 934 Address PrivateAddr = GetAddrOfLocalVar(PrivateVD); 935 if (auto RefTy = PrivateVD->getType()->getAs<ReferenceType>()) 936 PrivateAddr = 937 Address(Builder.CreateLoad(PrivateAddr), 938 getNaturalTypeAlignment(RefTy->getPointeeType())); 939 EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp); 940 } 941 ++IRef; 942 ++ISrcRef; 943 ++IDestRef; 944 } 945 if (auto *PostUpdate = C->getPostUpdateExpr()) 946 EmitIgnoredExpr(PostUpdate); 947 } 948 if (IsLastIterCond) 949 EmitBlock(DoneBB, /*IsFinished=*/true); 950 } 951 952 void CodeGenFunction::EmitOMPReductionClauseInit( 953 const OMPExecutableDirective &D, 954 CodeGenFunction::OMPPrivateScope &PrivateScope) { 955 if (!HaveInsertPoint()) 956 return; 957 SmallVector<const Expr *, 4> Shareds; 958 SmallVector<const Expr *, 4> Privates; 959 SmallVector<const Expr *, 4> ReductionOps; 960 SmallVector<const Expr *, 4> LHSs; 961 SmallVector<const Expr *, 4> RHSs; 962 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 963 auto IPriv = C->privates().begin(); 964 auto IRed = C->reduction_ops().begin(); 965 auto ILHS = C->lhs_exprs().begin(); 966 auto IRHS = C->rhs_exprs().begin(); 967 for (const auto *Ref : C->varlists()) { 968 Shareds.emplace_back(Ref); 969 Privates.emplace_back(*IPriv); 970 ReductionOps.emplace_back(*IRed); 971 LHSs.emplace_back(*ILHS); 972 RHSs.emplace_back(*IRHS); 973 std::advance(IPriv, 1); 974 std::advance(IRed, 1); 975 std::advance(ILHS, 1); 976 std::advance(IRHS, 1); 977 } 978 } 979 ReductionCodeGen RedCG(Shareds, Privates, ReductionOps); 980 unsigned Count = 0; 981 auto ILHS = LHSs.begin(); 982 auto IRHS = RHSs.begin(); 983 auto IPriv = Privates.begin(); 984 for (const auto *IRef : Shareds) { 985 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl()); 986 // Emit private VarDecl with reduction init. 987 RedCG.emitSharedLValue(*this, Count); 988 RedCG.emitAggregateType(*this, Count); 989 auto Emission = EmitAutoVarAlloca(*PrivateVD); 990 RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(), 991 RedCG.getSharedLValue(Count), 992 [&Emission](CodeGenFunction &CGF) { 993 CGF.EmitAutoVarInit(Emission); 994 return true; 995 }); 996 EmitAutoVarCleanups(Emission); 997 Address BaseAddr = RedCG.adjustPrivateAddress( 998 *this, Count, Emission.getAllocatedAddress()); 999 bool IsRegistered = PrivateScope.addPrivate( 1000 RedCG.getBaseDecl(Count), [BaseAddr]() -> Address { return BaseAddr; }); 1001 assert(IsRegistered && "private var already registered as private"); 1002 // Silence the warning about unused variable. 1003 (void)IsRegistered; 1004 1005 auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 1006 auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 1007 QualType Type = PrivateVD->getType(); 1008 bool isaOMPArraySectionExpr = isa<OMPArraySectionExpr>(IRef); 1009 if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) { 1010 // Store the address of the original variable associated with the LHS 1011 // implicit variable. 1012 PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address { 1013 return RedCG.getSharedLValue(Count).getAddress(); 1014 }); 1015 PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { 1016 return GetAddrOfLocalVar(PrivateVD); 1017 }); 1018 } else if ((isaOMPArraySectionExpr && Type->isScalarType()) || 1019 isa<ArraySubscriptExpr>(IRef)) { 1020 // Store the address of the original variable associated with the LHS 1021 // implicit variable. 1022 PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address { 1023 return RedCG.getSharedLValue(Count).getAddress(); 1024 }); 1025 PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address { 1026 return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD), 1027 ConvertTypeForMem(RHSVD->getType()), 1028 "rhs.begin"); 1029 }); 1030 } else { 1031 QualType Type = PrivateVD->getType(); 1032 bool IsArray = getContext().getAsArrayType(Type) != nullptr; 1033 Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress(); 1034 // Store the address of the original variable associated with the LHS 1035 // implicit variable. 1036 if (IsArray) { 1037 OriginalAddr = Builder.CreateElementBitCast( 1038 OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin"); 1039 } 1040 PrivateScope.addPrivate( 1041 LHSVD, [OriginalAddr]() -> Address { return OriginalAddr; }); 1042 PrivateScope.addPrivate( 1043 RHSVD, [this, PrivateVD, RHSVD, IsArray]() -> Address { 1044 return IsArray 1045 ? Builder.CreateElementBitCast( 1046 GetAddrOfLocalVar(PrivateVD), 1047 ConvertTypeForMem(RHSVD->getType()), "rhs.begin") 1048 : GetAddrOfLocalVar(PrivateVD); 1049 }); 1050 } 1051 ++ILHS; 1052 ++IRHS; 1053 ++IPriv; 1054 ++Count; 1055 } 1056 } 1057 1058 void CodeGenFunction::EmitOMPReductionClauseFinal( 1059 const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) { 1060 if (!HaveInsertPoint()) 1061 return; 1062 llvm::SmallVector<const Expr *, 8> Privates; 1063 llvm::SmallVector<const Expr *, 8> LHSExprs; 1064 llvm::SmallVector<const Expr *, 8> RHSExprs; 1065 llvm::SmallVector<const Expr *, 8> ReductionOps; 1066 bool HasAtLeastOneReduction = false; 1067 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1068 HasAtLeastOneReduction = true; 1069 Privates.append(C->privates().begin(), C->privates().end()); 1070 LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 1071 RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 1072 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 1073 } 1074 if (HasAtLeastOneReduction) { 1075 bool WithNowait = D.getSingleClause<OMPNowaitClause>() || 1076 isOpenMPParallelDirective(D.getDirectiveKind()) || 1077 D.getDirectiveKind() == OMPD_simd; 1078 bool SimpleReduction = D.getDirectiveKind() == OMPD_simd; 1079 // Emit nowait reduction if nowait clause is present or directive is a 1080 // parallel directive (it always has implicit barrier). 1081 CGM.getOpenMPRuntime().emitReduction( 1082 *this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps, 1083 {WithNowait, SimpleReduction, ReductionKind}); 1084 } 1085 } 1086 1087 static void emitPostUpdateForReductionClause( 1088 CodeGenFunction &CGF, const OMPExecutableDirective &D, 1089 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1090 if (!CGF.HaveInsertPoint()) 1091 return; 1092 llvm::BasicBlock *DoneBB = nullptr; 1093 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1094 if (auto *PostUpdate = C->getPostUpdateExpr()) { 1095 if (!DoneBB) { 1096 if (auto *Cond = CondGen(CGF)) { 1097 // If the first post-update expression is found, emit conditional 1098 // block if it was requested. 1099 auto *ThenBB = CGF.createBasicBlock(".omp.reduction.pu"); 1100 DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done"); 1101 CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1102 CGF.EmitBlock(ThenBB); 1103 } 1104 } 1105 CGF.EmitIgnoredExpr(PostUpdate); 1106 } 1107 } 1108 if (DoneBB) 1109 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 1110 } 1111 1112 namespace { 1113 /// Codegen lambda for appending distribute lower and upper bounds to outlined 1114 /// parallel function. This is necessary for combined constructs such as 1115 /// 'distribute parallel for' 1116 typedef llvm::function_ref<void(CodeGenFunction &, 1117 const OMPExecutableDirective &, 1118 llvm::SmallVectorImpl<llvm::Value *> &)> 1119 CodeGenBoundParametersTy; 1120 } // anonymous namespace 1121 1122 static void emitCommonOMPParallelDirective( 1123 CodeGenFunction &CGF, const OMPExecutableDirective &S, 1124 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1125 const CodeGenBoundParametersTy &CodeGenBoundParameters) { 1126 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); 1127 auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( 1128 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 1129 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) { 1130 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 1131 auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), 1132 /*IgnoreResultAssign*/ true); 1133 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause( 1134 CGF, NumThreads, NumThreadsClause->getLocStart()); 1135 } 1136 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) { 1137 CodeGenFunction::RunCleanupsScope ProcBindScope(CGF); 1138 CGF.CGM.getOpenMPRuntime().emitProcBindClause( 1139 CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart()); 1140 } 1141 const Expr *IfCond = nullptr; 1142 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 1143 if (C->getNameModifier() == OMPD_unknown || 1144 C->getNameModifier() == OMPD_parallel) { 1145 IfCond = C->getCondition(); 1146 break; 1147 } 1148 } 1149 1150 OMPParallelScope Scope(CGF, S); 1151 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 1152 // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk 1153 // lower and upper bounds with the pragma 'for' chunking mechanism. 1154 // The following lambda takes care of appending the lower and upper bound 1155 // parameters when necessary 1156 CodeGenBoundParameters(CGF, S, CapturedVars); 1157 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 1158 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn, 1159 CapturedVars, IfCond); 1160 } 1161 1162 static void emitEmptyBoundParameters(CodeGenFunction &, 1163 const OMPExecutableDirective &, 1164 llvm::SmallVectorImpl<llvm::Value *> &) {} 1165 1166 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { 1167 // Emit parallel region as a standalone region. 1168 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1169 OMPPrivateScope PrivateScope(CGF); 1170 bool Copyins = CGF.EmitOMPCopyinClause(S); 1171 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 1172 if (Copyins) { 1173 // Emit implicit barrier to synchronize threads and avoid data races on 1174 // propagation master's thread values of threadprivate variables to local 1175 // instances of that variables of all other implicit threads. 1176 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 1177 CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 1178 /*ForceSimpleCall=*/true); 1179 } 1180 CGF.EmitOMPPrivateClause(S, PrivateScope); 1181 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 1182 (void)PrivateScope.Privatize(); 1183 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1184 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 1185 }; 1186 emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen, 1187 emitEmptyBoundParameters); 1188 emitPostUpdateForReductionClause( 1189 *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1190 } 1191 1192 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, 1193 JumpDest LoopExit) { 1194 RunCleanupsScope BodyScope(*this); 1195 // Update counters values on current iteration. 1196 for (auto I : D.updates()) { 1197 EmitIgnoredExpr(I); 1198 } 1199 // Update the linear variables. 1200 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1201 for (auto *U : C->updates()) 1202 EmitIgnoredExpr(U); 1203 } 1204 1205 // On a continue in the body, jump to the end. 1206 auto Continue = getJumpDestInCurrentScope("omp.body.continue"); 1207 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1208 // Emit loop body. 1209 EmitStmt(D.getBody()); 1210 // The end (updates/cleanups). 1211 EmitBlock(Continue.getBlock()); 1212 BreakContinueStack.pop_back(); 1213 } 1214 1215 void CodeGenFunction::EmitOMPInnerLoop( 1216 const Stmt &S, bool RequiresCleanup, const Expr *LoopCond, 1217 const Expr *IncExpr, 1218 const llvm::function_ref<void(CodeGenFunction &)> &BodyGen, 1219 const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen) { 1220 auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end"); 1221 1222 // Start the loop with a block that tests the condition. 1223 auto CondBlock = createBasicBlock("omp.inner.for.cond"); 1224 EmitBlock(CondBlock); 1225 const SourceRange &R = S.getSourceRange(); 1226 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), 1227 SourceLocToDebugLoc(R.getEnd())); 1228 1229 // If there are any cleanups between here and the loop-exit scope, 1230 // create a block to stage a loop exit along. 1231 auto ExitBlock = LoopExit.getBlock(); 1232 if (RequiresCleanup) 1233 ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup"); 1234 1235 auto LoopBody = createBasicBlock("omp.inner.for.body"); 1236 1237 // Emit condition. 1238 EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S)); 1239 if (ExitBlock != LoopExit.getBlock()) { 1240 EmitBlock(ExitBlock); 1241 EmitBranchThroughCleanup(LoopExit); 1242 } 1243 1244 EmitBlock(LoopBody); 1245 incrementProfileCounter(&S); 1246 1247 // Create a block for the increment. 1248 auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc"); 1249 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1250 1251 BodyGen(*this); 1252 1253 // Emit "IV = IV + 1" and a back-edge to the condition block. 1254 EmitBlock(Continue.getBlock()); 1255 EmitIgnoredExpr(IncExpr); 1256 PostIncGen(*this); 1257 BreakContinueStack.pop_back(); 1258 EmitBranch(CondBlock); 1259 LoopStack.pop(); 1260 // Emit the fall-through block. 1261 EmitBlock(LoopExit.getBlock()); 1262 } 1263 1264 bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { 1265 if (!HaveInsertPoint()) 1266 return false; 1267 // Emit inits for the linear variables. 1268 bool HasLinears = false; 1269 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1270 for (auto *Init : C->inits()) { 1271 HasLinears = true; 1272 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl()); 1273 if (auto *Ref = dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) { 1274 AutoVarEmission Emission = EmitAutoVarAlloca(*VD); 1275 auto *OrigVD = cast<VarDecl>(Ref->getDecl()); 1276 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1277 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1278 VD->getInit()->getType(), VK_LValue, 1279 VD->getInit()->getExprLoc()); 1280 EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(), 1281 VD->getType()), 1282 /*capturedByInit=*/false); 1283 EmitAutoVarCleanups(Emission); 1284 } else 1285 EmitVarDecl(*VD); 1286 } 1287 // Emit the linear steps for the linear clauses. 1288 // If a step is not constant, it is pre-calculated before the loop. 1289 if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep())) 1290 if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) { 1291 EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl())); 1292 // Emit calculation of the linear step. 1293 EmitIgnoredExpr(CS); 1294 } 1295 } 1296 return HasLinears; 1297 } 1298 1299 void CodeGenFunction::EmitOMPLinearClauseFinal( 1300 const OMPLoopDirective &D, 1301 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1302 if (!HaveInsertPoint()) 1303 return; 1304 llvm::BasicBlock *DoneBB = nullptr; 1305 // Emit the final values of the linear variables. 1306 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1307 auto IC = C->varlist_begin(); 1308 for (auto *F : C->finals()) { 1309 if (!DoneBB) { 1310 if (auto *Cond = CondGen(*this)) { 1311 // If the first post-update expression is found, emit conditional 1312 // block if it was requested. 1313 auto *ThenBB = createBasicBlock(".omp.linear.pu"); 1314 DoneBB = createBasicBlock(".omp.linear.pu.done"); 1315 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1316 EmitBlock(ThenBB); 1317 } 1318 } 1319 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl()); 1320 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1321 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1322 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); 1323 Address OrigAddr = EmitLValue(&DRE).getAddress(); 1324 CodeGenFunction::OMPPrivateScope VarScope(*this); 1325 VarScope.addPrivate(OrigVD, [OrigAddr]() -> Address { return OrigAddr; }); 1326 (void)VarScope.Privatize(); 1327 EmitIgnoredExpr(F); 1328 ++IC; 1329 } 1330 if (auto *PostUpdate = C->getPostUpdateExpr()) 1331 EmitIgnoredExpr(PostUpdate); 1332 } 1333 if (DoneBB) 1334 EmitBlock(DoneBB, /*IsFinished=*/true); 1335 } 1336 1337 static void emitAlignedClause(CodeGenFunction &CGF, 1338 const OMPExecutableDirective &D) { 1339 if (!CGF.HaveInsertPoint()) 1340 return; 1341 for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) { 1342 unsigned ClauseAlignment = 0; 1343 if (auto AlignmentExpr = Clause->getAlignment()) { 1344 auto AlignmentCI = 1345 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); 1346 ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue()); 1347 } 1348 for (auto E : Clause->varlists()) { 1349 unsigned Alignment = ClauseAlignment; 1350 if (Alignment == 0) { 1351 // OpenMP [2.8.1, Description] 1352 // If no optional parameter is specified, implementation-defined default 1353 // alignments for SIMD instructions on the target platforms are assumed. 1354 Alignment = 1355 CGF.getContext() 1356 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign( 1357 E->getType()->getPointeeType())) 1358 .getQuantity(); 1359 } 1360 assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) && 1361 "alignment is not power of 2"); 1362 if (Alignment != 0) { 1363 llvm::Value *PtrValue = CGF.EmitScalarExpr(E); 1364 CGF.EmitAlignmentAssumption(PtrValue, Alignment); 1365 } 1366 } 1367 } 1368 } 1369 1370 void CodeGenFunction::EmitOMPPrivateLoopCounters( 1371 const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) { 1372 if (!HaveInsertPoint()) 1373 return; 1374 auto I = S.private_counters().begin(); 1375 for (auto *E : S.counters()) { 1376 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1377 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()); 1378 (void)LoopScope.addPrivate(VD, [&]() -> Address { 1379 // Emit var without initialization. 1380 if (!LocalDeclMap.count(PrivateVD)) { 1381 auto VarEmission = EmitAutoVarAlloca(*PrivateVD); 1382 EmitAutoVarCleanups(VarEmission); 1383 } 1384 DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD), 1385 /*RefersToEnclosingVariableOrCapture=*/false, 1386 (*I)->getType(), VK_LValue, (*I)->getExprLoc()); 1387 return EmitLValue(&DRE).getAddress(); 1388 }); 1389 if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) || 1390 VD->hasGlobalStorage()) { 1391 (void)LoopScope.addPrivate(PrivateVD, [&]() -> Address { 1392 DeclRefExpr DRE(const_cast<VarDecl *>(VD), 1393 LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD), 1394 E->getType(), VK_LValue, E->getExprLoc()); 1395 return EmitLValue(&DRE).getAddress(); 1396 }); 1397 } 1398 ++I; 1399 } 1400 } 1401 1402 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S, 1403 const Expr *Cond, llvm::BasicBlock *TrueBlock, 1404 llvm::BasicBlock *FalseBlock, uint64_t TrueCount) { 1405 if (!CGF.HaveInsertPoint()) 1406 return; 1407 { 1408 CodeGenFunction::OMPPrivateScope PreCondScope(CGF); 1409 CGF.EmitOMPPrivateLoopCounters(S, PreCondScope); 1410 (void)PreCondScope.Privatize(); 1411 // Get initial values of real counters. 1412 for (auto I : S.inits()) { 1413 CGF.EmitIgnoredExpr(I); 1414 } 1415 } 1416 // Check that loop is executed at least one time. 1417 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount); 1418 } 1419 1420 void CodeGenFunction::EmitOMPLinearClause( 1421 const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) { 1422 if (!HaveInsertPoint()) 1423 return; 1424 llvm::DenseSet<const VarDecl *> SIMDLCVs; 1425 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 1426 auto *LoopDirective = cast<OMPLoopDirective>(&D); 1427 for (auto *C : LoopDirective->counters()) { 1428 SIMDLCVs.insert( 1429 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 1430 } 1431 } 1432 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1433 auto CurPrivate = C->privates().begin(); 1434 for (auto *E : C->varlists()) { 1435 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1436 auto *PrivateVD = 1437 cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl()); 1438 if (!SIMDLCVs.count(VD->getCanonicalDecl())) { 1439 bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address { 1440 // Emit private VarDecl with copy init. 1441 EmitVarDecl(*PrivateVD); 1442 return GetAddrOfLocalVar(PrivateVD); 1443 }); 1444 assert(IsRegistered && "linear var already registered as private"); 1445 // Silence the warning about unused variable. 1446 (void)IsRegistered; 1447 } else 1448 EmitVarDecl(*PrivateVD); 1449 ++CurPrivate; 1450 } 1451 } 1452 } 1453 1454 static void emitSimdlenSafelenClause(CodeGenFunction &CGF, 1455 const OMPExecutableDirective &D, 1456 bool IsMonotonic) { 1457 if (!CGF.HaveInsertPoint()) 1458 return; 1459 if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) { 1460 RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(), 1461 /*ignoreResult=*/true); 1462 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 1463 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 1464 // In presence of finite 'safelen', it may be unsafe to mark all 1465 // the memory instructions parallel, because loop-carried 1466 // dependences of 'safelen' iterations are possible. 1467 if (!IsMonotonic) 1468 CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>()); 1469 } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) { 1470 RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(), 1471 /*ignoreResult=*/true); 1472 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 1473 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 1474 // In presence of finite 'safelen', it may be unsafe to mark all 1475 // the memory instructions parallel, because loop-carried 1476 // dependences of 'safelen' iterations are possible. 1477 CGF.LoopStack.setParallel(false); 1478 } 1479 } 1480 1481 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D, 1482 bool IsMonotonic) { 1483 // Walk clauses and process safelen/lastprivate. 1484 LoopStack.setParallel(!IsMonotonic); 1485 LoopStack.setVectorizeEnable(true); 1486 emitSimdlenSafelenClause(*this, D, IsMonotonic); 1487 } 1488 1489 void CodeGenFunction::EmitOMPSimdFinal( 1490 const OMPLoopDirective &D, 1491 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1492 if (!HaveInsertPoint()) 1493 return; 1494 llvm::BasicBlock *DoneBB = nullptr; 1495 auto IC = D.counters().begin(); 1496 auto IPC = D.private_counters().begin(); 1497 for (auto F : D.finals()) { 1498 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl()); 1499 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl()); 1500 auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD); 1501 if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) || 1502 OrigVD->hasGlobalStorage() || CED) { 1503 if (!DoneBB) { 1504 if (auto *Cond = CondGen(*this)) { 1505 // If the first post-update expression is found, emit conditional 1506 // block if it was requested. 1507 auto *ThenBB = createBasicBlock(".omp.final.then"); 1508 DoneBB = createBasicBlock(".omp.final.done"); 1509 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1510 EmitBlock(ThenBB); 1511 } 1512 } 1513 Address OrigAddr = Address::invalid(); 1514 if (CED) 1515 OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress(); 1516 else { 1517 DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD), 1518 /*RefersToEnclosingVariableOrCapture=*/false, 1519 (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc()); 1520 OrigAddr = EmitLValue(&DRE).getAddress(); 1521 } 1522 OMPPrivateScope VarScope(*this); 1523 VarScope.addPrivate(OrigVD, 1524 [OrigAddr]() -> Address { return OrigAddr; }); 1525 (void)VarScope.Privatize(); 1526 EmitIgnoredExpr(F); 1527 } 1528 ++IC; 1529 ++IPC; 1530 } 1531 if (DoneBB) 1532 EmitBlock(DoneBB, /*IsFinished=*/true); 1533 } 1534 1535 static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF, 1536 const OMPLoopDirective &S, 1537 CodeGenFunction::JumpDest LoopExit) { 1538 CGF.EmitOMPLoopBody(S, LoopExit); 1539 CGF.EmitStopPoint(&S); 1540 } 1541 1542 static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S, 1543 PrePostActionTy &Action) { 1544 Action.Enter(CGF); 1545 assert(isOpenMPSimdDirective(S.getDirectiveKind()) && 1546 "Expected simd directive"); 1547 OMPLoopScope PreInitScope(CGF, S); 1548 // if (PreCond) { 1549 // for (IV in 0..LastIteration) BODY; 1550 // <Final counter/linear vars updates>; 1551 // } 1552 // 1553 1554 // Emit: if (PreCond) - begin. 1555 // If the condition constant folds and can be elided, avoid emitting the 1556 // whole loop. 1557 bool CondConstant; 1558 llvm::BasicBlock *ContBlock = nullptr; 1559 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 1560 if (!CondConstant) 1561 return; 1562 } else { 1563 auto *ThenBlock = CGF.createBasicBlock("simd.if.then"); 1564 ContBlock = CGF.createBasicBlock("simd.if.end"); 1565 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 1566 CGF.getProfileCount(&S)); 1567 CGF.EmitBlock(ThenBlock); 1568 CGF.incrementProfileCounter(&S); 1569 } 1570 1571 // Emit the loop iteration variable. 1572 const Expr *IVExpr = S.getIterationVariable(); 1573 const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 1574 CGF.EmitVarDecl(*IVDecl); 1575 CGF.EmitIgnoredExpr(S.getInit()); 1576 1577 // Emit the iterations count variable. 1578 // If it is not a variable, Sema decided to calculate iterations count on 1579 // each iteration (e.g., it is foldable into a constant). 1580 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 1581 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 1582 // Emit calculation of the iterations count. 1583 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 1584 } 1585 1586 CGF.EmitOMPSimdInit(S); 1587 1588 emitAlignedClause(CGF, S); 1589 (void)CGF.EmitOMPLinearClauseInit(S); 1590 { 1591 CodeGenFunction::OMPPrivateScope LoopScope(CGF); 1592 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 1593 CGF.EmitOMPLinearClause(S, LoopScope); 1594 CGF.EmitOMPPrivateClause(S, LoopScope); 1595 CGF.EmitOMPReductionClauseInit(S, LoopScope); 1596 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 1597 (void)LoopScope.Privatize(); 1598 CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 1599 S.getInc(), 1600 [&S](CodeGenFunction &CGF) { 1601 CGF.EmitOMPLoopBody(S, CodeGenFunction::JumpDest()); 1602 CGF.EmitStopPoint(&S); 1603 }, 1604 [](CodeGenFunction &) {}); 1605 CGF.EmitOMPSimdFinal( 1606 S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1607 // Emit final copy of the lastprivate variables at the end of loops. 1608 if (HasLastprivateClause) 1609 CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true); 1610 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd); 1611 emitPostUpdateForReductionClause( 1612 CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1613 } 1614 CGF.EmitOMPLinearClauseFinal( 1615 S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1616 // Emit: if (PreCond) - end. 1617 if (ContBlock) { 1618 CGF.EmitBranch(ContBlock); 1619 CGF.EmitBlock(ContBlock, true); 1620 } 1621 } 1622 1623 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { 1624 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 1625 emitOMPSimdRegion(CGF, S, Action); 1626 }; 1627 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1628 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 1629 } 1630 1631 void CodeGenFunction::EmitOMPOuterLoop( 1632 bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S, 1633 CodeGenFunction::OMPPrivateScope &LoopScope, 1634 const CodeGenFunction::OMPLoopArguments &LoopArgs, 1635 const CodeGenFunction::CodeGenLoopTy &CodeGenLoop, 1636 const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) { 1637 auto &RT = CGM.getOpenMPRuntime(); 1638 1639 const Expr *IVExpr = S.getIterationVariable(); 1640 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1641 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1642 1643 auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end"); 1644 1645 // Start the loop with a block that tests the condition. 1646 auto CondBlock = createBasicBlock("omp.dispatch.cond"); 1647 EmitBlock(CondBlock); 1648 const SourceRange &R = S.getSourceRange(); 1649 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), 1650 SourceLocToDebugLoc(R.getEnd())); 1651 1652 llvm::Value *BoolCondVal = nullptr; 1653 if (!DynamicOrOrdered) { 1654 // UB = min(UB, GlobalUB) or 1655 // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g. 1656 // 'distribute parallel for') 1657 EmitIgnoredExpr(LoopArgs.EUB); 1658 // IV = LB 1659 EmitIgnoredExpr(LoopArgs.Init); 1660 // IV < UB 1661 BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond); 1662 } else { 1663 BoolCondVal = 1664 RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, LoopArgs.IL, 1665 LoopArgs.LB, LoopArgs.UB, LoopArgs.ST); 1666 } 1667 1668 // If there are any cleanups between here and the loop-exit scope, 1669 // create a block to stage a loop exit along. 1670 auto ExitBlock = LoopExit.getBlock(); 1671 if (LoopScope.requiresCleanups()) 1672 ExitBlock = createBasicBlock("omp.dispatch.cleanup"); 1673 1674 auto LoopBody = createBasicBlock("omp.dispatch.body"); 1675 Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock); 1676 if (ExitBlock != LoopExit.getBlock()) { 1677 EmitBlock(ExitBlock); 1678 EmitBranchThroughCleanup(LoopExit); 1679 } 1680 EmitBlock(LoopBody); 1681 1682 // Emit "IV = LB" (in case of static schedule, we have already calculated new 1683 // LB for loop condition and emitted it above). 1684 if (DynamicOrOrdered) 1685 EmitIgnoredExpr(LoopArgs.Init); 1686 1687 // Create a block for the increment. 1688 auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); 1689 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1690 1691 // Generate !llvm.loop.parallel metadata for loads and stores for loops 1692 // with dynamic/guided scheduling and without ordered clause. 1693 if (!isOpenMPSimdDirective(S.getDirectiveKind())) 1694 LoopStack.setParallel(!IsMonotonic); 1695 else 1696 EmitOMPSimdInit(S, IsMonotonic); 1697 1698 SourceLocation Loc = S.getLocStart(); 1699 1700 // when 'distribute' is not combined with a 'for': 1701 // while (idx <= UB) { BODY; ++idx; } 1702 // when 'distribute' is combined with a 'for' 1703 // (e.g. 'distribute parallel for') 1704 // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; } 1705 EmitOMPInnerLoop( 1706 S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr, 1707 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { 1708 CodeGenLoop(CGF, S, LoopExit); 1709 }, 1710 [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) { 1711 CodeGenOrdered(CGF, Loc, IVSize, IVSigned); 1712 }); 1713 1714 EmitBlock(Continue.getBlock()); 1715 BreakContinueStack.pop_back(); 1716 if (!DynamicOrOrdered) { 1717 // Emit "LB = LB + Stride", "UB = UB + Stride". 1718 EmitIgnoredExpr(LoopArgs.NextLB); 1719 EmitIgnoredExpr(LoopArgs.NextUB); 1720 } 1721 1722 EmitBranch(CondBlock); 1723 LoopStack.pop(); 1724 // Emit the fall-through block. 1725 EmitBlock(LoopExit.getBlock()); 1726 1727 // Tell the runtime we are done. 1728 auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) { 1729 if (!DynamicOrOrdered) 1730 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(), 1731 S.getDirectiveKind()); 1732 }; 1733 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); 1734 } 1735 1736 void CodeGenFunction::EmitOMPForOuterLoop( 1737 const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic, 1738 const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, 1739 const OMPLoopArguments &LoopArgs, 1740 const CodeGenDispatchBoundsTy &CGDispatchBounds) { 1741 auto &RT = CGM.getOpenMPRuntime(); 1742 1743 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). 1744 const bool DynamicOrOrdered = 1745 Ordered || RT.isDynamic(ScheduleKind.Schedule); 1746 1747 assert((Ordered || 1748 !RT.isStaticNonchunked(ScheduleKind.Schedule, 1749 LoopArgs.Chunk != nullptr)) && 1750 "static non-chunked schedule does not need outer loop"); 1751 1752 // Emit outer loop. 1753 // 1754 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1755 // When schedule(dynamic,chunk_size) is specified, the iterations are 1756 // distributed to threads in the team in chunks as the threads request them. 1757 // Each thread executes a chunk of iterations, then requests another chunk, 1758 // until no chunks remain to be distributed. Each chunk contains chunk_size 1759 // iterations, except for the last chunk to be distributed, which may have 1760 // fewer iterations. When no chunk_size is specified, it defaults to 1. 1761 // 1762 // When schedule(guided,chunk_size) is specified, the iterations are assigned 1763 // to threads in the team in chunks as the executing threads request them. 1764 // Each thread executes a chunk of iterations, then requests another chunk, 1765 // until no chunks remain to be assigned. For a chunk_size of 1, the size of 1766 // each chunk is proportional to the number of unassigned iterations divided 1767 // by the number of threads in the team, decreasing to 1. For a chunk_size 1768 // with value k (greater than 1), the size of each chunk is determined in the 1769 // same way, with the restriction that the chunks do not contain fewer than k 1770 // iterations (except for the last chunk to be assigned, which may have fewer 1771 // than k iterations). 1772 // 1773 // When schedule(auto) is specified, the decision regarding scheduling is 1774 // delegated to the compiler and/or runtime system. The programmer gives the 1775 // implementation the freedom to choose any possible mapping of iterations to 1776 // threads in the team. 1777 // 1778 // When schedule(runtime) is specified, the decision regarding scheduling is 1779 // deferred until run time, and the schedule and chunk size are taken from the 1780 // run-sched-var ICV. If the ICV is set to auto, the schedule is 1781 // implementation defined 1782 // 1783 // while(__kmpc_dispatch_next(&LB, &UB)) { 1784 // idx = LB; 1785 // while (idx <= UB) { BODY; ++idx; 1786 // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only. 1787 // } // inner loop 1788 // } 1789 // 1790 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1791 // When schedule(static, chunk_size) is specified, iterations are divided into 1792 // chunks of size chunk_size, and the chunks are assigned to the threads in 1793 // the team in a round-robin fashion in the order of the thread number. 1794 // 1795 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { 1796 // while (idx <= UB) { BODY; ++idx; } // inner loop 1797 // LB = LB + ST; 1798 // UB = UB + ST; 1799 // } 1800 // 1801 1802 const Expr *IVExpr = S.getIterationVariable(); 1803 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1804 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1805 1806 if (DynamicOrOrdered) { 1807 auto DispatchBounds = CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB); 1808 llvm::Value *LBVal = DispatchBounds.first; 1809 llvm::Value *UBVal = DispatchBounds.second; 1810 CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal, 1811 LoopArgs.Chunk}; 1812 RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, IVSize, 1813 IVSigned, Ordered, DipatchRTInputValues); 1814 } else { 1815 CGOpenMPRuntime::StaticRTInput StaticInit( 1816 IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB, 1817 LoopArgs.ST, LoopArgs.Chunk); 1818 RT.emitForStaticInit(*this, S.getLocStart(), S.getDirectiveKind(), 1819 ScheduleKind, StaticInit); 1820 } 1821 1822 auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc, 1823 const unsigned IVSize, 1824 const bool IVSigned) { 1825 if (Ordered) { 1826 CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize, 1827 IVSigned); 1828 } 1829 }; 1830 1831 OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST, 1832 LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB); 1833 OuterLoopArgs.IncExpr = S.getInc(); 1834 OuterLoopArgs.Init = S.getInit(); 1835 OuterLoopArgs.Cond = S.getCond(); 1836 OuterLoopArgs.NextLB = S.getNextLowerBound(); 1837 OuterLoopArgs.NextUB = S.getNextUpperBound(); 1838 EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs, 1839 emitOMPLoopBodyWithStopPoint, CodeGenOrdered); 1840 } 1841 1842 static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc, 1843 const unsigned IVSize, const bool IVSigned) {} 1844 1845 void CodeGenFunction::EmitOMPDistributeOuterLoop( 1846 OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S, 1847 OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs, 1848 const CodeGenLoopTy &CodeGenLoopContent) { 1849 1850 auto &RT = CGM.getOpenMPRuntime(); 1851 1852 // Emit outer loop. 1853 // Same behavior as a OMPForOuterLoop, except that schedule cannot be 1854 // dynamic 1855 // 1856 1857 const Expr *IVExpr = S.getIterationVariable(); 1858 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1859 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1860 1861 CGOpenMPRuntime::StaticRTInput StaticInit( 1862 IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB, 1863 LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk); 1864 RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, StaticInit); 1865 1866 // for combined 'distribute' and 'for' the increment expression of distribute 1867 // is store in DistInc. For 'distribute' alone, it is in Inc. 1868 Expr *IncExpr; 1869 if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())) 1870 IncExpr = S.getDistInc(); 1871 else 1872 IncExpr = S.getInc(); 1873 1874 // this routine is shared by 'omp distribute parallel for' and 1875 // 'omp distribute': select the right EUB expression depending on the 1876 // directive 1877 OMPLoopArguments OuterLoopArgs; 1878 OuterLoopArgs.LB = LoopArgs.LB; 1879 OuterLoopArgs.UB = LoopArgs.UB; 1880 OuterLoopArgs.ST = LoopArgs.ST; 1881 OuterLoopArgs.IL = LoopArgs.IL; 1882 OuterLoopArgs.Chunk = LoopArgs.Chunk; 1883 OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 1884 ? S.getCombinedEnsureUpperBound() 1885 : S.getEnsureUpperBound(); 1886 OuterLoopArgs.IncExpr = IncExpr; 1887 OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 1888 ? S.getCombinedInit() 1889 : S.getInit(); 1890 OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 1891 ? S.getCombinedCond() 1892 : S.getCond(); 1893 OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 1894 ? S.getCombinedNextLowerBound() 1895 : S.getNextLowerBound(); 1896 OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 1897 ? S.getCombinedNextUpperBound() 1898 : S.getNextUpperBound(); 1899 1900 EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S, 1901 LoopScope, OuterLoopArgs, CodeGenLoopContent, 1902 emitEmptyOrdered); 1903 } 1904 1905 /// Emit a helper variable and return corresponding lvalue. 1906 static LValue EmitOMPHelperVar(CodeGenFunction &CGF, 1907 const DeclRefExpr *Helper) { 1908 auto VDecl = cast<VarDecl>(Helper->getDecl()); 1909 CGF.EmitVarDecl(*VDecl); 1910 return CGF.EmitLValue(Helper); 1911 } 1912 1913 static std::pair<LValue, LValue> 1914 emitDistributeParallelForInnerBounds(CodeGenFunction &CGF, 1915 const OMPExecutableDirective &S) { 1916 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 1917 LValue LB = 1918 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); 1919 LValue UB = 1920 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); 1921 1922 // When composing 'distribute' with 'for' (e.g. as in 'distribute 1923 // parallel for') we need to use the 'distribute' 1924 // chunk lower and upper bounds rather than the whole loop iteration 1925 // space. These are parameters to the outlined function for 'parallel' 1926 // and we copy the bounds of the previous schedule into the 1927 // the current ones. 1928 LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable()); 1929 LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable()); 1930 llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(PrevLB, SourceLocation()); 1931 PrevLBVal = CGF.EmitScalarConversion( 1932 PrevLBVal, LS.getPrevLowerBoundVariable()->getType(), 1933 LS.getIterationVariable()->getType(), SourceLocation()); 1934 llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(PrevUB, SourceLocation()); 1935 PrevUBVal = CGF.EmitScalarConversion( 1936 PrevUBVal, LS.getPrevUpperBoundVariable()->getType(), 1937 LS.getIterationVariable()->getType(), SourceLocation()); 1938 1939 CGF.EmitStoreOfScalar(PrevLBVal, LB); 1940 CGF.EmitStoreOfScalar(PrevUBVal, UB); 1941 1942 return {LB, UB}; 1943 } 1944 1945 /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then 1946 /// we need to use the LB and UB expressions generated by the worksharing 1947 /// code generation support, whereas in non combined situations we would 1948 /// just emit 0 and the LastIteration expression 1949 /// This function is necessary due to the difference of the LB and UB 1950 /// types for the RT emission routines for 'for_static_init' and 1951 /// 'for_dispatch_init' 1952 static std::pair<llvm::Value *, llvm::Value *> 1953 emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF, 1954 const OMPExecutableDirective &S, 1955 Address LB, Address UB) { 1956 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 1957 const Expr *IVExpr = LS.getIterationVariable(); 1958 // when implementing a dynamic schedule for a 'for' combined with a 1959 // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop 1960 // is not normalized as each team only executes its own assigned 1961 // distribute chunk 1962 QualType IteratorTy = IVExpr->getType(); 1963 llvm::Value *LBVal = CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, 1964 SourceLocation()); 1965 llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, 1966 SourceLocation()); 1967 return {LBVal, UBVal}; 1968 } 1969 1970 static void emitDistributeParallelForDistributeInnerBoundParams( 1971 CodeGenFunction &CGF, const OMPExecutableDirective &S, 1972 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) { 1973 const auto &Dir = cast<OMPLoopDirective>(S); 1974 LValue LB = 1975 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable())); 1976 auto LBCast = CGF.Builder.CreateIntCast( 1977 CGF.Builder.CreateLoad(LB.getAddress()), CGF.SizeTy, /*isSigned=*/false); 1978 CapturedVars.push_back(LBCast); 1979 LValue UB = 1980 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable())); 1981 1982 auto UBCast = CGF.Builder.CreateIntCast( 1983 CGF.Builder.CreateLoad(UB.getAddress()), CGF.SizeTy, /*isSigned=*/false); 1984 CapturedVars.push_back(UBCast); 1985 } 1986 1987 static void 1988 emitInnerParallelForWhenCombined(CodeGenFunction &CGF, 1989 const OMPLoopDirective &S, 1990 CodeGenFunction::JumpDest LoopExit) { 1991 auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF, 1992 PrePostActionTy &) { 1993 CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(), 1994 emitDistributeParallelForInnerBounds, 1995 emitDistributeParallelForDispatchBounds); 1996 }; 1997 1998 emitCommonOMPParallelDirective( 1999 CGF, S, OMPD_for, CGInlinedWorksharingLoop, 2000 emitDistributeParallelForDistributeInnerBoundParams); 2001 } 2002 2003 void CodeGenFunction::EmitOMPDistributeParallelForDirective( 2004 const OMPDistributeParallelForDirective &S) { 2005 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2006 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 2007 S.getDistInc()); 2008 }; 2009 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2010 OMPCancelStackRAII CancelRegion(*this, OMPD_distribute_parallel_for, 2011 /*HasCancel=*/false); 2012 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen, 2013 /*HasCancel=*/false); 2014 } 2015 2016 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective( 2017 const OMPDistributeParallelForSimdDirective &S) { 2018 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2019 CGM.getOpenMPRuntime().emitInlinedDirective( 2020 *this, OMPD_distribute_parallel_for_simd, 2021 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2022 OMPLoopScope PreInitScope(CGF, S); 2023 CGF.EmitStmt( 2024 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2025 }); 2026 } 2027 2028 void CodeGenFunction::EmitOMPDistributeSimdDirective( 2029 const OMPDistributeSimdDirective &S) { 2030 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2031 CGM.getOpenMPRuntime().emitInlinedDirective( 2032 *this, OMPD_distribute_simd, 2033 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2034 OMPLoopScope PreInitScope(CGF, S); 2035 CGF.EmitStmt( 2036 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2037 }); 2038 } 2039 2040 void CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 2041 CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) { 2042 // Emit SPMD target parallel for region as a standalone region. 2043 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2044 emitOMPSimdRegion(CGF, S, Action); 2045 }; 2046 llvm::Function *Fn; 2047 llvm::Constant *Addr; 2048 // Emit target region as a standalone region. 2049 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 2050 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 2051 assert(Fn && Addr && "Target device function emission failed."); 2052 } 2053 2054 void CodeGenFunction::EmitOMPTargetSimdDirective( 2055 const OMPTargetSimdDirective &S) { 2056 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2057 emitOMPSimdRegion(CGF, S, Action); 2058 }; 2059 emitCommonOMPTargetDirective(*this, S, CodeGen); 2060 } 2061 2062 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective( 2063 const OMPTeamsDistributeSimdDirective &S) { 2064 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2065 CGM.getOpenMPRuntime().emitInlinedDirective( 2066 *this, OMPD_teams_distribute_simd, 2067 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2068 OMPLoopScope PreInitScope(CGF, S); 2069 CGF.EmitStmt( 2070 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2071 }); 2072 } 2073 2074 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective( 2075 const OMPTeamsDistributeParallelForSimdDirective &S) { 2076 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2077 CGM.getOpenMPRuntime().emitInlinedDirective( 2078 *this, OMPD_teams_distribute_parallel_for_simd, 2079 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2080 OMPLoopScope PreInitScope(CGF, S); 2081 CGF.EmitStmt( 2082 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2083 }); 2084 } 2085 2086 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective( 2087 const OMPTargetTeamsDistributeDirective &S) { 2088 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2089 CGM.getOpenMPRuntime().emitInlinedDirective( 2090 *this, OMPD_target_teams_distribute, 2091 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2092 CGF.EmitStmt( 2093 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2094 }); 2095 } 2096 2097 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective( 2098 const OMPTargetTeamsDistributeParallelForDirective &S) { 2099 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2100 CGM.getOpenMPRuntime().emitInlinedDirective( 2101 *this, OMPD_target_teams_distribute_parallel_for, 2102 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2103 CGF.EmitStmt( 2104 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2105 }); 2106 } 2107 2108 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective( 2109 const OMPTargetTeamsDistributeParallelForSimdDirective &S) { 2110 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2111 CGM.getOpenMPRuntime().emitInlinedDirective( 2112 *this, OMPD_target_teams_distribute_parallel_for_simd, 2113 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2114 CGF.EmitStmt( 2115 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2116 }); 2117 } 2118 2119 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective( 2120 const OMPTargetTeamsDistributeSimdDirective &S) { 2121 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2122 CGM.getOpenMPRuntime().emitInlinedDirective( 2123 *this, OMPD_target_teams_distribute_simd, 2124 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2125 CGF.EmitStmt( 2126 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2127 }); 2128 } 2129 2130 namespace { 2131 struct ScheduleKindModifiersTy { 2132 OpenMPScheduleClauseKind Kind; 2133 OpenMPScheduleClauseModifier M1; 2134 OpenMPScheduleClauseModifier M2; 2135 ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind, 2136 OpenMPScheduleClauseModifier M1, 2137 OpenMPScheduleClauseModifier M2) 2138 : Kind(Kind), M1(M1), M2(M2) {} 2139 }; 2140 } // namespace 2141 2142 bool CodeGenFunction::EmitOMPWorksharingLoop( 2143 const OMPLoopDirective &S, Expr *EUB, 2144 const CodeGenLoopBoundsTy &CodeGenLoopBounds, 2145 const CodeGenDispatchBoundsTy &CGDispatchBounds) { 2146 // Emit the loop iteration variable. 2147 auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 2148 auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); 2149 EmitVarDecl(*IVDecl); 2150 2151 // Emit the iterations count variable. 2152 // If it is not a variable, Sema decided to calculate iterations count on each 2153 // iteration (e.g., it is foldable into a constant). 2154 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 2155 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 2156 // Emit calculation of the iterations count. 2157 EmitIgnoredExpr(S.getCalcLastIteration()); 2158 } 2159 2160 auto &RT = CGM.getOpenMPRuntime(); 2161 2162 bool HasLastprivateClause; 2163 // Check pre-condition. 2164 { 2165 OMPLoopScope PreInitScope(*this, S); 2166 // Skip the entire loop if we don't meet the precondition. 2167 // If the condition constant folds and can be elided, avoid emitting the 2168 // whole loop. 2169 bool CondConstant; 2170 llvm::BasicBlock *ContBlock = nullptr; 2171 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 2172 if (!CondConstant) 2173 return false; 2174 } else { 2175 auto *ThenBlock = createBasicBlock("omp.precond.then"); 2176 ContBlock = createBasicBlock("omp.precond.end"); 2177 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 2178 getProfileCount(&S)); 2179 EmitBlock(ThenBlock); 2180 incrementProfileCounter(&S); 2181 } 2182 2183 bool Ordered = false; 2184 if (auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) { 2185 if (OrderedClause->getNumForLoops()) 2186 RT.emitDoacrossInit(*this, S); 2187 else 2188 Ordered = true; 2189 } 2190 2191 llvm::DenseSet<const Expr *> EmittedFinals; 2192 emitAlignedClause(*this, S); 2193 bool HasLinears = EmitOMPLinearClauseInit(S); 2194 // Emit helper vars inits. 2195 2196 std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S); 2197 LValue LB = Bounds.first; 2198 LValue UB = Bounds.second; 2199 LValue ST = 2200 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 2201 LValue IL = 2202 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 2203 2204 // Emit 'then' code. 2205 { 2206 OMPPrivateScope LoopScope(*this); 2207 if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) { 2208 // Emit implicit barrier to synchronize threads and avoid data races on 2209 // initialization of firstprivate variables and post-update of 2210 // lastprivate variables. 2211 CGM.getOpenMPRuntime().emitBarrierCall( 2212 *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 2213 /*ForceSimpleCall=*/true); 2214 } 2215 EmitOMPPrivateClause(S, LoopScope); 2216 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 2217 EmitOMPReductionClauseInit(S, LoopScope); 2218 EmitOMPPrivateLoopCounters(S, LoopScope); 2219 EmitOMPLinearClause(S, LoopScope); 2220 (void)LoopScope.Privatize(); 2221 2222 // Detect the loop schedule kind and chunk. 2223 llvm::Value *Chunk = nullptr; 2224 OpenMPScheduleTy ScheduleKind; 2225 if (auto *C = S.getSingleClause<OMPScheduleClause>()) { 2226 ScheduleKind.Schedule = C->getScheduleKind(); 2227 ScheduleKind.M1 = C->getFirstScheduleModifier(); 2228 ScheduleKind.M2 = C->getSecondScheduleModifier(); 2229 if (const auto *Ch = C->getChunkSize()) { 2230 Chunk = EmitScalarExpr(Ch); 2231 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 2232 S.getIterationVariable()->getType(), 2233 S.getLocStart()); 2234 } 2235 } 2236 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2237 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2238 // OpenMP 4.5, 2.7.1 Loop Construct, Description. 2239 // If the static schedule kind is specified or if the ordered clause is 2240 // specified, and if no monotonic modifier is specified, the effect will 2241 // be as if the monotonic modifier was specified. 2242 if (RT.isStaticNonchunked(ScheduleKind.Schedule, 2243 /* Chunked */ Chunk != nullptr) && 2244 !Ordered) { 2245 if (isOpenMPSimdDirective(S.getDirectiveKind())) 2246 EmitOMPSimdInit(S, /*IsMonotonic=*/true); 2247 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 2248 // When no chunk_size is specified, the iteration space is divided into 2249 // chunks that are approximately equal in size, and at most one chunk is 2250 // distributed to each thread. Note that the size of the chunks is 2251 // unspecified in this case. 2252 CGOpenMPRuntime::StaticRTInput StaticInit( 2253 IVSize, IVSigned, Ordered, IL.getAddress(), LB.getAddress(), 2254 UB.getAddress(), ST.getAddress()); 2255 RT.emitForStaticInit(*this, S.getLocStart(), S.getDirectiveKind(), 2256 ScheduleKind, StaticInit); 2257 auto LoopExit = 2258 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 2259 // UB = min(UB, GlobalUB); 2260 EmitIgnoredExpr(S.getEnsureUpperBound()); 2261 // IV = LB; 2262 EmitIgnoredExpr(S.getInit()); 2263 // while (idx <= UB) { BODY; ++idx; } 2264 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 2265 S.getInc(), 2266 [&S, LoopExit](CodeGenFunction &CGF) { 2267 CGF.EmitOMPLoopBody(S, LoopExit); 2268 CGF.EmitStopPoint(&S); 2269 }, 2270 [](CodeGenFunction &) {}); 2271 EmitBlock(LoopExit.getBlock()); 2272 // Tell the runtime we are done. 2273 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 2274 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(), 2275 S.getDirectiveKind()); 2276 }; 2277 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); 2278 } else { 2279 const bool IsMonotonic = 2280 Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static || 2281 ScheduleKind.Schedule == OMPC_SCHEDULE_unknown || 2282 ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic || 2283 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic; 2284 // Emit the outer loop, which requests its work chunk [LB..UB] from 2285 // runtime and runs the inner loop to process it. 2286 const OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(), 2287 ST.getAddress(), IL.getAddress(), 2288 Chunk, EUB); 2289 EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered, 2290 LoopArguments, CGDispatchBounds); 2291 } 2292 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 2293 EmitOMPSimdFinal(S, 2294 [&](CodeGenFunction &CGF) -> llvm::Value * { 2295 return CGF.Builder.CreateIsNotNull( 2296 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2297 }); 2298 } 2299 EmitOMPReductionClauseFinal( 2300 S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind()) 2301 ? /*Parallel and Simd*/ OMPD_parallel_for_simd 2302 : /*Parallel only*/ OMPD_parallel); 2303 // Emit post-update of the reduction variables if IsLastIter != 0. 2304 emitPostUpdateForReductionClause( 2305 *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * { 2306 return CGF.Builder.CreateIsNotNull( 2307 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2308 }); 2309 // Emit final copy of the lastprivate variables if IsLastIter != 0. 2310 if (HasLastprivateClause) 2311 EmitOMPLastprivateClauseFinal( 2312 S, isOpenMPSimdDirective(S.getDirectiveKind()), 2313 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart()))); 2314 } 2315 EmitOMPLinearClauseFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * { 2316 return CGF.Builder.CreateIsNotNull( 2317 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2318 }); 2319 // We're now done with the loop, so jump to the continuation block. 2320 if (ContBlock) { 2321 EmitBranch(ContBlock); 2322 EmitBlock(ContBlock, true); 2323 } 2324 } 2325 return HasLastprivateClause; 2326 } 2327 2328 /// The following two functions generate expressions for the loop lower 2329 /// and upper bounds in case of static and dynamic (dispatch) schedule 2330 /// of the associated 'for' or 'distribute' loop. 2331 static std::pair<LValue, LValue> 2332 emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 2333 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 2334 LValue LB = 2335 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); 2336 LValue UB = 2337 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); 2338 return {LB, UB}; 2339 } 2340 2341 /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not 2342 /// consider the lower and upper bound expressions generated by the 2343 /// worksharing loop support, but we use 0 and the iteration space size as 2344 /// constants 2345 static std::pair<llvm::Value *, llvm::Value *> 2346 emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S, 2347 Address LB, Address UB) { 2348 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 2349 const Expr *IVExpr = LS.getIterationVariable(); 2350 const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType()); 2351 llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0); 2352 llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration()); 2353 return {LBVal, UBVal}; 2354 } 2355 2356 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { 2357 bool HasLastprivates = false; 2358 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, 2359 PrePostActionTy &) { 2360 OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel()); 2361 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 2362 emitForLoopBounds, 2363 emitDispatchForLoopBounds); 2364 }; 2365 { 2366 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2367 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen, 2368 S.hasCancel()); 2369 } 2370 2371 // Emit an implicit barrier at the end. 2372 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) { 2373 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); 2374 } 2375 } 2376 2377 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { 2378 bool HasLastprivates = false; 2379 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, 2380 PrePostActionTy &) { 2381 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 2382 emitForLoopBounds, 2383 emitDispatchForLoopBounds); 2384 }; 2385 { 2386 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2387 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 2388 } 2389 2390 // Emit an implicit barrier at the end. 2391 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) { 2392 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); 2393 } 2394 } 2395 2396 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, 2397 const Twine &Name, 2398 llvm::Value *Init = nullptr) { 2399 auto LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty); 2400 if (Init) 2401 CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true); 2402 return LVal; 2403 } 2404 2405 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { 2406 auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt(); 2407 auto *CS = dyn_cast<CompoundStmt>(Stmt); 2408 bool HasLastprivates = false; 2409 auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF, 2410 PrePostActionTy &) { 2411 auto &C = CGF.CGM.getContext(); 2412 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2413 // Emit helper vars inits. 2414 LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.", 2415 CGF.Builder.getInt32(0)); 2416 auto *GlobalUBVal = CS != nullptr ? CGF.Builder.getInt32(CS->size() - 1) 2417 : CGF.Builder.getInt32(0); 2418 LValue UB = 2419 createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal); 2420 LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.", 2421 CGF.Builder.getInt32(1)); 2422 LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.", 2423 CGF.Builder.getInt32(0)); 2424 // Loop counter. 2425 LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv."); 2426 OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 2427 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV); 2428 OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 2429 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); 2430 // Generate condition for loop. 2431 BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, 2432 OK_Ordinary, S.getLocStart(), FPOptions()); 2433 // Increment for loop counter. 2434 UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary, 2435 S.getLocStart()); 2436 auto BodyGen = [Stmt, CS, &S, &IV](CodeGenFunction &CGF) { 2437 // Iterate through all sections and emit a switch construct: 2438 // switch (IV) { 2439 // case 0: 2440 // <SectionStmt[0]>; 2441 // break; 2442 // ... 2443 // case <NumSection> - 1: 2444 // <SectionStmt[<NumSection> - 1]>; 2445 // break; 2446 // } 2447 // .omp.sections.exit: 2448 auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit"); 2449 auto *SwitchStmt = CGF.Builder.CreateSwitch( 2450 CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB, 2451 CS == nullptr ? 1 : CS->size()); 2452 if (CS) { 2453 unsigned CaseNumber = 0; 2454 for (auto *SubStmt : CS->children()) { 2455 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 2456 CGF.EmitBlock(CaseBB); 2457 SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB); 2458 CGF.EmitStmt(SubStmt); 2459 CGF.EmitBranch(ExitBB); 2460 ++CaseNumber; 2461 } 2462 } else { 2463 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 2464 CGF.EmitBlock(CaseBB); 2465 SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB); 2466 CGF.EmitStmt(Stmt); 2467 CGF.EmitBranch(ExitBB); 2468 } 2469 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 2470 }; 2471 2472 CodeGenFunction::OMPPrivateScope LoopScope(CGF); 2473 if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) { 2474 // Emit implicit barrier to synchronize threads and avoid data races on 2475 // initialization of firstprivate variables and post-update of lastprivate 2476 // variables. 2477 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 2478 CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 2479 /*ForceSimpleCall=*/true); 2480 } 2481 CGF.EmitOMPPrivateClause(S, LoopScope); 2482 HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 2483 CGF.EmitOMPReductionClauseInit(S, LoopScope); 2484 (void)LoopScope.Privatize(); 2485 2486 // Emit static non-chunked loop. 2487 OpenMPScheduleTy ScheduleKind; 2488 ScheduleKind.Schedule = OMPC_SCHEDULE_static; 2489 CGOpenMPRuntime::StaticRTInput StaticInit( 2490 /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), 2491 LB.getAddress(), UB.getAddress(), ST.getAddress()); 2492 CGF.CGM.getOpenMPRuntime().emitForStaticInit( 2493 CGF, S.getLocStart(), S.getDirectiveKind(), ScheduleKind, StaticInit); 2494 // UB = min(UB, GlobalUB); 2495 auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart()); 2496 auto *MinUBGlobalUB = CGF.Builder.CreateSelect( 2497 CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal); 2498 CGF.EmitStoreOfScalar(MinUBGlobalUB, UB); 2499 // IV = LB; 2500 CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV); 2501 // while (idx <= UB) { BODY; ++idx; } 2502 CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen, 2503 [](CodeGenFunction &) {}); 2504 // Tell the runtime we are done. 2505 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 2506 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(), 2507 S.getDirectiveKind()); 2508 }; 2509 CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen); 2510 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 2511 // Emit post-update of the reduction variables if IsLastIter != 0. 2512 emitPostUpdateForReductionClause( 2513 CGF, S, [&](CodeGenFunction &CGF) -> llvm::Value * { 2514 return CGF.Builder.CreateIsNotNull( 2515 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2516 }); 2517 2518 // Emit final copy of the lastprivate variables if IsLastIter != 0. 2519 if (HasLastprivates) 2520 CGF.EmitOMPLastprivateClauseFinal( 2521 S, /*NoFinals=*/false, 2522 CGF.Builder.CreateIsNotNull( 2523 CGF.EmitLoadOfScalar(IL, S.getLocStart()))); 2524 }; 2525 2526 bool HasCancel = false; 2527 if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S)) 2528 HasCancel = OSD->hasCancel(); 2529 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S)) 2530 HasCancel = OPSD->hasCancel(); 2531 OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel); 2532 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen, 2533 HasCancel); 2534 // Emit barrier for lastprivates only if 'sections' directive has 'nowait' 2535 // clause. Otherwise the barrier will be generated by the codegen for the 2536 // directive. 2537 if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) { 2538 // Emit implicit barrier to synchronize threads and avoid data races on 2539 // initialization of firstprivate variables. 2540 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), 2541 OMPD_unknown); 2542 } 2543 } 2544 2545 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { 2546 { 2547 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2548 EmitSections(S); 2549 } 2550 // Emit an implicit barrier at the end. 2551 if (!S.getSingleClause<OMPNowaitClause>()) { 2552 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), 2553 OMPD_sections); 2554 } 2555 } 2556 2557 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { 2558 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2559 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2560 }; 2561 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2562 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen, 2563 S.hasCancel()); 2564 } 2565 2566 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { 2567 llvm::SmallVector<const Expr *, 8> CopyprivateVars; 2568 llvm::SmallVector<const Expr *, 8> DestExprs; 2569 llvm::SmallVector<const Expr *, 8> SrcExprs; 2570 llvm::SmallVector<const Expr *, 8> AssignmentOps; 2571 // Check if there are any 'copyprivate' clauses associated with this 2572 // 'single' construct. 2573 // Build a list of copyprivate variables along with helper expressions 2574 // (<source>, <destination>, <destination>=<source> expressions) 2575 for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) { 2576 CopyprivateVars.append(C->varlists().begin(), C->varlists().end()); 2577 DestExprs.append(C->destination_exprs().begin(), 2578 C->destination_exprs().end()); 2579 SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end()); 2580 AssignmentOps.append(C->assignment_ops().begin(), 2581 C->assignment_ops().end()); 2582 } 2583 // Emit code for 'single' region along with 'copyprivate' clauses 2584 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2585 Action.Enter(CGF); 2586 OMPPrivateScope SingleScope(CGF); 2587 (void)CGF.EmitOMPFirstprivateClause(S, SingleScope); 2588 CGF.EmitOMPPrivateClause(S, SingleScope); 2589 (void)SingleScope.Privatize(); 2590 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2591 }; 2592 { 2593 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2594 CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(), 2595 CopyprivateVars, DestExprs, 2596 SrcExprs, AssignmentOps); 2597 } 2598 // Emit an implicit barrier at the end (to avoid data race on firstprivate 2599 // init or if no 'nowait' clause was specified and no 'copyprivate' clause). 2600 if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) { 2601 CGM.getOpenMPRuntime().emitBarrierCall( 2602 *this, S.getLocStart(), 2603 S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single); 2604 } 2605 } 2606 2607 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { 2608 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2609 Action.Enter(CGF); 2610 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2611 }; 2612 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2613 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart()); 2614 } 2615 2616 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { 2617 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2618 Action.Enter(CGF); 2619 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2620 }; 2621 Expr *Hint = nullptr; 2622 if (auto *HintClause = S.getSingleClause<OMPHintClause>()) 2623 Hint = HintClause->getHint(); 2624 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2625 CGM.getOpenMPRuntime().emitCriticalRegion(*this, 2626 S.getDirectiveName().getAsString(), 2627 CodeGen, S.getLocStart(), Hint); 2628 } 2629 2630 void CodeGenFunction::EmitOMPParallelForDirective( 2631 const OMPParallelForDirective &S) { 2632 // Emit directive as a combined directive that consists of two implicit 2633 // directives: 'parallel' with 'for' directive. 2634 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2635 OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel()); 2636 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 2637 emitDispatchForLoopBounds); 2638 }; 2639 emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen, 2640 emitEmptyBoundParameters); 2641 } 2642 2643 void CodeGenFunction::EmitOMPParallelForSimdDirective( 2644 const OMPParallelForSimdDirective &S) { 2645 // Emit directive as a combined directive that consists of two implicit 2646 // directives: 'parallel' with 'for' directive. 2647 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2648 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 2649 emitDispatchForLoopBounds); 2650 }; 2651 emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen, 2652 emitEmptyBoundParameters); 2653 } 2654 2655 void CodeGenFunction::EmitOMPParallelSectionsDirective( 2656 const OMPParallelSectionsDirective &S) { 2657 // Emit directive as a combined directive that consists of two implicit 2658 // directives: 'parallel' with 'sections' directive. 2659 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2660 CGF.EmitSections(S); 2661 }; 2662 emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen, 2663 emitEmptyBoundParameters); 2664 } 2665 2666 void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, 2667 const RegionCodeGenTy &BodyGen, 2668 const TaskGenTy &TaskGen, 2669 OMPTaskDataTy &Data) { 2670 // Emit outlined function for task construct. 2671 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2672 auto *I = CS->getCapturedDecl()->param_begin(); 2673 auto *PartId = std::next(I); 2674 auto *TaskT = std::next(I, 4); 2675 // Check if the task is final 2676 if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) { 2677 // If the condition constant folds and can be elided, try to avoid emitting 2678 // the condition and the dead arm of the if/else. 2679 auto *Cond = Clause->getCondition(); 2680 bool CondConstant; 2681 if (ConstantFoldsToSimpleInteger(Cond, CondConstant)) 2682 Data.Final.setInt(CondConstant); 2683 else 2684 Data.Final.setPointer(EvaluateExprAsBool(Cond)); 2685 } else { 2686 // By default the task is not final. 2687 Data.Final.setInt(/*IntVal=*/false); 2688 } 2689 // Check if the task has 'priority' clause. 2690 if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) { 2691 auto *Prio = Clause->getPriority(); 2692 Data.Priority.setInt(/*IntVal=*/true); 2693 Data.Priority.setPointer(EmitScalarConversion( 2694 EmitScalarExpr(Prio), Prio->getType(), 2695 getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1), 2696 Prio->getExprLoc())); 2697 } 2698 // The first function argument for tasks is a thread id, the second one is a 2699 // part id (0 for tied tasks, >=0 for untied task). 2700 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 2701 // Get list of private variables. 2702 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 2703 auto IRef = C->varlist_begin(); 2704 for (auto *IInit : C->private_copies()) { 2705 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2706 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2707 Data.PrivateVars.push_back(*IRef); 2708 Data.PrivateCopies.push_back(IInit); 2709 } 2710 ++IRef; 2711 } 2712 } 2713 EmittedAsPrivate.clear(); 2714 // Get list of firstprivate variables. 2715 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 2716 auto IRef = C->varlist_begin(); 2717 auto IElemInitRef = C->inits().begin(); 2718 for (auto *IInit : C->private_copies()) { 2719 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2720 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2721 Data.FirstprivateVars.push_back(*IRef); 2722 Data.FirstprivateCopies.push_back(IInit); 2723 Data.FirstprivateInits.push_back(*IElemInitRef); 2724 } 2725 ++IRef; 2726 ++IElemInitRef; 2727 } 2728 } 2729 // Get list of lastprivate variables (for taskloops). 2730 llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs; 2731 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 2732 auto IRef = C->varlist_begin(); 2733 auto ID = C->destination_exprs().begin(); 2734 for (auto *IInit : C->private_copies()) { 2735 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2736 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2737 Data.LastprivateVars.push_back(*IRef); 2738 Data.LastprivateCopies.push_back(IInit); 2739 } 2740 LastprivateDstsOrigs.insert( 2741 {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()), 2742 cast<DeclRefExpr>(*IRef)}); 2743 ++IRef; 2744 ++ID; 2745 } 2746 } 2747 SmallVector<const Expr *, 4> LHSs; 2748 SmallVector<const Expr *, 4> RHSs; 2749 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 2750 auto IPriv = C->privates().begin(); 2751 auto IRed = C->reduction_ops().begin(); 2752 auto ILHS = C->lhs_exprs().begin(); 2753 auto IRHS = C->rhs_exprs().begin(); 2754 for (const auto *Ref : C->varlists()) { 2755 Data.ReductionVars.emplace_back(Ref); 2756 Data.ReductionCopies.emplace_back(*IPriv); 2757 Data.ReductionOps.emplace_back(*IRed); 2758 LHSs.emplace_back(*ILHS); 2759 RHSs.emplace_back(*IRHS); 2760 std::advance(IPriv, 1); 2761 std::advance(IRed, 1); 2762 std::advance(ILHS, 1); 2763 std::advance(IRHS, 1); 2764 } 2765 } 2766 Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit( 2767 *this, S.getLocStart(), LHSs, RHSs, Data); 2768 // Build list of dependences. 2769 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) 2770 for (auto *IRef : C->varlists()) 2771 Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef)); 2772 auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs]( 2773 CodeGenFunction &CGF, PrePostActionTy &Action) { 2774 // Set proper addresses for generated private copies. 2775 OMPPrivateScope Scope(CGF); 2776 if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() || 2777 !Data.LastprivateVars.empty()) { 2778 enum { PrivatesParam = 2, CopyFnParam = 3 }; 2779 auto *CopyFn = CGF.Builder.CreateLoad( 2780 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3))); 2781 auto *PrivatesPtr = CGF.Builder.CreateLoad( 2782 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2))); 2783 // Map privates. 2784 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; 2785 llvm::SmallVector<llvm::Value *, 16> CallArgs; 2786 CallArgs.push_back(PrivatesPtr); 2787 for (auto *E : Data.PrivateVars) { 2788 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2789 Address PrivatePtr = CGF.CreateMemTemp( 2790 CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr"); 2791 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2792 CallArgs.push_back(PrivatePtr.getPointer()); 2793 } 2794 for (auto *E : Data.FirstprivateVars) { 2795 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2796 Address PrivatePtr = 2797 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 2798 ".firstpriv.ptr.addr"); 2799 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2800 CallArgs.push_back(PrivatePtr.getPointer()); 2801 } 2802 for (auto *E : Data.LastprivateVars) { 2803 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2804 Address PrivatePtr = 2805 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 2806 ".lastpriv.ptr.addr"); 2807 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2808 CallArgs.push_back(PrivatePtr.getPointer()); 2809 } 2810 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(), 2811 CopyFn, CallArgs); 2812 for (auto &&Pair : LastprivateDstsOrigs) { 2813 auto *OrigVD = cast<VarDecl>(Pair.second->getDecl()); 2814 DeclRefExpr DRE( 2815 const_cast<VarDecl *>(OrigVD), 2816 /*RefersToEnclosingVariableOrCapture=*/CGF.CapturedStmtInfo->lookup( 2817 OrigVD) != nullptr, 2818 Pair.second->getType(), VK_LValue, Pair.second->getExprLoc()); 2819 Scope.addPrivate(Pair.first, [&CGF, &DRE]() { 2820 return CGF.EmitLValue(&DRE).getAddress(); 2821 }); 2822 } 2823 for (auto &&Pair : PrivatePtrs) { 2824 Address Replacement(CGF.Builder.CreateLoad(Pair.second), 2825 CGF.getContext().getDeclAlign(Pair.first)); 2826 Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); 2827 } 2828 } 2829 if (Data.Reductions) { 2830 OMPLexicalScope LexScope(CGF, S, /*AsInlined=*/true); 2831 ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionCopies, 2832 Data.ReductionOps); 2833 llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad( 2834 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9))); 2835 for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) { 2836 RedCG.emitSharedLValue(CGF, Cnt); 2837 RedCG.emitAggregateType(CGF, Cnt); 2838 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( 2839 CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); 2840 Replacement = 2841 Address(CGF.EmitScalarConversion( 2842 Replacement.getPointer(), CGF.getContext().VoidPtrTy, 2843 CGF.getContext().getPointerType( 2844 Data.ReductionCopies[Cnt]->getType()), 2845 SourceLocation()), 2846 Replacement.getAlignment()); 2847 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); 2848 Scope.addPrivate(RedCG.getBaseDecl(Cnt), 2849 [Replacement]() { return Replacement; }); 2850 // FIXME: This must removed once the runtime library is fixed. 2851 // Emit required threadprivate variables for 2852 // initilizer/combiner/finalizer. 2853 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(), 2854 RedCG, Cnt); 2855 } 2856 } 2857 // Privatize all private variables except for in_reduction items. 2858 (void)Scope.Privatize(); 2859 SmallVector<const Expr *, 4> InRedVars; 2860 SmallVector<const Expr *, 4> InRedPrivs; 2861 SmallVector<const Expr *, 4> InRedOps; 2862 SmallVector<const Expr *, 4> TaskgroupDescriptors; 2863 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) { 2864 auto IPriv = C->privates().begin(); 2865 auto IRed = C->reduction_ops().begin(); 2866 auto ITD = C->taskgroup_descriptors().begin(); 2867 for (const auto *Ref : C->varlists()) { 2868 InRedVars.emplace_back(Ref); 2869 InRedPrivs.emplace_back(*IPriv); 2870 InRedOps.emplace_back(*IRed); 2871 TaskgroupDescriptors.emplace_back(*ITD); 2872 std::advance(IPriv, 1); 2873 std::advance(IRed, 1); 2874 std::advance(ITD, 1); 2875 } 2876 } 2877 // Privatize in_reduction items here, because taskgroup descriptors must be 2878 // privatized earlier. 2879 OMPPrivateScope InRedScope(CGF); 2880 if (!InRedVars.empty()) { 2881 ReductionCodeGen RedCG(InRedVars, InRedPrivs, InRedOps); 2882 for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) { 2883 RedCG.emitSharedLValue(CGF, Cnt); 2884 RedCG.emitAggregateType(CGF, Cnt); 2885 // The taskgroup descriptor variable is always implicit firstprivate and 2886 // privatized already during procoessing of the firstprivates. 2887 llvm::Value *ReductionsPtr = CGF.EmitLoadOfScalar( 2888 CGF.EmitLValue(TaskgroupDescriptors[Cnt]), SourceLocation()); 2889 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( 2890 CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); 2891 Replacement = Address( 2892 CGF.EmitScalarConversion( 2893 Replacement.getPointer(), CGF.getContext().VoidPtrTy, 2894 CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()), 2895 SourceLocation()), 2896 Replacement.getAlignment()); 2897 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); 2898 InRedScope.addPrivate(RedCG.getBaseDecl(Cnt), 2899 [Replacement]() { return Replacement; }); 2900 // FIXME: This must removed once the runtime library is fixed. 2901 // Emit required threadprivate variables for 2902 // initilizer/combiner/finalizer. 2903 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(), 2904 RedCG, Cnt); 2905 } 2906 } 2907 (void)InRedScope.Privatize(); 2908 2909 Action.Enter(CGF); 2910 BodyGen(CGF); 2911 }; 2912 auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( 2913 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied, 2914 Data.NumberOfParts); 2915 OMPLexicalScope Scope(*this, S); 2916 TaskGen(*this, OutlinedFn, Data); 2917 } 2918 2919 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { 2920 // Emit outlined function for task construct. 2921 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2922 auto CapturedStruct = GenerateCapturedStmtArgument(*CS); 2923 auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 2924 const Expr *IfCond = nullptr; 2925 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 2926 if (C->getNameModifier() == OMPD_unknown || 2927 C->getNameModifier() == OMPD_task) { 2928 IfCond = C->getCondition(); 2929 break; 2930 } 2931 } 2932 2933 OMPTaskDataTy Data; 2934 // Check if we should emit tied or untied task. 2935 Data.Tied = !S.getSingleClause<OMPUntiedClause>(); 2936 auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { 2937 CGF.EmitStmt(CS->getCapturedStmt()); 2938 }; 2939 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 2940 IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn, 2941 const OMPTaskDataTy &Data) { 2942 CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getLocStart(), S, OutlinedFn, 2943 SharedsTy, CapturedStruct, IfCond, 2944 Data); 2945 }; 2946 EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data); 2947 } 2948 2949 void CodeGenFunction::EmitOMPTaskyieldDirective( 2950 const OMPTaskyieldDirective &S) { 2951 CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart()); 2952 } 2953 2954 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { 2955 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier); 2956 } 2957 2958 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) { 2959 CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart()); 2960 } 2961 2962 void CodeGenFunction::EmitOMPTaskgroupDirective( 2963 const OMPTaskgroupDirective &S) { 2964 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2965 Action.Enter(CGF); 2966 if (const Expr *E = S.getReductionRef()) { 2967 SmallVector<const Expr *, 4> LHSs; 2968 SmallVector<const Expr *, 4> RHSs; 2969 OMPTaskDataTy Data; 2970 for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) { 2971 auto IPriv = C->privates().begin(); 2972 auto IRed = C->reduction_ops().begin(); 2973 auto ILHS = C->lhs_exprs().begin(); 2974 auto IRHS = C->rhs_exprs().begin(); 2975 for (const auto *Ref : C->varlists()) { 2976 Data.ReductionVars.emplace_back(Ref); 2977 Data.ReductionCopies.emplace_back(*IPriv); 2978 Data.ReductionOps.emplace_back(*IRed); 2979 LHSs.emplace_back(*ILHS); 2980 RHSs.emplace_back(*IRHS); 2981 std::advance(IPriv, 1); 2982 std::advance(IRed, 1); 2983 std::advance(ILHS, 1); 2984 std::advance(IRHS, 1); 2985 } 2986 } 2987 llvm::Value *ReductionDesc = 2988 CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getLocStart(), 2989 LHSs, RHSs, Data); 2990 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2991 CGF.EmitVarDecl(*VD); 2992 CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD), 2993 /*Volatile=*/false, E->getType()); 2994 } 2995 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2996 }; 2997 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2998 CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart()); 2999 } 3000 3001 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { 3002 CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> { 3003 if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) { 3004 return llvm::makeArrayRef(FlushClause->varlist_begin(), 3005 FlushClause->varlist_end()); 3006 } 3007 return llvm::None; 3008 }(), S.getLocStart()); 3009 } 3010 3011 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, 3012 const CodeGenLoopTy &CodeGenLoop, 3013 Expr *IncExpr) { 3014 // Emit the loop iteration variable. 3015 auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 3016 auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); 3017 EmitVarDecl(*IVDecl); 3018 3019 // Emit the iterations count variable. 3020 // If it is not a variable, Sema decided to calculate iterations count on each 3021 // iteration (e.g., it is foldable into a constant). 3022 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 3023 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 3024 // Emit calculation of the iterations count. 3025 EmitIgnoredExpr(S.getCalcLastIteration()); 3026 } 3027 3028 auto &RT = CGM.getOpenMPRuntime(); 3029 3030 bool HasLastprivateClause = false; 3031 // Check pre-condition. 3032 { 3033 OMPLoopScope PreInitScope(*this, S); 3034 // Skip the entire loop if we don't meet the precondition. 3035 // If the condition constant folds and can be elided, avoid emitting the 3036 // whole loop. 3037 bool CondConstant; 3038 llvm::BasicBlock *ContBlock = nullptr; 3039 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 3040 if (!CondConstant) 3041 return; 3042 } else { 3043 auto *ThenBlock = createBasicBlock("omp.precond.then"); 3044 ContBlock = createBasicBlock("omp.precond.end"); 3045 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 3046 getProfileCount(&S)); 3047 EmitBlock(ThenBlock); 3048 incrementProfileCounter(&S); 3049 } 3050 3051 // Emit 'then' code. 3052 { 3053 // Emit helper vars inits. 3054 3055 LValue LB = EmitOMPHelperVar( 3056 *this, cast<DeclRefExpr>( 3057 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3058 ? S.getCombinedLowerBoundVariable() 3059 : S.getLowerBoundVariable()))); 3060 LValue UB = EmitOMPHelperVar( 3061 *this, cast<DeclRefExpr>( 3062 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3063 ? S.getCombinedUpperBoundVariable() 3064 : S.getUpperBoundVariable()))); 3065 LValue ST = 3066 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 3067 LValue IL = 3068 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 3069 3070 OMPPrivateScope LoopScope(*this); 3071 if (EmitOMPFirstprivateClause(S, LoopScope)) { 3072 // Emit implicit barrier to synchronize threads and avoid data races on 3073 // initialization of firstprivate variables and post-update of 3074 // lastprivate variables. 3075 CGM.getOpenMPRuntime().emitBarrierCall( 3076 *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 3077 /*ForceSimpleCall=*/true); 3078 } 3079 EmitOMPPrivateClause(S, LoopScope); 3080 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 3081 EmitOMPPrivateLoopCounters(S, LoopScope); 3082 (void)LoopScope.Privatize(); 3083 3084 // Detect the distribute schedule kind and chunk. 3085 llvm::Value *Chunk = nullptr; 3086 OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown; 3087 if (auto *C = S.getSingleClause<OMPDistScheduleClause>()) { 3088 ScheduleKind = C->getDistScheduleKind(); 3089 if (const auto *Ch = C->getChunkSize()) { 3090 Chunk = EmitScalarExpr(Ch); 3091 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 3092 S.getIterationVariable()->getType(), 3093 S.getLocStart()); 3094 } 3095 } 3096 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 3097 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 3098 3099 // OpenMP [2.10.8, distribute Construct, Description] 3100 // If dist_schedule is specified, kind must be static. If specified, 3101 // iterations are divided into chunks of size chunk_size, chunks are 3102 // assigned to the teams of the league in a round-robin fashion in the 3103 // order of the team number. When no chunk_size is specified, the 3104 // iteration space is divided into chunks that are approximately equal 3105 // in size, and at most one chunk is distributed to each team of the 3106 // league. The size of the chunks is unspecified in this case. 3107 if (RT.isStaticNonchunked(ScheduleKind, 3108 /* Chunked */ Chunk != nullptr)) { 3109 CGOpenMPRuntime::StaticRTInput StaticInit( 3110 IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(), 3111 LB.getAddress(), UB.getAddress(), ST.getAddress()); 3112 RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, 3113 StaticInit); 3114 auto LoopExit = 3115 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 3116 // UB = min(UB, GlobalUB); 3117 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3118 ? S.getCombinedEnsureUpperBound() 3119 : S.getEnsureUpperBound()); 3120 // IV = LB; 3121 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3122 ? S.getCombinedInit() 3123 : S.getInit()); 3124 3125 Expr *Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3126 ? S.getCombinedCond() 3127 : S.getCond(); 3128 3129 // for distribute alone, codegen 3130 // while (idx <= UB) { BODY; ++idx; } 3131 // when combined with 'for' (e.g. as in 'distribute parallel for') 3132 // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; } 3133 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), Cond, IncExpr, 3134 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { 3135 CodeGenLoop(CGF, S, LoopExit); 3136 }, 3137 [](CodeGenFunction &) {}); 3138 EmitBlock(LoopExit.getBlock()); 3139 // Tell the runtime we are done. 3140 RT.emitForStaticFinish(*this, S.getLocStart(), S.getDirectiveKind()); 3141 } else { 3142 // Emit the outer loop, which requests its work chunk [LB..UB] from 3143 // runtime and runs the inner loop to process it. 3144 const OMPLoopArguments LoopArguments = { 3145 LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(), 3146 Chunk}; 3147 EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments, 3148 CodeGenLoop); 3149 } 3150 3151 // Emit final copy of the lastprivate variables if IsLastIter != 0. 3152 if (HasLastprivateClause) 3153 EmitOMPLastprivateClauseFinal( 3154 S, /*NoFinals=*/false, 3155 Builder.CreateIsNotNull( 3156 EmitLoadOfScalar(IL, S.getLocStart()))); 3157 } 3158 3159 // We're now done with the loop, so jump to the continuation block. 3160 if (ContBlock) { 3161 EmitBranch(ContBlock); 3162 EmitBlock(ContBlock, true); 3163 } 3164 } 3165 } 3166 3167 void CodeGenFunction::EmitOMPDistributeDirective( 3168 const OMPDistributeDirective &S) { 3169 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3170 3171 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 3172 }; 3173 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 3174 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen, 3175 false); 3176 } 3177 3178 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, 3179 const CapturedStmt *S) { 3180 CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); 3181 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; 3182 CGF.CapturedStmtInfo = &CapStmtInfo; 3183 auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S); 3184 Fn->addFnAttr(llvm::Attribute::NoInline); 3185 return Fn; 3186 } 3187 3188 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { 3189 if (!S.getAssociatedStmt()) { 3190 for (const auto *DC : S.getClausesOfKind<OMPDependClause>()) 3191 CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC); 3192 return; 3193 } 3194 auto *C = S.getSingleClause<OMPSIMDClause>(); 3195 auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF, 3196 PrePostActionTy &Action) { 3197 if (C) { 3198 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 3199 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 3200 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 3201 auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS); 3202 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(), 3203 OutlinedFn, CapturedVars); 3204 } else { 3205 Action.Enter(CGF); 3206 CGF.EmitStmt( 3207 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3208 } 3209 }; 3210 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 3211 CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C); 3212 } 3213 3214 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, 3215 QualType SrcType, QualType DestType, 3216 SourceLocation Loc) { 3217 assert(CGF.hasScalarEvaluationKind(DestType) && 3218 "DestType must have scalar evaluation kind."); 3219 assert(!Val.isAggregate() && "Must be a scalar or complex."); 3220 return Val.isScalar() 3221 ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType, 3222 Loc) 3223 : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType, 3224 DestType, Loc); 3225 } 3226 3227 static CodeGenFunction::ComplexPairTy 3228 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, 3229 QualType DestType, SourceLocation Loc) { 3230 assert(CGF.getEvaluationKind(DestType) == TEK_Complex && 3231 "DestType must have complex evaluation kind."); 3232 CodeGenFunction::ComplexPairTy ComplexVal; 3233 if (Val.isScalar()) { 3234 // Convert the input element to the element type of the complex. 3235 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 3236 auto ScalarVal = CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, 3237 DestElementType, Loc); 3238 ComplexVal = CodeGenFunction::ComplexPairTy( 3239 ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType())); 3240 } else { 3241 assert(Val.isComplex() && "Must be a scalar or complex."); 3242 auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType(); 3243 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 3244 ComplexVal.first = CGF.EmitScalarConversion( 3245 Val.getComplexVal().first, SrcElementType, DestElementType, Loc); 3246 ComplexVal.second = CGF.EmitScalarConversion( 3247 Val.getComplexVal().second, SrcElementType, DestElementType, Loc); 3248 } 3249 return ComplexVal; 3250 } 3251 3252 static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst, 3253 LValue LVal, RValue RVal) { 3254 if (LVal.isGlobalReg()) { 3255 CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal); 3256 } else { 3257 CGF.EmitAtomicStore(RVal, LVal, 3258 IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3259 : llvm::AtomicOrdering::Monotonic, 3260 LVal.isVolatile(), /*IsInit=*/false); 3261 } 3262 } 3263 3264 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal, 3265 QualType RValTy, SourceLocation Loc) { 3266 switch (getEvaluationKind(LVal.getType())) { 3267 case TEK_Scalar: 3268 EmitStoreThroughLValue(RValue::get(convertToScalarValue( 3269 *this, RVal, RValTy, LVal.getType(), Loc)), 3270 LVal); 3271 break; 3272 case TEK_Complex: 3273 EmitStoreOfComplex( 3274 convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal, 3275 /*isInit=*/false); 3276 break; 3277 case TEK_Aggregate: 3278 llvm_unreachable("Must be a scalar or complex."); 3279 } 3280 } 3281 3282 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst, 3283 const Expr *X, const Expr *V, 3284 SourceLocation Loc) { 3285 // v = x; 3286 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue"); 3287 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue"); 3288 LValue XLValue = CGF.EmitLValue(X); 3289 LValue VLValue = CGF.EmitLValue(V); 3290 RValue Res = XLValue.isGlobalReg() 3291 ? CGF.EmitLoadOfLValue(XLValue, Loc) 3292 : CGF.EmitAtomicLoad( 3293 XLValue, Loc, 3294 IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3295 : llvm::AtomicOrdering::Monotonic, 3296 XLValue.isVolatile()); 3297 // OpenMP, 2.12.6, atomic Construct 3298 // Any atomic construct with a seq_cst clause forces the atomically 3299 // performed operation to include an implicit flush operation without a 3300 // list. 3301 if (IsSeqCst) 3302 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3303 CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc); 3304 } 3305 3306 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst, 3307 const Expr *X, const Expr *E, 3308 SourceLocation Loc) { 3309 // x = expr; 3310 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue"); 3311 emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E)); 3312 // OpenMP, 2.12.6, atomic Construct 3313 // Any atomic construct with a seq_cst clause forces the atomically 3314 // performed operation to include an implicit flush operation without a 3315 // list. 3316 if (IsSeqCst) 3317 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3318 } 3319 3320 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, 3321 RValue Update, 3322 BinaryOperatorKind BO, 3323 llvm::AtomicOrdering AO, 3324 bool IsXLHSInRHSPart) { 3325 auto &Context = CGF.CGM.getContext(); 3326 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x' 3327 // expression is simple and atomic is allowed for the given type for the 3328 // target platform. 3329 if (BO == BO_Comma || !Update.isScalar() || 3330 !Update.getScalarVal()->getType()->isIntegerTy() || 3331 !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) && 3332 (Update.getScalarVal()->getType() != 3333 X.getAddress().getElementType())) || 3334 !X.getAddress().getElementType()->isIntegerTy() || 3335 !Context.getTargetInfo().hasBuiltinAtomic( 3336 Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment()))) 3337 return std::make_pair(false, RValue::get(nullptr)); 3338 3339 llvm::AtomicRMWInst::BinOp RMWOp; 3340 switch (BO) { 3341 case BO_Add: 3342 RMWOp = llvm::AtomicRMWInst::Add; 3343 break; 3344 case BO_Sub: 3345 if (!IsXLHSInRHSPart) 3346 return std::make_pair(false, RValue::get(nullptr)); 3347 RMWOp = llvm::AtomicRMWInst::Sub; 3348 break; 3349 case BO_And: 3350 RMWOp = llvm::AtomicRMWInst::And; 3351 break; 3352 case BO_Or: 3353 RMWOp = llvm::AtomicRMWInst::Or; 3354 break; 3355 case BO_Xor: 3356 RMWOp = llvm::AtomicRMWInst::Xor; 3357 break; 3358 case BO_LT: 3359 RMWOp = X.getType()->hasSignedIntegerRepresentation() 3360 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min 3361 : llvm::AtomicRMWInst::Max) 3362 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin 3363 : llvm::AtomicRMWInst::UMax); 3364 break; 3365 case BO_GT: 3366 RMWOp = X.getType()->hasSignedIntegerRepresentation() 3367 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max 3368 : llvm::AtomicRMWInst::Min) 3369 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax 3370 : llvm::AtomicRMWInst::UMin); 3371 break; 3372 case BO_Assign: 3373 RMWOp = llvm::AtomicRMWInst::Xchg; 3374 break; 3375 case BO_Mul: 3376 case BO_Div: 3377 case BO_Rem: 3378 case BO_Shl: 3379 case BO_Shr: 3380 case BO_LAnd: 3381 case BO_LOr: 3382 return std::make_pair(false, RValue::get(nullptr)); 3383 case BO_PtrMemD: 3384 case BO_PtrMemI: 3385 case BO_LE: 3386 case BO_GE: 3387 case BO_EQ: 3388 case BO_NE: 3389 case BO_AddAssign: 3390 case BO_SubAssign: 3391 case BO_AndAssign: 3392 case BO_OrAssign: 3393 case BO_XorAssign: 3394 case BO_MulAssign: 3395 case BO_DivAssign: 3396 case BO_RemAssign: 3397 case BO_ShlAssign: 3398 case BO_ShrAssign: 3399 case BO_Comma: 3400 llvm_unreachable("Unsupported atomic update operation"); 3401 } 3402 auto *UpdateVal = Update.getScalarVal(); 3403 if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) { 3404 UpdateVal = CGF.Builder.CreateIntCast( 3405 IC, X.getAddress().getElementType(), 3406 X.getType()->hasSignedIntegerRepresentation()); 3407 } 3408 auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO); 3409 return std::make_pair(true, RValue::get(Res)); 3410 } 3411 3412 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr( 3413 LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart, 3414 llvm::AtomicOrdering AO, SourceLocation Loc, 3415 const llvm::function_ref<RValue(RValue)> &CommonGen) { 3416 // Update expressions are allowed to have the following forms: 3417 // x binop= expr; -> xrval + expr; 3418 // x++, ++x -> xrval + 1; 3419 // x--, --x -> xrval - 1; 3420 // x = x binop expr; -> xrval binop expr 3421 // x = expr Op x; - > expr binop xrval; 3422 auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart); 3423 if (!Res.first) { 3424 if (X.isGlobalReg()) { 3425 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop 3426 // 'xrval'. 3427 EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X); 3428 } else { 3429 // Perform compare-and-swap procedure. 3430 EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified()); 3431 } 3432 } 3433 return Res; 3434 } 3435 3436 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst, 3437 const Expr *X, const Expr *E, 3438 const Expr *UE, bool IsXLHSInRHSPart, 3439 SourceLocation Loc) { 3440 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 3441 "Update expr in 'atomic update' must be a binary operator."); 3442 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 3443 // Update expressions are allowed to have the following forms: 3444 // x binop= expr; -> xrval + expr; 3445 // x++, ++x -> xrval + 1; 3446 // x--, --x -> xrval - 1; 3447 // x = x binop expr; -> xrval binop expr 3448 // x = expr Op x; - > expr binop xrval; 3449 assert(X->isLValue() && "X of 'omp atomic update' is not lvalue"); 3450 LValue XLValue = CGF.EmitLValue(X); 3451 RValue ExprRValue = CGF.EmitAnyExpr(E); 3452 auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3453 : llvm::AtomicOrdering::Monotonic; 3454 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 3455 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 3456 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 3457 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 3458 auto Gen = 3459 [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue { 3460 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3461 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 3462 return CGF.EmitAnyExpr(UE); 3463 }; 3464 (void)CGF.EmitOMPAtomicSimpleUpdateExpr( 3465 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 3466 // OpenMP, 2.12.6, atomic Construct 3467 // Any atomic construct with a seq_cst clause forces the atomically 3468 // performed operation to include an implicit flush operation without a 3469 // list. 3470 if (IsSeqCst) 3471 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3472 } 3473 3474 static RValue convertToType(CodeGenFunction &CGF, RValue Value, 3475 QualType SourceType, QualType ResType, 3476 SourceLocation Loc) { 3477 switch (CGF.getEvaluationKind(ResType)) { 3478 case TEK_Scalar: 3479 return RValue::get( 3480 convertToScalarValue(CGF, Value, SourceType, ResType, Loc)); 3481 case TEK_Complex: { 3482 auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc); 3483 return RValue::getComplex(Res.first, Res.second); 3484 } 3485 case TEK_Aggregate: 3486 break; 3487 } 3488 llvm_unreachable("Must be a scalar or complex."); 3489 } 3490 3491 static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst, 3492 bool IsPostfixUpdate, const Expr *V, 3493 const Expr *X, const Expr *E, 3494 const Expr *UE, bool IsXLHSInRHSPart, 3495 SourceLocation Loc) { 3496 assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue"); 3497 assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue"); 3498 RValue NewVVal; 3499 LValue VLValue = CGF.EmitLValue(V); 3500 LValue XLValue = CGF.EmitLValue(X); 3501 RValue ExprRValue = CGF.EmitAnyExpr(E); 3502 auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3503 : llvm::AtomicOrdering::Monotonic; 3504 QualType NewVValType; 3505 if (UE) { 3506 // 'x' is updated with some additional value. 3507 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 3508 "Update expr in 'atomic capture' must be a binary operator."); 3509 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 3510 // Update expressions are allowed to have the following forms: 3511 // x binop= expr; -> xrval + expr; 3512 // x++, ++x -> xrval + 1; 3513 // x--, --x -> xrval - 1; 3514 // x = x binop expr; -> xrval binop expr 3515 // x = expr Op x; - > expr binop xrval; 3516 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 3517 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 3518 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 3519 NewVValType = XRValExpr->getType(); 3520 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 3521 auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr, 3522 IsPostfixUpdate](RValue XRValue) -> RValue { 3523 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3524 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 3525 RValue Res = CGF.EmitAnyExpr(UE); 3526 NewVVal = IsPostfixUpdate ? XRValue : Res; 3527 return Res; 3528 }; 3529 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 3530 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 3531 if (Res.first) { 3532 // 'atomicrmw' instruction was generated. 3533 if (IsPostfixUpdate) { 3534 // Use old value from 'atomicrmw'. 3535 NewVVal = Res.second; 3536 } else { 3537 // 'atomicrmw' does not provide new value, so evaluate it using old 3538 // value of 'x'. 3539 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3540 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second); 3541 NewVVal = CGF.EmitAnyExpr(UE); 3542 } 3543 } 3544 } else { 3545 // 'x' is simply rewritten with some 'expr'. 3546 NewVValType = X->getType().getNonReferenceType(); 3547 ExprRValue = convertToType(CGF, ExprRValue, E->getType(), 3548 X->getType().getNonReferenceType(), Loc); 3549 auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) -> RValue { 3550 NewVVal = XRValue; 3551 return ExprRValue; 3552 }; 3553 // Try to perform atomicrmw xchg, otherwise simple exchange. 3554 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 3555 XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO, 3556 Loc, Gen); 3557 if (Res.first) { 3558 // 'atomicrmw' instruction was generated. 3559 NewVVal = IsPostfixUpdate ? Res.second : ExprRValue; 3560 } 3561 } 3562 // Emit post-update store to 'v' of old/new 'x' value. 3563 CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc); 3564 // OpenMP, 2.12.6, atomic Construct 3565 // Any atomic construct with a seq_cst clause forces the atomically 3566 // performed operation to include an implicit flush operation without a 3567 // list. 3568 if (IsSeqCst) 3569 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3570 } 3571 3572 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, 3573 bool IsSeqCst, bool IsPostfixUpdate, 3574 const Expr *X, const Expr *V, const Expr *E, 3575 const Expr *UE, bool IsXLHSInRHSPart, 3576 SourceLocation Loc) { 3577 switch (Kind) { 3578 case OMPC_read: 3579 EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc); 3580 break; 3581 case OMPC_write: 3582 EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc); 3583 break; 3584 case OMPC_unknown: 3585 case OMPC_update: 3586 EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc); 3587 break; 3588 case OMPC_capture: 3589 EmitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE, 3590 IsXLHSInRHSPart, Loc); 3591 break; 3592 case OMPC_if: 3593 case OMPC_final: 3594 case OMPC_num_threads: 3595 case OMPC_private: 3596 case OMPC_firstprivate: 3597 case OMPC_lastprivate: 3598 case OMPC_reduction: 3599 case OMPC_task_reduction: 3600 case OMPC_in_reduction: 3601 case OMPC_safelen: 3602 case OMPC_simdlen: 3603 case OMPC_collapse: 3604 case OMPC_default: 3605 case OMPC_seq_cst: 3606 case OMPC_shared: 3607 case OMPC_linear: 3608 case OMPC_aligned: 3609 case OMPC_copyin: 3610 case OMPC_copyprivate: 3611 case OMPC_flush: 3612 case OMPC_proc_bind: 3613 case OMPC_schedule: 3614 case OMPC_ordered: 3615 case OMPC_nowait: 3616 case OMPC_untied: 3617 case OMPC_threadprivate: 3618 case OMPC_depend: 3619 case OMPC_mergeable: 3620 case OMPC_device: 3621 case OMPC_threads: 3622 case OMPC_simd: 3623 case OMPC_map: 3624 case OMPC_num_teams: 3625 case OMPC_thread_limit: 3626 case OMPC_priority: 3627 case OMPC_grainsize: 3628 case OMPC_nogroup: 3629 case OMPC_num_tasks: 3630 case OMPC_hint: 3631 case OMPC_dist_schedule: 3632 case OMPC_defaultmap: 3633 case OMPC_uniform: 3634 case OMPC_to: 3635 case OMPC_from: 3636 case OMPC_use_device_ptr: 3637 case OMPC_is_device_ptr: 3638 llvm_unreachable("Clause is not allowed in 'omp atomic'."); 3639 } 3640 } 3641 3642 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { 3643 bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>(); 3644 OpenMPClauseKind Kind = OMPC_unknown; 3645 for (auto *C : S.clauses()) { 3646 // Find first clause (skip seq_cst clause, if it is first). 3647 if (C->getClauseKind() != OMPC_seq_cst) { 3648 Kind = C->getClauseKind(); 3649 break; 3650 } 3651 } 3652 3653 const auto *CS = 3654 S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 3655 if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) { 3656 enterFullExpression(EWC); 3657 } 3658 // Processing for statements under 'atomic capture'. 3659 if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) { 3660 for (const auto *C : Compound->body()) { 3661 if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) { 3662 enterFullExpression(EWC); 3663 } 3664 } 3665 } 3666 3667 auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF, 3668 PrePostActionTy &) { 3669 CGF.EmitStopPoint(CS); 3670 EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(), 3671 S.getV(), S.getExpr(), S.getUpdateExpr(), 3672 S.isXLHSInRHSPart(), S.getLocStart()); 3673 }; 3674 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 3675 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen); 3676 } 3677 3678 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, 3679 const OMPExecutableDirective &S, 3680 const RegionCodeGenTy &CodeGen) { 3681 assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind())); 3682 CodeGenModule &CGM = CGF.CGM; 3683 const CapturedStmt &CS = *S.getCapturedStmt(OMPD_target); 3684 3685 llvm::Function *Fn = nullptr; 3686 llvm::Constant *FnID = nullptr; 3687 3688 const Expr *IfCond = nullptr; 3689 // Check for the at most one if clause associated with the target region. 3690 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 3691 if (C->getNameModifier() == OMPD_unknown || 3692 C->getNameModifier() == OMPD_target) { 3693 IfCond = C->getCondition(); 3694 break; 3695 } 3696 } 3697 3698 // Check if we have any device clause associated with the directive. 3699 const Expr *Device = nullptr; 3700 if (auto *C = S.getSingleClause<OMPDeviceClause>()) { 3701 Device = C->getDevice(); 3702 } 3703 3704 // Check if we have an if clause whose conditional always evaluates to false 3705 // or if we do not have any targets specified. If so the target region is not 3706 // an offload entry point. 3707 bool IsOffloadEntry = true; 3708 if (IfCond) { 3709 bool Val; 3710 if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val) 3711 IsOffloadEntry = false; 3712 } 3713 if (CGM.getLangOpts().OMPTargetTriples.empty()) 3714 IsOffloadEntry = false; 3715 3716 assert(CGF.CurFuncDecl && "No parent declaration for target region!"); 3717 StringRef ParentName; 3718 // In case we have Ctors/Dtors we use the complete type variant to produce 3719 // the mangling of the device outlined kernel. 3720 if (auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl)) 3721 ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete)); 3722 else if (auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl)) 3723 ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete)); 3724 else 3725 ParentName = 3726 CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl))); 3727 3728 // Emit target region as a standalone region. 3729 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID, 3730 IsOffloadEntry, CodeGen); 3731 OMPLexicalScope Scope(CGF, S); 3732 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 3733 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 3734 CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device, 3735 CapturedVars); 3736 } 3737 3738 static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S, 3739 PrePostActionTy &Action) { 3740 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 3741 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 3742 CGF.EmitOMPPrivateClause(S, PrivateScope); 3743 (void)PrivateScope.Privatize(); 3744 3745 Action.Enter(CGF); 3746 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3747 } 3748 3749 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM, 3750 StringRef ParentName, 3751 const OMPTargetDirective &S) { 3752 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3753 emitTargetRegion(CGF, S, Action); 3754 }; 3755 llvm::Function *Fn; 3756 llvm::Constant *Addr; 3757 // Emit target region as a standalone region. 3758 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 3759 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 3760 assert(Fn && Addr && "Target device function emission failed."); 3761 } 3762 3763 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { 3764 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3765 emitTargetRegion(CGF, S, Action); 3766 }; 3767 emitCommonOMPTargetDirective(*this, S, CodeGen); 3768 } 3769 3770 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF, 3771 const OMPExecutableDirective &S, 3772 OpenMPDirectiveKind InnermostKind, 3773 const RegionCodeGenTy &CodeGen) { 3774 const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams); 3775 auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction( 3776 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 3777 3778 const OMPNumTeamsClause *NT = S.getSingleClause<OMPNumTeamsClause>(); 3779 const OMPThreadLimitClause *TL = S.getSingleClause<OMPThreadLimitClause>(); 3780 if (NT || TL) { 3781 Expr *NumTeams = (NT) ? NT->getNumTeams() : nullptr; 3782 Expr *ThreadLimit = (TL) ? TL->getThreadLimit() : nullptr; 3783 3784 CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit, 3785 S.getLocStart()); 3786 } 3787 3788 OMPTeamsScope Scope(CGF, S); 3789 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 3790 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 3791 CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn, 3792 CapturedVars); 3793 } 3794 3795 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) { 3796 // Emit teams region as a standalone region. 3797 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3798 OMPPrivateScope PrivateScope(CGF); 3799 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 3800 CGF.EmitOMPPrivateClause(S, PrivateScope); 3801 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 3802 (void)PrivateScope.Privatize(); 3803 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3804 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 3805 }; 3806 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen); 3807 emitPostUpdateForReductionClause( 3808 *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 3809 } 3810 3811 static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action, 3812 const OMPTargetTeamsDirective &S) { 3813 auto *CS = S.getCapturedStmt(OMPD_teams); 3814 Action.Enter(CGF); 3815 auto &&CodeGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { 3816 // TODO: Add support for clauses. 3817 CGF.EmitStmt(CS->getCapturedStmt()); 3818 }; 3819 emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen); 3820 } 3821 3822 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 3823 CodeGenModule &CGM, StringRef ParentName, 3824 const OMPTargetTeamsDirective &S) { 3825 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3826 emitTargetTeamsRegion(CGF, Action, S); 3827 }; 3828 llvm::Function *Fn; 3829 llvm::Constant *Addr; 3830 // Emit target region as a standalone region. 3831 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 3832 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 3833 assert(Fn && Addr && "Target device function emission failed."); 3834 } 3835 3836 void CodeGenFunction::EmitOMPTargetTeamsDirective( 3837 const OMPTargetTeamsDirective &S) { 3838 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3839 emitTargetTeamsRegion(CGF, Action, S); 3840 }; 3841 emitCommonOMPTargetDirective(*this, S, CodeGen); 3842 } 3843 3844 void CodeGenFunction::EmitOMPTeamsDistributeDirective( 3845 const OMPTeamsDistributeDirective &S) { 3846 3847 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3848 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 3849 }; 3850 3851 // Emit teams region as a standalone region. 3852 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 3853 PrePostActionTy &) { 3854 OMPPrivateScope PrivateScope(CGF); 3855 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 3856 (void)PrivateScope.Privatize(); 3857 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 3858 CodeGenDistribute); 3859 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 3860 }; 3861 emitCommonOMPTeamsDirective(*this, S, OMPD_teams, CodeGen); 3862 emitPostUpdateForReductionClause(*this, S, 3863 [](CodeGenFunction &) { return nullptr; }); 3864 } 3865 3866 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective( 3867 const OMPTeamsDistributeParallelForDirective &S) { 3868 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3869 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 3870 S.getDistInc()); 3871 }; 3872 3873 // Emit teams region as a standalone region. 3874 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 3875 PrePostActionTy &) { 3876 OMPPrivateScope PrivateScope(CGF); 3877 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 3878 (void)PrivateScope.Privatize(); 3879 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 3880 CodeGenDistribute); 3881 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 3882 }; 3883 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen); 3884 emitPostUpdateForReductionClause(*this, S, 3885 [](CodeGenFunction &) { return nullptr; }); 3886 } 3887 3888 void CodeGenFunction::EmitOMPCancellationPointDirective( 3889 const OMPCancellationPointDirective &S) { 3890 CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(), 3891 S.getCancelRegion()); 3892 } 3893 3894 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { 3895 const Expr *IfCond = nullptr; 3896 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 3897 if (C->getNameModifier() == OMPD_unknown || 3898 C->getNameModifier() == OMPD_cancel) { 3899 IfCond = C->getCondition(); 3900 break; 3901 } 3902 } 3903 CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(), IfCond, 3904 S.getCancelRegion()); 3905 } 3906 3907 CodeGenFunction::JumpDest 3908 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) { 3909 if (Kind == OMPD_parallel || Kind == OMPD_task || 3910 Kind == OMPD_target_parallel) 3911 return ReturnBlock; 3912 assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections || 3913 Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for || 3914 Kind == OMPD_distribute_parallel_for || 3915 Kind == OMPD_target_parallel_for); 3916 return OMPCancelStack.getExitBlock(); 3917 } 3918 3919 void CodeGenFunction::EmitOMPUseDevicePtrClause( 3920 const OMPClause &NC, OMPPrivateScope &PrivateScope, 3921 const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) { 3922 const auto &C = cast<OMPUseDevicePtrClause>(NC); 3923 auto OrigVarIt = C.varlist_begin(); 3924 auto InitIt = C.inits().begin(); 3925 for (auto PvtVarIt : C.private_copies()) { 3926 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl()); 3927 auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl()); 3928 auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl()); 3929 3930 // In order to identify the right initializer we need to match the 3931 // declaration used by the mapping logic. In some cases we may get 3932 // OMPCapturedExprDecl that refers to the original declaration. 3933 const ValueDecl *MatchingVD = OrigVD; 3934 if (auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) { 3935 // OMPCapturedExprDecl are used to privative fields of the current 3936 // structure. 3937 auto *ME = cast<MemberExpr>(OED->getInit()); 3938 assert(isa<CXXThisExpr>(ME->getBase()) && 3939 "Base should be the current struct!"); 3940 MatchingVD = ME->getMemberDecl(); 3941 } 3942 3943 // If we don't have information about the current list item, move on to 3944 // the next one. 3945 auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD); 3946 if (InitAddrIt == CaptureDeviceAddrMap.end()) 3947 continue; 3948 3949 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 3950 // Initialize the temporary initialization variable with the address we 3951 // get from the runtime library. We have to cast the source address 3952 // because it is always a void *. References are materialized in the 3953 // privatization scope, so the initialization here disregards the fact 3954 // the original variable is a reference. 3955 QualType AddrQTy = 3956 getContext().getPointerType(OrigVD->getType().getNonReferenceType()); 3957 llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy); 3958 Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy); 3959 setAddrOfLocalVar(InitVD, InitAddr); 3960 3961 // Emit private declaration, it will be initialized by the value we 3962 // declaration we just added to the local declarations map. 3963 EmitDecl(*PvtVD); 3964 3965 // The initialization variables reached its purpose in the emission 3966 // ofthe previous declaration, so we don't need it anymore. 3967 LocalDeclMap.erase(InitVD); 3968 3969 // Return the address of the private variable. 3970 return GetAddrOfLocalVar(PvtVD); 3971 }); 3972 assert(IsRegistered && "firstprivate var already registered as private"); 3973 // Silence the warning about unused variable. 3974 (void)IsRegistered; 3975 3976 ++OrigVarIt; 3977 ++InitIt; 3978 } 3979 } 3980 3981 // Generate the instructions for '#pragma omp target data' directive. 3982 void CodeGenFunction::EmitOMPTargetDataDirective( 3983 const OMPTargetDataDirective &S) { 3984 CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true); 3985 3986 // Create a pre/post action to signal the privatization of the device pointer. 3987 // This action can be replaced by the OpenMP runtime code generation to 3988 // deactivate privatization. 3989 bool PrivatizeDevicePointers = false; 3990 class DevicePointerPrivActionTy : public PrePostActionTy { 3991 bool &PrivatizeDevicePointers; 3992 3993 public: 3994 explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers) 3995 : PrePostActionTy(), PrivatizeDevicePointers(PrivatizeDevicePointers) {} 3996 void Enter(CodeGenFunction &CGF) override { 3997 PrivatizeDevicePointers = true; 3998 } 3999 }; 4000 DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers); 4001 4002 auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers]( 4003 CodeGenFunction &CGF, PrePostActionTy &Action) { 4004 auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4005 CGF.EmitStmt( 4006 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 4007 }; 4008 4009 // Codegen that selects wheather to generate the privatization code or not. 4010 auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers, 4011 &InnermostCodeGen](CodeGenFunction &CGF, 4012 PrePostActionTy &Action) { 4013 RegionCodeGenTy RCG(InnermostCodeGen); 4014 PrivatizeDevicePointers = false; 4015 4016 // Call the pre-action to change the status of PrivatizeDevicePointers if 4017 // needed. 4018 Action.Enter(CGF); 4019 4020 if (PrivatizeDevicePointers) { 4021 OMPPrivateScope PrivateScope(CGF); 4022 // Emit all instances of the use_device_ptr clause. 4023 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>()) 4024 CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope, 4025 Info.CaptureDeviceAddrMap); 4026 (void)PrivateScope.Privatize(); 4027 RCG(CGF); 4028 } else 4029 RCG(CGF); 4030 }; 4031 4032 // Forward the provided action to the privatization codegen. 4033 RegionCodeGenTy PrivRCG(PrivCodeGen); 4034 PrivRCG.setAction(Action); 4035 4036 // Notwithstanding the body of the region is emitted as inlined directive, 4037 // we don't use an inline scope as changes in the references inside the 4038 // region are expected to be visible outside, so we do not privative them. 4039 OMPLexicalScope Scope(CGF, S); 4040 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data, 4041 PrivRCG); 4042 }; 4043 4044 RegionCodeGenTy RCG(CodeGen); 4045 4046 // If we don't have target devices, don't bother emitting the data mapping 4047 // code. 4048 if (CGM.getLangOpts().OMPTargetTriples.empty()) { 4049 RCG(*this); 4050 return; 4051 } 4052 4053 // Check if we have any if clause associated with the directive. 4054 const Expr *IfCond = nullptr; 4055 if (auto *C = S.getSingleClause<OMPIfClause>()) 4056 IfCond = C->getCondition(); 4057 4058 // Check if we have any device clause associated with the directive. 4059 const Expr *Device = nullptr; 4060 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 4061 Device = C->getDevice(); 4062 4063 // Set the action to signal privatization of device pointers. 4064 RCG.setAction(PrivAction); 4065 4066 // Emit region code. 4067 CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG, 4068 Info); 4069 } 4070 4071 void CodeGenFunction::EmitOMPTargetEnterDataDirective( 4072 const OMPTargetEnterDataDirective &S) { 4073 // If we don't have target devices, don't bother emitting the data mapping 4074 // code. 4075 if (CGM.getLangOpts().OMPTargetTriples.empty()) 4076 return; 4077 4078 // Check if we have any if clause associated with the directive. 4079 const Expr *IfCond = nullptr; 4080 if (auto *C = S.getSingleClause<OMPIfClause>()) 4081 IfCond = C->getCondition(); 4082 4083 // Check if we have any device clause associated with the directive. 4084 const Expr *Device = nullptr; 4085 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 4086 Device = C->getDevice(); 4087 4088 auto &&CodeGen = [&S, IfCond, Device](CodeGenFunction &CGF, 4089 PrePostActionTy &) { 4090 CGF.CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(CGF, S, IfCond, 4091 Device); 4092 }; 4093 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 4094 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_target_enter_data, 4095 CodeGen); 4096 } 4097 4098 void CodeGenFunction::EmitOMPTargetExitDataDirective( 4099 const OMPTargetExitDataDirective &S) { 4100 // If we don't have target devices, don't bother emitting the data mapping 4101 // code. 4102 if (CGM.getLangOpts().OMPTargetTriples.empty()) 4103 return; 4104 4105 // Check if we have any if clause associated with the directive. 4106 const Expr *IfCond = nullptr; 4107 if (auto *C = S.getSingleClause<OMPIfClause>()) 4108 IfCond = C->getCondition(); 4109 4110 // Check if we have any device clause associated with the directive. 4111 const Expr *Device = nullptr; 4112 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 4113 Device = C->getDevice(); 4114 4115 auto &&CodeGen = [&S, IfCond, Device](CodeGenFunction &CGF, 4116 PrePostActionTy &) { 4117 CGF.CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(CGF, S, IfCond, 4118 Device); 4119 }; 4120 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 4121 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_target_exit_data, 4122 CodeGen); 4123 } 4124 4125 static void emitTargetParallelRegion(CodeGenFunction &CGF, 4126 const OMPTargetParallelDirective &S, 4127 PrePostActionTy &Action) { 4128 // Get the captured statement associated with the 'parallel' region. 4129 auto *CS = S.getCapturedStmt(OMPD_parallel); 4130 Action.Enter(CGF); 4131 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &) { 4132 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 4133 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 4134 CGF.EmitOMPPrivateClause(S, PrivateScope); 4135 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4136 (void)PrivateScope.Privatize(); 4137 // TODO: Add support for clauses. 4138 CGF.EmitStmt(CS->getCapturedStmt()); 4139 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 4140 }; 4141 emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen, 4142 emitEmptyBoundParameters); 4143 emitPostUpdateForReductionClause( 4144 CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 4145 } 4146 4147 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 4148 CodeGenModule &CGM, StringRef ParentName, 4149 const OMPTargetParallelDirective &S) { 4150 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4151 emitTargetParallelRegion(CGF, S, Action); 4152 }; 4153 llvm::Function *Fn; 4154 llvm::Constant *Addr; 4155 // Emit target region as a standalone region. 4156 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 4157 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 4158 assert(Fn && Addr && "Target device function emission failed."); 4159 } 4160 4161 void CodeGenFunction::EmitOMPTargetParallelDirective( 4162 const OMPTargetParallelDirective &S) { 4163 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4164 emitTargetParallelRegion(CGF, S, Action); 4165 }; 4166 emitCommonOMPTargetDirective(*this, S, CodeGen); 4167 } 4168 4169 static void emitTargetParallelForRegion(CodeGenFunction &CGF, 4170 const OMPTargetParallelForDirective &S, 4171 PrePostActionTy &Action) { 4172 Action.Enter(CGF); 4173 // Emit directive as a combined directive that consists of two implicit 4174 // directives: 'parallel' with 'for' directive. 4175 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4176 CodeGenFunction::OMPCancelStackRAII CancelRegion( 4177 CGF, OMPD_target_parallel_for, S.hasCancel()); 4178 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 4179 emitDispatchForLoopBounds); 4180 }; 4181 emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen, 4182 emitEmptyBoundParameters); 4183 } 4184 4185 void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 4186 CodeGenModule &CGM, StringRef ParentName, 4187 const OMPTargetParallelForDirective &S) { 4188 // Emit SPMD target parallel for region as a standalone region. 4189 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4190 emitTargetParallelForRegion(CGF, S, Action); 4191 }; 4192 llvm::Function *Fn; 4193 llvm::Constant *Addr; 4194 // Emit target region as a standalone region. 4195 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 4196 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 4197 assert(Fn && Addr && "Target device function emission failed."); 4198 } 4199 4200 void CodeGenFunction::EmitOMPTargetParallelForDirective( 4201 const OMPTargetParallelForDirective &S) { 4202 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4203 emitTargetParallelForRegion(CGF, S, Action); 4204 }; 4205 emitCommonOMPTargetDirective(*this, S, CodeGen); 4206 } 4207 4208 static void 4209 emitTargetParallelForSimdRegion(CodeGenFunction &CGF, 4210 const OMPTargetParallelForSimdDirective &S, 4211 PrePostActionTy &Action) { 4212 Action.Enter(CGF); 4213 // Emit directive as a combined directive that consists of two implicit 4214 // directives: 'parallel' with 'for' directive. 4215 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4216 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 4217 emitDispatchForLoopBounds); 4218 }; 4219 emitCommonOMPParallelDirective(CGF, S, OMPD_simd, CodeGen, 4220 emitEmptyBoundParameters); 4221 } 4222 4223 void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 4224 CodeGenModule &CGM, StringRef ParentName, 4225 const OMPTargetParallelForSimdDirective &S) { 4226 // Emit SPMD target parallel for region as a standalone region. 4227 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4228 emitTargetParallelForSimdRegion(CGF, S, Action); 4229 }; 4230 llvm::Function *Fn; 4231 llvm::Constant *Addr; 4232 // Emit target region as a standalone region. 4233 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 4234 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 4235 assert(Fn && Addr && "Target device function emission failed."); 4236 } 4237 4238 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective( 4239 const OMPTargetParallelForSimdDirective &S) { 4240 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4241 emitTargetParallelForSimdRegion(CGF, S, Action); 4242 }; 4243 emitCommonOMPTargetDirective(*this, S, CodeGen); 4244 } 4245 4246 /// Emit a helper variable and return corresponding lvalue. 4247 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper, 4248 const ImplicitParamDecl *PVD, 4249 CodeGenFunction::OMPPrivateScope &Privates) { 4250 auto *VDecl = cast<VarDecl>(Helper->getDecl()); 4251 Privates.addPrivate( 4252 VDecl, [&CGF, PVD]() -> Address { return CGF.GetAddrOfLocalVar(PVD); }); 4253 } 4254 4255 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { 4256 assert(isOpenMPTaskLoopDirective(S.getDirectiveKind())); 4257 // Emit outlined function for task construct. 4258 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 4259 auto CapturedStruct = GenerateCapturedStmtArgument(*CS); 4260 auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 4261 const Expr *IfCond = nullptr; 4262 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 4263 if (C->getNameModifier() == OMPD_unknown || 4264 C->getNameModifier() == OMPD_taskloop) { 4265 IfCond = C->getCondition(); 4266 break; 4267 } 4268 } 4269 4270 OMPTaskDataTy Data; 4271 // Check if taskloop must be emitted without taskgroup. 4272 Data.Nogroup = S.getSingleClause<OMPNogroupClause>(); 4273 // TODO: Check if we should emit tied or untied task. 4274 Data.Tied = true; 4275 // Set scheduling for taskloop 4276 if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) { 4277 // grainsize clause 4278 Data.Schedule.setInt(/*IntVal=*/false); 4279 Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize())); 4280 } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) { 4281 // num_tasks clause 4282 Data.Schedule.setInt(/*IntVal=*/true); 4283 Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks())); 4284 } 4285 4286 auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) { 4287 // if (PreCond) { 4288 // for (IV in 0..LastIteration) BODY; 4289 // <Final counter/linear vars updates>; 4290 // } 4291 // 4292 4293 // Emit: if (PreCond) - begin. 4294 // If the condition constant folds and can be elided, avoid emitting the 4295 // whole loop. 4296 bool CondConstant; 4297 llvm::BasicBlock *ContBlock = nullptr; 4298 OMPLoopScope PreInitScope(CGF, S); 4299 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 4300 if (!CondConstant) 4301 return; 4302 } else { 4303 auto *ThenBlock = CGF.createBasicBlock("taskloop.if.then"); 4304 ContBlock = CGF.createBasicBlock("taskloop.if.end"); 4305 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 4306 CGF.getProfileCount(&S)); 4307 CGF.EmitBlock(ThenBlock); 4308 CGF.incrementProfileCounter(&S); 4309 } 4310 4311 if (isOpenMPSimdDirective(S.getDirectiveKind())) 4312 CGF.EmitOMPSimdInit(S); 4313 4314 OMPPrivateScope LoopScope(CGF); 4315 // Emit helper vars inits. 4316 enum { LowerBound = 5, UpperBound, Stride, LastIter }; 4317 auto *I = CS->getCapturedDecl()->param_begin(); 4318 auto *LBP = std::next(I, LowerBound); 4319 auto *UBP = std::next(I, UpperBound); 4320 auto *STP = std::next(I, Stride); 4321 auto *LIP = std::next(I, LastIter); 4322 mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP, 4323 LoopScope); 4324 mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP, 4325 LoopScope); 4326 mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope); 4327 mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP, 4328 LoopScope); 4329 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 4330 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 4331 (void)LoopScope.Privatize(); 4332 // Emit the loop iteration variable. 4333 const Expr *IVExpr = S.getIterationVariable(); 4334 const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 4335 CGF.EmitVarDecl(*IVDecl); 4336 CGF.EmitIgnoredExpr(S.getInit()); 4337 4338 // Emit the iterations count variable. 4339 // If it is not a variable, Sema decided to calculate iterations count on 4340 // each iteration (e.g., it is foldable into a constant). 4341 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 4342 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 4343 // Emit calculation of the iterations count. 4344 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 4345 } 4346 4347 CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 4348 S.getInc(), 4349 [&S](CodeGenFunction &CGF) { 4350 CGF.EmitOMPLoopBody(S, JumpDest()); 4351 CGF.EmitStopPoint(&S); 4352 }, 4353 [](CodeGenFunction &) {}); 4354 // Emit: if (PreCond) - end. 4355 if (ContBlock) { 4356 CGF.EmitBranch(ContBlock); 4357 CGF.EmitBlock(ContBlock, true); 4358 } 4359 // Emit final copy of the lastprivate variables if IsLastIter != 0. 4360 if (HasLastprivateClause) { 4361 CGF.EmitOMPLastprivateClauseFinal( 4362 S, isOpenMPSimdDirective(S.getDirectiveKind()), 4363 CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar( 4364 CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false, 4365 (*LIP)->getType(), S.getLocStart()))); 4366 } 4367 }; 4368 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 4369 IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn, 4370 const OMPTaskDataTy &Data) { 4371 auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &) { 4372 OMPLoopScope PreInitScope(CGF, S); 4373 CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getLocStart(), S, 4374 OutlinedFn, SharedsTy, 4375 CapturedStruct, IfCond, Data); 4376 }; 4377 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop, 4378 CodeGen); 4379 }; 4380 if (Data.Nogroup) 4381 EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data); 4382 else { 4383 CGM.getOpenMPRuntime().emitTaskgroupRegion( 4384 *this, 4385 [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF, 4386 PrePostActionTy &Action) { 4387 Action.Enter(CGF); 4388 CGF.EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data); 4389 }, 4390 S.getLocStart()); 4391 } 4392 } 4393 4394 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) { 4395 EmitOMPTaskLoopBasedDirective(S); 4396 } 4397 4398 void CodeGenFunction::EmitOMPTaskLoopSimdDirective( 4399 const OMPTaskLoopSimdDirective &S) { 4400 EmitOMPTaskLoopBasedDirective(S); 4401 } 4402 4403 // Generate the instructions for '#pragma omp target update' directive. 4404 void CodeGenFunction::EmitOMPTargetUpdateDirective( 4405 const OMPTargetUpdateDirective &S) { 4406 // If we don't have target devices, don't bother emitting the data mapping 4407 // code. 4408 if (CGM.getLangOpts().OMPTargetTriples.empty()) 4409 return; 4410 4411 // Check if we have any if clause associated with the directive. 4412 const Expr *IfCond = nullptr; 4413 if (auto *C = S.getSingleClause<OMPIfClause>()) 4414 IfCond = C->getCondition(); 4415 4416 // Check if we have any device clause associated with the directive. 4417 const Expr *Device = nullptr; 4418 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 4419 Device = C->getDevice(); 4420 4421 auto &&CodeGen = [&S, IfCond, Device](CodeGenFunction &CGF, 4422 PrePostActionTy &) { 4423 CGF.CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(CGF, S, IfCond, 4424 Device); 4425 }; 4426 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 4427 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_target_update, 4428 CodeGen); 4429 } 4430