1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This contains code to emit OpenMP nodes as LLVM code. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGCleanup.h" 15 #include "CGOpenMPRuntime.h" 16 #include "CodeGenFunction.h" 17 #include "CodeGenModule.h" 18 #include "TargetInfo.h" 19 #include "clang/AST/Stmt.h" 20 #include "clang/AST/StmtOpenMP.h" 21 #include "clang/AST/DeclOpenMP.h" 22 #include "llvm/IR/CallSite.h" 23 using namespace clang; 24 using namespace CodeGen; 25 26 namespace { 27 /// Lexical scope for OpenMP executable constructs, that handles correct codegen 28 /// for captured expressions. 29 class OMPLexicalScope : public CodeGenFunction::LexicalScope { 30 void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 31 for (const auto *C : S.clauses()) { 32 if (auto *CPI = OMPClauseWithPreInit::get(C)) { 33 if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { 34 for (const auto *I : PreInit->decls()) { 35 if (!I->hasAttr<OMPCaptureNoInitAttr>()) 36 CGF.EmitVarDecl(cast<VarDecl>(*I)); 37 else { 38 CodeGenFunction::AutoVarEmission Emission = 39 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 40 CGF.EmitAutoVarCleanups(Emission); 41 } 42 } 43 } 44 } 45 } 46 } 47 CodeGenFunction::OMPPrivateScope InlinedShareds; 48 49 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { 50 return CGF.LambdaCaptureFields.lookup(VD) || 51 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || 52 (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl)); 53 } 54 55 public: 56 OMPLexicalScope( 57 CodeGenFunction &CGF, const OMPExecutableDirective &S, 58 const llvm::Optional<OpenMPDirectiveKind> CapturedRegion = llvm::None, 59 const bool EmitPreInitStmt = true) 60 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), 61 InlinedShareds(CGF) { 62 if (EmitPreInitStmt) 63 emitPreInitStmt(CGF, S); 64 if (!CapturedRegion.hasValue()) 65 return; 66 assert(S.hasAssociatedStmt() && 67 "Expected associated statement for inlined directive."); 68 const CapturedStmt *CS = S.getCapturedStmt(*CapturedRegion); 69 for (auto &C : CS->captures()) { 70 if (C.capturesVariable() || C.capturesVariableByCopy()) { 71 auto *VD = C.getCapturedVar(); 72 assert(VD == VD->getCanonicalDecl() && 73 "Canonical decl must be captured."); 74 DeclRefExpr DRE( 75 const_cast<VarDecl *>(VD), 76 isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo && 77 InlinedShareds.isGlobalVarCaptured(VD)), 78 VD->getType().getNonReferenceType(), VK_LValue, C.getLocation()); 79 InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address { 80 return CGF.EmitLValue(&DRE).getAddress(); 81 }); 82 } 83 } 84 (void)InlinedShareds.Privatize(); 85 } 86 }; 87 88 /// Lexical scope for OpenMP parallel construct, that handles correct codegen 89 /// for captured expressions. 90 class OMPParallelScope final : public OMPLexicalScope { 91 bool EmitPreInitStmt(const OMPExecutableDirective &S) { 92 OpenMPDirectiveKind Kind = S.getDirectiveKind(); 93 return !(isOpenMPTargetExecutionDirective(Kind) || 94 isOpenMPLoopBoundSharingDirective(Kind)) && 95 isOpenMPParallelDirective(Kind); 96 } 97 98 public: 99 OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 100 : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None, 101 EmitPreInitStmt(S)) {} 102 }; 103 104 /// Lexical scope for OpenMP teams construct, that handles correct codegen 105 /// for captured expressions. 106 class OMPTeamsScope final : public OMPLexicalScope { 107 bool EmitPreInitStmt(const OMPExecutableDirective &S) { 108 OpenMPDirectiveKind Kind = S.getDirectiveKind(); 109 return !isOpenMPTargetExecutionDirective(Kind) && 110 isOpenMPTeamsDirective(Kind); 111 } 112 113 public: 114 OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 115 : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None, 116 EmitPreInitStmt(S)) {} 117 }; 118 119 /// Private scope for OpenMP loop-based directives, that supports capturing 120 /// of used expression from loop statement. 121 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { 122 void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) { 123 CodeGenFunction::OMPPrivateScope PreCondScope(CGF); 124 for (auto *E : S.counters()) { 125 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 126 (void)PreCondScope.addPrivate(VD, [&CGF, VD]() { 127 return CGF.CreateMemTemp(VD->getType().getNonReferenceType()); 128 }); 129 } 130 (void)PreCondScope.Privatize(); 131 if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) { 132 if (auto *PreInits = cast_or_null<DeclStmt>(LD->getPreInits())) { 133 for (const auto *I : PreInits->decls()) 134 CGF.EmitVarDecl(cast<VarDecl>(*I)); 135 } 136 } 137 } 138 139 public: 140 OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S) 141 : CodeGenFunction::RunCleanupsScope(CGF) { 142 emitPreInitStmt(CGF, S); 143 } 144 }; 145 146 class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope { 147 CodeGenFunction::OMPPrivateScope InlinedShareds; 148 149 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { 150 return CGF.LambdaCaptureFields.lookup(VD) || 151 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || 152 (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) && 153 cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD)); 154 } 155 156 public: 157 OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 158 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), 159 InlinedShareds(CGF) { 160 for (const auto *C : S.clauses()) { 161 if (auto *CPI = OMPClauseWithPreInit::get(C)) { 162 if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { 163 for (const auto *I : PreInit->decls()) { 164 if (!I->hasAttr<OMPCaptureNoInitAttr>()) 165 CGF.EmitVarDecl(cast<VarDecl>(*I)); 166 else { 167 CodeGenFunction::AutoVarEmission Emission = 168 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 169 CGF.EmitAutoVarCleanups(Emission); 170 } 171 } 172 } 173 } else if (const auto *UDP = dyn_cast<OMPUseDevicePtrClause>(C)) { 174 for (const Expr *E : UDP->varlists()) { 175 const Decl *D = cast<DeclRefExpr>(E)->getDecl(); 176 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D)) 177 CGF.EmitVarDecl(*OED); 178 } 179 } 180 } 181 if (!isOpenMPSimdDirective(S.getDirectiveKind())) 182 CGF.EmitOMPPrivateClause(S, InlinedShareds); 183 if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(&S)) { 184 if (const Expr *E = TG->getReductionRef()) 185 CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl())); 186 } 187 const auto *CS = cast_or_null<CapturedStmt>(S.getAssociatedStmt()); 188 while (CS) { 189 for (auto &C : CS->captures()) { 190 if (C.capturesVariable() || C.capturesVariableByCopy()) { 191 auto *VD = C.getCapturedVar(); 192 assert(VD == VD->getCanonicalDecl() && 193 "Canonical decl must be captured."); 194 DeclRefExpr DRE(const_cast<VarDecl *>(VD), 195 isCapturedVar(CGF, VD) || 196 (CGF.CapturedStmtInfo && 197 InlinedShareds.isGlobalVarCaptured(VD)), 198 VD->getType().getNonReferenceType(), VK_LValue, 199 C.getLocation()); 200 InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address { 201 return CGF.EmitLValue(&DRE).getAddress(); 202 }); 203 } 204 } 205 CS = dyn_cast<CapturedStmt>(CS->getCapturedStmt()); 206 } 207 (void)InlinedShareds.Privatize(); 208 } 209 }; 210 211 } // namespace 212 213 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, 214 const OMPExecutableDirective &S, 215 const RegionCodeGenTy &CodeGen); 216 217 LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) { 218 if (auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) { 219 if (auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) { 220 OrigVD = OrigVD->getCanonicalDecl(); 221 bool IsCaptured = 222 LambdaCaptureFields.lookup(OrigVD) || 223 (CapturedStmtInfo && CapturedStmtInfo->lookup(OrigVD)) || 224 (CurCodeDecl && isa<BlockDecl>(CurCodeDecl)); 225 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), IsCaptured, 226 OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc()); 227 return EmitLValue(&DRE); 228 } 229 } 230 return EmitLValue(E); 231 } 232 233 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) { 234 auto &C = getContext(); 235 llvm::Value *Size = nullptr; 236 auto SizeInChars = C.getTypeSizeInChars(Ty); 237 if (SizeInChars.isZero()) { 238 // getTypeSizeInChars() returns 0 for a VLA. 239 while (auto *VAT = C.getAsVariableArrayType(Ty)) { 240 llvm::Value *ArraySize; 241 std::tie(ArraySize, Ty) = getVLASize(VAT); 242 Size = Size ? Builder.CreateNUWMul(Size, ArraySize) : ArraySize; 243 } 244 SizeInChars = C.getTypeSizeInChars(Ty); 245 if (SizeInChars.isZero()) 246 return llvm::ConstantInt::get(SizeTy, /*V=*/0); 247 Size = Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars)); 248 } else 249 Size = CGM.getSize(SizeInChars); 250 return Size; 251 } 252 253 void CodeGenFunction::GenerateOpenMPCapturedVars( 254 const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) { 255 const RecordDecl *RD = S.getCapturedRecordDecl(); 256 auto CurField = RD->field_begin(); 257 auto CurCap = S.captures().begin(); 258 for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(), 259 E = S.capture_init_end(); 260 I != E; ++I, ++CurField, ++CurCap) { 261 if (CurField->hasCapturedVLAType()) { 262 auto VAT = CurField->getCapturedVLAType(); 263 auto *Val = VLASizeMap[VAT->getSizeExpr()]; 264 CapturedVars.push_back(Val); 265 } else if (CurCap->capturesThis()) 266 CapturedVars.push_back(CXXThisValue); 267 else if (CurCap->capturesVariableByCopy()) { 268 llvm::Value *CV = EmitLoadOfScalar(EmitLValue(*I), CurCap->getLocation()); 269 270 // If the field is not a pointer, we need to save the actual value 271 // and load it as a void pointer. 272 if (!CurField->getType()->isAnyPointerType()) { 273 auto &Ctx = getContext(); 274 auto DstAddr = CreateMemTemp( 275 Ctx.getUIntPtrType(), 276 Twine(CurCap->getCapturedVar()->getName()) + ".casted"); 277 LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType()); 278 279 auto *SrcAddrVal = EmitScalarConversion( 280 DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()), 281 Ctx.getPointerType(CurField->getType()), CurCap->getLocation()); 282 LValue SrcLV = 283 MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType()); 284 285 // Store the value using the source type pointer. 286 EmitStoreThroughLValue(RValue::get(CV), SrcLV); 287 288 // Load the value using the destination type pointer. 289 CV = EmitLoadOfScalar(DstLV, CurCap->getLocation()); 290 } 291 CapturedVars.push_back(CV); 292 } else { 293 assert(CurCap->capturesVariable() && "Expected capture by reference."); 294 CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer()); 295 } 296 } 297 } 298 299 static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc, 300 QualType DstType, StringRef Name, 301 LValue AddrLV, 302 bool isReferenceType = false) { 303 ASTContext &Ctx = CGF.getContext(); 304 305 auto *CastedPtr = CGF.EmitScalarConversion(AddrLV.getAddress().getPointer(), 306 Ctx.getUIntPtrType(), 307 Ctx.getPointerType(DstType), Loc); 308 auto TmpAddr = 309 CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType)) 310 .getAddress(); 311 312 // If we are dealing with references we need to return the address of the 313 // reference instead of the reference of the value. 314 if (isReferenceType) { 315 QualType RefType = Ctx.getLValueReferenceType(DstType); 316 auto *RefVal = TmpAddr.getPointer(); 317 TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name) + ".ref"); 318 auto TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType); 319 CGF.EmitStoreThroughLValue(RValue::get(RefVal), TmpLVal, /*isInit*/ true); 320 } 321 322 return TmpAddr; 323 } 324 325 static QualType getCanonicalParamType(ASTContext &C, QualType T) { 326 if (T->isLValueReferenceType()) { 327 return C.getLValueReferenceType( 328 getCanonicalParamType(C, T.getNonReferenceType()), 329 /*SpelledAsLValue=*/false); 330 } 331 if (T->isPointerType()) 332 return C.getPointerType(getCanonicalParamType(C, T->getPointeeType())); 333 if (auto *A = T->getAsArrayTypeUnsafe()) { 334 if (auto *VLA = dyn_cast<VariableArrayType>(A)) 335 return getCanonicalParamType(C, VLA->getElementType()); 336 else if (!A->isVariablyModifiedType()) 337 return C.getCanonicalType(T); 338 } 339 return C.getCanonicalParamType(T); 340 } 341 342 namespace { 343 /// Contains required data for proper outlined function codegen. 344 struct FunctionOptions { 345 /// Captured statement for which the function is generated. 346 const CapturedStmt *S = nullptr; 347 /// true if cast to/from UIntPtr is required for variables captured by 348 /// value. 349 const bool UIntPtrCastRequired = true; 350 /// true if only casted arguments must be registered as local args or VLA 351 /// sizes. 352 const bool RegisterCastedArgsOnly = false; 353 /// Name of the generated function. 354 const StringRef FunctionName; 355 explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired, 356 bool RegisterCastedArgsOnly, 357 StringRef FunctionName) 358 : S(S), UIntPtrCastRequired(UIntPtrCastRequired), 359 RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly), 360 FunctionName(FunctionName) {} 361 }; 362 } 363 364 static llvm::Function *emitOutlinedFunctionPrologue( 365 CodeGenFunction &CGF, FunctionArgList &Args, 366 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> 367 &LocalAddrs, 368 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> 369 &VLASizes, 370 llvm::Value *&CXXThisValue, const FunctionOptions &FO) { 371 const CapturedDecl *CD = FO.S->getCapturedDecl(); 372 const RecordDecl *RD = FO.S->getCapturedRecordDecl(); 373 assert(CD->hasBody() && "missing CapturedDecl body"); 374 375 CXXThisValue = nullptr; 376 // Build the argument list. 377 CodeGenModule &CGM = CGF.CGM; 378 ASTContext &Ctx = CGM.getContext(); 379 FunctionArgList TargetArgs; 380 Args.append(CD->param_begin(), 381 std::next(CD->param_begin(), CD->getContextParamPosition())); 382 TargetArgs.append( 383 CD->param_begin(), 384 std::next(CD->param_begin(), CD->getContextParamPosition())); 385 auto I = FO.S->captures().begin(); 386 FunctionDecl *DebugFunctionDecl = nullptr; 387 if (!FO.UIntPtrCastRequired) { 388 FunctionProtoType::ExtProtoInfo EPI; 389 DebugFunctionDecl = FunctionDecl::Create( 390 Ctx, Ctx.getTranslationUnitDecl(), FO.S->getLocStart(), 391 SourceLocation(), DeclarationName(), Ctx.VoidTy, 392 Ctx.getTrivialTypeSourceInfo( 393 Ctx.getFunctionType(Ctx.VoidTy, llvm::None, EPI)), 394 SC_Static, /*isInlineSpecified=*/false, /*hasWrittenPrototype=*/false); 395 } 396 for (auto *FD : RD->fields()) { 397 QualType ArgType = FD->getType(); 398 IdentifierInfo *II = nullptr; 399 VarDecl *CapVar = nullptr; 400 401 // If this is a capture by copy and the type is not a pointer, the outlined 402 // function argument type should be uintptr and the value properly casted to 403 // uintptr. This is necessary given that the runtime library is only able to 404 // deal with pointers. We can pass in the same way the VLA type sizes to the 405 // outlined function. 406 if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) || 407 I->capturesVariableArrayType()) { 408 if (FO.UIntPtrCastRequired) 409 ArgType = Ctx.getUIntPtrType(); 410 } 411 412 if (I->capturesVariable() || I->capturesVariableByCopy()) { 413 CapVar = I->getCapturedVar(); 414 II = CapVar->getIdentifier(); 415 } else if (I->capturesThis()) 416 II = &Ctx.Idents.get("this"); 417 else { 418 assert(I->capturesVariableArrayType()); 419 II = &Ctx.Idents.get("vla"); 420 } 421 if (ArgType->isVariablyModifiedType()) 422 ArgType = getCanonicalParamType(Ctx, ArgType); 423 VarDecl *Arg; 424 if (DebugFunctionDecl && (CapVar || I->capturesThis())) { 425 Arg = ParmVarDecl::Create( 426 Ctx, DebugFunctionDecl, 427 CapVar ? CapVar->getLocStart() : FD->getLocStart(), 428 CapVar ? CapVar->getLocation() : FD->getLocation(), II, ArgType, 429 /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr); 430 } else { 431 Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(), 432 II, ArgType, ImplicitParamDecl::Other); 433 } 434 Args.emplace_back(Arg); 435 // Do not cast arguments if we emit function with non-original types. 436 TargetArgs.emplace_back( 437 FO.UIntPtrCastRequired 438 ? Arg 439 : CGM.getOpenMPRuntime().translateParameter(FD, Arg)); 440 ++I; 441 } 442 Args.append( 443 std::next(CD->param_begin(), CD->getContextParamPosition() + 1), 444 CD->param_end()); 445 TargetArgs.append( 446 std::next(CD->param_begin(), CD->getContextParamPosition() + 1), 447 CD->param_end()); 448 449 // Create the function declaration. 450 const CGFunctionInfo &FuncInfo = 451 CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs); 452 llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); 453 454 llvm::Function *F = 455 llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage, 456 FO.FunctionName, &CGM.getModule()); 457 CGM.SetInternalFunctionAttributes(CD, F, FuncInfo); 458 if (CD->isNothrow()) 459 F->setDoesNotThrow(); 460 461 // Generate the function. 462 CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs, 463 FO.S->getLocStart(), CD->getBody()->getLocStart()); 464 unsigned Cnt = CD->getContextParamPosition(); 465 I = FO.S->captures().begin(); 466 for (auto *FD : RD->fields()) { 467 // Do not map arguments if we emit function with non-original types. 468 Address LocalAddr(Address::invalid()); 469 if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) { 470 LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt], 471 TargetArgs[Cnt]); 472 } else { 473 LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]); 474 } 475 // If we are capturing a pointer by copy we don't need to do anything, just 476 // use the value that we get from the arguments. 477 if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) { 478 const VarDecl *CurVD = I->getCapturedVar(); 479 // If the variable is a reference we need to materialize it here. 480 if (CurVD->getType()->isReferenceType()) { 481 Address RefAddr = CGF.CreateMemTemp( 482 CurVD->getType(), CGM.getPointerAlign(), ".materialized_ref"); 483 CGF.EmitStoreOfScalar(LocalAddr.getPointer(), RefAddr, 484 /*Volatile=*/false, CurVD->getType()); 485 LocalAddr = RefAddr; 486 } 487 if (!FO.RegisterCastedArgsOnly) 488 LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}}); 489 ++Cnt; 490 ++I; 491 continue; 492 } 493 494 LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(), 495 AlignmentSource::Decl); 496 if (FD->hasCapturedVLAType()) { 497 if (FO.UIntPtrCastRequired) { 498 ArgLVal = CGF.MakeAddrLValue( 499 castValueFromUintptr(CGF, I->getLocation(), FD->getType(), 500 Args[Cnt]->getName(), ArgLVal), 501 FD->getType(), AlignmentSource::Decl); 502 } 503 auto *ExprArg = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation()); 504 auto VAT = FD->getCapturedVLAType(); 505 VLASizes.insert({Args[Cnt], {VAT->getSizeExpr(), ExprArg}}); 506 } else if (I->capturesVariable()) { 507 auto *Var = I->getCapturedVar(); 508 QualType VarTy = Var->getType(); 509 Address ArgAddr = ArgLVal.getAddress(); 510 if (!VarTy->isReferenceType()) { 511 if (ArgLVal.getType()->isLValueReferenceType()) { 512 ArgAddr = CGF.EmitLoadOfReference(ArgLVal); 513 } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) { 514 assert(ArgLVal.getType()->isPointerType()); 515 ArgAddr = CGF.EmitLoadOfPointer( 516 ArgAddr, ArgLVal.getType()->castAs<PointerType>()); 517 } 518 } 519 if (!FO.RegisterCastedArgsOnly) { 520 LocalAddrs.insert( 521 {Args[Cnt], 522 {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}}); 523 } 524 } else if (I->capturesVariableByCopy()) { 525 assert(!FD->getType()->isAnyPointerType() && 526 "Not expecting a captured pointer."); 527 auto *Var = I->getCapturedVar(); 528 QualType VarTy = Var->getType(); 529 LocalAddrs.insert( 530 {Args[Cnt], 531 {Var, FO.UIntPtrCastRequired 532 ? castValueFromUintptr(CGF, I->getLocation(), 533 FD->getType(), Args[Cnt]->getName(), 534 ArgLVal, VarTy->isReferenceType()) 535 : ArgLVal.getAddress()}}); 536 } else { 537 // If 'this' is captured, load it into CXXThisValue. 538 assert(I->capturesThis()); 539 CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation()); 540 LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress()}}); 541 } 542 ++Cnt; 543 ++I; 544 } 545 546 return F; 547 } 548 549 llvm::Function * 550 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { 551 assert( 552 CapturedStmtInfo && 553 "CapturedStmtInfo should be set when generating the captured function"); 554 const CapturedDecl *CD = S.getCapturedDecl(); 555 // Build the argument list. 556 bool NeedWrapperFunction = 557 getDebugInfo() && 558 CGM.getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo; 559 FunctionArgList Args; 560 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs; 561 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes; 562 SmallString<256> Buffer; 563 llvm::raw_svector_ostream Out(Buffer); 564 Out << CapturedStmtInfo->getHelperName(); 565 if (NeedWrapperFunction) 566 Out << "_debug__"; 567 FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false, 568 Out.str()); 569 llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs, 570 VLASizes, CXXThisValue, FO); 571 for (const auto &LocalAddrPair : LocalAddrs) { 572 if (LocalAddrPair.second.first) { 573 setAddrOfLocalVar(LocalAddrPair.second.first, 574 LocalAddrPair.second.second); 575 } 576 } 577 for (const auto &VLASizePair : VLASizes) 578 VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second; 579 PGO.assignRegionCounters(GlobalDecl(CD), F); 580 CapturedStmtInfo->EmitBody(*this, CD->getBody()); 581 FinishFunction(CD->getBodyRBrace()); 582 if (!NeedWrapperFunction) 583 return F; 584 585 FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true, 586 /*RegisterCastedArgsOnly=*/true, 587 CapturedStmtInfo->getHelperName()); 588 CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true); 589 Args.clear(); 590 LocalAddrs.clear(); 591 VLASizes.clear(); 592 llvm::Function *WrapperF = 593 emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes, 594 WrapperCGF.CXXThisValue, WrapperFO); 595 llvm::SmallVector<llvm::Value *, 4> CallArgs; 596 for (const auto *Arg : Args) { 597 llvm::Value *CallArg; 598 auto I = LocalAddrs.find(Arg); 599 if (I != LocalAddrs.end()) { 600 LValue LV = WrapperCGF.MakeAddrLValue( 601 I->second.second, 602 I->second.first ? I->second.first->getType() : Arg->getType(), 603 AlignmentSource::Decl); 604 CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getLocStart()); 605 } else { 606 auto EI = VLASizes.find(Arg); 607 if (EI != VLASizes.end()) 608 CallArg = EI->second.second; 609 else { 610 LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg), 611 Arg->getType(), 612 AlignmentSource::Decl); 613 CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getLocStart()); 614 } 615 } 616 CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType())); 617 } 618 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, S.getLocStart(), 619 F, CallArgs); 620 WrapperCGF.FinishFunction(); 621 return WrapperF; 622 } 623 624 //===----------------------------------------------------------------------===// 625 // OpenMP Directive Emission 626 //===----------------------------------------------------------------------===// 627 void CodeGenFunction::EmitOMPAggregateAssign( 628 Address DestAddr, Address SrcAddr, QualType OriginalType, 629 const llvm::function_ref<void(Address, Address)> &CopyGen) { 630 // Perform element-by-element initialization. 631 QualType ElementTy; 632 633 // Drill down to the base element type on both arrays. 634 auto ArrayTy = OriginalType->getAsArrayTypeUnsafe(); 635 auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr); 636 SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 637 638 auto SrcBegin = SrcAddr.getPointer(); 639 auto DestBegin = DestAddr.getPointer(); 640 // Cast from pointer to array type to pointer to single element. 641 auto DestEnd = Builder.CreateGEP(DestBegin, NumElements); 642 // The basic structure here is a while-do loop. 643 auto BodyBB = createBasicBlock("omp.arraycpy.body"); 644 auto DoneBB = createBasicBlock("omp.arraycpy.done"); 645 auto IsEmpty = 646 Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty"); 647 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 648 649 // Enter the loop body, making that address the current address. 650 auto EntryBB = Builder.GetInsertBlock(); 651 EmitBlock(BodyBB); 652 653 CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy); 654 655 llvm::PHINode *SrcElementPHI = 656 Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 657 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 658 Address SrcElementCurrent = 659 Address(SrcElementPHI, 660 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 661 662 llvm::PHINode *DestElementPHI = 663 Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 664 DestElementPHI->addIncoming(DestBegin, EntryBB); 665 Address DestElementCurrent = 666 Address(DestElementPHI, 667 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 668 669 // Emit copy. 670 CopyGen(DestElementCurrent, SrcElementCurrent); 671 672 // Shift the address forward by one element. 673 auto DestElementNext = Builder.CreateConstGEP1_32( 674 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 675 auto SrcElementNext = Builder.CreateConstGEP1_32( 676 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 677 // Check whether we've reached the end. 678 auto Done = 679 Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 680 Builder.CreateCondBr(Done, DoneBB, BodyBB); 681 DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock()); 682 SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock()); 683 684 // Done. 685 EmitBlock(DoneBB, /*IsFinished=*/true); 686 } 687 688 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr, 689 Address SrcAddr, const VarDecl *DestVD, 690 const VarDecl *SrcVD, const Expr *Copy) { 691 if (OriginalType->isArrayType()) { 692 auto *BO = dyn_cast<BinaryOperator>(Copy); 693 if (BO && BO->getOpcode() == BO_Assign) { 694 // Perform simple memcpy for simple copying. 695 LValue Dest = MakeAddrLValue(DestAddr, OriginalType); 696 LValue Src = MakeAddrLValue(SrcAddr, OriginalType); 697 EmitAggregateAssign(Dest, Src, OriginalType); 698 } else { 699 // For arrays with complex element types perform element by element 700 // copying. 701 EmitOMPAggregateAssign( 702 DestAddr, SrcAddr, OriginalType, 703 [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) { 704 // Working with the single array element, so have to remap 705 // destination and source variables to corresponding array 706 // elements. 707 CodeGenFunction::OMPPrivateScope Remap(*this); 708 Remap.addPrivate(DestVD, [DestElement]() -> Address { 709 return DestElement; 710 }); 711 Remap.addPrivate( 712 SrcVD, [SrcElement]() -> Address { return SrcElement; }); 713 (void)Remap.Privatize(); 714 EmitIgnoredExpr(Copy); 715 }); 716 } 717 } else { 718 // Remap pseudo source variable to private copy. 719 CodeGenFunction::OMPPrivateScope Remap(*this); 720 Remap.addPrivate(SrcVD, [SrcAddr]() -> Address { return SrcAddr; }); 721 Remap.addPrivate(DestVD, [DestAddr]() -> Address { return DestAddr; }); 722 (void)Remap.Privatize(); 723 // Emit copying of the whole variable. 724 EmitIgnoredExpr(Copy); 725 } 726 } 727 728 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, 729 OMPPrivateScope &PrivateScope) { 730 if (!HaveInsertPoint()) 731 return false; 732 bool FirstprivateIsLastprivate = false; 733 llvm::DenseSet<const VarDecl *> Lastprivates; 734 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 735 for (const auto *D : C->varlists()) 736 Lastprivates.insert( 737 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); 738 } 739 llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate; 740 llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 741 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 742 // Force emission of the firstprivate copy if the directive does not emit 743 // outlined function, like omp for, omp simd, omp distribute etc. 744 bool MustEmitFirstprivateCopy = 745 CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown; 746 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { 747 auto IRef = C->varlist_begin(); 748 auto InitsRef = C->inits().begin(); 749 for (auto IInit : C->private_copies()) { 750 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 751 bool ThisFirstprivateIsLastprivate = 752 Lastprivates.count(OrigVD->getCanonicalDecl()) > 0; 753 auto *FD = CapturedStmtInfo->lookup(OrigVD); 754 if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD && 755 !FD->getType()->isReferenceType()) { 756 EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()); 757 ++IRef; 758 ++InitsRef; 759 continue; 760 } 761 FirstprivateIsLastprivate = 762 FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate; 763 if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) { 764 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 765 auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl()); 766 bool IsRegistered; 767 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 768 /*RefersToEnclosingVariableOrCapture=*/FD != nullptr, 769 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 770 LValue OriginalLVal = EmitLValue(&DRE); 771 Address OriginalAddr = OriginalLVal.getAddress(); 772 QualType Type = VD->getType(); 773 if (Type->isArrayType()) { 774 // Emit VarDecl with copy init for arrays. 775 // Get the address of the original variable captured in current 776 // captured region. 777 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 778 auto Emission = EmitAutoVarAlloca(*VD); 779 auto *Init = VD->getInit(); 780 if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) { 781 // Perform simple memcpy. 782 LValue Dest = MakeAddrLValue(Emission.getAllocatedAddress(), 783 Type); 784 EmitAggregateAssign(Dest, OriginalLVal, Type); 785 } else { 786 EmitOMPAggregateAssign( 787 Emission.getAllocatedAddress(), OriginalAddr, Type, 788 [this, VDInit, Init](Address DestElement, 789 Address SrcElement) { 790 // Clean up any temporaries needed by the initialization. 791 RunCleanupsScope InitScope(*this); 792 // Emit initialization for single element. 793 setAddrOfLocalVar(VDInit, SrcElement); 794 EmitAnyExprToMem(Init, DestElement, 795 Init->getType().getQualifiers(), 796 /*IsInitializer*/ false); 797 LocalDeclMap.erase(VDInit); 798 }); 799 } 800 EmitAutoVarCleanups(Emission); 801 return Emission.getAllocatedAddress(); 802 }); 803 } else { 804 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 805 // Emit private VarDecl with copy init. 806 // Remap temp VDInit variable to the address of the original 807 // variable 808 // (for proper handling of captured global variables). 809 setAddrOfLocalVar(VDInit, OriginalAddr); 810 EmitDecl(*VD); 811 LocalDeclMap.erase(VDInit); 812 return GetAddrOfLocalVar(VD); 813 }); 814 } 815 assert(IsRegistered && 816 "firstprivate var already registered as private"); 817 // Silence the warning about unused variable. 818 (void)IsRegistered; 819 } 820 ++IRef; 821 ++InitsRef; 822 } 823 } 824 return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty(); 825 } 826 827 void CodeGenFunction::EmitOMPPrivateClause( 828 const OMPExecutableDirective &D, 829 CodeGenFunction::OMPPrivateScope &PrivateScope) { 830 if (!HaveInsertPoint()) 831 return; 832 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 833 for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) { 834 auto IRef = C->varlist_begin(); 835 for (auto IInit : C->private_copies()) { 836 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 837 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 838 auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 839 bool IsRegistered = 840 PrivateScope.addPrivate(OrigVD, [&]() -> Address { 841 // Emit private VarDecl with copy init. 842 EmitDecl(*VD); 843 return GetAddrOfLocalVar(VD); 844 }); 845 assert(IsRegistered && "private var already registered as private"); 846 // Silence the warning about unused variable. 847 (void)IsRegistered; 848 } 849 ++IRef; 850 } 851 } 852 } 853 854 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { 855 if (!HaveInsertPoint()) 856 return false; 857 // threadprivate_var1 = master_threadprivate_var1; 858 // operator=(threadprivate_var2, master_threadprivate_var2); 859 // ... 860 // __kmpc_barrier(&loc, global_tid); 861 llvm::DenseSet<const VarDecl *> CopiedVars; 862 llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr; 863 for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) { 864 auto IRef = C->varlist_begin(); 865 auto ISrcRef = C->source_exprs().begin(); 866 auto IDestRef = C->destination_exprs().begin(); 867 for (auto *AssignOp : C->assignment_ops()) { 868 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 869 QualType Type = VD->getType(); 870 if (CopiedVars.insert(VD->getCanonicalDecl()).second) { 871 // Get the address of the master variable. If we are emitting code with 872 // TLS support, the address is passed from the master as field in the 873 // captured declaration. 874 Address MasterAddr = Address::invalid(); 875 if (getLangOpts().OpenMPUseTLS && 876 getContext().getTargetInfo().isTLSSupported()) { 877 assert(CapturedStmtInfo->lookup(VD) && 878 "Copyin threadprivates should have been captured!"); 879 DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(), 880 VK_LValue, (*IRef)->getExprLoc()); 881 MasterAddr = EmitLValue(&DRE).getAddress(); 882 LocalDeclMap.erase(VD); 883 } else { 884 MasterAddr = 885 Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD) 886 : CGM.GetAddrOfGlobal(VD), 887 getContext().getDeclAlign(VD)); 888 } 889 // Get the address of the threadprivate variable. 890 Address PrivateAddr = EmitLValue(*IRef).getAddress(); 891 if (CopiedVars.size() == 1) { 892 // At first check if current thread is a master thread. If it is, no 893 // need to copy data. 894 CopyBegin = createBasicBlock("copyin.not.master"); 895 CopyEnd = createBasicBlock("copyin.not.master.end"); 896 Builder.CreateCondBr( 897 Builder.CreateICmpNE( 898 Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy), 899 Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy)), 900 CopyBegin, CopyEnd); 901 EmitBlock(CopyBegin); 902 } 903 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 904 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 905 EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp); 906 } 907 ++IRef; 908 ++ISrcRef; 909 ++IDestRef; 910 } 911 } 912 if (CopyEnd) { 913 // Exit out of copying procedure for non-master thread. 914 EmitBlock(CopyEnd, /*IsFinished=*/true); 915 return true; 916 } 917 return false; 918 } 919 920 bool CodeGenFunction::EmitOMPLastprivateClauseInit( 921 const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) { 922 if (!HaveInsertPoint()) 923 return false; 924 bool HasAtLeastOneLastprivate = false; 925 llvm::DenseSet<const VarDecl *> SIMDLCVs; 926 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 927 auto *LoopDirective = cast<OMPLoopDirective>(&D); 928 for (auto *C : LoopDirective->counters()) { 929 SIMDLCVs.insert( 930 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 931 } 932 } 933 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 934 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 935 HasAtLeastOneLastprivate = true; 936 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 937 !getLangOpts().OpenMPSimd) 938 break; 939 auto IRef = C->varlist_begin(); 940 auto IDestRef = C->destination_exprs().begin(); 941 for (auto *IInit : C->private_copies()) { 942 // Keep the address of the original variable for future update at the end 943 // of the loop. 944 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 945 // Taskloops do not require additional initialization, it is done in 946 // runtime support library. 947 if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) { 948 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 949 PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address { 950 DeclRefExpr DRE( 951 const_cast<VarDecl *>(OrigVD), 952 /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup( 953 OrigVD) != nullptr, 954 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 955 return EmitLValue(&DRE).getAddress(); 956 }); 957 // Check if the variable is also a firstprivate: in this case IInit is 958 // not generated. Initialization of this variable will happen in codegen 959 // for 'firstprivate' clause. 960 if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) { 961 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 962 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 963 // Emit private VarDecl with copy init. 964 EmitDecl(*VD); 965 return GetAddrOfLocalVar(VD); 966 }); 967 assert(IsRegistered && 968 "lastprivate var already registered as private"); 969 (void)IsRegistered; 970 } 971 } 972 ++IRef; 973 ++IDestRef; 974 } 975 } 976 return HasAtLeastOneLastprivate; 977 } 978 979 void CodeGenFunction::EmitOMPLastprivateClauseFinal( 980 const OMPExecutableDirective &D, bool NoFinals, 981 llvm::Value *IsLastIterCond) { 982 if (!HaveInsertPoint()) 983 return; 984 // Emit following code: 985 // if (<IsLastIterCond>) { 986 // orig_var1 = private_orig_var1; 987 // ... 988 // orig_varn = private_orig_varn; 989 // } 990 llvm::BasicBlock *ThenBB = nullptr; 991 llvm::BasicBlock *DoneBB = nullptr; 992 if (IsLastIterCond) { 993 ThenBB = createBasicBlock(".omp.lastprivate.then"); 994 DoneBB = createBasicBlock(".omp.lastprivate.done"); 995 Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB); 996 EmitBlock(ThenBB); 997 } 998 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 999 llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates; 1000 if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) { 1001 auto IC = LoopDirective->counters().begin(); 1002 for (auto F : LoopDirective->finals()) { 1003 auto *D = 1004 cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl(); 1005 if (NoFinals) 1006 AlreadyEmittedVars.insert(D); 1007 else 1008 LoopCountersAndUpdates[D] = F; 1009 ++IC; 1010 } 1011 } 1012 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 1013 auto IRef = C->varlist_begin(); 1014 auto ISrcRef = C->source_exprs().begin(); 1015 auto IDestRef = C->destination_exprs().begin(); 1016 for (auto *AssignOp : C->assignment_ops()) { 1017 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 1018 QualType Type = PrivateVD->getType(); 1019 auto *CanonicalVD = PrivateVD->getCanonicalDecl(); 1020 if (AlreadyEmittedVars.insert(CanonicalVD).second) { 1021 // If lastprivate variable is a loop control variable for loop-based 1022 // directive, update its value before copyin back to original 1023 // variable. 1024 if (auto *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD)) 1025 EmitIgnoredExpr(FinalExpr); 1026 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 1027 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 1028 // Get the address of the original variable. 1029 Address OriginalAddr = GetAddrOfLocalVar(DestVD); 1030 // Get the address of the private variable. 1031 Address PrivateAddr = GetAddrOfLocalVar(PrivateVD); 1032 if (auto RefTy = PrivateVD->getType()->getAs<ReferenceType>()) 1033 PrivateAddr = 1034 Address(Builder.CreateLoad(PrivateAddr), 1035 getNaturalTypeAlignment(RefTy->getPointeeType())); 1036 EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp); 1037 } 1038 ++IRef; 1039 ++ISrcRef; 1040 ++IDestRef; 1041 } 1042 if (auto *PostUpdate = C->getPostUpdateExpr()) 1043 EmitIgnoredExpr(PostUpdate); 1044 } 1045 if (IsLastIterCond) 1046 EmitBlock(DoneBB, /*IsFinished=*/true); 1047 } 1048 1049 void CodeGenFunction::EmitOMPReductionClauseInit( 1050 const OMPExecutableDirective &D, 1051 CodeGenFunction::OMPPrivateScope &PrivateScope) { 1052 if (!HaveInsertPoint()) 1053 return; 1054 SmallVector<const Expr *, 4> Shareds; 1055 SmallVector<const Expr *, 4> Privates; 1056 SmallVector<const Expr *, 4> ReductionOps; 1057 SmallVector<const Expr *, 4> LHSs; 1058 SmallVector<const Expr *, 4> RHSs; 1059 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1060 auto IPriv = C->privates().begin(); 1061 auto IRed = C->reduction_ops().begin(); 1062 auto ILHS = C->lhs_exprs().begin(); 1063 auto IRHS = C->rhs_exprs().begin(); 1064 for (const auto *Ref : C->varlists()) { 1065 Shareds.emplace_back(Ref); 1066 Privates.emplace_back(*IPriv); 1067 ReductionOps.emplace_back(*IRed); 1068 LHSs.emplace_back(*ILHS); 1069 RHSs.emplace_back(*IRHS); 1070 std::advance(IPriv, 1); 1071 std::advance(IRed, 1); 1072 std::advance(ILHS, 1); 1073 std::advance(IRHS, 1); 1074 } 1075 } 1076 ReductionCodeGen RedCG(Shareds, Privates, ReductionOps); 1077 unsigned Count = 0; 1078 auto ILHS = LHSs.begin(); 1079 auto IRHS = RHSs.begin(); 1080 auto IPriv = Privates.begin(); 1081 for (const auto *IRef : Shareds) { 1082 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl()); 1083 // Emit private VarDecl with reduction init. 1084 RedCG.emitSharedLValue(*this, Count); 1085 RedCG.emitAggregateType(*this, Count); 1086 auto Emission = EmitAutoVarAlloca(*PrivateVD); 1087 RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(), 1088 RedCG.getSharedLValue(Count), 1089 [&Emission](CodeGenFunction &CGF) { 1090 CGF.EmitAutoVarInit(Emission); 1091 return true; 1092 }); 1093 EmitAutoVarCleanups(Emission); 1094 Address BaseAddr = RedCG.adjustPrivateAddress( 1095 *this, Count, Emission.getAllocatedAddress()); 1096 bool IsRegistered = PrivateScope.addPrivate( 1097 RedCG.getBaseDecl(Count), [BaseAddr]() -> Address { return BaseAddr; }); 1098 assert(IsRegistered && "private var already registered as private"); 1099 // Silence the warning about unused variable. 1100 (void)IsRegistered; 1101 1102 auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 1103 auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 1104 QualType Type = PrivateVD->getType(); 1105 bool isaOMPArraySectionExpr = isa<OMPArraySectionExpr>(IRef); 1106 if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) { 1107 // Store the address of the original variable associated with the LHS 1108 // implicit variable. 1109 PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address { 1110 return RedCG.getSharedLValue(Count).getAddress(); 1111 }); 1112 PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { 1113 return GetAddrOfLocalVar(PrivateVD); 1114 }); 1115 } else if ((isaOMPArraySectionExpr && Type->isScalarType()) || 1116 isa<ArraySubscriptExpr>(IRef)) { 1117 // Store the address of the original variable associated with the LHS 1118 // implicit variable. 1119 PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address { 1120 return RedCG.getSharedLValue(Count).getAddress(); 1121 }); 1122 PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address { 1123 return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD), 1124 ConvertTypeForMem(RHSVD->getType()), 1125 "rhs.begin"); 1126 }); 1127 } else { 1128 QualType Type = PrivateVD->getType(); 1129 bool IsArray = getContext().getAsArrayType(Type) != nullptr; 1130 Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress(); 1131 // Store the address of the original variable associated with the LHS 1132 // implicit variable. 1133 if (IsArray) { 1134 OriginalAddr = Builder.CreateElementBitCast( 1135 OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin"); 1136 } 1137 PrivateScope.addPrivate( 1138 LHSVD, [OriginalAddr]() -> Address { return OriginalAddr; }); 1139 PrivateScope.addPrivate( 1140 RHSVD, [this, PrivateVD, RHSVD, IsArray]() -> Address { 1141 return IsArray 1142 ? Builder.CreateElementBitCast( 1143 GetAddrOfLocalVar(PrivateVD), 1144 ConvertTypeForMem(RHSVD->getType()), "rhs.begin") 1145 : GetAddrOfLocalVar(PrivateVD); 1146 }); 1147 } 1148 ++ILHS; 1149 ++IRHS; 1150 ++IPriv; 1151 ++Count; 1152 } 1153 } 1154 1155 void CodeGenFunction::EmitOMPReductionClauseFinal( 1156 const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) { 1157 if (!HaveInsertPoint()) 1158 return; 1159 llvm::SmallVector<const Expr *, 8> Privates; 1160 llvm::SmallVector<const Expr *, 8> LHSExprs; 1161 llvm::SmallVector<const Expr *, 8> RHSExprs; 1162 llvm::SmallVector<const Expr *, 8> ReductionOps; 1163 bool HasAtLeastOneReduction = false; 1164 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1165 HasAtLeastOneReduction = true; 1166 Privates.append(C->privates().begin(), C->privates().end()); 1167 LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 1168 RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 1169 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 1170 } 1171 if (HasAtLeastOneReduction) { 1172 bool WithNowait = D.getSingleClause<OMPNowaitClause>() || 1173 isOpenMPParallelDirective(D.getDirectiveKind()) || 1174 ReductionKind == OMPD_simd; 1175 bool SimpleReduction = ReductionKind == OMPD_simd; 1176 // Emit nowait reduction if nowait clause is present or directive is a 1177 // parallel directive (it always has implicit barrier). 1178 CGM.getOpenMPRuntime().emitReduction( 1179 *this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps, 1180 {WithNowait, SimpleReduction, ReductionKind}); 1181 } 1182 } 1183 1184 static void emitPostUpdateForReductionClause( 1185 CodeGenFunction &CGF, const OMPExecutableDirective &D, 1186 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1187 if (!CGF.HaveInsertPoint()) 1188 return; 1189 llvm::BasicBlock *DoneBB = nullptr; 1190 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1191 if (auto *PostUpdate = C->getPostUpdateExpr()) { 1192 if (!DoneBB) { 1193 if (auto *Cond = CondGen(CGF)) { 1194 // If the first post-update expression is found, emit conditional 1195 // block if it was requested. 1196 auto *ThenBB = CGF.createBasicBlock(".omp.reduction.pu"); 1197 DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done"); 1198 CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1199 CGF.EmitBlock(ThenBB); 1200 } 1201 } 1202 CGF.EmitIgnoredExpr(PostUpdate); 1203 } 1204 } 1205 if (DoneBB) 1206 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 1207 } 1208 1209 namespace { 1210 /// Codegen lambda for appending distribute lower and upper bounds to outlined 1211 /// parallel function. This is necessary for combined constructs such as 1212 /// 'distribute parallel for' 1213 typedef llvm::function_ref<void(CodeGenFunction &, 1214 const OMPExecutableDirective &, 1215 llvm::SmallVectorImpl<llvm::Value *> &)> 1216 CodeGenBoundParametersTy; 1217 } // anonymous namespace 1218 1219 static void emitCommonOMPParallelDirective( 1220 CodeGenFunction &CGF, const OMPExecutableDirective &S, 1221 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1222 const CodeGenBoundParametersTy &CodeGenBoundParameters) { 1223 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); 1224 auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( 1225 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 1226 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) { 1227 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 1228 auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), 1229 /*IgnoreResultAssign*/ true); 1230 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause( 1231 CGF, NumThreads, NumThreadsClause->getLocStart()); 1232 } 1233 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) { 1234 CodeGenFunction::RunCleanupsScope ProcBindScope(CGF); 1235 CGF.CGM.getOpenMPRuntime().emitProcBindClause( 1236 CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart()); 1237 } 1238 const Expr *IfCond = nullptr; 1239 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 1240 if (C->getNameModifier() == OMPD_unknown || 1241 C->getNameModifier() == OMPD_parallel) { 1242 IfCond = C->getCondition(); 1243 break; 1244 } 1245 } 1246 1247 OMPParallelScope Scope(CGF, S); 1248 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 1249 // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk 1250 // lower and upper bounds with the pragma 'for' chunking mechanism. 1251 // The following lambda takes care of appending the lower and upper bound 1252 // parameters when necessary 1253 CodeGenBoundParameters(CGF, S, CapturedVars); 1254 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 1255 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn, 1256 CapturedVars, IfCond); 1257 } 1258 1259 static void emitEmptyBoundParameters(CodeGenFunction &, 1260 const OMPExecutableDirective &, 1261 llvm::SmallVectorImpl<llvm::Value *> &) {} 1262 1263 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { 1264 // Emit parallel region as a standalone region. 1265 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1266 OMPPrivateScope PrivateScope(CGF); 1267 bool Copyins = CGF.EmitOMPCopyinClause(S); 1268 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 1269 if (Copyins) { 1270 // Emit implicit barrier to synchronize threads and avoid data races on 1271 // propagation master's thread values of threadprivate variables to local 1272 // instances of that variables of all other implicit threads. 1273 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 1274 CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 1275 /*ForceSimpleCall=*/true); 1276 } 1277 CGF.EmitOMPPrivateClause(S, PrivateScope); 1278 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 1279 (void)PrivateScope.Privatize(); 1280 CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt()); 1281 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 1282 }; 1283 emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen, 1284 emitEmptyBoundParameters); 1285 emitPostUpdateForReductionClause( 1286 *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1287 } 1288 1289 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, 1290 JumpDest LoopExit) { 1291 RunCleanupsScope BodyScope(*this); 1292 // Update counters values on current iteration. 1293 for (auto I : D.updates()) { 1294 EmitIgnoredExpr(I); 1295 } 1296 // Update the linear variables. 1297 // In distribute directives only loop counters may be marked as linear, no 1298 // need to generate the code for them. 1299 if (!isOpenMPDistributeDirective(D.getDirectiveKind())) { 1300 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1301 for (auto *U : C->updates()) 1302 EmitIgnoredExpr(U); 1303 } 1304 } 1305 1306 // On a continue in the body, jump to the end. 1307 auto Continue = getJumpDestInCurrentScope("omp.body.continue"); 1308 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1309 // Emit loop body. 1310 EmitStmt(D.getBody()); 1311 // The end (updates/cleanups). 1312 EmitBlock(Continue.getBlock()); 1313 BreakContinueStack.pop_back(); 1314 } 1315 1316 void CodeGenFunction::EmitOMPInnerLoop( 1317 const Stmt &S, bool RequiresCleanup, const Expr *LoopCond, 1318 const Expr *IncExpr, 1319 const llvm::function_ref<void(CodeGenFunction &)> &BodyGen, 1320 const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen) { 1321 auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end"); 1322 1323 // Start the loop with a block that tests the condition. 1324 auto CondBlock = createBasicBlock("omp.inner.for.cond"); 1325 EmitBlock(CondBlock); 1326 const SourceRange &R = S.getSourceRange(); 1327 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), 1328 SourceLocToDebugLoc(R.getEnd())); 1329 1330 // If there are any cleanups between here and the loop-exit scope, 1331 // create a block to stage a loop exit along. 1332 auto ExitBlock = LoopExit.getBlock(); 1333 if (RequiresCleanup) 1334 ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup"); 1335 1336 auto LoopBody = createBasicBlock("omp.inner.for.body"); 1337 1338 // Emit condition. 1339 EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S)); 1340 if (ExitBlock != LoopExit.getBlock()) { 1341 EmitBlock(ExitBlock); 1342 EmitBranchThroughCleanup(LoopExit); 1343 } 1344 1345 EmitBlock(LoopBody); 1346 incrementProfileCounter(&S); 1347 1348 // Create a block for the increment. 1349 auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc"); 1350 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1351 1352 BodyGen(*this); 1353 1354 // Emit "IV = IV + 1" and a back-edge to the condition block. 1355 EmitBlock(Continue.getBlock()); 1356 EmitIgnoredExpr(IncExpr); 1357 PostIncGen(*this); 1358 BreakContinueStack.pop_back(); 1359 EmitBranch(CondBlock); 1360 LoopStack.pop(); 1361 // Emit the fall-through block. 1362 EmitBlock(LoopExit.getBlock()); 1363 } 1364 1365 bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { 1366 if (!HaveInsertPoint()) 1367 return false; 1368 // Emit inits for the linear variables. 1369 bool HasLinears = false; 1370 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1371 for (auto *Init : C->inits()) { 1372 HasLinears = true; 1373 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl()); 1374 if (auto *Ref = dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) { 1375 AutoVarEmission Emission = EmitAutoVarAlloca(*VD); 1376 auto *OrigVD = cast<VarDecl>(Ref->getDecl()); 1377 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1378 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1379 VD->getInit()->getType(), VK_LValue, 1380 VD->getInit()->getExprLoc()); 1381 EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(), 1382 VD->getType()), 1383 /*capturedByInit=*/false); 1384 EmitAutoVarCleanups(Emission); 1385 } else 1386 EmitVarDecl(*VD); 1387 } 1388 // Emit the linear steps for the linear clauses. 1389 // If a step is not constant, it is pre-calculated before the loop. 1390 if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep())) 1391 if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) { 1392 EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl())); 1393 // Emit calculation of the linear step. 1394 EmitIgnoredExpr(CS); 1395 } 1396 } 1397 return HasLinears; 1398 } 1399 1400 void CodeGenFunction::EmitOMPLinearClauseFinal( 1401 const OMPLoopDirective &D, 1402 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1403 if (!HaveInsertPoint()) 1404 return; 1405 llvm::BasicBlock *DoneBB = nullptr; 1406 // Emit the final values of the linear variables. 1407 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1408 auto IC = C->varlist_begin(); 1409 for (auto *F : C->finals()) { 1410 if (!DoneBB) { 1411 if (auto *Cond = CondGen(*this)) { 1412 // If the first post-update expression is found, emit conditional 1413 // block if it was requested. 1414 auto *ThenBB = createBasicBlock(".omp.linear.pu"); 1415 DoneBB = createBasicBlock(".omp.linear.pu.done"); 1416 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1417 EmitBlock(ThenBB); 1418 } 1419 } 1420 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl()); 1421 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1422 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1423 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); 1424 Address OrigAddr = EmitLValue(&DRE).getAddress(); 1425 CodeGenFunction::OMPPrivateScope VarScope(*this); 1426 VarScope.addPrivate(OrigVD, [OrigAddr]() -> Address { return OrigAddr; }); 1427 (void)VarScope.Privatize(); 1428 EmitIgnoredExpr(F); 1429 ++IC; 1430 } 1431 if (auto *PostUpdate = C->getPostUpdateExpr()) 1432 EmitIgnoredExpr(PostUpdate); 1433 } 1434 if (DoneBB) 1435 EmitBlock(DoneBB, /*IsFinished=*/true); 1436 } 1437 1438 static void emitAlignedClause(CodeGenFunction &CGF, 1439 const OMPExecutableDirective &D) { 1440 if (!CGF.HaveInsertPoint()) 1441 return; 1442 for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) { 1443 unsigned ClauseAlignment = 0; 1444 if (auto AlignmentExpr = Clause->getAlignment()) { 1445 auto AlignmentCI = 1446 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); 1447 ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue()); 1448 } 1449 for (auto E : Clause->varlists()) { 1450 unsigned Alignment = ClauseAlignment; 1451 if (Alignment == 0) { 1452 // OpenMP [2.8.1, Description] 1453 // If no optional parameter is specified, implementation-defined default 1454 // alignments for SIMD instructions on the target platforms are assumed. 1455 Alignment = 1456 CGF.getContext() 1457 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign( 1458 E->getType()->getPointeeType())) 1459 .getQuantity(); 1460 } 1461 assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) && 1462 "alignment is not power of 2"); 1463 if (Alignment != 0) { 1464 llvm::Value *PtrValue = CGF.EmitScalarExpr(E); 1465 CGF.EmitAlignmentAssumption(PtrValue, Alignment); 1466 } 1467 } 1468 } 1469 } 1470 1471 void CodeGenFunction::EmitOMPPrivateLoopCounters( 1472 const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) { 1473 if (!HaveInsertPoint()) 1474 return; 1475 auto I = S.private_counters().begin(); 1476 for (auto *E : S.counters()) { 1477 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1478 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()); 1479 (void)LoopScope.addPrivate(VD, [&]() -> Address { 1480 // Emit var without initialization. 1481 if (!LocalDeclMap.count(PrivateVD)) { 1482 auto VarEmission = EmitAutoVarAlloca(*PrivateVD); 1483 EmitAutoVarCleanups(VarEmission); 1484 } 1485 DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD), 1486 /*RefersToEnclosingVariableOrCapture=*/false, 1487 (*I)->getType(), VK_LValue, (*I)->getExprLoc()); 1488 return EmitLValue(&DRE).getAddress(); 1489 }); 1490 if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) || 1491 VD->hasGlobalStorage()) { 1492 (void)LoopScope.addPrivate(PrivateVD, [&]() -> Address { 1493 DeclRefExpr DRE(const_cast<VarDecl *>(VD), 1494 LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD), 1495 E->getType(), VK_LValue, E->getExprLoc()); 1496 return EmitLValue(&DRE).getAddress(); 1497 }); 1498 } 1499 ++I; 1500 } 1501 } 1502 1503 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S, 1504 const Expr *Cond, llvm::BasicBlock *TrueBlock, 1505 llvm::BasicBlock *FalseBlock, uint64_t TrueCount) { 1506 if (!CGF.HaveInsertPoint()) 1507 return; 1508 { 1509 CodeGenFunction::OMPPrivateScope PreCondScope(CGF); 1510 CGF.EmitOMPPrivateLoopCounters(S, PreCondScope); 1511 (void)PreCondScope.Privatize(); 1512 // Get initial values of real counters. 1513 for (auto I : S.inits()) { 1514 CGF.EmitIgnoredExpr(I); 1515 } 1516 } 1517 // Check that loop is executed at least one time. 1518 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount); 1519 } 1520 1521 void CodeGenFunction::EmitOMPLinearClause( 1522 const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) { 1523 if (!HaveInsertPoint()) 1524 return; 1525 llvm::DenseSet<const VarDecl *> SIMDLCVs; 1526 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 1527 auto *LoopDirective = cast<OMPLoopDirective>(&D); 1528 for (auto *C : LoopDirective->counters()) { 1529 SIMDLCVs.insert( 1530 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 1531 } 1532 } 1533 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1534 auto CurPrivate = C->privates().begin(); 1535 for (auto *E : C->varlists()) { 1536 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1537 auto *PrivateVD = 1538 cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl()); 1539 if (!SIMDLCVs.count(VD->getCanonicalDecl())) { 1540 bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address { 1541 // Emit private VarDecl with copy init. 1542 EmitVarDecl(*PrivateVD); 1543 return GetAddrOfLocalVar(PrivateVD); 1544 }); 1545 assert(IsRegistered && "linear var already registered as private"); 1546 // Silence the warning about unused variable. 1547 (void)IsRegistered; 1548 } else 1549 EmitVarDecl(*PrivateVD); 1550 ++CurPrivate; 1551 } 1552 } 1553 } 1554 1555 static void emitSimdlenSafelenClause(CodeGenFunction &CGF, 1556 const OMPExecutableDirective &D, 1557 bool IsMonotonic) { 1558 if (!CGF.HaveInsertPoint()) 1559 return; 1560 if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) { 1561 RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(), 1562 /*ignoreResult=*/true); 1563 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 1564 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 1565 // In presence of finite 'safelen', it may be unsafe to mark all 1566 // the memory instructions parallel, because loop-carried 1567 // dependences of 'safelen' iterations are possible. 1568 if (!IsMonotonic) 1569 CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>()); 1570 } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) { 1571 RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(), 1572 /*ignoreResult=*/true); 1573 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 1574 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 1575 // In presence of finite 'safelen', it may be unsafe to mark all 1576 // the memory instructions parallel, because loop-carried 1577 // dependences of 'safelen' iterations are possible. 1578 CGF.LoopStack.setParallel(false); 1579 } 1580 } 1581 1582 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D, 1583 bool IsMonotonic) { 1584 // Walk clauses and process safelen/lastprivate. 1585 LoopStack.setParallel(!IsMonotonic); 1586 LoopStack.setVectorizeEnable(true); 1587 emitSimdlenSafelenClause(*this, D, IsMonotonic); 1588 } 1589 1590 void CodeGenFunction::EmitOMPSimdFinal( 1591 const OMPLoopDirective &D, 1592 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1593 if (!HaveInsertPoint()) 1594 return; 1595 llvm::BasicBlock *DoneBB = nullptr; 1596 auto IC = D.counters().begin(); 1597 auto IPC = D.private_counters().begin(); 1598 for (auto F : D.finals()) { 1599 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl()); 1600 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl()); 1601 auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD); 1602 if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) || 1603 OrigVD->hasGlobalStorage() || CED) { 1604 if (!DoneBB) { 1605 if (auto *Cond = CondGen(*this)) { 1606 // If the first post-update expression is found, emit conditional 1607 // block if it was requested. 1608 auto *ThenBB = createBasicBlock(".omp.final.then"); 1609 DoneBB = createBasicBlock(".omp.final.done"); 1610 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1611 EmitBlock(ThenBB); 1612 } 1613 } 1614 Address OrigAddr = Address::invalid(); 1615 if (CED) 1616 OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress(); 1617 else { 1618 DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD), 1619 /*RefersToEnclosingVariableOrCapture=*/false, 1620 (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc()); 1621 OrigAddr = EmitLValue(&DRE).getAddress(); 1622 } 1623 OMPPrivateScope VarScope(*this); 1624 VarScope.addPrivate(OrigVD, 1625 [OrigAddr]() -> Address { return OrigAddr; }); 1626 (void)VarScope.Privatize(); 1627 EmitIgnoredExpr(F); 1628 } 1629 ++IC; 1630 ++IPC; 1631 } 1632 if (DoneBB) 1633 EmitBlock(DoneBB, /*IsFinished=*/true); 1634 } 1635 1636 static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF, 1637 const OMPLoopDirective &S, 1638 CodeGenFunction::JumpDest LoopExit) { 1639 CGF.EmitOMPLoopBody(S, LoopExit); 1640 CGF.EmitStopPoint(&S); 1641 } 1642 1643 /// Emit a helper variable and return corresponding lvalue. 1644 static LValue EmitOMPHelperVar(CodeGenFunction &CGF, 1645 const DeclRefExpr *Helper) { 1646 auto VDecl = cast<VarDecl>(Helper->getDecl()); 1647 CGF.EmitVarDecl(*VDecl); 1648 return CGF.EmitLValue(Helper); 1649 } 1650 1651 static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S, 1652 PrePostActionTy &Action) { 1653 Action.Enter(CGF); 1654 assert(isOpenMPSimdDirective(S.getDirectiveKind()) && 1655 "Expected simd directive"); 1656 OMPLoopScope PreInitScope(CGF, S); 1657 // if (PreCond) { 1658 // for (IV in 0..LastIteration) BODY; 1659 // <Final counter/linear vars updates>; 1660 // } 1661 // 1662 if (isOpenMPDistributeDirective(S.getDirectiveKind()) || 1663 isOpenMPWorksharingDirective(S.getDirectiveKind()) || 1664 isOpenMPTaskLoopDirective(S.getDirectiveKind())) { 1665 (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable())); 1666 (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable())); 1667 } 1668 1669 // Emit: if (PreCond) - begin. 1670 // If the condition constant folds and can be elided, avoid emitting the 1671 // whole loop. 1672 bool CondConstant; 1673 llvm::BasicBlock *ContBlock = nullptr; 1674 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 1675 if (!CondConstant) 1676 return; 1677 } else { 1678 auto *ThenBlock = CGF.createBasicBlock("simd.if.then"); 1679 ContBlock = CGF.createBasicBlock("simd.if.end"); 1680 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 1681 CGF.getProfileCount(&S)); 1682 CGF.EmitBlock(ThenBlock); 1683 CGF.incrementProfileCounter(&S); 1684 } 1685 1686 // Emit the loop iteration variable. 1687 const Expr *IVExpr = S.getIterationVariable(); 1688 const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 1689 CGF.EmitVarDecl(*IVDecl); 1690 CGF.EmitIgnoredExpr(S.getInit()); 1691 1692 // Emit the iterations count variable. 1693 // If it is not a variable, Sema decided to calculate iterations count on 1694 // each iteration (e.g., it is foldable into a constant). 1695 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 1696 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 1697 // Emit calculation of the iterations count. 1698 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 1699 } 1700 1701 CGF.EmitOMPSimdInit(S); 1702 1703 emitAlignedClause(CGF, S); 1704 (void)CGF.EmitOMPLinearClauseInit(S); 1705 { 1706 CodeGenFunction::OMPPrivateScope LoopScope(CGF); 1707 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 1708 CGF.EmitOMPLinearClause(S, LoopScope); 1709 CGF.EmitOMPPrivateClause(S, LoopScope); 1710 CGF.EmitOMPReductionClauseInit(S, LoopScope); 1711 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 1712 (void)LoopScope.Privatize(); 1713 CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 1714 S.getInc(), 1715 [&S](CodeGenFunction &CGF) { 1716 CGF.EmitOMPLoopBody(S, CodeGenFunction::JumpDest()); 1717 CGF.EmitStopPoint(&S); 1718 }, 1719 [](CodeGenFunction &) {}); 1720 CGF.EmitOMPSimdFinal( 1721 S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1722 // Emit final copy of the lastprivate variables at the end of loops. 1723 if (HasLastprivateClause) 1724 CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true); 1725 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd); 1726 emitPostUpdateForReductionClause( 1727 CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1728 } 1729 CGF.EmitOMPLinearClauseFinal( 1730 S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1731 // Emit: if (PreCond) - end. 1732 if (ContBlock) { 1733 CGF.EmitBranch(ContBlock); 1734 CGF.EmitBlock(ContBlock, true); 1735 } 1736 } 1737 1738 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { 1739 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 1740 emitOMPSimdRegion(CGF, S, Action); 1741 }; 1742 OMPLexicalScope Scope(*this, S, OMPD_unknown); 1743 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 1744 } 1745 1746 void CodeGenFunction::EmitOMPOuterLoop( 1747 bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S, 1748 CodeGenFunction::OMPPrivateScope &LoopScope, 1749 const CodeGenFunction::OMPLoopArguments &LoopArgs, 1750 const CodeGenFunction::CodeGenLoopTy &CodeGenLoop, 1751 const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) { 1752 auto &RT = CGM.getOpenMPRuntime(); 1753 1754 const Expr *IVExpr = S.getIterationVariable(); 1755 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1756 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1757 1758 auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end"); 1759 1760 // Start the loop with a block that tests the condition. 1761 auto CondBlock = createBasicBlock("omp.dispatch.cond"); 1762 EmitBlock(CondBlock); 1763 const SourceRange &R = S.getSourceRange(); 1764 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), 1765 SourceLocToDebugLoc(R.getEnd())); 1766 1767 llvm::Value *BoolCondVal = nullptr; 1768 if (!DynamicOrOrdered) { 1769 // UB = min(UB, GlobalUB) or 1770 // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g. 1771 // 'distribute parallel for') 1772 EmitIgnoredExpr(LoopArgs.EUB); 1773 // IV = LB 1774 EmitIgnoredExpr(LoopArgs.Init); 1775 // IV < UB 1776 BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond); 1777 } else { 1778 BoolCondVal = 1779 RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, LoopArgs.IL, 1780 LoopArgs.LB, LoopArgs.UB, LoopArgs.ST); 1781 } 1782 1783 // If there are any cleanups between here and the loop-exit scope, 1784 // create a block to stage a loop exit along. 1785 auto ExitBlock = LoopExit.getBlock(); 1786 if (LoopScope.requiresCleanups()) 1787 ExitBlock = createBasicBlock("omp.dispatch.cleanup"); 1788 1789 auto LoopBody = createBasicBlock("omp.dispatch.body"); 1790 Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock); 1791 if (ExitBlock != LoopExit.getBlock()) { 1792 EmitBlock(ExitBlock); 1793 EmitBranchThroughCleanup(LoopExit); 1794 } 1795 EmitBlock(LoopBody); 1796 1797 // Emit "IV = LB" (in case of static schedule, we have already calculated new 1798 // LB for loop condition and emitted it above). 1799 if (DynamicOrOrdered) 1800 EmitIgnoredExpr(LoopArgs.Init); 1801 1802 // Create a block for the increment. 1803 auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); 1804 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1805 1806 // Generate !llvm.loop.parallel metadata for loads and stores for loops 1807 // with dynamic/guided scheduling and without ordered clause. 1808 if (!isOpenMPSimdDirective(S.getDirectiveKind())) 1809 LoopStack.setParallel(!IsMonotonic); 1810 else 1811 EmitOMPSimdInit(S, IsMonotonic); 1812 1813 SourceLocation Loc = S.getLocStart(); 1814 1815 // when 'distribute' is not combined with a 'for': 1816 // while (idx <= UB) { BODY; ++idx; } 1817 // when 'distribute' is combined with a 'for' 1818 // (e.g. 'distribute parallel for') 1819 // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; } 1820 EmitOMPInnerLoop( 1821 S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr, 1822 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { 1823 CodeGenLoop(CGF, S, LoopExit); 1824 }, 1825 [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) { 1826 CodeGenOrdered(CGF, Loc, IVSize, IVSigned); 1827 }); 1828 1829 EmitBlock(Continue.getBlock()); 1830 BreakContinueStack.pop_back(); 1831 if (!DynamicOrOrdered) { 1832 // Emit "LB = LB + Stride", "UB = UB + Stride". 1833 EmitIgnoredExpr(LoopArgs.NextLB); 1834 EmitIgnoredExpr(LoopArgs.NextUB); 1835 } 1836 1837 EmitBranch(CondBlock); 1838 LoopStack.pop(); 1839 // Emit the fall-through block. 1840 EmitBlock(LoopExit.getBlock()); 1841 1842 // Tell the runtime we are done. 1843 auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) { 1844 if (!DynamicOrOrdered) 1845 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(), 1846 S.getDirectiveKind()); 1847 }; 1848 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); 1849 } 1850 1851 void CodeGenFunction::EmitOMPForOuterLoop( 1852 const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic, 1853 const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, 1854 const OMPLoopArguments &LoopArgs, 1855 const CodeGenDispatchBoundsTy &CGDispatchBounds) { 1856 auto &RT = CGM.getOpenMPRuntime(); 1857 1858 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). 1859 const bool DynamicOrOrdered = 1860 Ordered || RT.isDynamic(ScheduleKind.Schedule); 1861 1862 assert((Ordered || 1863 !RT.isStaticNonchunked(ScheduleKind.Schedule, 1864 LoopArgs.Chunk != nullptr)) && 1865 "static non-chunked schedule does not need outer loop"); 1866 1867 // Emit outer loop. 1868 // 1869 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1870 // When schedule(dynamic,chunk_size) is specified, the iterations are 1871 // distributed to threads in the team in chunks as the threads request them. 1872 // Each thread executes a chunk of iterations, then requests another chunk, 1873 // until no chunks remain to be distributed. Each chunk contains chunk_size 1874 // iterations, except for the last chunk to be distributed, which may have 1875 // fewer iterations. When no chunk_size is specified, it defaults to 1. 1876 // 1877 // When schedule(guided,chunk_size) is specified, the iterations are assigned 1878 // to threads in the team in chunks as the executing threads request them. 1879 // Each thread executes a chunk of iterations, then requests another chunk, 1880 // until no chunks remain to be assigned. For a chunk_size of 1, the size of 1881 // each chunk is proportional to the number of unassigned iterations divided 1882 // by the number of threads in the team, decreasing to 1. For a chunk_size 1883 // with value k (greater than 1), the size of each chunk is determined in the 1884 // same way, with the restriction that the chunks do not contain fewer than k 1885 // iterations (except for the last chunk to be assigned, which may have fewer 1886 // than k iterations). 1887 // 1888 // When schedule(auto) is specified, the decision regarding scheduling is 1889 // delegated to the compiler and/or runtime system. The programmer gives the 1890 // implementation the freedom to choose any possible mapping of iterations to 1891 // threads in the team. 1892 // 1893 // When schedule(runtime) is specified, the decision regarding scheduling is 1894 // deferred until run time, and the schedule and chunk size are taken from the 1895 // run-sched-var ICV. If the ICV is set to auto, the schedule is 1896 // implementation defined 1897 // 1898 // while(__kmpc_dispatch_next(&LB, &UB)) { 1899 // idx = LB; 1900 // while (idx <= UB) { BODY; ++idx; 1901 // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only. 1902 // } // inner loop 1903 // } 1904 // 1905 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1906 // When schedule(static, chunk_size) is specified, iterations are divided into 1907 // chunks of size chunk_size, and the chunks are assigned to the threads in 1908 // the team in a round-robin fashion in the order of the thread number. 1909 // 1910 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { 1911 // while (idx <= UB) { BODY; ++idx; } // inner loop 1912 // LB = LB + ST; 1913 // UB = UB + ST; 1914 // } 1915 // 1916 1917 const Expr *IVExpr = S.getIterationVariable(); 1918 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1919 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1920 1921 if (DynamicOrOrdered) { 1922 auto DispatchBounds = CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB); 1923 llvm::Value *LBVal = DispatchBounds.first; 1924 llvm::Value *UBVal = DispatchBounds.second; 1925 CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal, 1926 LoopArgs.Chunk}; 1927 RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, IVSize, 1928 IVSigned, Ordered, DipatchRTInputValues); 1929 } else { 1930 CGOpenMPRuntime::StaticRTInput StaticInit( 1931 IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB, 1932 LoopArgs.ST, LoopArgs.Chunk); 1933 RT.emitForStaticInit(*this, S.getLocStart(), S.getDirectiveKind(), 1934 ScheduleKind, StaticInit); 1935 } 1936 1937 auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc, 1938 const unsigned IVSize, 1939 const bool IVSigned) { 1940 if (Ordered) { 1941 CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize, 1942 IVSigned); 1943 } 1944 }; 1945 1946 OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST, 1947 LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB); 1948 OuterLoopArgs.IncExpr = S.getInc(); 1949 OuterLoopArgs.Init = S.getInit(); 1950 OuterLoopArgs.Cond = S.getCond(); 1951 OuterLoopArgs.NextLB = S.getNextLowerBound(); 1952 OuterLoopArgs.NextUB = S.getNextUpperBound(); 1953 EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs, 1954 emitOMPLoopBodyWithStopPoint, CodeGenOrdered); 1955 } 1956 1957 static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc, 1958 const unsigned IVSize, const bool IVSigned) {} 1959 1960 void CodeGenFunction::EmitOMPDistributeOuterLoop( 1961 OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S, 1962 OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs, 1963 const CodeGenLoopTy &CodeGenLoopContent) { 1964 1965 auto &RT = CGM.getOpenMPRuntime(); 1966 1967 // Emit outer loop. 1968 // Same behavior as a OMPForOuterLoop, except that schedule cannot be 1969 // dynamic 1970 // 1971 1972 const Expr *IVExpr = S.getIterationVariable(); 1973 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1974 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1975 1976 CGOpenMPRuntime::StaticRTInput StaticInit( 1977 IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB, 1978 LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk); 1979 RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, StaticInit); 1980 1981 // for combined 'distribute' and 'for' the increment expression of distribute 1982 // is store in DistInc. For 'distribute' alone, it is in Inc. 1983 Expr *IncExpr; 1984 if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())) 1985 IncExpr = S.getDistInc(); 1986 else 1987 IncExpr = S.getInc(); 1988 1989 // this routine is shared by 'omp distribute parallel for' and 1990 // 'omp distribute': select the right EUB expression depending on the 1991 // directive 1992 OMPLoopArguments OuterLoopArgs; 1993 OuterLoopArgs.LB = LoopArgs.LB; 1994 OuterLoopArgs.UB = LoopArgs.UB; 1995 OuterLoopArgs.ST = LoopArgs.ST; 1996 OuterLoopArgs.IL = LoopArgs.IL; 1997 OuterLoopArgs.Chunk = LoopArgs.Chunk; 1998 OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 1999 ? S.getCombinedEnsureUpperBound() 2000 : S.getEnsureUpperBound(); 2001 OuterLoopArgs.IncExpr = IncExpr; 2002 OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2003 ? S.getCombinedInit() 2004 : S.getInit(); 2005 OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2006 ? S.getCombinedCond() 2007 : S.getCond(); 2008 OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2009 ? S.getCombinedNextLowerBound() 2010 : S.getNextLowerBound(); 2011 OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2012 ? S.getCombinedNextUpperBound() 2013 : S.getNextUpperBound(); 2014 2015 EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S, 2016 LoopScope, OuterLoopArgs, CodeGenLoopContent, 2017 emitEmptyOrdered); 2018 } 2019 2020 static std::pair<LValue, LValue> 2021 emitDistributeParallelForInnerBounds(CodeGenFunction &CGF, 2022 const OMPExecutableDirective &S) { 2023 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 2024 LValue LB = 2025 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); 2026 LValue UB = 2027 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); 2028 2029 // When composing 'distribute' with 'for' (e.g. as in 'distribute 2030 // parallel for') we need to use the 'distribute' 2031 // chunk lower and upper bounds rather than the whole loop iteration 2032 // space. These are parameters to the outlined function for 'parallel' 2033 // and we copy the bounds of the previous schedule into the 2034 // the current ones. 2035 LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable()); 2036 LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable()); 2037 llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar( 2038 PrevLB, LS.getPrevLowerBoundVariable()->getExprLoc()); 2039 PrevLBVal = CGF.EmitScalarConversion( 2040 PrevLBVal, LS.getPrevLowerBoundVariable()->getType(), 2041 LS.getIterationVariable()->getType(), 2042 LS.getPrevLowerBoundVariable()->getExprLoc()); 2043 llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar( 2044 PrevUB, LS.getPrevUpperBoundVariable()->getExprLoc()); 2045 PrevUBVal = CGF.EmitScalarConversion( 2046 PrevUBVal, LS.getPrevUpperBoundVariable()->getType(), 2047 LS.getIterationVariable()->getType(), 2048 LS.getPrevUpperBoundVariable()->getExprLoc()); 2049 2050 CGF.EmitStoreOfScalar(PrevLBVal, LB); 2051 CGF.EmitStoreOfScalar(PrevUBVal, UB); 2052 2053 return {LB, UB}; 2054 } 2055 2056 /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then 2057 /// we need to use the LB and UB expressions generated by the worksharing 2058 /// code generation support, whereas in non combined situations we would 2059 /// just emit 0 and the LastIteration expression 2060 /// This function is necessary due to the difference of the LB and UB 2061 /// types for the RT emission routines for 'for_static_init' and 2062 /// 'for_dispatch_init' 2063 static std::pair<llvm::Value *, llvm::Value *> 2064 emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF, 2065 const OMPExecutableDirective &S, 2066 Address LB, Address UB) { 2067 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 2068 const Expr *IVExpr = LS.getIterationVariable(); 2069 // when implementing a dynamic schedule for a 'for' combined with a 2070 // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop 2071 // is not normalized as each team only executes its own assigned 2072 // distribute chunk 2073 QualType IteratorTy = IVExpr->getType(); 2074 llvm::Value *LBVal = 2075 CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, S.getLocStart()); 2076 llvm::Value *UBVal = 2077 CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, S.getLocStart()); 2078 return {LBVal, UBVal}; 2079 } 2080 2081 static void emitDistributeParallelForDistributeInnerBoundParams( 2082 CodeGenFunction &CGF, const OMPExecutableDirective &S, 2083 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) { 2084 const auto &Dir = cast<OMPLoopDirective>(S); 2085 LValue LB = 2086 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable())); 2087 auto LBCast = CGF.Builder.CreateIntCast( 2088 CGF.Builder.CreateLoad(LB.getAddress()), CGF.SizeTy, /*isSigned=*/false); 2089 CapturedVars.push_back(LBCast); 2090 LValue UB = 2091 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable())); 2092 2093 auto UBCast = CGF.Builder.CreateIntCast( 2094 CGF.Builder.CreateLoad(UB.getAddress()), CGF.SizeTy, /*isSigned=*/false); 2095 CapturedVars.push_back(UBCast); 2096 } 2097 2098 static void 2099 emitInnerParallelForWhenCombined(CodeGenFunction &CGF, 2100 const OMPLoopDirective &S, 2101 CodeGenFunction::JumpDest LoopExit) { 2102 auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF, 2103 PrePostActionTy &) { 2104 bool HasCancel = false; 2105 if (!isOpenMPSimdDirective(S.getDirectiveKind())) { 2106 if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(&S)) 2107 HasCancel = D->hasCancel(); 2108 else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(&S)) 2109 HasCancel = D->hasCancel(); 2110 else if (const auto *D = 2111 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&S)) 2112 HasCancel = D->hasCancel(); 2113 } 2114 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(), 2115 HasCancel); 2116 CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(), 2117 emitDistributeParallelForInnerBounds, 2118 emitDistributeParallelForDispatchBounds); 2119 }; 2120 2121 emitCommonOMPParallelDirective( 2122 CGF, S, 2123 isOpenMPSimdDirective(S.getDirectiveKind()) ? OMPD_for_simd : OMPD_for, 2124 CGInlinedWorksharingLoop, 2125 emitDistributeParallelForDistributeInnerBoundParams); 2126 } 2127 2128 void CodeGenFunction::EmitOMPDistributeParallelForDirective( 2129 const OMPDistributeParallelForDirective &S) { 2130 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2131 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 2132 S.getDistInc()); 2133 }; 2134 OMPLexicalScope Scope(*this, S, OMPD_parallel); 2135 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); 2136 } 2137 2138 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective( 2139 const OMPDistributeParallelForSimdDirective &S) { 2140 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2141 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 2142 S.getDistInc()); 2143 }; 2144 OMPLexicalScope Scope(*this, S, OMPD_parallel); 2145 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); 2146 } 2147 2148 void CodeGenFunction::EmitOMPDistributeSimdDirective( 2149 const OMPDistributeSimdDirective &S) { 2150 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2151 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 2152 }; 2153 OMPLexicalScope Scope(*this, S, OMPD_unknown); 2154 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 2155 } 2156 2157 void CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 2158 CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) { 2159 // Emit SPMD target parallel for region as a standalone region. 2160 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2161 emitOMPSimdRegion(CGF, S, Action); 2162 }; 2163 llvm::Function *Fn; 2164 llvm::Constant *Addr; 2165 // Emit target region as a standalone region. 2166 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 2167 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 2168 assert(Fn && Addr && "Target device function emission failed."); 2169 } 2170 2171 void CodeGenFunction::EmitOMPTargetSimdDirective( 2172 const OMPTargetSimdDirective &S) { 2173 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2174 emitOMPSimdRegion(CGF, S, Action); 2175 }; 2176 emitCommonOMPTargetDirective(*this, S, CodeGen); 2177 } 2178 2179 namespace { 2180 struct ScheduleKindModifiersTy { 2181 OpenMPScheduleClauseKind Kind; 2182 OpenMPScheduleClauseModifier M1; 2183 OpenMPScheduleClauseModifier M2; 2184 ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind, 2185 OpenMPScheduleClauseModifier M1, 2186 OpenMPScheduleClauseModifier M2) 2187 : Kind(Kind), M1(M1), M2(M2) {} 2188 }; 2189 } // namespace 2190 2191 bool CodeGenFunction::EmitOMPWorksharingLoop( 2192 const OMPLoopDirective &S, Expr *EUB, 2193 const CodeGenLoopBoundsTy &CodeGenLoopBounds, 2194 const CodeGenDispatchBoundsTy &CGDispatchBounds) { 2195 // Emit the loop iteration variable. 2196 auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 2197 auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); 2198 EmitVarDecl(*IVDecl); 2199 2200 // Emit the iterations count variable. 2201 // If it is not a variable, Sema decided to calculate iterations count on each 2202 // iteration (e.g., it is foldable into a constant). 2203 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 2204 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 2205 // Emit calculation of the iterations count. 2206 EmitIgnoredExpr(S.getCalcLastIteration()); 2207 } 2208 2209 auto &RT = CGM.getOpenMPRuntime(); 2210 2211 bool HasLastprivateClause; 2212 // Check pre-condition. 2213 { 2214 OMPLoopScope PreInitScope(*this, S); 2215 // Skip the entire loop if we don't meet the precondition. 2216 // If the condition constant folds and can be elided, avoid emitting the 2217 // whole loop. 2218 bool CondConstant; 2219 llvm::BasicBlock *ContBlock = nullptr; 2220 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 2221 if (!CondConstant) 2222 return false; 2223 } else { 2224 auto *ThenBlock = createBasicBlock("omp.precond.then"); 2225 ContBlock = createBasicBlock("omp.precond.end"); 2226 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 2227 getProfileCount(&S)); 2228 EmitBlock(ThenBlock); 2229 incrementProfileCounter(&S); 2230 } 2231 2232 bool Ordered = false; 2233 if (auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) { 2234 if (OrderedClause->getNumForLoops()) 2235 RT.emitDoacrossInit(*this, S); 2236 else 2237 Ordered = true; 2238 } 2239 2240 llvm::DenseSet<const Expr *> EmittedFinals; 2241 emitAlignedClause(*this, S); 2242 bool HasLinears = EmitOMPLinearClauseInit(S); 2243 // Emit helper vars inits. 2244 2245 std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S); 2246 LValue LB = Bounds.first; 2247 LValue UB = Bounds.second; 2248 LValue ST = 2249 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 2250 LValue IL = 2251 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 2252 2253 // Emit 'then' code. 2254 { 2255 OMPPrivateScope LoopScope(*this); 2256 if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) { 2257 // Emit implicit barrier to synchronize threads and avoid data races on 2258 // initialization of firstprivate variables and post-update of 2259 // lastprivate variables. 2260 CGM.getOpenMPRuntime().emitBarrierCall( 2261 *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 2262 /*ForceSimpleCall=*/true); 2263 } 2264 EmitOMPPrivateClause(S, LoopScope); 2265 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 2266 EmitOMPReductionClauseInit(S, LoopScope); 2267 EmitOMPPrivateLoopCounters(S, LoopScope); 2268 EmitOMPLinearClause(S, LoopScope); 2269 (void)LoopScope.Privatize(); 2270 2271 // Detect the loop schedule kind and chunk. 2272 llvm::Value *Chunk = nullptr; 2273 OpenMPScheduleTy ScheduleKind; 2274 if (auto *C = S.getSingleClause<OMPScheduleClause>()) { 2275 ScheduleKind.Schedule = C->getScheduleKind(); 2276 ScheduleKind.M1 = C->getFirstScheduleModifier(); 2277 ScheduleKind.M2 = C->getSecondScheduleModifier(); 2278 if (const auto *Ch = C->getChunkSize()) { 2279 Chunk = EmitScalarExpr(Ch); 2280 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 2281 S.getIterationVariable()->getType(), 2282 S.getLocStart()); 2283 } 2284 } 2285 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2286 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2287 // OpenMP 4.5, 2.7.1 Loop Construct, Description. 2288 // If the static schedule kind is specified or if the ordered clause is 2289 // specified, and if no monotonic modifier is specified, the effect will 2290 // be as if the monotonic modifier was specified. 2291 if (RT.isStaticNonchunked(ScheduleKind.Schedule, 2292 /* Chunked */ Chunk != nullptr) && 2293 !Ordered) { 2294 if (isOpenMPSimdDirective(S.getDirectiveKind())) 2295 EmitOMPSimdInit(S, /*IsMonotonic=*/true); 2296 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 2297 // When no chunk_size is specified, the iteration space is divided into 2298 // chunks that are approximately equal in size, and at most one chunk is 2299 // distributed to each thread. Note that the size of the chunks is 2300 // unspecified in this case. 2301 CGOpenMPRuntime::StaticRTInput StaticInit( 2302 IVSize, IVSigned, Ordered, IL.getAddress(), LB.getAddress(), 2303 UB.getAddress(), ST.getAddress()); 2304 RT.emitForStaticInit(*this, S.getLocStart(), S.getDirectiveKind(), 2305 ScheduleKind, StaticInit); 2306 auto LoopExit = 2307 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 2308 // UB = min(UB, GlobalUB); 2309 EmitIgnoredExpr(S.getEnsureUpperBound()); 2310 // IV = LB; 2311 EmitIgnoredExpr(S.getInit()); 2312 // while (idx <= UB) { BODY; ++idx; } 2313 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 2314 S.getInc(), 2315 [&S, LoopExit](CodeGenFunction &CGF) { 2316 CGF.EmitOMPLoopBody(S, LoopExit); 2317 CGF.EmitStopPoint(&S); 2318 }, 2319 [](CodeGenFunction &) {}); 2320 EmitBlock(LoopExit.getBlock()); 2321 // Tell the runtime we are done. 2322 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 2323 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(), 2324 S.getDirectiveKind()); 2325 }; 2326 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); 2327 } else { 2328 const bool IsMonotonic = 2329 Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static || 2330 ScheduleKind.Schedule == OMPC_SCHEDULE_unknown || 2331 ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic || 2332 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic; 2333 // Emit the outer loop, which requests its work chunk [LB..UB] from 2334 // runtime and runs the inner loop to process it. 2335 const OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(), 2336 ST.getAddress(), IL.getAddress(), 2337 Chunk, EUB); 2338 EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered, 2339 LoopArguments, CGDispatchBounds); 2340 } 2341 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 2342 EmitOMPSimdFinal(S, 2343 [&](CodeGenFunction &CGF) -> llvm::Value * { 2344 return CGF.Builder.CreateIsNotNull( 2345 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2346 }); 2347 } 2348 EmitOMPReductionClauseFinal( 2349 S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind()) 2350 ? /*Parallel and Simd*/ OMPD_parallel_for_simd 2351 : /*Parallel only*/ OMPD_parallel); 2352 // Emit post-update of the reduction variables if IsLastIter != 0. 2353 emitPostUpdateForReductionClause( 2354 *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * { 2355 return CGF.Builder.CreateIsNotNull( 2356 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2357 }); 2358 // Emit final copy of the lastprivate variables if IsLastIter != 0. 2359 if (HasLastprivateClause) 2360 EmitOMPLastprivateClauseFinal( 2361 S, isOpenMPSimdDirective(S.getDirectiveKind()), 2362 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart()))); 2363 } 2364 EmitOMPLinearClauseFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * { 2365 return CGF.Builder.CreateIsNotNull( 2366 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2367 }); 2368 // We're now done with the loop, so jump to the continuation block. 2369 if (ContBlock) { 2370 EmitBranch(ContBlock); 2371 EmitBlock(ContBlock, true); 2372 } 2373 } 2374 return HasLastprivateClause; 2375 } 2376 2377 /// The following two functions generate expressions for the loop lower 2378 /// and upper bounds in case of static and dynamic (dispatch) schedule 2379 /// of the associated 'for' or 'distribute' loop. 2380 static std::pair<LValue, LValue> 2381 emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 2382 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 2383 LValue LB = 2384 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); 2385 LValue UB = 2386 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); 2387 return {LB, UB}; 2388 } 2389 2390 /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not 2391 /// consider the lower and upper bound expressions generated by the 2392 /// worksharing loop support, but we use 0 and the iteration space size as 2393 /// constants 2394 static std::pair<llvm::Value *, llvm::Value *> 2395 emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S, 2396 Address LB, Address UB) { 2397 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 2398 const Expr *IVExpr = LS.getIterationVariable(); 2399 const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType()); 2400 llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0); 2401 llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration()); 2402 return {LBVal, UBVal}; 2403 } 2404 2405 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { 2406 bool HasLastprivates = false; 2407 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, 2408 PrePostActionTy &) { 2409 OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel()); 2410 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 2411 emitForLoopBounds, 2412 emitDispatchForLoopBounds); 2413 }; 2414 { 2415 OMPLexicalScope Scope(*this, S, OMPD_unknown); 2416 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen, 2417 S.hasCancel()); 2418 } 2419 2420 // Emit an implicit barrier at the end. 2421 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) { 2422 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); 2423 } 2424 } 2425 2426 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { 2427 bool HasLastprivates = false; 2428 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, 2429 PrePostActionTy &) { 2430 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 2431 emitForLoopBounds, 2432 emitDispatchForLoopBounds); 2433 }; 2434 { 2435 OMPLexicalScope Scope(*this, S, OMPD_unknown); 2436 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 2437 } 2438 2439 // Emit an implicit barrier at the end. 2440 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) { 2441 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); 2442 } 2443 } 2444 2445 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, 2446 const Twine &Name, 2447 llvm::Value *Init = nullptr) { 2448 auto LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty); 2449 if (Init) 2450 CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true); 2451 return LVal; 2452 } 2453 2454 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { 2455 const Stmt *Stmt = S.getInnermostCapturedStmt()->getCapturedStmt(); 2456 const auto *CS = dyn_cast<CompoundStmt>(Stmt); 2457 bool HasLastprivates = false; 2458 auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF, 2459 PrePostActionTy &) { 2460 auto &C = CGF.CGM.getContext(); 2461 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2462 // Emit helper vars inits. 2463 LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.", 2464 CGF.Builder.getInt32(0)); 2465 auto *GlobalUBVal = CS != nullptr ? CGF.Builder.getInt32(CS->size() - 1) 2466 : CGF.Builder.getInt32(0); 2467 LValue UB = 2468 createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal); 2469 LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.", 2470 CGF.Builder.getInt32(1)); 2471 LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.", 2472 CGF.Builder.getInt32(0)); 2473 // Loop counter. 2474 LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv."); 2475 OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 2476 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV); 2477 OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 2478 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); 2479 // Generate condition for loop. 2480 BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, 2481 OK_Ordinary, S.getLocStart(), FPOptions()); 2482 // Increment for loop counter. 2483 UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary, 2484 S.getLocStart(), true); 2485 auto BodyGen = [Stmt, CS, &S, &IV](CodeGenFunction &CGF) { 2486 // Iterate through all sections and emit a switch construct: 2487 // switch (IV) { 2488 // case 0: 2489 // <SectionStmt[0]>; 2490 // break; 2491 // ... 2492 // case <NumSection> - 1: 2493 // <SectionStmt[<NumSection> - 1]>; 2494 // break; 2495 // } 2496 // .omp.sections.exit: 2497 auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit"); 2498 auto *SwitchStmt = 2499 CGF.Builder.CreateSwitch(CGF.EmitLoadOfScalar(IV, S.getLocStart()), 2500 ExitBB, CS == nullptr ? 1 : CS->size()); 2501 if (CS) { 2502 unsigned CaseNumber = 0; 2503 for (auto *SubStmt : CS->children()) { 2504 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 2505 CGF.EmitBlock(CaseBB); 2506 SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB); 2507 CGF.EmitStmt(SubStmt); 2508 CGF.EmitBranch(ExitBB); 2509 ++CaseNumber; 2510 } 2511 } else { 2512 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 2513 CGF.EmitBlock(CaseBB); 2514 SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB); 2515 CGF.EmitStmt(Stmt); 2516 CGF.EmitBranch(ExitBB); 2517 } 2518 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 2519 }; 2520 2521 CodeGenFunction::OMPPrivateScope LoopScope(CGF); 2522 if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) { 2523 // Emit implicit barrier to synchronize threads and avoid data races on 2524 // initialization of firstprivate variables and post-update of lastprivate 2525 // variables. 2526 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 2527 CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 2528 /*ForceSimpleCall=*/true); 2529 } 2530 CGF.EmitOMPPrivateClause(S, LoopScope); 2531 HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 2532 CGF.EmitOMPReductionClauseInit(S, LoopScope); 2533 (void)LoopScope.Privatize(); 2534 2535 // Emit static non-chunked loop. 2536 OpenMPScheduleTy ScheduleKind; 2537 ScheduleKind.Schedule = OMPC_SCHEDULE_static; 2538 CGOpenMPRuntime::StaticRTInput StaticInit( 2539 /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), 2540 LB.getAddress(), UB.getAddress(), ST.getAddress()); 2541 CGF.CGM.getOpenMPRuntime().emitForStaticInit( 2542 CGF, S.getLocStart(), S.getDirectiveKind(), ScheduleKind, StaticInit); 2543 // UB = min(UB, GlobalUB); 2544 auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart()); 2545 auto *MinUBGlobalUB = CGF.Builder.CreateSelect( 2546 CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal); 2547 CGF.EmitStoreOfScalar(MinUBGlobalUB, UB); 2548 // IV = LB; 2549 CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV); 2550 // while (idx <= UB) { BODY; ++idx; } 2551 CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen, 2552 [](CodeGenFunction &) {}); 2553 // Tell the runtime we are done. 2554 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 2555 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(), 2556 S.getDirectiveKind()); 2557 }; 2558 CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen); 2559 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 2560 // Emit post-update of the reduction variables if IsLastIter != 0. 2561 emitPostUpdateForReductionClause( 2562 CGF, S, [&](CodeGenFunction &CGF) -> llvm::Value * { 2563 return CGF.Builder.CreateIsNotNull( 2564 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2565 }); 2566 2567 // Emit final copy of the lastprivate variables if IsLastIter != 0. 2568 if (HasLastprivates) 2569 CGF.EmitOMPLastprivateClauseFinal( 2570 S, /*NoFinals=*/false, 2571 CGF.Builder.CreateIsNotNull( 2572 CGF.EmitLoadOfScalar(IL, S.getLocStart()))); 2573 }; 2574 2575 bool HasCancel = false; 2576 if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S)) 2577 HasCancel = OSD->hasCancel(); 2578 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S)) 2579 HasCancel = OPSD->hasCancel(); 2580 OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel); 2581 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen, 2582 HasCancel); 2583 // Emit barrier for lastprivates only if 'sections' directive has 'nowait' 2584 // clause. Otherwise the barrier will be generated by the codegen for the 2585 // directive. 2586 if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) { 2587 // Emit implicit barrier to synchronize threads and avoid data races on 2588 // initialization of firstprivate variables. 2589 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), 2590 OMPD_unknown); 2591 } 2592 } 2593 2594 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { 2595 { 2596 OMPLexicalScope Scope(*this, S, OMPD_unknown); 2597 EmitSections(S); 2598 } 2599 // Emit an implicit barrier at the end. 2600 if (!S.getSingleClause<OMPNowaitClause>()) { 2601 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), 2602 OMPD_sections); 2603 } 2604 } 2605 2606 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { 2607 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2608 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 2609 }; 2610 OMPLexicalScope Scope(*this, S, OMPD_unknown); 2611 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen, 2612 S.hasCancel()); 2613 } 2614 2615 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { 2616 llvm::SmallVector<const Expr *, 8> CopyprivateVars; 2617 llvm::SmallVector<const Expr *, 8> DestExprs; 2618 llvm::SmallVector<const Expr *, 8> SrcExprs; 2619 llvm::SmallVector<const Expr *, 8> AssignmentOps; 2620 // Check if there are any 'copyprivate' clauses associated with this 2621 // 'single' construct. 2622 // Build a list of copyprivate variables along with helper expressions 2623 // (<source>, <destination>, <destination>=<source> expressions) 2624 for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) { 2625 CopyprivateVars.append(C->varlists().begin(), C->varlists().end()); 2626 DestExprs.append(C->destination_exprs().begin(), 2627 C->destination_exprs().end()); 2628 SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end()); 2629 AssignmentOps.append(C->assignment_ops().begin(), 2630 C->assignment_ops().end()); 2631 } 2632 // Emit code for 'single' region along with 'copyprivate' clauses 2633 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2634 Action.Enter(CGF); 2635 OMPPrivateScope SingleScope(CGF); 2636 (void)CGF.EmitOMPFirstprivateClause(S, SingleScope); 2637 CGF.EmitOMPPrivateClause(S, SingleScope); 2638 (void)SingleScope.Privatize(); 2639 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 2640 }; 2641 { 2642 OMPLexicalScope Scope(*this, S, OMPD_unknown); 2643 CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(), 2644 CopyprivateVars, DestExprs, 2645 SrcExprs, AssignmentOps); 2646 } 2647 // Emit an implicit barrier at the end (to avoid data race on firstprivate 2648 // init or if no 'nowait' clause was specified and no 'copyprivate' clause). 2649 if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) { 2650 CGM.getOpenMPRuntime().emitBarrierCall( 2651 *this, S.getLocStart(), 2652 S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single); 2653 } 2654 } 2655 2656 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { 2657 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2658 Action.Enter(CGF); 2659 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 2660 }; 2661 OMPLexicalScope Scope(*this, S, OMPD_unknown); 2662 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart()); 2663 } 2664 2665 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { 2666 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2667 Action.Enter(CGF); 2668 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 2669 }; 2670 Expr *Hint = nullptr; 2671 if (auto *HintClause = S.getSingleClause<OMPHintClause>()) 2672 Hint = HintClause->getHint(); 2673 OMPLexicalScope Scope(*this, S, OMPD_unknown); 2674 CGM.getOpenMPRuntime().emitCriticalRegion(*this, 2675 S.getDirectiveName().getAsString(), 2676 CodeGen, S.getLocStart(), Hint); 2677 } 2678 2679 void CodeGenFunction::EmitOMPParallelForDirective( 2680 const OMPParallelForDirective &S) { 2681 // Emit directive as a combined directive that consists of two implicit 2682 // directives: 'parallel' with 'for' directive. 2683 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2684 OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel()); 2685 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 2686 emitDispatchForLoopBounds); 2687 }; 2688 emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen, 2689 emitEmptyBoundParameters); 2690 } 2691 2692 void CodeGenFunction::EmitOMPParallelForSimdDirective( 2693 const OMPParallelForSimdDirective &S) { 2694 // Emit directive as a combined directive that consists of two implicit 2695 // directives: 'parallel' with 'for' directive. 2696 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2697 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 2698 emitDispatchForLoopBounds); 2699 }; 2700 emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen, 2701 emitEmptyBoundParameters); 2702 } 2703 2704 void CodeGenFunction::EmitOMPParallelSectionsDirective( 2705 const OMPParallelSectionsDirective &S) { 2706 // Emit directive as a combined directive that consists of two implicit 2707 // directives: 'parallel' with 'sections' directive. 2708 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2709 CGF.EmitSections(S); 2710 }; 2711 emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen, 2712 emitEmptyBoundParameters); 2713 } 2714 2715 void CodeGenFunction::EmitOMPTaskBasedDirective( 2716 const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion, 2717 const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen, 2718 OMPTaskDataTy &Data) { 2719 // Emit outlined function for task construct. 2720 const CapturedStmt *CS = S.getCapturedStmt(CapturedRegion); 2721 auto *I = CS->getCapturedDecl()->param_begin(); 2722 auto *PartId = std::next(I); 2723 auto *TaskT = std::next(I, 4); 2724 // Check if the task is final 2725 if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) { 2726 // If the condition constant folds and can be elided, try to avoid emitting 2727 // the condition and the dead arm of the if/else. 2728 auto *Cond = Clause->getCondition(); 2729 bool CondConstant; 2730 if (ConstantFoldsToSimpleInteger(Cond, CondConstant)) 2731 Data.Final.setInt(CondConstant); 2732 else 2733 Data.Final.setPointer(EvaluateExprAsBool(Cond)); 2734 } else { 2735 // By default the task is not final. 2736 Data.Final.setInt(/*IntVal=*/false); 2737 } 2738 // Check if the task has 'priority' clause. 2739 if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) { 2740 auto *Prio = Clause->getPriority(); 2741 Data.Priority.setInt(/*IntVal=*/true); 2742 Data.Priority.setPointer(EmitScalarConversion( 2743 EmitScalarExpr(Prio), Prio->getType(), 2744 getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1), 2745 Prio->getExprLoc())); 2746 } 2747 // The first function argument for tasks is a thread id, the second one is a 2748 // part id (0 for tied tasks, >=0 for untied task). 2749 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 2750 // Get list of private variables. 2751 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 2752 auto IRef = C->varlist_begin(); 2753 for (auto *IInit : C->private_copies()) { 2754 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2755 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2756 Data.PrivateVars.push_back(*IRef); 2757 Data.PrivateCopies.push_back(IInit); 2758 } 2759 ++IRef; 2760 } 2761 } 2762 EmittedAsPrivate.clear(); 2763 // Get list of firstprivate variables. 2764 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 2765 auto IRef = C->varlist_begin(); 2766 auto IElemInitRef = C->inits().begin(); 2767 for (auto *IInit : C->private_copies()) { 2768 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2769 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2770 Data.FirstprivateVars.push_back(*IRef); 2771 Data.FirstprivateCopies.push_back(IInit); 2772 Data.FirstprivateInits.push_back(*IElemInitRef); 2773 } 2774 ++IRef; 2775 ++IElemInitRef; 2776 } 2777 } 2778 // Get list of lastprivate variables (for taskloops). 2779 llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs; 2780 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 2781 auto IRef = C->varlist_begin(); 2782 auto ID = C->destination_exprs().begin(); 2783 for (auto *IInit : C->private_copies()) { 2784 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2785 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2786 Data.LastprivateVars.push_back(*IRef); 2787 Data.LastprivateCopies.push_back(IInit); 2788 } 2789 LastprivateDstsOrigs.insert( 2790 {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()), 2791 cast<DeclRefExpr>(*IRef)}); 2792 ++IRef; 2793 ++ID; 2794 } 2795 } 2796 SmallVector<const Expr *, 4> LHSs; 2797 SmallVector<const Expr *, 4> RHSs; 2798 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 2799 auto IPriv = C->privates().begin(); 2800 auto IRed = C->reduction_ops().begin(); 2801 auto ILHS = C->lhs_exprs().begin(); 2802 auto IRHS = C->rhs_exprs().begin(); 2803 for (const auto *Ref : C->varlists()) { 2804 Data.ReductionVars.emplace_back(Ref); 2805 Data.ReductionCopies.emplace_back(*IPriv); 2806 Data.ReductionOps.emplace_back(*IRed); 2807 LHSs.emplace_back(*ILHS); 2808 RHSs.emplace_back(*IRHS); 2809 std::advance(IPriv, 1); 2810 std::advance(IRed, 1); 2811 std::advance(ILHS, 1); 2812 std::advance(IRHS, 1); 2813 } 2814 } 2815 Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit( 2816 *this, S.getLocStart(), LHSs, RHSs, Data); 2817 // Build list of dependences. 2818 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) 2819 for (auto *IRef : C->varlists()) 2820 Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef)); 2821 auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs, 2822 CapturedRegion](CodeGenFunction &CGF, 2823 PrePostActionTy &Action) { 2824 // Set proper addresses for generated private copies. 2825 OMPPrivateScope Scope(CGF); 2826 if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() || 2827 !Data.LastprivateVars.empty()) { 2828 enum { PrivatesParam = 2, CopyFnParam = 3 }; 2829 auto *CopyFn = CGF.Builder.CreateLoad( 2830 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3))); 2831 auto *PrivatesPtr = CGF.Builder.CreateLoad( 2832 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2))); 2833 // Map privates. 2834 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; 2835 llvm::SmallVector<llvm::Value *, 16> CallArgs; 2836 CallArgs.push_back(PrivatesPtr); 2837 for (auto *E : Data.PrivateVars) { 2838 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2839 Address PrivatePtr = CGF.CreateMemTemp( 2840 CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr"); 2841 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2842 CallArgs.push_back(PrivatePtr.getPointer()); 2843 } 2844 for (auto *E : Data.FirstprivateVars) { 2845 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2846 Address PrivatePtr = 2847 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 2848 ".firstpriv.ptr.addr"); 2849 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2850 CallArgs.push_back(PrivatePtr.getPointer()); 2851 } 2852 for (auto *E : Data.LastprivateVars) { 2853 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2854 Address PrivatePtr = 2855 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 2856 ".lastpriv.ptr.addr"); 2857 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2858 CallArgs.push_back(PrivatePtr.getPointer()); 2859 } 2860 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(), 2861 CopyFn, CallArgs); 2862 for (auto &&Pair : LastprivateDstsOrigs) { 2863 auto *OrigVD = cast<VarDecl>(Pair.second->getDecl()); 2864 DeclRefExpr DRE( 2865 const_cast<VarDecl *>(OrigVD), 2866 /*RefersToEnclosingVariableOrCapture=*/CGF.CapturedStmtInfo->lookup( 2867 OrigVD) != nullptr, 2868 Pair.second->getType(), VK_LValue, Pair.second->getExprLoc()); 2869 Scope.addPrivate(Pair.first, [&CGF, &DRE]() { 2870 return CGF.EmitLValue(&DRE).getAddress(); 2871 }); 2872 } 2873 for (auto &&Pair : PrivatePtrs) { 2874 Address Replacement(CGF.Builder.CreateLoad(Pair.second), 2875 CGF.getContext().getDeclAlign(Pair.first)); 2876 Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); 2877 } 2878 } 2879 if (Data.Reductions) { 2880 OMPLexicalScope LexScope(CGF, S, CapturedRegion); 2881 ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionCopies, 2882 Data.ReductionOps); 2883 llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad( 2884 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9))); 2885 for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) { 2886 RedCG.emitSharedLValue(CGF, Cnt); 2887 RedCG.emitAggregateType(CGF, Cnt); 2888 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( 2889 CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); 2890 Replacement = 2891 Address(CGF.EmitScalarConversion( 2892 Replacement.getPointer(), CGF.getContext().VoidPtrTy, 2893 CGF.getContext().getPointerType( 2894 Data.ReductionCopies[Cnt]->getType()), 2895 Data.ReductionCopies[Cnt]->getExprLoc()), 2896 Replacement.getAlignment()); 2897 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); 2898 Scope.addPrivate(RedCG.getBaseDecl(Cnt), 2899 [Replacement]() { return Replacement; }); 2900 // FIXME: This must removed once the runtime library is fixed. 2901 // Emit required threadprivate variables for 2902 // initilizer/combiner/finalizer. 2903 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(), 2904 RedCG, Cnt); 2905 } 2906 } 2907 // Privatize all private variables except for in_reduction items. 2908 (void)Scope.Privatize(); 2909 SmallVector<const Expr *, 4> InRedVars; 2910 SmallVector<const Expr *, 4> InRedPrivs; 2911 SmallVector<const Expr *, 4> InRedOps; 2912 SmallVector<const Expr *, 4> TaskgroupDescriptors; 2913 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) { 2914 auto IPriv = C->privates().begin(); 2915 auto IRed = C->reduction_ops().begin(); 2916 auto ITD = C->taskgroup_descriptors().begin(); 2917 for (const auto *Ref : C->varlists()) { 2918 InRedVars.emplace_back(Ref); 2919 InRedPrivs.emplace_back(*IPriv); 2920 InRedOps.emplace_back(*IRed); 2921 TaskgroupDescriptors.emplace_back(*ITD); 2922 std::advance(IPriv, 1); 2923 std::advance(IRed, 1); 2924 std::advance(ITD, 1); 2925 } 2926 } 2927 // Privatize in_reduction items here, because taskgroup descriptors must be 2928 // privatized earlier. 2929 OMPPrivateScope InRedScope(CGF); 2930 if (!InRedVars.empty()) { 2931 ReductionCodeGen RedCG(InRedVars, InRedPrivs, InRedOps); 2932 for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) { 2933 RedCG.emitSharedLValue(CGF, Cnt); 2934 RedCG.emitAggregateType(CGF, Cnt); 2935 // The taskgroup descriptor variable is always implicit firstprivate and 2936 // privatized already during procoessing of the firstprivates. 2937 llvm::Value *ReductionsPtr = 2938 CGF.EmitLoadOfScalar(CGF.EmitLValue(TaskgroupDescriptors[Cnt]), 2939 TaskgroupDescriptors[Cnt]->getExprLoc()); 2940 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( 2941 CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); 2942 Replacement = Address( 2943 CGF.EmitScalarConversion( 2944 Replacement.getPointer(), CGF.getContext().VoidPtrTy, 2945 CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()), 2946 InRedPrivs[Cnt]->getExprLoc()), 2947 Replacement.getAlignment()); 2948 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); 2949 InRedScope.addPrivate(RedCG.getBaseDecl(Cnt), 2950 [Replacement]() { return Replacement; }); 2951 // FIXME: This must removed once the runtime library is fixed. 2952 // Emit required threadprivate variables for 2953 // initilizer/combiner/finalizer. 2954 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(), 2955 RedCG, Cnt); 2956 } 2957 } 2958 (void)InRedScope.Privatize(); 2959 2960 Action.Enter(CGF); 2961 BodyGen(CGF); 2962 }; 2963 auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( 2964 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied, 2965 Data.NumberOfParts); 2966 OMPLexicalScope Scope(*this, S); 2967 TaskGen(*this, OutlinedFn, Data); 2968 } 2969 2970 static ImplicitParamDecl * 2971 createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data, 2972 QualType Ty, CapturedDecl *CD, 2973 SourceLocation Loc) { 2974 auto *OrigVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty, 2975 ImplicitParamDecl::Other); 2976 auto *OrigRef = DeclRefExpr::Create( 2977 C, NestedNameSpecifierLoc(), SourceLocation(), OrigVD, 2978 /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue); 2979 auto *PrivateVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty, 2980 ImplicitParamDecl::Other); 2981 auto *PrivateRef = DeclRefExpr::Create( 2982 C, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD, 2983 /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue); 2984 QualType ElemType = C.getBaseElementType(Ty); 2985 auto *InitVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, ElemType, 2986 ImplicitParamDecl::Other); 2987 auto *InitRef = DeclRefExpr::Create( 2988 C, NestedNameSpecifierLoc(), SourceLocation(), InitVD, 2989 /*RefersToEnclosingVariableOrCapture=*/false, Loc, ElemType, VK_LValue); 2990 PrivateVD->setInitStyle(VarDecl::CInit); 2991 PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue, 2992 InitRef, /*BasePath=*/nullptr, 2993 VK_RValue)); 2994 Data.FirstprivateVars.emplace_back(OrigRef); 2995 Data.FirstprivateCopies.emplace_back(PrivateRef); 2996 Data.FirstprivateInits.emplace_back(InitRef); 2997 return OrigVD; 2998 } 2999 3000 void CodeGenFunction::EmitOMPTargetTaskBasedDirective( 3001 const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen, 3002 OMPTargetDataInfo &InputInfo) { 3003 // Emit outlined function for task construct. 3004 auto CS = S.getCapturedStmt(OMPD_task); 3005 auto CapturedStruct = GenerateCapturedStmtArgument(*CS); 3006 auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 3007 auto *I = CS->getCapturedDecl()->param_begin(); 3008 auto *PartId = std::next(I); 3009 auto *TaskT = std::next(I, 4); 3010 OMPTaskDataTy Data; 3011 // The task is not final. 3012 Data.Final.setInt(/*IntVal=*/false); 3013 // Get list of firstprivate variables. 3014 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 3015 auto IRef = C->varlist_begin(); 3016 auto IElemInitRef = C->inits().begin(); 3017 for (auto *IInit : C->private_copies()) { 3018 Data.FirstprivateVars.push_back(*IRef); 3019 Data.FirstprivateCopies.push_back(IInit); 3020 Data.FirstprivateInits.push_back(*IElemInitRef); 3021 ++IRef; 3022 ++IElemInitRef; 3023 } 3024 } 3025 OMPPrivateScope TargetScope(*this); 3026 VarDecl *BPVD = nullptr; 3027 VarDecl *PVD = nullptr; 3028 VarDecl *SVD = nullptr; 3029 if (InputInfo.NumberOfTargetItems > 0) { 3030 auto *CD = CapturedDecl::Create( 3031 getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0); 3032 llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems); 3033 QualType BaseAndPointersType = getContext().getConstantArrayType( 3034 getContext().VoidPtrTy, ArrSize, ArrayType::Normal, 3035 /*IndexTypeQuals=*/0); 3036 BPVD = createImplicitFirstprivateForType( 3037 getContext(), Data, BaseAndPointersType, CD, S.getLocStart()); 3038 PVD = createImplicitFirstprivateForType( 3039 getContext(), Data, BaseAndPointersType, CD, S.getLocStart()); 3040 QualType SizesType = getContext().getConstantArrayType( 3041 getContext().getSizeType(), ArrSize, ArrayType::Normal, 3042 /*IndexTypeQuals=*/0); 3043 SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD, 3044 S.getLocStart()); 3045 TargetScope.addPrivate( 3046 BPVD, [&InputInfo]() { return InputInfo.BasePointersArray; }); 3047 TargetScope.addPrivate(PVD, 3048 [&InputInfo]() { return InputInfo.PointersArray; }); 3049 TargetScope.addPrivate(SVD, 3050 [&InputInfo]() { return InputInfo.SizesArray; }); 3051 } 3052 (void)TargetScope.Privatize(); 3053 // Build list of dependences. 3054 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) 3055 for (auto *IRef : C->varlists()) 3056 Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef)); 3057 auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD, 3058 &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) { 3059 // Set proper addresses for generated private copies. 3060 OMPPrivateScope Scope(CGF); 3061 if (!Data.FirstprivateVars.empty()) { 3062 enum { PrivatesParam = 2, CopyFnParam = 3 }; 3063 auto *CopyFn = CGF.Builder.CreateLoad( 3064 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3))); 3065 auto *PrivatesPtr = CGF.Builder.CreateLoad( 3066 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2))); 3067 // Map privates. 3068 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; 3069 llvm::SmallVector<llvm::Value *, 16> CallArgs; 3070 CallArgs.push_back(PrivatesPtr); 3071 for (auto *E : Data.FirstprivateVars) { 3072 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3073 Address PrivatePtr = 3074 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 3075 ".firstpriv.ptr.addr"); 3076 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 3077 CallArgs.push_back(PrivatePtr.getPointer()); 3078 } 3079 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(), 3080 CopyFn, CallArgs); 3081 for (auto &&Pair : PrivatePtrs) { 3082 Address Replacement(CGF.Builder.CreateLoad(Pair.second), 3083 CGF.getContext().getDeclAlign(Pair.first)); 3084 Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); 3085 } 3086 } 3087 // Privatize all private variables except for in_reduction items. 3088 (void)Scope.Privatize(); 3089 if (InputInfo.NumberOfTargetItems > 0) { 3090 InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP( 3091 CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0, CGF.getPointerSize()); 3092 InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP( 3093 CGF.GetAddrOfLocalVar(PVD), /*Index=*/0, CGF.getPointerSize()); 3094 InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP( 3095 CGF.GetAddrOfLocalVar(SVD), /*Index=*/0, CGF.getSizeSize()); 3096 } 3097 3098 Action.Enter(CGF); 3099 OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false); 3100 BodyGen(CGF); 3101 }; 3102 auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( 3103 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true, 3104 Data.NumberOfParts); 3105 llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0); 3106 IntegerLiteral IfCond(getContext(), TrueOrFalse, 3107 getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 3108 SourceLocation()); 3109 3110 CGM.getOpenMPRuntime().emitTaskCall(*this, S.getLocStart(), S, OutlinedFn, 3111 SharedsTy, CapturedStruct, &IfCond, Data); 3112 } 3113 3114 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { 3115 // Emit outlined function for task construct. 3116 const CapturedStmt *CS = S.getCapturedStmt(OMPD_task); 3117 auto CapturedStruct = GenerateCapturedStmtArgument(*CS); 3118 auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 3119 const Expr *IfCond = nullptr; 3120 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 3121 if (C->getNameModifier() == OMPD_unknown || 3122 C->getNameModifier() == OMPD_task) { 3123 IfCond = C->getCondition(); 3124 break; 3125 } 3126 } 3127 3128 OMPTaskDataTy Data; 3129 // Check if we should emit tied or untied task. 3130 Data.Tied = !S.getSingleClause<OMPUntiedClause>(); 3131 auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { 3132 CGF.EmitStmt(CS->getCapturedStmt()); 3133 }; 3134 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 3135 IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn, 3136 const OMPTaskDataTy &Data) { 3137 CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getLocStart(), S, OutlinedFn, 3138 SharedsTy, CapturedStruct, IfCond, 3139 Data); 3140 }; 3141 EmitOMPTaskBasedDirective(S, OMPD_task, BodyGen, TaskGen, Data); 3142 } 3143 3144 void CodeGenFunction::EmitOMPTaskyieldDirective( 3145 const OMPTaskyieldDirective &S) { 3146 CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart()); 3147 } 3148 3149 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { 3150 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier); 3151 } 3152 3153 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) { 3154 CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart()); 3155 } 3156 3157 void CodeGenFunction::EmitOMPTaskgroupDirective( 3158 const OMPTaskgroupDirective &S) { 3159 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3160 Action.Enter(CGF); 3161 if (const Expr *E = S.getReductionRef()) { 3162 SmallVector<const Expr *, 4> LHSs; 3163 SmallVector<const Expr *, 4> RHSs; 3164 OMPTaskDataTy Data; 3165 for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) { 3166 auto IPriv = C->privates().begin(); 3167 auto IRed = C->reduction_ops().begin(); 3168 auto ILHS = C->lhs_exprs().begin(); 3169 auto IRHS = C->rhs_exprs().begin(); 3170 for (const auto *Ref : C->varlists()) { 3171 Data.ReductionVars.emplace_back(Ref); 3172 Data.ReductionCopies.emplace_back(*IPriv); 3173 Data.ReductionOps.emplace_back(*IRed); 3174 LHSs.emplace_back(*ILHS); 3175 RHSs.emplace_back(*IRHS); 3176 std::advance(IPriv, 1); 3177 std::advance(IRed, 1); 3178 std::advance(ILHS, 1); 3179 std::advance(IRHS, 1); 3180 } 3181 } 3182 llvm::Value *ReductionDesc = 3183 CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getLocStart(), 3184 LHSs, RHSs, Data); 3185 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3186 CGF.EmitVarDecl(*VD); 3187 CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD), 3188 /*Volatile=*/false, E->getType()); 3189 } 3190 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 3191 }; 3192 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3193 CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart()); 3194 } 3195 3196 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { 3197 CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> { 3198 if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) { 3199 return llvm::makeArrayRef(FlushClause->varlist_begin(), 3200 FlushClause->varlist_end()); 3201 } 3202 return llvm::None; 3203 }(), S.getLocStart()); 3204 } 3205 3206 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, 3207 const CodeGenLoopTy &CodeGenLoop, 3208 Expr *IncExpr) { 3209 // Emit the loop iteration variable. 3210 auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 3211 auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); 3212 EmitVarDecl(*IVDecl); 3213 3214 // Emit the iterations count variable. 3215 // If it is not a variable, Sema decided to calculate iterations count on each 3216 // iteration (e.g., it is foldable into a constant). 3217 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 3218 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 3219 // Emit calculation of the iterations count. 3220 EmitIgnoredExpr(S.getCalcLastIteration()); 3221 } 3222 3223 auto &RT = CGM.getOpenMPRuntime(); 3224 3225 bool HasLastprivateClause = false; 3226 // Check pre-condition. 3227 { 3228 OMPLoopScope PreInitScope(*this, S); 3229 // Skip the entire loop if we don't meet the precondition. 3230 // If the condition constant folds and can be elided, avoid emitting the 3231 // whole loop. 3232 bool CondConstant; 3233 llvm::BasicBlock *ContBlock = nullptr; 3234 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 3235 if (!CondConstant) 3236 return; 3237 } else { 3238 auto *ThenBlock = createBasicBlock("omp.precond.then"); 3239 ContBlock = createBasicBlock("omp.precond.end"); 3240 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 3241 getProfileCount(&S)); 3242 EmitBlock(ThenBlock); 3243 incrementProfileCounter(&S); 3244 } 3245 3246 emitAlignedClause(*this, S); 3247 // Emit 'then' code. 3248 { 3249 // Emit helper vars inits. 3250 3251 LValue LB = EmitOMPHelperVar( 3252 *this, cast<DeclRefExpr>( 3253 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3254 ? S.getCombinedLowerBoundVariable() 3255 : S.getLowerBoundVariable()))); 3256 LValue UB = EmitOMPHelperVar( 3257 *this, cast<DeclRefExpr>( 3258 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3259 ? S.getCombinedUpperBoundVariable() 3260 : S.getUpperBoundVariable()))); 3261 LValue ST = 3262 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 3263 LValue IL = 3264 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 3265 3266 OMPPrivateScope LoopScope(*this); 3267 if (EmitOMPFirstprivateClause(S, LoopScope)) { 3268 // Emit implicit barrier to synchronize threads and avoid data races 3269 // on initialization of firstprivate variables and post-update of 3270 // lastprivate variables. 3271 CGM.getOpenMPRuntime().emitBarrierCall( 3272 *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 3273 /*ForceSimpleCall=*/true); 3274 } 3275 EmitOMPPrivateClause(S, LoopScope); 3276 if (isOpenMPSimdDirective(S.getDirectiveKind()) && 3277 !isOpenMPParallelDirective(S.getDirectiveKind()) && 3278 !isOpenMPTeamsDirective(S.getDirectiveKind())) 3279 EmitOMPReductionClauseInit(S, LoopScope); 3280 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 3281 EmitOMPPrivateLoopCounters(S, LoopScope); 3282 (void)LoopScope.Privatize(); 3283 3284 // Detect the distribute schedule kind and chunk. 3285 llvm::Value *Chunk = nullptr; 3286 OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown; 3287 if (auto *C = S.getSingleClause<OMPDistScheduleClause>()) { 3288 ScheduleKind = C->getDistScheduleKind(); 3289 if (const auto *Ch = C->getChunkSize()) { 3290 Chunk = EmitScalarExpr(Ch); 3291 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 3292 S.getIterationVariable()->getType(), 3293 S.getLocStart()); 3294 } 3295 } 3296 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 3297 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 3298 3299 // OpenMP [2.10.8, distribute Construct, Description] 3300 // If dist_schedule is specified, kind must be static. If specified, 3301 // iterations are divided into chunks of size chunk_size, chunks are 3302 // assigned to the teams of the league in a round-robin fashion in the 3303 // order of the team number. When no chunk_size is specified, the 3304 // iteration space is divided into chunks that are approximately equal 3305 // in size, and at most one chunk is distributed to each team of the 3306 // league. The size of the chunks is unspecified in this case. 3307 if (RT.isStaticNonchunked(ScheduleKind, 3308 /* Chunked */ Chunk != nullptr)) { 3309 if (isOpenMPSimdDirective(S.getDirectiveKind())) 3310 EmitOMPSimdInit(S, /*IsMonotonic=*/true); 3311 CGOpenMPRuntime::StaticRTInput StaticInit( 3312 IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(), 3313 LB.getAddress(), UB.getAddress(), ST.getAddress()); 3314 RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, 3315 StaticInit); 3316 auto LoopExit = 3317 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 3318 // UB = min(UB, GlobalUB); 3319 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3320 ? S.getCombinedEnsureUpperBound() 3321 : S.getEnsureUpperBound()); 3322 // IV = LB; 3323 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3324 ? S.getCombinedInit() 3325 : S.getInit()); 3326 3327 Expr *Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3328 ? S.getCombinedCond() 3329 : S.getCond(); 3330 3331 // for distribute alone, codegen 3332 // while (idx <= UB) { BODY; ++idx; } 3333 // when combined with 'for' (e.g. as in 'distribute parallel for') 3334 // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; } 3335 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), Cond, IncExpr, 3336 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { 3337 CodeGenLoop(CGF, S, LoopExit); 3338 }, 3339 [](CodeGenFunction &) {}); 3340 EmitBlock(LoopExit.getBlock()); 3341 // Tell the runtime we are done. 3342 RT.emitForStaticFinish(*this, S.getLocStart(), S.getDirectiveKind()); 3343 } else { 3344 // Emit the outer loop, which requests its work chunk [LB..UB] from 3345 // runtime and runs the inner loop to process it. 3346 const OMPLoopArguments LoopArguments = { 3347 LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(), 3348 Chunk}; 3349 EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments, 3350 CodeGenLoop); 3351 } 3352 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 3353 EmitOMPSimdFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * { 3354 return CGF.Builder.CreateIsNotNull( 3355 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 3356 }); 3357 } 3358 OpenMPDirectiveKind ReductionKind = OMPD_unknown; 3359 if (isOpenMPParallelDirective(S.getDirectiveKind()) && 3360 isOpenMPSimdDirective(S.getDirectiveKind())) { 3361 ReductionKind = OMPD_parallel_for_simd; 3362 } else if (isOpenMPParallelDirective(S.getDirectiveKind())) { 3363 ReductionKind = OMPD_parallel_for; 3364 } else if (isOpenMPSimdDirective(S.getDirectiveKind())) { 3365 ReductionKind = OMPD_simd; 3366 } else if (!isOpenMPTeamsDirective(S.getDirectiveKind()) && 3367 S.hasClausesOfKind<OMPReductionClause>()) { 3368 llvm_unreachable( 3369 "No reduction clauses is allowed in distribute directive."); 3370 } 3371 EmitOMPReductionClauseFinal(S, ReductionKind); 3372 // Emit post-update of the reduction variables if IsLastIter != 0. 3373 emitPostUpdateForReductionClause( 3374 *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * { 3375 return CGF.Builder.CreateIsNotNull( 3376 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 3377 }); 3378 // Emit final copy of the lastprivate variables if IsLastIter != 0. 3379 if (HasLastprivateClause) { 3380 EmitOMPLastprivateClauseFinal( 3381 S, /*NoFinals=*/false, 3382 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart()))); 3383 } 3384 } 3385 3386 // We're now done with the loop, so jump to the continuation block. 3387 if (ContBlock) { 3388 EmitBranch(ContBlock); 3389 EmitBlock(ContBlock, true); 3390 } 3391 } 3392 } 3393 3394 void CodeGenFunction::EmitOMPDistributeDirective( 3395 const OMPDistributeDirective &S) { 3396 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3397 3398 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 3399 }; 3400 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3401 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); 3402 } 3403 3404 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, 3405 const CapturedStmt *S) { 3406 CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); 3407 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; 3408 CGF.CapturedStmtInfo = &CapStmtInfo; 3409 auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S); 3410 Fn->addFnAttr(llvm::Attribute::NoInline); 3411 return Fn; 3412 } 3413 3414 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { 3415 if (S.hasClausesOfKind<OMPDependClause>()) { 3416 assert(!S.getAssociatedStmt() && 3417 "No associated statement must be in ordered depend construct."); 3418 for (const auto *DC : S.getClausesOfKind<OMPDependClause>()) 3419 CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC); 3420 return; 3421 } 3422 auto *C = S.getSingleClause<OMPSIMDClause>(); 3423 auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF, 3424 PrePostActionTy &Action) { 3425 const CapturedStmt *CS = S.getInnermostCapturedStmt(); 3426 if (C) { 3427 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 3428 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 3429 auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS); 3430 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(), 3431 OutlinedFn, CapturedVars); 3432 } else { 3433 Action.Enter(CGF); 3434 CGF.EmitStmt(CS->getCapturedStmt()); 3435 } 3436 }; 3437 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3438 CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C); 3439 } 3440 3441 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, 3442 QualType SrcType, QualType DestType, 3443 SourceLocation Loc) { 3444 assert(CGF.hasScalarEvaluationKind(DestType) && 3445 "DestType must have scalar evaluation kind."); 3446 assert(!Val.isAggregate() && "Must be a scalar or complex."); 3447 return Val.isScalar() 3448 ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType, 3449 Loc) 3450 : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType, 3451 DestType, Loc); 3452 } 3453 3454 static CodeGenFunction::ComplexPairTy 3455 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, 3456 QualType DestType, SourceLocation Loc) { 3457 assert(CGF.getEvaluationKind(DestType) == TEK_Complex && 3458 "DestType must have complex evaluation kind."); 3459 CodeGenFunction::ComplexPairTy ComplexVal; 3460 if (Val.isScalar()) { 3461 // Convert the input element to the element type of the complex. 3462 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 3463 auto ScalarVal = CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, 3464 DestElementType, Loc); 3465 ComplexVal = CodeGenFunction::ComplexPairTy( 3466 ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType())); 3467 } else { 3468 assert(Val.isComplex() && "Must be a scalar or complex."); 3469 auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType(); 3470 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 3471 ComplexVal.first = CGF.EmitScalarConversion( 3472 Val.getComplexVal().first, SrcElementType, DestElementType, Loc); 3473 ComplexVal.second = CGF.EmitScalarConversion( 3474 Val.getComplexVal().second, SrcElementType, DestElementType, Loc); 3475 } 3476 return ComplexVal; 3477 } 3478 3479 static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst, 3480 LValue LVal, RValue RVal) { 3481 if (LVal.isGlobalReg()) { 3482 CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal); 3483 } else { 3484 CGF.EmitAtomicStore(RVal, LVal, 3485 IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3486 : llvm::AtomicOrdering::Monotonic, 3487 LVal.isVolatile(), /*IsInit=*/false); 3488 } 3489 } 3490 3491 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal, 3492 QualType RValTy, SourceLocation Loc) { 3493 switch (getEvaluationKind(LVal.getType())) { 3494 case TEK_Scalar: 3495 EmitStoreThroughLValue(RValue::get(convertToScalarValue( 3496 *this, RVal, RValTy, LVal.getType(), Loc)), 3497 LVal); 3498 break; 3499 case TEK_Complex: 3500 EmitStoreOfComplex( 3501 convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal, 3502 /*isInit=*/false); 3503 break; 3504 case TEK_Aggregate: 3505 llvm_unreachable("Must be a scalar or complex."); 3506 } 3507 } 3508 3509 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst, 3510 const Expr *X, const Expr *V, 3511 SourceLocation Loc) { 3512 // v = x; 3513 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue"); 3514 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue"); 3515 LValue XLValue = CGF.EmitLValue(X); 3516 LValue VLValue = CGF.EmitLValue(V); 3517 RValue Res = XLValue.isGlobalReg() 3518 ? CGF.EmitLoadOfLValue(XLValue, Loc) 3519 : CGF.EmitAtomicLoad( 3520 XLValue, Loc, 3521 IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3522 : llvm::AtomicOrdering::Monotonic, 3523 XLValue.isVolatile()); 3524 // OpenMP, 2.12.6, atomic Construct 3525 // Any atomic construct with a seq_cst clause forces the atomically 3526 // performed operation to include an implicit flush operation without a 3527 // list. 3528 if (IsSeqCst) 3529 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3530 CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc); 3531 } 3532 3533 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst, 3534 const Expr *X, const Expr *E, 3535 SourceLocation Loc) { 3536 // x = expr; 3537 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue"); 3538 emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E)); 3539 // OpenMP, 2.12.6, atomic Construct 3540 // Any atomic construct with a seq_cst clause forces the atomically 3541 // performed operation to include an implicit flush operation without a 3542 // list. 3543 if (IsSeqCst) 3544 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3545 } 3546 3547 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, 3548 RValue Update, 3549 BinaryOperatorKind BO, 3550 llvm::AtomicOrdering AO, 3551 bool IsXLHSInRHSPart) { 3552 auto &Context = CGF.CGM.getContext(); 3553 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x' 3554 // expression is simple and atomic is allowed for the given type for the 3555 // target platform. 3556 if (BO == BO_Comma || !Update.isScalar() || 3557 !Update.getScalarVal()->getType()->isIntegerTy() || 3558 !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) && 3559 (Update.getScalarVal()->getType() != 3560 X.getAddress().getElementType())) || 3561 !X.getAddress().getElementType()->isIntegerTy() || 3562 !Context.getTargetInfo().hasBuiltinAtomic( 3563 Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment()))) 3564 return std::make_pair(false, RValue::get(nullptr)); 3565 3566 llvm::AtomicRMWInst::BinOp RMWOp; 3567 switch (BO) { 3568 case BO_Add: 3569 RMWOp = llvm::AtomicRMWInst::Add; 3570 break; 3571 case BO_Sub: 3572 if (!IsXLHSInRHSPart) 3573 return std::make_pair(false, RValue::get(nullptr)); 3574 RMWOp = llvm::AtomicRMWInst::Sub; 3575 break; 3576 case BO_And: 3577 RMWOp = llvm::AtomicRMWInst::And; 3578 break; 3579 case BO_Or: 3580 RMWOp = llvm::AtomicRMWInst::Or; 3581 break; 3582 case BO_Xor: 3583 RMWOp = llvm::AtomicRMWInst::Xor; 3584 break; 3585 case BO_LT: 3586 RMWOp = X.getType()->hasSignedIntegerRepresentation() 3587 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min 3588 : llvm::AtomicRMWInst::Max) 3589 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin 3590 : llvm::AtomicRMWInst::UMax); 3591 break; 3592 case BO_GT: 3593 RMWOp = X.getType()->hasSignedIntegerRepresentation() 3594 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max 3595 : llvm::AtomicRMWInst::Min) 3596 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax 3597 : llvm::AtomicRMWInst::UMin); 3598 break; 3599 case BO_Assign: 3600 RMWOp = llvm::AtomicRMWInst::Xchg; 3601 break; 3602 case BO_Mul: 3603 case BO_Div: 3604 case BO_Rem: 3605 case BO_Shl: 3606 case BO_Shr: 3607 case BO_LAnd: 3608 case BO_LOr: 3609 return std::make_pair(false, RValue::get(nullptr)); 3610 case BO_PtrMemD: 3611 case BO_PtrMemI: 3612 case BO_LE: 3613 case BO_GE: 3614 case BO_EQ: 3615 case BO_NE: 3616 case BO_Cmp: 3617 case BO_AddAssign: 3618 case BO_SubAssign: 3619 case BO_AndAssign: 3620 case BO_OrAssign: 3621 case BO_XorAssign: 3622 case BO_MulAssign: 3623 case BO_DivAssign: 3624 case BO_RemAssign: 3625 case BO_ShlAssign: 3626 case BO_ShrAssign: 3627 case BO_Comma: 3628 llvm_unreachable("Unsupported atomic update operation"); 3629 } 3630 auto *UpdateVal = Update.getScalarVal(); 3631 if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) { 3632 UpdateVal = CGF.Builder.CreateIntCast( 3633 IC, X.getAddress().getElementType(), 3634 X.getType()->hasSignedIntegerRepresentation()); 3635 } 3636 auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO); 3637 return std::make_pair(true, RValue::get(Res)); 3638 } 3639 3640 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr( 3641 LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart, 3642 llvm::AtomicOrdering AO, SourceLocation Loc, 3643 const llvm::function_ref<RValue(RValue)> &CommonGen) { 3644 // Update expressions are allowed to have the following forms: 3645 // x binop= expr; -> xrval + expr; 3646 // x++, ++x -> xrval + 1; 3647 // x--, --x -> xrval - 1; 3648 // x = x binop expr; -> xrval binop expr 3649 // x = expr Op x; - > expr binop xrval; 3650 auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart); 3651 if (!Res.first) { 3652 if (X.isGlobalReg()) { 3653 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop 3654 // 'xrval'. 3655 EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X); 3656 } else { 3657 // Perform compare-and-swap procedure. 3658 EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified()); 3659 } 3660 } 3661 return Res; 3662 } 3663 3664 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst, 3665 const Expr *X, const Expr *E, 3666 const Expr *UE, bool IsXLHSInRHSPart, 3667 SourceLocation Loc) { 3668 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 3669 "Update expr in 'atomic update' must be a binary operator."); 3670 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 3671 // Update expressions are allowed to have the following forms: 3672 // x binop= expr; -> xrval + expr; 3673 // x++, ++x -> xrval + 1; 3674 // x--, --x -> xrval - 1; 3675 // x = x binop expr; -> xrval binop expr 3676 // x = expr Op x; - > expr binop xrval; 3677 assert(X->isLValue() && "X of 'omp atomic update' is not lvalue"); 3678 LValue XLValue = CGF.EmitLValue(X); 3679 RValue ExprRValue = CGF.EmitAnyExpr(E); 3680 auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3681 : llvm::AtomicOrdering::Monotonic; 3682 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 3683 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 3684 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 3685 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 3686 auto Gen = 3687 [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue { 3688 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3689 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 3690 return CGF.EmitAnyExpr(UE); 3691 }; 3692 (void)CGF.EmitOMPAtomicSimpleUpdateExpr( 3693 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 3694 // OpenMP, 2.12.6, atomic Construct 3695 // Any atomic construct with a seq_cst clause forces the atomically 3696 // performed operation to include an implicit flush operation without a 3697 // list. 3698 if (IsSeqCst) 3699 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3700 } 3701 3702 static RValue convertToType(CodeGenFunction &CGF, RValue Value, 3703 QualType SourceType, QualType ResType, 3704 SourceLocation Loc) { 3705 switch (CGF.getEvaluationKind(ResType)) { 3706 case TEK_Scalar: 3707 return RValue::get( 3708 convertToScalarValue(CGF, Value, SourceType, ResType, Loc)); 3709 case TEK_Complex: { 3710 auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc); 3711 return RValue::getComplex(Res.first, Res.second); 3712 } 3713 case TEK_Aggregate: 3714 break; 3715 } 3716 llvm_unreachable("Must be a scalar or complex."); 3717 } 3718 3719 static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst, 3720 bool IsPostfixUpdate, const Expr *V, 3721 const Expr *X, const Expr *E, 3722 const Expr *UE, bool IsXLHSInRHSPart, 3723 SourceLocation Loc) { 3724 assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue"); 3725 assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue"); 3726 RValue NewVVal; 3727 LValue VLValue = CGF.EmitLValue(V); 3728 LValue XLValue = CGF.EmitLValue(X); 3729 RValue ExprRValue = CGF.EmitAnyExpr(E); 3730 auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3731 : llvm::AtomicOrdering::Monotonic; 3732 QualType NewVValType; 3733 if (UE) { 3734 // 'x' is updated with some additional value. 3735 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 3736 "Update expr in 'atomic capture' must be a binary operator."); 3737 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 3738 // Update expressions are allowed to have the following forms: 3739 // x binop= expr; -> xrval + expr; 3740 // x++, ++x -> xrval + 1; 3741 // x--, --x -> xrval - 1; 3742 // x = x binop expr; -> xrval binop expr 3743 // x = expr Op x; - > expr binop xrval; 3744 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 3745 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 3746 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 3747 NewVValType = XRValExpr->getType(); 3748 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 3749 auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr, 3750 IsPostfixUpdate](RValue XRValue) -> RValue { 3751 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3752 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 3753 RValue Res = CGF.EmitAnyExpr(UE); 3754 NewVVal = IsPostfixUpdate ? XRValue : Res; 3755 return Res; 3756 }; 3757 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 3758 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 3759 if (Res.first) { 3760 // 'atomicrmw' instruction was generated. 3761 if (IsPostfixUpdate) { 3762 // Use old value from 'atomicrmw'. 3763 NewVVal = Res.second; 3764 } else { 3765 // 'atomicrmw' does not provide new value, so evaluate it using old 3766 // value of 'x'. 3767 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3768 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second); 3769 NewVVal = CGF.EmitAnyExpr(UE); 3770 } 3771 } 3772 } else { 3773 // 'x' is simply rewritten with some 'expr'. 3774 NewVValType = X->getType().getNonReferenceType(); 3775 ExprRValue = convertToType(CGF, ExprRValue, E->getType(), 3776 X->getType().getNonReferenceType(), Loc); 3777 auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) -> RValue { 3778 NewVVal = XRValue; 3779 return ExprRValue; 3780 }; 3781 // Try to perform atomicrmw xchg, otherwise simple exchange. 3782 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 3783 XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO, 3784 Loc, Gen); 3785 if (Res.first) { 3786 // 'atomicrmw' instruction was generated. 3787 NewVVal = IsPostfixUpdate ? Res.second : ExprRValue; 3788 } 3789 } 3790 // Emit post-update store to 'v' of old/new 'x' value. 3791 CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc); 3792 // OpenMP, 2.12.6, atomic Construct 3793 // Any atomic construct with a seq_cst clause forces the atomically 3794 // performed operation to include an implicit flush operation without a 3795 // list. 3796 if (IsSeqCst) 3797 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3798 } 3799 3800 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, 3801 bool IsSeqCst, bool IsPostfixUpdate, 3802 const Expr *X, const Expr *V, const Expr *E, 3803 const Expr *UE, bool IsXLHSInRHSPart, 3804 SourceLocation Loc) { 3805 switch (Kind) { 3806 case OMPC_read: 3807 EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc); 3808 break; 3809 case OMPC_write: 3810 EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc); 3811 break; 3812 case OMPC_unknown: 3813 case OMPC_update: 3814 EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc); 3815 break; 3816 case OMPC_capture: 3817 EmitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE, 3818 IsXLHSInRHSPart, Loc); 3819 break; 3820 case OMPC_if: 3821 case OMPC_final: 3822 case OMPC_num_threads: 3823 case OMPC_private: 3824 case OMPC_firstprivate: 3825 case OMPC_lastprivate: 3826 case OMPC_reduction: 3827 case OMPC_task_reduction: 3828 case OMPC_in_reduction: 3829 case OMPC_safelen: 3830 case OMPC_simdlen: 3831 case OMPC_collapse: 3832 case OMPC_default: 3833 case OMPC_seq_cst: 3834 case OMPC_shared: 3835 case OMPC_linear: 3836 case OMPC_aligned: 3837 case OMPC_copyin: 3838 case OMPC_copyprivate: 3839 case OMPC_flush: 3840 case OMPC_proc_bind: 3841 case OMPC_schedule: 3842 case OMPC_ordered: 3843 case OMPC_nowait: 3844 case OMPC_untied: 3845 case OMPC_threadprivate: 3846 case OMPC_depend: 3847 case OMPC_mergeable: 3848 case OMPC_device: 3849 case OMPC_threads: 3850 case OMPC_simd: 3851 case OMPC_map: 3852 case OMPC_num_teams: 3853 case OMPC_thread_limit: 3854 case OMPC_priority: 3855 case OMPC_grainsize: 3856 case OMPC_nogroup: 3857 case OMPC_num_tasks: 3858 case OMPC_hint: 3859 case OMPC_dist_schedule: 3860 case OMPC_defaultmap: 3861 case OMPC_uniform: 3862 case OMPC_to: 3863 case OMPC_from: 3864 case OMPC_use_device_ptr: 3865 case OMPC_is_device_ptr: 3866 llvm_unreachable("Clause is not allowed in 'omp atomic'."); 3867 } 3868 } 3869 3870 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { 3871 bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>(); 3872 OpenMPClauseKind Kind = OMPC_unknown; 3873 for (auto *C : S.clauses()) { 3874 // Find first clause (skip seq_cst clause, if it is first). 3875 if (C->getClauseKind() != OMPC_seq_cst) { 3876 Kind = C->getClauseKind(); 3877 break; 3878 } 3879 } 3880 3881 const auto *CS = S.getInnermostCapturedStmt()->IgnoreContainers(); 3882 if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) { 3883 enterFullExpression(EWC); 3884 } 3885 // Processing for statements under 'atomic capture'. 3886 if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) { 3887 for (const auto *C : Compound->body()) { 3888 if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) { 3889 enterFullExpression(EWC); 3890 } 3891 } 3892 } 3893 3894 auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF, 3895 PrePostActionTy &) { 3896 CGF.EmitStopPoint(CS); 3897 EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(), 3898 S.getV(), S.getExpr(), S.getUpdateExpr(), 3899 S.isXLHSInRHSPart(), S.getLocStart()); 3900 }; 3901 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3902 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen); 3903 } 3904 3905 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, 3906 const OMPExecutableDirective &S, 3907 const RegionCodeGenTy &CodeGen) { 3908 assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind())); 3909 CodeGenModule &CGM = CGF.CGM; 3910 3911 llvm::Function *Fn = nullptr; 3912 llvm::Constant *FnID = nullptr; 3913 3914 const Expr *IfCond = nullptr; 3915 // Check for the at most one if clause associated with the target region. 3916 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 3917 if (C->getNameModifier() == OMPD_unknown || 3918 C->getNameModifier() == OMPD_target) { 3919 IfCond = C->getCondition(); 3920 break; 3921 } 3922 } 3923 3924 // Check if we have any device clause associated with the directive. 3925 const Expr *Device = nullptr; 3926 if (auto *C = S.getSingleClause<OMPDeviceClause>()) { 3927 Device = C->getDevice(); 3928 } 3929 3930 // Check if we have an if clause whose conditional always evaluates to false 3931 // or if we do not have any targets specified. If so the target region is not 3932 // an offload entry point. 3933 bool IsOffloadEntry = true; 3934 if (IfCond) { 3935 bool Val; 3936 if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val) 3937 IsOffloadEntry = false; 3938 } 3939 if (CGM.getLangOpts().OMPTargetTriples.empty()) 3940 IsOffloadEntry = false; 3941 3942 assert(CGF.CurFuncDecl && "No parent declaration for target region!"); 3943 StringRef ParentName; 3944 // In case we have Ctors/Dtors we use the complete type variant to produce 3945 // the mangling of the device outlined kernel. 3946 if (auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl)) 3947 ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete)); 3948 else if (auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl)) 3949 ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete)); 3950 else 3951 ParentName = 3952 CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl))); 3953 3954 // Emit target region as a standalone region. 3955 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID, 3956 IsOffloadEntry, CodeGen); 3957 OMPLexicalScope Scope(CGF, S, OMPD_task); 3958 CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device); 3959 } 3960 3961 static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S, 3962 PrePostActionTy &Action) { 3963 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 3964 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 3965 CGF.EmitOMPPrivateClause(S, PrivateScope); 3966 (void)PrivateScope.Privatize(); 3967 3968 Action.Enter(CGF); 3969 CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt()); 3970 } 3971 3972 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM, 3973 StringRef ParentName, 3974 const OMPTargetDirective &S) { 3975 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3976 emitTargetRegion(CGF, S, Action); 3977 }; 3978 llvm::Function *Fn; 3979 llvm::Constant *Addr; 3980 // Emit target region as a standalone region. 3981 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 3982 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 3983 assert(Fn && Addr && "Target device function emission failed."); 3984 } 3985 3986 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { 3987 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3988 emitTargetRegion(CGF, S, Action); 3989 }; 3990 emitCommonOMPTargetDirective(*this, S, CodeGen); 3991 } 3992 3993 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF, 3994 const OMPExecutableDirective &S, 3995 OpenMPDirectiveKind InnermostKind, 3996 const RegionCodeGenTy &CodeGen) { 3997 const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams); 3998 auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction( 3999 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 4000 4001 const OMPNumTeamsClause *NT = S.getSingleClause<OMPNumTeamsClause>(); 4002 const OMPThreadLimitClause *TL = S.getSingleClause<OMPThreadLimitClause>(); 4003 if (NT || TL) { 4004 Expr *NumTeams = (NT) ? NT->getNumTeams() : nullptr; 4005 Expr *ThreadLimit = (TL) ? TL->getThreadLimit() : nullptr; 4006 4007 CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit, 4008 S.getLocStart()); 4009 } 4010 4011 OMPTeamsScope Scope(CGF, S); 4012 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 4013 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 4014 CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn, 4015 CapturedVars); 4016 } 4017 4018 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) { 4019 // Emit teams region as a standalone region. 4020 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4021 OMPPrivateScope PrivateScope(CGF); 4022 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 4023 CGF.EmitOMPPrivateClause(S, PrivateScope); 4024 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4025 (void)PrivateScope.Privatize(); 4026 CGF.EmitStmt(S.getCapturedStmt(OMPD_teams)->getCapturedStmt()); 4027 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4028 }; 4029 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen); 4030 emitPostUpdateForReductionClause( 4031 *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 4032 } 4033 4034 static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action, 4035 const OMPTargetTeamsDirective &S) { 4036 auto *CS = S.getCapturedStmt(OMPD_teams); 4037 Action.Enter(CGF); 4038 // Emit teams region as a standalone region. 4039 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) { 4040 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 4041 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 4042 CGF.EmitOMPPrivateClause(S, PrivateScope); 4043 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4044 (void)PrivateScope.Privatize(); 4045 Action.Enter(CGF); 4046 CGF.EmitStmt(CS->getCapturedStmt()); 4047 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4048 }; 4049 emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen); 4050 emitPostUpdateForReductionClause( 4051 CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 4052 } 4053 4054 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 4055 CodeGenModule &CGM, StringRef ParentName, 4056 const OMPTargetTeamsDirective &S) { 4057 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4058 emitTargetTeamsRegion(CGF, Action, S); 4059 }; 4060 llvm::Function *Fn; 4061 llvm::Constant *Addr; 4062 // Emit target region as a standalone region. 4063 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 4064 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 4065 assert(Fn && Addr && "Target device function emission failed."); 4066 } 4067 4068 void CodeGenFunction::EmitOMPTargetTeamsDirective( 4069 const OMPTargetTeamsDirective &S) { 4070 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4071 emitTargetTeamsRegion(CGF, Action, S); 4072 }; 4073 emitCommonOMPTargetDirective(*this, S, CodeGen); 4074 } 4075 4076 static void 4077 emitTargetTeamsDistributeRegion(CodeGenFunction &CGF, PrePostActionTy &Action, 4078 const OMPTargetTeamsDistributeDirective &S) { 4079 Action.Enter(CGF); 4080 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4081 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 4082 }; 4083 4084 // Emit teams region as a standalone region. 4085 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 4086 PrePostActionTy &) { 4087 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 4088 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4089 (void)PrivateScope.Privatize(); 4090 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 4091 CodeGenDistribute); 4092 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4093 }; 4094 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute, CodeGen); 4095 emitPostUpdateForReductionClause(CGF, S, 4096 [](CodeGenFunction &) { return nullptr; }); 4097 } 4098 4099 void CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 4100 CodeGenModule &CGM, StringRef ParentName, 4101 const OMPTargetTeamsDistributeDirective &S) { 4102 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4103 emitTargetTeamsDistributeRegion(CGF, Action, S); 4104 }; 4105 llvm::Function *Fn; 4106 llvm::Constant *Addr; 4107 // Emit target region as a standalone region. 4108 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 4109 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 4110 assert(Fn && Addr && "Target device function emission failed."); 4111 } 4112 4113 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective( 4114 const OMPTargetTeamsDistributeDirective &S) { 4115 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4116 emitTargetTeamsDistributeRegion(CGF, Action, S); 4117 }; 4118 emitCommonOMPTargetDirective(*this, S, CodeGen); 4119 } 4120 4121 static void emitTargetTeamsDistributeSimdRegion( 4122 CodeGenFunction &CGF, PrePostActionTy &Action, 4123 const OMPTargetTeamsDistributeSimdDirective &S) { 4124 Action.Enter(CGF); 4125 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4126 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 4127 }; 4128 4129 // Emit teams region as a standalone region. 4130 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 4131 PrePostActionTy &) { 4132 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 4133 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4134 (void)PrivateScope.Privatize(); 4135 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 4136 CodeGenDistribute); 4137 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4138 }; 4139 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_simd, CodeGen); 4140 emitPostUpdateForReductionClause(CGF, S, 4141 [](CodeGenFunction &) { return nullptr; }); 4142 } 4143 4144 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 4145 CodeGenModule &CGM, StringRef ParentName, 4146 const OMPTargetTeamsDistributeSimdDirective &S) { 4147 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4148 emitTargetTeamsDistributeSimdRegion(CGF, Action, S); 4149 }; 4150 llvm::Function *Fn; 4151 llvm::Constant *Addr; 4152 // Emit target region as a standalone region. 4153 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 4154 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 4155 assert(Fn && Addr && "Target device function emission failed."); 4156 } 4157 4158 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective( 4159 const OMPTargetTeamsDistributeSimdDirective &S) { 4160 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4161 emitTargetTeamsDistributeSimdRegion(CGF, Action, S); 4162 }; 4163 emitCommonOMPTargetDirective(*this, S, CodeGen); 4164 } 4165 4166 void CodeGenFunction::EmitOMPTeamsDistributeDirective( 4167 const OMPTeamsDistributeDirective &S) { 4168 4169 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4170 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 4171 }; 4172 4173 // Emit teams region as a standalone region. 4174 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 4175 PrePostActionTy &) { 4176 OMPPrivateScope PrivateScope(CGF); 4177 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4178 (void)PrivateScope.Privatize(); 4179 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 4180 CodeGenDistribute); 4181 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4182 }; 4183 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen); 4184 emitPostUpdateForReductionClause(*this, S, 4185 [](CodeGenFunction &) { return nullptr; }); 4186 } 4187 4188 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective( 4189 const OMPTeamsDistributeSimdDirective &S) { 4190 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4191 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 4192 }; 4193 4194 // Emit teams region as a standalone region. 4195 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 4196 PrePostActionTy &) { 4197 OMPPrivateScope PrivateScope(CGF); 4198 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4199 (void)PrivateScope.Privatize(); 4200 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_simd, 4201 CodeGenDistribute); 4202 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4203 }; 4204 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_simd, CodeGen); 4205 emitPostUpdateForReductionClause(*this, S, 4206 [](CodeGenFunction &) { return nullptr; }); 4207 } 4208 4209 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective( 4210 const OMPTeamsDistributeParallelForDirective &S) { 4211 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4212 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 4213 S.getDistInc()); 4214 }; 4215 4216 // Emit teams region as a standalone region. 4217 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 4218 PrePostActionTy &) { 4219 OMPPrivateScope PrivateScope(CGF); 4220 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4221 (void)PrivateScope.Privatize(); 4222 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 4223 CodeGenDistribute); 4224 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4225 }; 4226 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen); 4227 emitPostUpdateForReductionClause(*this, S, 4228 [](CodeGenFunction &) { return nullptr; }); 4229 } 4230 4231 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective( 4232 const OMPTeamsDistributeParallelForSimdDirective &S) { 4233 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4234 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 4235 S.getDistInc()); 4236 }; 4237 4238 // Emit teams region as a standalone region. 4239 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 4240 PrePostActionTy &) { 4241 OMPPrivateScope PrivateScope(CGF); 4242 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4243 (void)PrivateScope.Privatize(); 4244 CGF.CGM.getOpenMPRuntime().emitInlinedDirective( 4245 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); 4246 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4247 }; 4248 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen); 4249 emitPostUpdateForReductionClause(*this, S, 4250 [](CodeGenFunction &) { return nullptr; }); 4251 } 4252 4253 static void emitTargetTeamsDistributeParallelForRegion( 4254 CodeGenFunction &CGF, const OMPTargetTeamsDistributeParallelForDirective &S, 4255 PrePostActionTy &Action) { 4256 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4257 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 4258 S.getDistInc()); 4259 }; 4260 4261 // Emit teams region as a standalone region. 4262 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 4263 PrePostActionTy &) { 4264 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 4265 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4266 (void)PrivateScope.Privatize(); 4267 CGF.CGM.getOpenMPRuntime().emitInlinedDirective( 4268 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); 4269 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4270 }; 4271 4272 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for, 4273 CodeGenTeams); 4274 emitPostUpdateForReductionClause(CGF, S, 4275 [](CodeGenFunction &) { return nullptr; }); 4276 } 4277 4278 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 4279 CodeGenModule &CGM, StringRef ParentName, 4280 const OMPTargetTeamsDistributeParallelForDirective &S) { 4281 // Emit SPMD target teams distribute parallel for region as a standalone 4282 // region. 4283 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4284 emitTargetTeamsDistributeParallelForRegion(CGF, S, Action); 4285 }; 4286 llvm::Function *Fn; 4287 llvm::Constant *Addr; 4288 // Emit target region as a standalone region. 4289 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 4290 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 4291 assert(Fn && Addr && "Target device function emission failed."); 4292 } 4293 4294 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective( 4295 const OMPTargetTeamsDistributeParallelForDirective &S) { 4296 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4297 emitTargetTeamsDistributeParallelForRegion(CGF, S, Action); 4298 }; 4299 emitCommonOMPTargetDirective(*this, S, CodeGen); 4300 } 4301 4302 static void emitTargetTeamsDistributeParallelForSimdRegion( 4303 CodeGenFunction &CGF, 4304 const OMPTargetTeamsDistributeParallelForSimdDirective &S, 4305 PrePostActionTy &Action) { 4306 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4307 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 4308 S.getDistInc()); 4309 }; 4310 4311 // Emit teams region as a standalone region. 4312 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 4313 PrePostActionTy &) { 4314 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 4315 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4316 (void)PrivateScope.Privatize(); 4317 CGF.CGM.getOpenMPRuntime().emitInlinedDirective( 4318 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); 4319 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4320 }; 4321 4322 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for_simd, 4323 CodeGenTeams); 4324 emitPostUpdateForReductionClause(CGF, S, 4325 [](CodeGenFunction &) { return nullptr; }); 4326 } 4327 4328 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 4329 CodeGenModule &CGM, StringRef ParentName, 4330 const OMPTargetTeamsDistributeParallelForSimdDirective &S) { 4331 // Emit SPMD target teams distribute parallel for simd region as a standalone 4332 // region. 4333 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4334 emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action); 4335 }; 4336 llvm::Function *Fn; 4337 llvm::Constant *Addr; 4338 // Emit target region as a standalone region. 4339 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 4340 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 4341 assert(Fn && Addr && "Target device function emission failed."); 4342 } 4343 4344 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective( 4345 const OMPTargetTeamsDistributeParallelForSimdDirective &S) { 4346 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4347 emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action); 4348 }; 4349 emitCommonOMPTargetDirective(*this, S, CodeGen); 4350 } 4351 4352 void CodeGenFunction::EmitOMPCancellationPointDirective( 4353 const OMPCancellationPointDirective &S) { 4354 CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(), 4355 S.getCancelRegion()); 4356 } 4357 4358 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { 4359 const Expr *IfCond = nullptr; 4360 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 4361 if (C->getNameModifier() == OMPD_unknown || 4362 C->getNameModifier() == OMPD_cancel) { 4363 IfCond = C->getCondition(); 4364 break; 4365 } 4366 } 4367 CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(), IfCond, 4368 S.getCancelRegion()); 4369 } 4370 4371 CodeGenFunction::JumpDest 4372 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) { 4373 if (Kind == OMPD_parallel || Kind == OMPD_task || 4374 Kind == OMPD_target_parallel) 4375 return ReturnBlock; 4376 assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections || 4377 Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for || 4378 Kind == OMPD_distribute_parallel_for || 4379 Kind == OMPD_target_parallel_for || 4380 Kind == OMPD_teams_distribute_parallel_for || 4381 Kind == OMPD_target_teams_distribute_parallel_for); 4382 return OMPCancelStack.getExitBlock(); 4383 } 4384 4385 void CodeGenFunction::EmitOMPUseDevicePtrClause( 4386 const OMPClause &NC, OMPPrivateScope &PrivateScope, 4387 const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) { 4388 const auto &C = cast<OMPUseDevicePtrClause>(NC); 4389 auto OrigVarIt = C.varlist_begin(); 4390 auto InitIt = C.inits().begin(); 4391 for (auto PvtVarIt : C.private_copies()) { 4392 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl()); 4393 auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl()); 4394 auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl()); 4395 4396 // In order to identify the right initializer we need to match the 4397 // declaration used by the mapping logic. In some cases we may get 4398 // OMPCapturedExprDecl that refers to the original declaration. 4399 const ValueDecl *MatchingVD = OrigVD; 4400 if (auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) { 4401 // OMPCapturedExprDecl are used to privative fields of the current 4402 // structure. 4403 auto *ME = cast<MemberExpr>(OED->getInit()); 4404 assert(isa<CXXThisExpr>(ME->getBase()) && 4405 "Base should be the current struct!"); 4406 MatchingVD = ME->getMemberDecl(); 4407 } 4408 4409 // If we don't have information about the current list item, move on to 4410 // the next one. 4411 auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD); 4412 if (InitAddrIt == CaptureDeviceAddrMap.end()) 4413 continue; 4414 4415 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 4416 // Initialize the temporary initialization variable with the address we 4417 // get from the runtime library. We have to cast the source address 4418 // because it is always a void *. References are materialized in the 4419 // privatization scope, so the initialization here disregards the fact 4420 // the original variable is a reference. 4421 QualType AddrQTy = 4422 getContext().getPointerType(OrigVD->getType().getNonReferenceType()); 4423 llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy); 4424 Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy); 4425 setAddrOfLocalVar(InitVD, InitAddr); 4426 4427 // Emit private declaration, it will be initialized by the value we 4428 // declaration we just added to the local declarations map. 4429 EmitDecl(*PvtVD); 4430 4431 // The initialization variables reached its purpose in the emission 4432 // ofthe previous declaration, so we don't need it anymore. 4433 LocalDeclMap.erase(InitVD); 4434 4435 // Return the address of the private variable. 4436 return GetAddrOfLocalVar(PvtVD); 4437 }); 4438 assert(IsRegistered && "firstprivate var already registered as private"); 4439 // Silence the warning about unused variable. 4440 (void)IsRegistered; 4441 4442 ++OrigVarIt; 4443 ++InitIt; 4444 } 4445 } 4446 4447 // Generate the instructions for '#pragma omp target data' directive. 4448 void CodeGenFunction::EmitOMPTargetDataDirective( 4449 const OMPTargetDataDirective &S) { 4450 CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true); 4451 4452 // Create a pre/post action to signal the privatization of the device pointer. 4453 // This action can be replaced by the OpenMP runtime code generation to 4454 // deactivate privatization. 4455 bool PrivatizeDevicePointers = false; 4456 class DevicePointerPrivActionTy : public PrePostActionTy { 4457 bool &PrivatizeDevicePointers; 4458 4459 public: 4460 explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers) 4461 : PrePostActionTy(), PrivatizeDevicePointers(PrivatizeDevicePointers) {} 4462 void Enter(CodeGenFunction &CGF) override { 4463 PrivatizeDevicePointers = true; 4464 } 4465 }; 4466 DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers); 4467 4468 auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers]( 4469 CodeGenFunction &CGF, PrePostActionTy &Action) { 4470 auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4471 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 4472 }; 4473 4474 // Codegen that selects wheather to generate the privatization code or not. 4475 auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers, 4476 &InnermostCodeGen](CodeGenFunction &CGF, 4477 PrePostActionTy &Action) { 4478 RegionCodeGenTy RCG(InnermostCodeGen); 4479 PrivatizeDevicePointers = false; 4480 4481 // Call the pre-action to change the status of PrivatizeDevicePointers if 4482 // needed. 4483 Action.Enter(CGF); 4484 4485 if (PrivatizeDevicePointers) { 4486 OMPPrivateScope PrivateScope(CGF); 4487 // Emit all instances of the use_device_ptr clause. 4488 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>()) 4489 CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope, 4490 Info.CaptureDeviceAddrMap); 4491 (void)PrivateScope.Privatize(); 4492 RCG(CGF); 4493 } else 4494 RCG(CGF); 4495 }; 4496 4497 // Forward the provided action to the privatization codegen. 4498 RegionCodeGenTy PrivRCG(PrivCodeGen); 4499 PrivRCG.setAction(Action); 4500 4501 // Notwithstanding the body of the region is emitted as inlined directive, 4502 // we don't use an inline scope as changes in the references inside the 4503 // region are expected to be visible outside, so we do not privative them. 4504 OMPLexicalScope Scope(CGF, S); 4505 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data, 4506 PrivRCG); 4507 }; 4508 4509 RegionCodeGenTy RCG(CodeGen); 4510 4511 // If we don't have target devices, don't bother emitting the data mapping 4512 // code. 4513 if (CGM.getLangOpts().OMPTargetTriples.empty()) { 4514 RCG(*this); 4515 return; 4516 } 4517 4518 // Check if we have any if clause associated with the directive. 4519 const Expr *IfCond = nullptr; 4520 if (auto *C = S.getSingleClause<OMPIfClause>()) 4521 IfCond = C->getCondition(); 4522 4523 // Check if we have any device clause associated with the directive. 4524 const Expr *Device = nullptr; 4525 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 4526 Device = C->getDevice(); 4527 4528 // Set the action to signal privatization of device pointers. 4529 RCG.setAction(PrivAction); 4530 4531 // Emit region code. 4532 CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG, 4533 Info); 4534 } 4535 4536 void CodeGenFunction::EmitOMPTargetEnterDataDirective( 4537 const OMPTargetEnterDataDirective &S) { 4538 // If we don't have target devices, don't bother emitting the data mapping 4539 // code. 4540 if (CGM.getLangOpts().OMPTargetTriples.empty()) 4541 return; 4542 4543 // Check if we have any if clause associated with the directive. 4544 const Expr *IfCond = nullptr; 4545 if (auto *C = S.getSingleClause<OMPIfClause>()) 4546 IfCond = C->getCondition(); 4547 4548 // Check if we have any device clause associated with the directive. 4549 const Expr *Device = nullptr; 4550 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 4551 Device = C->getDevice(); 4552 4553 OMPLexicalScope Scope(*this, S, OMPD_task); 4554 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 4555 } 4556 4557 void CodeGenFunction::EmitOMPTargetExitDataDirective( 4558 const OMPTargetExitDataDirective &S) { 4559 // If we don't have target devices, don't bother emitting the data mapping 4560 // code. 4561 if (CGM.getLangOpts().OMPTargetTriples.empty()) 4562 return; 4563 4564 // Check if we have any if clause associated with the directive. 4565 const Expr *IfCond = nullptr; 4566 if (auto *C = S.getSingleClause<OMPIfClause>()) 4567 IfCond = C->getCondition(); 4568 4569 // Check if we have any device clause associated with the directive. 4570 const Expr *Device = nullptr; 4571 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 4572 Device = C->getDevice(); 4573 4574 OMPLexicalScope Scope(*this, S, OMPD_task); 4575 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 4576 } 4577 4578 static void emitTargetParallelRegion(CodeGenFunction &CGF, 4579 const OMPTargetParallelDirective &S, 4580 PrePostActionTy &Action) { 4581 // Get the captured statement associated with the 'parallel' region. 4582 auto *CS = S.getCapturedStmt(OMPD_parallel); 4583 Action.Enter(CGF); 4584 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &) { 4585 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 4586 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 4587 CGF.EmitOMPPrivateClause(S, PrivateScope); 4588 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4589 (void)PrivateScope.Privatize(); 4590 // TODO: Add support for clauses. 4591 CGF.EmitStmt(CS->getCapturedStmt()); 4592 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 4593 }; 4594 emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen, 4595 emitEmptyBoundParameters); 4596 emitPostUpdateForReductionClause( 4597 CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 4598 } 4599 4600 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 4601 CodeGenModule &CGM, StringRef ParentName, 4602 const OMPTargetParallelDirective &S) { 4603 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4604 emitTargetParallelRegion(CGF, S, Action); 4605 }; 4606 llvm::Function *Fn; 4607 llvm::Constant *Addr; 4608 // Emit target region as a standalone region. 4609 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 4610 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 4611 assert(Fn && Addr && "Target device function emission failed."); 4612 } 4613 4614 void CodeGenFunction::EmitOMPTargetParallelDirective( 4615 const OMPTargetParallelDirective &S) { 4616 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4617 emitTargetParallelRegion(CGF, S, Action); 4618 }; 4619 emitCommonOMPTargetDirective(*this, S, CodeGen); 4620 } 4621 4622 static void emitTargetParallelForRegion(CodeGenFunction &CGF, 4623 const OMPTargetParallelForDirective &S, 4624 PrePostActionTy &Action) { 4625 Action.Enter(CGF); 4626 // Emit directive as a combined directive that consists of two implicit 4627 // directives: 'parallel' with 'for' directive. 4628 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4629 CodeGenFunction::OMPCancelStackRAII CancelRegion( 4630 CGF, OMPD_target_parallel_for, S.hasCancel()); 4631 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 4632 emitDispatchForLoopBounds); 4633 }; 4634 emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen, 4635 emitEmptyBoundParameters); 4636 } 4637 4638 void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 4639 CodeGenModule &CGM, StringRef ParentName, 4640 const OMPTargetParallelForDirective &S) { 4641 // Emit SPMD target parallel for region as a standalone region. 4642 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4643 emitTargetParallelForRegion(CGF, S, Action); 4644 }; 4645 llvm::Function *Fn; 4646 llvm::Constant *Addr; 4647 // Emit target region as a standalone region. 4648 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 4649 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 4650 assert(Fn && Addr && "Target device function emission failed."); 4651 } 4652 4653 void CodeGenFunction::EmitOMPTargetParallelForDirective( 4654 const OMPTargetParallelForDirective &S) { 4655 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4656 emitTargetParallelForRegion(CGF, S, Action); 4657 }; 4658 emitCommonOMPTargetDirective(*this, S, CodeGen); 4659 } 4660 4661 static void 4662 emitTargetParallelForSimdRegion(CodeGenFunction &CGF, 4663 const OMPTargetParallelForSimdDirective &S, 4664 PrePostActionTy &Action) { 4665 Action.Enter(CGF); 4666 // Emit directive as a combined directive that consists of two implicit 4667 // directives: 'parallel' with 'for' directive. 4668 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4669 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 4670 emitDispatchForLoopBounds); 4671 }; 4672 emitCommonOMPParallelDirective(CGF, S, OMPD_simd, CodeGen, 4673 emitEmptyBoundParameters); 4674 } 4675 4676 void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 4677 CodeGenModule &CGM, StringRef ParentName, 4678 const OMPTargetParallelForSimdDirective &S) { 4679 // Emit SPMD target parallel for region as a standalone region. 4680 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4681 emitTargetParallelForSimdRegion(CGF, S, Action); 4682 }; 4683 llvm::Function *Fn; 4684 llvm::Constant *Addr; 4685 // Emit target region as a standalone region. 4686 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 4687 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 4688 assert(Fn && Addr && "Target device function emission failed."); 4689 } 4690 4691 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective( 4692 const OMPTargetParallelForSimdDirective &S) { 4693 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4694 emitTargetParallelForSimdRegion(CGF, S, Action); 4695 }; 4696 emitCommonOMPTargetDirective(*this, S, CodeGen); 4697 } 4698 4699 /// Emit a helper variable and return corresponding lvalue. 4700 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper, 4701 const ImplicitParamDecl *PVD, 4702 CodeGenFunction::OMPPrivateScope &Privates) { 4703 auto *VDecl = cast<VarDecl>(Helper->getDecl()); 4704 Privates.addPrivate( 4705 VDecl, [&CGF, PVD]() -> Address { return CGF.GetAddrOfLocalVar(PVD); }); 4706 } 4707 4708 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { 4709 assert(isOpenMPTaskLoopDirective(S.getDirectiveKind())); 4710 // Emit outlined function for task construct. 4711 const CapturedStmt *CS = S.getCapturedStmt(OMPD_taskloop); 4712 auto CapturedStruct = GenerateCapturedStmtArgument(*CS); 4713 auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 4714 const Expr *IfCond = nullptr; 4715 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 4716 if (C->getNameModifier() == OMPD_unknown || 4717 C->getNameModifier() == OMPD_taskloop) { 4718 IfCond = C->getCondition(); 4719 break; 4720 } 4721 } 4722 4723 OMPTaskDataTy Data; 4724 // Check if taskloop must be emitted without taskgroup. 4725 Data.Nogroup = S.getSingleClause<OMPNogroupClause>(); 4726 // TODO: Check if we should emit tied or untied task. 4727 Data.Tied = true; 4728 // Set scheduling for taskloop 4729 if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) { 4730 // grainsize clause 4731 Data.Schedule.setInt(/*IntVal=*/false); 4732 Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize())); 4733 } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) { 4734 // num_tasks clause 4735 Data.Schedule.setInt(/*IntVal=*/true); 4736 Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks())); 4737 } 4738 4739 auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) { 4740 // if (PreCond) { 4741 // for (IV in 0..LastIteration) BODY; 4742 // <Final counter/linear vars updates>; 4743 // } 4744 // 4745 4746 // Emit: if (PreCond) - begin. 4747 // If the condition constant folds and can be elided, avoid emitting the 4748 // whole loop. 4749 bool CondConstant; 4750 llvm::BasicBlock *ContBlock = nullptr; 4751 OMPLoopScope PreInitScope(CGF, S); 4752 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 4753 if (!CondConstant) 4754 return; 4755 } else { 4756 auto *ThenBlock = CGF.createBasicBlock("taskloop.if.then"); 4757 ContBlock = CGF.createBasicBlock("taskloop.if.end"); 4758 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 4759 CGF.getProfileCount(&S)); 4760 CGF.EmitBlock(ThenBlock); 4761 CGF.incrementProfileCounter(&S); 4762 } 4763 4764 if (isOpenMPSimdDirective(S.getDirectiveKind())) 4765 CGF.EmitOMPSimdInit(S); 4766 4767 OMPPrivateScope LoopScope(CGF); 4768 // Emit helper vars inits. 4769 enum { LowerBound = 5, UpperBound, Stride, LastIter }; 4770 auto *I = CS->getCapturedDecl()->param_begin(); 4771 auto *LBP = std::next(I, LowerBound); 4772 auto *UBP = std::next(I, UpperBound); 4773 auto *STP = std::next(I, Stride); 4774 auto *LIP = std::next(I, LastIter); 4775 mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP, 4776 LoopScope); 4777 mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP, 4778 LoopScope); 4779 mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope); 4780 mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP, 4781 LoopScope); 4782 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 4783 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 4784 (void)LoopScope.Privatize(); 4785 // Emit the loop iteration variable. 4786 const Expr *IVExpr = S.getIterationVariable(); 4787 const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 4788 CGF.EmitVarDecl(*IVDecl); 4789 CGF.EmitIgnoredExpr(S.getInit()); 4790 4791 // Emit the iterations count variable. 4792 // If it is not a variable, Sema decided to calculate iterations count on 4793 // each iteration (e.g., it is foldable into a constant). 4794 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 4795 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 4796 // Emit calculation of the iterations count. 4797 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 4798 } 4799 4800 CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 4801 S.getInc(), 4802 [&S](CodeGenFunction &CGF) { 4803 CGF.EmitOMPLoopBody(S, JumpDest()); 4804 CGF.EmitStopPoint(&S); 4805 }, 4806 [](CodeGenFunction &) {}); 4807 // Emit: if (PreCond) - end. 4808 if (ContBlock) { 4809 CGF.EmitBranch(ContBlock); 4810 CGF.EmitBlock(ContBlock, true); 4811 } 4812 // Emit final copy of the lastprivate variables if IsLastIter != 0. 4813 if (HasLastprivateClause) { 4814 CGF.EmitOMPLastprivateClauseFinal( 4815 S, isOpenMPSimdDirective(S.getDirectiveKind()), 4816 CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar( 4817 CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false, 4818 (*LIP)->getType(), S.getLocStart()))); 4819 } 4820 }; 4821 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 4822 IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn, 4823 const OMPTaskDataTy &Data) { 4824 auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &) { 4825 OMPLoopScope PreInitScope(CGF, S); 4826 CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getLocStart(), S, 4827 OutlinedFn, SharedsTy, 4828 CapturedStruct, IfCond, Data); 4829 }; 4830 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop, 4831 CodeGen); 4832 }; 4833 if (Data.Nogroup) { 4834 EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, Data); 4835 } else { 4836 CGM.getOpenMPRuntime().emitTaskgroupRegion( 4837 *this, 4838 [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF, 4839 PrePostActionTy &Action) { 4840 Action.Enter(CGF); 4841 CGF.EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, 4842 Data); 4843 }, 4844 S.getLocStart()); 4845 } 4846 } 4847 4848 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) { 4849 EmitOMPTaskLoopBasedDirective(S); 4850 } 4851 4852 void CodeGenFunction::EmitOMPTaskLoopSimdDirective( 4853 const OMPTaskLoopSimdDirective &S) { 4854 EmitOMPTaskLoopBasedDirective(S); 4855 } 4856 4857 // Generate the instructions for '#pragma omp target update' directive. 4858 void CodeGenFunction::EmitOMPTargetUpdateDirective( 4859 const OMPTargetUpdateDirective &S) { 4860 // If we don't have target devices, don't bother emitting the data mapping 4861 // code. 4862 if (CGM.getLangOpts().OMPTargetTriples.empty()) 4863 return; 4864 4865 // Check if we have any if clause associated with the directive. 4866 const Expr *IfCond = nullptr; 4867 if (auto *C = S.getSingleClause<OMPIfClause>()) 4868 IfCond = C->getCondition(); 4869 4870 // Check if we have any device clause associated with the directive. 4871 const Expr *Device = nullptr; 4872 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 4873 Device = C->getDevice(); 4874 4875 OMPLexicalScope Scope(*this, S, OMPD_task); 4876 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 4877 } 4878 4879 void CodeGenFunction::EmitSimpleOMPExecutableDirective( 4880 const OMPExecutableDirective &D) { 4881 if (!D.hasAssociatedStmt() || !D.getAssociatedStmt()) 4882 return; 4883 auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) { 4884 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 4885 emitOMPSimdRegion(CGF, cast<OMPLoopDirective>(D), Action); 4886 } else { 4887 if (const auto *LD = dyn_cast<OMPLoopDirective>(&D)) { 4888 for (const auto *E : LD->counters()) { 4889 if (const auto *VD = dyn_cast<OMPCapturedExprDecl>( 4890 cast<DeclRefExpr>(E)->getDecl())) { 4891 // Emit only those that were not explicitly referenced in clauses. 4892 if (!CGF.LocalDeclMap.count(VD)) 4893 CGF.EmitVarDecl(*VD); 4894 } 4895 } 4896 } 4897 CGF.EmitStmt(D.getInnermostCapturedStmt()->getCapturedStmt()); 4898 } 4899 }; 4900 OMPSimdLexicalScope Scope(*this, D); 4901 CGM.getOpenMPRuntime().emitInlinedDirective( 4902 *this, 4903 isOpenMPSimdDirective(D.getDirectiveKind()) ? OMPD_simd 4904 : D.getDirectiveKind(), 4905 CodeGen); 4906 } 4907