1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This contains code to emit OpenMP nodes as LLVM code. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGCleanup.h" 15 #include "CGOpenMPRuntime.h" 16 #include "CodeGenFunction.h" 17 #include "CodeGenModule.h" 18 #include "TargetInfo.h" 19 #include "clang/AST/Stmt.h" 20 #include "clang/AST/StmtOpenMP.h" 21 #include "clang/AST/DeclOpenMP.h" 22 #include "llvm/IR/CallSite.h" 23 using namespace clang; 24 using namespace CodeGen; 25 26 namespace { 27 /// Lexical scope for OpenMP executable constructs, that handles correct codegen 28 /// for captured expressions. 29 class OMPLexicalScope : public CodeGenFunction::LexicalScope { 30 void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 31 for (const auto *C : S.clauses()) { 32 if (auto *CPI = OMPClauseWithPreInit::get(C)) { 33 if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { 34 for (const auto *I : PreInit->decls()) { 35 if (!I->hasAttr<OMPCaptureNoInitAttr>()) 36 CGF.EmitVarDecl(cast<VarDecl>(*I)); 37 else { 38 CodeGenFunction::AutoVarEmission Emission = 39 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 40 CGF.EmitAutoVarCleanups(Emission); 41 } 42 } 43 } 44 } 45 } 46 } 47 CodeGenFunction::OMPPrivateScope InlinedShareds; 48 49 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { 50 return CGF.LambdaCaptureFields.lookup(VD) || 51 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || 52 (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl)); 53 } 54 55 public: 56 OMPLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S, 57 bool AsInlined = false, bool EmitPreInitStmt = true) 58 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), 59 InlinedShareds(CGF) { 60 if (EmitPreInitStmt) 61 emitPreInitStmt(CGF, S); 62 if (AsInlined) { 63 if (S.hasAssociatedStmt()) { 64 auto *CS = cast<CapturedStmt>(S.getAssociatedStmt()); 65 for (auto &C : CS->captures()) { 66 if (C.capturesVariable() || C.capturesVariableByCopy()) { 67 auto *VD = C.getCapturedVar(); 68 DeclRefExpr DRE(const_cast<VarDecl *>(VD), 69 isCapturedVar(CGF, VD) || 70 (CGF.CapturedStmtInfo && 71 InlinedShareds.isGlobalVarCaptured(VD)), 72 VD->getType().getNonReferenceType(), VK_LValue, 73 SourceLocation()); 74 InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address { 75 return CGF.EmitLValue(&DRE).getAddress(); 76 }); 77 } 78 } 79 (void)InlinedShareds.Privatize(); 80 } 81 } 82 } 83 }; 84 85 /// Lexical scope for OpenMP parallel construct, that handles correct codegen 86 /// for captured expressions. 87 class OMPParallelScope final : public OMPLexicalScope { 88 bool EmitPreInitStmt(const OMPExecutableDirective &S) { 89 OpenMPDirectiveKind Kind = S.getDirectiveKind(); 90 return !(isOpenMPTargetExecutionDirective(Kind) || 91 isOpenMPLoopBoundSharingDirective(Kind)) && 92 isOpenMPParallelDirective(Kind); 93 } 94 95 public: 96 OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 97 : OMPLexicalScope(CGF, S, 98 /*AsInlined=*/false, 99 /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {} 100 }; 101 102 /// Lexical scope for OpenMP teams construct, that handles correct codegen 103 /// for captured expressions. 104 class OMPTeamsScope final : public OMPLexicalScope { 105 bool EmitPreInitStmt(const OMPExecutableDirective &S) { 106 OpenMPDirectiveKind Kind = S.getDirectiveKind(); 107 return !isOpenMPTargetExecutionDirective(Kind) && 108 isOpenMPTeamsDirective(Kind); 109 } 110 111 public: 112 OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 113 : OMPLexicalScope(CGF, S, 114 /*AsInlined=*/false, 115 /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {} 116 }; 117 118 /// Private scope for OpenMP loop-based directives, that supports capturing 119 /// of used expression from loop statement. 120 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { 121 void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) { 122 if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) { 123 if (auto *PreInits = cast_or_null<DeclStmt>(LD->getPreInits())) { 124 for (const auto *I : PreInits->decls()) 125 CGF.EmitVarDecl(cast<VarDecl>(*I)); 126 } 127 } 128 } 129 130 public: 131 OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S) 132 : CodeGenFunction::RunCleanupsScope(CGF) { 133 emitPreInitStmt(CGF, S); 134 } 135 }; 136 137 } // namespace 138 139 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) { 140 auto &C = getContext(); 141 llvm::Value *Size = nullptr; 142 auto SizeInChars = C.getTypeSizeInChars(Ty); 143 if (SizeInChars.isZero()) { 144 // getTypeSizeInChars() returns 0 for a VLA. 145 while (auto *VAT = C.getAsVariableArrayType(Ty)) { 146 llvm::Value *ArraySize; 147 std::tie(ArraySize, Ty) = getVLASize(VAT); 148 Size = Size ? Builder.CreateNUWMul(Size, ArraySize) : ArraySize; 149 } 150 SizeInChars = C.getTypeSizeInChars(Ty); 151 if (SizeInChars.isZero()) 152 return llvm::ConstantInt::get(SizeTy, /*V=*/0); 153 Size = Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars)); 154 } else 155 Size = CGM.getSize(SizeInChars); 156 return Size; 157 } 158 159 void CodeGenFunction::GenerateOpenMPCapturedVars( 160 const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) { 161 const RecordDecl *RD = S.getCapturedRecordDecl(); 162 auto CurField = RD->field_begin(); 163 auto CurCap = S.captures().begin(); 164 for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(), 165 E = S.capture_init_end(); 166 I != E; ++I, ++CurField, ++CurCap) { 167 if (CurField->hasCapturedVLAType()) { 168 auto VAT = CurField->getCapturedVLAType(); 169 auto *Val = VLASizeMap[VAT->getSizeExpr()]; 170 CapturedVars.push_back(Val); 171 } else if (CurCap->capturesThis()) 172 CapturedVars.push_back(CXXThisValue); 173 else if (CurCap->capturesVariableByCopy()) { 174 llvm::Value *CV = 175 EmitLoadOfLValue(EmitLValue(*I), SourceLocation()).getScalarVal(); 176 177 // If the field is not a pointer, we need to save the actual value 178 // and load it as a void pointer. 179 if (!CurField->getType()->isAnyPointerType()) { 180 auto &Ctx = getContext(); 181 auto DstAddr = CreateMemTemp( 182 Ctx.getUIntPtrType(), 183 Twine(CurCap->getCapturedVar()->getName()) + ".casted"); 184 LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType()); 185 186 auto *SrcAddrVal = EmitScalarConversion( 187 DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()), 188 Ctx.getPointerType(CurField->getType()), SourceLocation()); 189 LValue SrcLV = 190 MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType()); 191 192 // Store the value using the source type pointer. 193 EmitStoreThroughLValue(RValue::get(CV), SrcLV); 194 195 // Load the value using the destination type pointer. 196 CV = EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal(); 197 } 198 CapturedVars.push_back(CV); 199 } else { 200 assert(CurCap->capturesVariable() && "Expected capture by reference."); 201 CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer()); 202 } 203 } 204 } 205 206 static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType, 207 StringRef Name, LValue AddrLV, 208 bool isReferenceType = false) { 209 ASTContext &Ctx = CGF.getContext(); 210 211 auto *CastedPtr = CGF.EmitScalarConversion( 212 AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(), 213 Ctx.getPointerType(DstType), SourceLocation()); 214 auto TmpAddr = 215 CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType)) 216 .getAddress(); 217 218 // If we are dealing with references we need to return the address of the 219 // reference instead of the reference of the value. 220 if (isReferenceType) { 221 QualType RefType = Ctx.getLValueReferenceType(DstType); 222 auto *RefVal = TmpAddr.getPointer(); 223 TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name) + ".ref"); 224 auto TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType); 225 CGF.EmitStoreThroughLValue(RValue::get(RefVal), TmpLVal, /*isInit*/ true); 226 } 227 228 return TmpAddr; 229 } 230 231 static QualType getCanonicalParamType(ASTContext &C, QualType T) { 232 if (T->isLValueReferenceType()) { 233 return C.getLValueReferenceType( 234 getCanonicalParamType(C, T.getNonReferenceType()), 235 /*SpelledAsLValue=*/false); 236 } 237 if (T->isPointerType()) 238 return C.getPointerType(getCanonicalParamType(C, T->getPointeeType())); 239 return C.getCanonicalParamType(T); 240 } 241 242 llvm::Function * 243 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { 244 assert( 245 CapturedStmtInfo && 246 "CapturedStmtInfo should be set when generating the captured function"); 247 const CapturedDecl *CD = S.getCapturedDecl(); 248 const RecordDecl *RD = S.getCapturedRecordDecl(); 249 assert(CD->hasBody() && "missing CapturedDecl body"); 250 251 // Build the argument list. 252 ASTContext &Ctx = CGM.getContext(); 253 FunctionArgList Args; 254 Args.append(CD->param_begin(), 255 std::next(CD->param_begin(), CD->getContextParamPosition())); 256 auto I = S.captures().begin(); 257 for (auto *FD : RD->fields()) { 258 QualType ArgType = FD->getType(); 259 IdentifierInfo *II = nullptr; 260 VarDecl *CapVar = nullptr; 261 262 // If this is a capture by copy and the type is not a pointer, the outlined 263 // function argument type should be uintptr and the value properly casted to 264 // uintptr. This is necessary given that the runtime library is only able to 265 // deal with pointers. We can pass in the same way the VLA type sizes to the 266 // outlined function. 267 if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) || 268 I->capturesVariableArrayType()) 269 ArgType = Ctx.getUIntPtrType(); 270 271 if (I->capturesVariable() || I->capturesVariableByCopy()) { 272 CapVar = I->getCapturedVar(); 273 II = CapVar->getIdentifier(); 274 } else if (I->capturesThis()) 275 II = &getContext().Idents.get("this"); 276 else { 277 assert(I->capturesVariableArrayType()); 278 II = &getContext().Idents.get("vla"); 279 } 280 if (ArgType->isVariablyModifiedType()) { 281 ArgType = 282 getCanonicalParamType(getContext(), ArgType.getNonReferenceType()); 283 } 284 Args.push_back(ImplicitParamDecl::Create(getContext(), /*DC=*/nullptr, 285 FD->getLocation(), II, ArgType, 286 ImplicitParamDecl::Other)); 287 ++I; 288 } 289 Args.append( 290 std::next(CD->param_begin(), CD->getContextParamPosition() + 1), 291 CD->param_end()); 292 293 // Create the function declaration. 294 FunctionType::ExtInfo ExtInfo; 295 const CGFunctionInfo &FuncInfo = 296 CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args); 297 llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); 298 299 llvm::Function *F = llvm::Function::Create( 300 FuncLLVMTy, llvm::GlobalValue::InternalLinkage, 301 CapturedStmtInfo->getHelperName(), &CGM.getModule()); 302 CGM.SetInternalFunctionAttributes(CD, F, FuncInfo); 303 if (CD->isNothrow()) 304 F->addFnAttr(llvm::Attribute::NoUnwind); 305 306 // Generate the function. 307 StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getLocation(), 308 CD->getBody()->getLocStart()); 309 unsigned Cnt = CD->getContextParamPosition(); 310 I = S.captures().begin(); 311 for (auto *FD : RD->fields()) { 312 // If we are capturing a pointer by copy we don't need to do anything, just 313 // use the value that we get from the arguments. 314 if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) { 315 const VarDecl *CurVD = I->getCapturedVar(); 316 Address LocalAddr = GetAddrOfLocalVar(Args[Cnt]); 317 // If the variable is a reference we need to materialize it here. 318 if (CurVD->getType()->isReferenceType()) { 319 Address RefAddr = CreateMemTemp(CurVD->getType(), getPointerAlign(), 320 ".materialized_ref"); 321 EmitStoreOfScalar(LocalAddr.getPointer(), RefAddr, /*Volatile=*/false, 322 CurVD->getType()); 323 LocalAddr = RefAddr; 324 } 325 setAddrOfLocalVar(CurVD, LocalAddr); 326 ++Cnt; 327 ++I; 328 continue; 329 } 330 331 LValueBaseInfo BaseInfo(AlignmentSource::Decl, false); 332 LValue ArgLVal = 333 MakeAddrLValue(GetAddrOfLocalVar(Args[Cnt]), Args[Cnt]->getType(), 334 BaseInfo); 335 if (FD->hasCapturedVLAType()) { 336 LValue CastedArgLVal = 337 MakeAddrLValue(castValueFromUintptr(*this, FD->getType(), 338 Args[Cnt]->getName(), ArgLVal), 339 FD->getType(), BaseInfo); 340 auto *ExprArg = 341 EmitLoadOfLValue(CastedArgLVal, SourceLocation()).getScalarVal(); 342 auto VAT = FD->getCapturedVLAType(); 343 VLASizeMap[VAT->getSizeExpr()] = ExprArg; 344 } else if (I->capturesVariable()) { 345 auto *Var = I->getCapturedVar(); 346 QualType VarTy = Var->getType(); 347 Address ArgAddr = ArgLVal.getAddress(); 348 if (!VarTy->isReferenceType()) { 349 if (ArgLVal.getType()->isLValueReferenceType()) { 350 ArgAddr = EmitLoadOfReference( 351 ArgAddr, ArgLVal.getType()->castAs<ReferenceType>()); 352 } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) { 353 assert(ArgLVal.getType()->isPointerType()); 354 ArgAddr = EmitLoadOfPointer( 355 ArgAddr, ArgLVal.getType()->castAs<PointerType>()); 356 } 357 } 358 setAddrOfLocalVar( 359 Var, Address(ArgAddr.getPointer(), getContext().getDeclAlign(Var))); 360 } else if (I->capturesVariableByCopy()) { 361 assert(!FD->getType()->isAnyPointerType() && 362 "Not expecting a captured pointer."); 363 auto *Var = I->getCapturedVar(); 364 QualType VarTy = Var->getType(); 365 setAddrOfLocalVar(Var, castValueFromUintptr(*this, FD->getType(), 366 Args[Cnt]->getName(), ArgLVal, 367 VarTy->isReferenceType())); 368 } else { 369 // If 'this' is captured, load it into CXXThisValue. 370 assert(I->capturesThis()); 371 CXXThisValue = 372 EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation()).getScalarVal(); 373 } 374 ++Cnt; 375 ++I; 376 } 377 378 PGO.assignRegionCounters(GlobalDecl(CD), F); 379 CapturedStmtInfo->EmitBody(*this, CD->getBody()); 380 FinishFunction(CD->getBodyRBrace()); 381 382 return F; 383 } 384 385 //===----------------------------------------------------------------------===// 386 // OpenMP Directive Emission 387 //===----------------------------------------------------------------------===// 388 void CodeGenFunction::EmitOMPAggregateAssign( 389 Address DestAddr, Address SrcAddr, QualType OriginalType, 390 const llvm::function_ref<void(Address, Address)> &CopyGen) { 391 // Perform element-by-element initialization. 392 QualType ElementTy; 393 394 // Drill down to the base element type on both arrays. 395 auto ArrayTy = OriginalType->getAsArrayTypeUnsafe(); 396 auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr); 397 SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 398 399 auto SrcBegin = SrcAddr.getPointer(); 400 auto DestBegin = DestAddr.getPointer(); 401 // Cast from pointer to array type to pointer to single element. 402 auto DestEnd = Builder.CreateGEP(DestBegin, NumElements); 403 // The basic structure here is a while-do loop. 404 auto BodyBB = createBasicBlock("omp.arraycpy.body"); 405 auto DoneBB = createBasicBlock("omp.arraycpy.done"); 406 auto IsEmpty = 407 Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty"); 408 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 409 410 // Enter the loop body, making that address the current address. 411 auto EntryBB = Builder.GetInsertBlock(); 412 EmitBlock(BodyBB); 413 414 CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy); 415 416 llvm::PHINode *SrcElementPHI = 417 Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 418 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 419 Address SrcElementCurrent = 420 Address(SrcElementPHI, 421 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 422 423 llvm::PHINode *DestElementPHI = 424 Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 425 DestElementPHI->addIncoming(DestBegin, EntryBB); 426 Address DestElementCurrent = 427 Address(DestElementPHI, 428 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 429 430 // Emit copy. 431 CopyGen(DestElementCurrent, SrcElementCurrent); 432 433 // Shift the address forward by one element. 434 auto DestElementNext = Builder.CreateConstGEP1_32( 435 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 436 auto SrcElementNext = Builder.CreateConstGEP1_32( 437 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 438 // Check whether we've reached the end. 439 auto Done = 440 Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 441 Builder.CreateCondBr(Done, DoneBB, BodyBB); 442 DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock()); 443 SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock()); 444 445 // Done. 446 EmitBlock(DoneBB, /*IsFinished=*/true); 447 } 448 449 /// Check if the combiner is a call to UDR combiner and if it is so return the 450 /// UDR decl used for reduction. 451 static const OMPDeclareReductionDecl * 452 getReductionInit(const Expr *ReductionOp) { 453 if (auto *CE = dyn_cast<CallExpr>(ReductionOp)) 454 if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 455 if (auto *DRE = 456 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 457 if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 458 return DRD; 459 return nullptr; 460 } 461 462 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 463 const OMPDeclareReductionDecl *DRD, 464 const Expr *InitOp, 465 Address Private, Address Original, 466 QualType Ty) { 467 if (DRD->getInitializer()) { 468 std::pair<llvm::Function *, llvm::Function *> Reduction = 469 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 470 auto *CE = cast<CallExpr>(InitOp); 471 auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 472 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 473 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 474 auto *LHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 475 auto *RHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 476 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 477 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 478 [=]() -> Address { return Private; }); 479 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 480 [=]() -> Address { return Original; }); 481 (void)PrivateScope.Privatize(); 482 RValue Func = RValue::get(Reduction.second); 483 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 484 CGF.EmitIgnoredExpr(InitOp); 485 } else { 486 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 487 auto *GV = new llvm::GlobalVariable( 488 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 489 llvm::GlobalValue::PrivateLinkage, Init, ".init"); 490 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 491 RValue InitRVal; 492 switch (CGF.getEvaluationKind(Ty)) { 493 case TEK_Scalar: 494 InitRVal = CGF.EmitLoadOfLValue(LV, SourceLocation()); 495 break; 496 case TEK_Complex: 497 InitRVal = 498 RValue::getComplex(CGF.EmitLoadOfComplex(LV, SourceLocation())); 499 break; 500 case TEK_Aggregate: 501 InitRVal = RValue::getAggregate(LV.getAddress()); 502 break; 503 } 504 OpaqueValueExpr OVE(SourceLocation(), Ty, VK_RValue); 505 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 506 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 507 /*IsInitializer=*/false); 508 } 509 } 510 511 /// \brief Emit initialization of arrays of complex types. 512 /// \param DestAddr Address of the array. 513 /// \param Type Type of array. 514 /// \param Init Initial expression of array. 515 /// \param SrcAddr Address of the original array. 516 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 517 QualType Type, const Expr *Init, 518 Address SrcAddr = Address::invalid()) { 519 auto *DRD = getReductionInit(Init); 520 // Perform element-by-element initialization. 521 QualType ElementTy; 522 523 // Drill down to the base element type on both arrays. 524 auto ArrayTy = Type->getAsArrayTypeUnsafe(); 525 auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 526 DestAddr = 527 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 528 if (DRD) 529 SrcAddr = 530 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 531 532 llvm::Value *SrcBegin = nullptr; 533 if (DRD) 534 SrcBegin = SrcAddr.getPointer(); 535 auto DestBegin = DestAddr.getPointer(); 536 // Cast from pointer to array type to pointer to single element. 537 auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 538 // The basic structure here is a while-do loop. 539 auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 540 auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 541 auto IsEmpty = 542 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 543 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 544 545 // Enter the loop body, making that address the current address. 546 auto EntryBB = CGF.Builder.GetInsertBlock(); 547 CGF.EmitBlock(BodyBB); 548 549 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 550 551 llvm::PHINode *SrcElementPHI = nullptr; 552 Address SrcElementCurrent = Address::invalid(); 553 if (DRD) { 554 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 555 "omp.arraycpy.srcElementPast"); 556 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 557 SrcElementCurrent = 558 Address(SrcElementPHI, 559 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 560 } 561 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 562 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 563 DestElementPHI->addIncoming(DestBegin, EntryBB); 564 Address DestElementCurrent = 565 Address(DestElementPHI, 566 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 567 568 // Emit copy. 569 { 570 CodeGenFunction::RunCleanupsScope InitScope(CGF); 571 if (DRD && (DRD->getInitializer() || !Init)) { 572 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 573 SrcElementCurrent, ElementTy); 574 } else 575 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 576 /*IsInitializer=*/false); 577 } 578 579 if (DRD) { 580 // Shift the address forward by one element. 581 auto SrcElementNext = CGF.Builder.CreateConstGEP1_32( 582 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 583 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 584 } 585 586 // Shift the address forward by one element. 587 auto DestElementNext = CGF.Builder.CreateConstGEP1_32( 588 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 589 // Check whether we've reached the end. 590 auto Done = 591 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 592 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 593 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 594 595 // Done. 596 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 597 } 598 599 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr, 600 Address SrcAddr, const VarDecl *DestVD, 601 const VarDecl *SrcVD, const Expr *Copy) { 602 if (OriginalType->isArrayType()) { 603 auto *BO = dyn_cast<BinaryOperator>(Copy); 604 if (BO && BO->getOpcode() == BO_Assign) { 605 // Perform simple memcpy for simple copying. 606 EmitAggregateAssign(DestAddr, SrcAddr, OriginalType); 607 } else { 608 // For arrays with complex element types perform element by element 609 // copying. 610 EmitOMPAggregateAssign( 611 DestAddr, SrcAddr, OriginalType, 612 [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) { 613 // Working with the single array element, so have to remap 614 // destination and source variables to corresponding array 615 // elements. 616 CodeGenFunction::OMPPrivateScope Remap(*this); 617 Remap.addPrivate(DestVD, [DestElement]() -> Address { 618 return DestElement; 619 }); 620 Remap.addPrivate( 621 SrcVD, [SrcElement]() -> Address { return SrcElement; }); 622 (void)Remap.Privatize(); 623 EmitIgnoredExpr(Copy); 624 }); 625 } 626 } else { 627 // Remap pseudo source variable to private copy. 628 CodeGenFunction::OMPPrivateScope Remap(*this); 629 Remap.addPrivate(SrcVD, [SrcAddr]() -> Address { return SrcAddr; }); 630 Remap.addPrivate(DestVD, [DestAddr]() -> Address { return DestAddr; }); 631 (void)Remap.Privatize(); 632 // Emit copying of the whole variable. 633 EmitIgnoredExpr(Copy); 634 } 635 } 636 637 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, 638 OMPPrivateScope &PrivateScope) { 639 if (!HaveInsertPoint()) 640 return false; 641 bool FirstprivateIsLastprivate = false; 642 llvm::DenseSet<const VarDecl *> Lastprivates; 643 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 644 for (const auto *D : C->varlists()) 645 Lastprivates.insert( 646 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); 647 } 648 llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate; 649 CGCapturedStmtInfo CapturesInfo(cast<CapturedStmt>(*D.getAssociatedStmt())); 650 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { 651 auto IRef = C->varlist_begin(); 652 auto InitsRef = C->inits().begin(); 653 for (auto IInit : C->private_copies()) { 654 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 655 bool ThisFirstprivateIsLastprivate = 656 Lastprivates.count(OrigVD->getCanonicalDecl()) > 0; 657 auto *CapFD = CapturesInfo.lookup(OrigVD); 658 auto *FD = CapturedStmtInfo->lookup(OrigVD); 659 if (!ThisFirstprivateIsLastprivate && FD && (FD == CapFD) && 660 !FD->getType()->isReferenceType()) { 661 EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()); 662 ++IRef; 663 ++InitsRef; 664 continue; 665 } 666 FirstprivateIsLastprivate = 667 FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate; 668 if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) { 669 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 670 auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl()); 671 bool IsRegistered; 672 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 673 /*RefersToEnclosingVariableOrCapture=*/FD != nullptr, 674 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 675 Address OriginalAddr = EmitLValue(&DRE).getAddress(); 676 QualType Type = VD->getType(); 677 if (Type->isArrayType()) { 678 // Emit VarDecl with copy init for arrays. 679 // Get the address of the original variable captured in current 680 // captured region. 681 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 682 auto Emission = EmitAutoVarAlloca(*VD); 683 auto *Init = VD->getInit(); 684 if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) { 685 // Perform simple memcpy. 686 EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr, 687 Type); 688 } else { 689 EmitOMPAggregateAssign( 690 Emission.getAllocatedAddress(), OriginalAddr, Type, 691 [this, VDInit, Init](Address DestElement, 692 Address SrcElement) { 693 // Clean up any temporaries needed by the initialization. 694 RunCleanupsScope InitScope(*this); 695 // Emit initialization for single element. 696 setAddrOfLocalVar(VDInit, SrcElement); 697 EmitAnyExprToMem(Init, DestElement, 698 Init->getType().getQualifiers(), 699 /*IsInitializer*/ false); 700 LocalDeclMap.erase(VDInit); 701 }); 702 } 703 EmitAutoVarCleanups(Emission); 704 return Emission.getAllocatedAddress(); 705 }); 706 } else { 707 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 708 // Emit private VarDecl with copy init. 709 // Remap temp VDInit variable to the address of the original 710 // variable 711 // (for proper handling of captured global variables). 712 setAddrOfLocalVar(VDInit, OriginalAddr); 713 EmitDecl(*VD); 714 LocalDeclMap.erase(VDInit); 715 return GetAddrOfLocalVar(VD); 716 }); 717 } 718 assert(IsRegistered && 719 "firstprivate var already registered as private"); 720 // Silence the warning about unused variable. 721 (void)IsRegistered; 722 } 723 ++IRef; 724 ++InitsRef; 725 } 726 } 727 return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty(); 728 } 729 730 void CodeGenFunction::EmitOMPPrivateClause( 731 const OMPExecutableDirective &D, 732 CodeGenFunction::OMPPrivateScope &PrivateScope) { 733 if (!HaveInsertPoint()) 734 return; 735 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 736 for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) { 737 auto IRef = C->varlist_begin(); 738 for (auto IInit : C->private_copies()) { 739 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 740 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 741 auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 742 bool IsRegistered = 743 PrivateScope.addPrivate(OrigVD, [&]() -> Address { 744 // Emit private VarDecl with copy init. 745 EmitDecl(*VD); 746 return GetAddrOfLocalVar(VD); 747 }); 748 assert(IsRegistered && "private var already registered as private"); 749 // Silence the warning about unused variable. 750 (void)IsRegistered; 751 } 752 ++IRef; 753 } 754 } 755 } 756 757 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { 758 if (!HaveInsertPoint()) 759 return false; 760 // threadprivate_var1 = master_threadprivate_var1; 761 // operator=(threadprivate_var2, master_threadprivate_var2); 762 // ... 763 // __kmpc_barrier(&loc, global_tid); 764 llvm::DenseSet<const VarDecl *> CopiedVars; 765 llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr; 766 for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) { 767 auto IRef = C->varlist_begin(); 768 auto ISrcRef = C->source_exprs().begin(); 769 auto IDestRef = C->destination_exprs().begin(); 770 for (auto *AssignOp : C->assignment_ops()) { 771 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 772 QualType Type = VD->getType(); 773 if (CopiedVars.insert(VD->getCanonicalDecl()).second) { 774 // Get the address of the master variable. If we are emitting code with 775 // TLS support, the address is passed from the master as field in the 776 // captured declaration. 777 Address MasterAddr = Address::invalid(); 778 if (getLangOpts().OpenMPUseTLS && 779 getContext().getTargetInfo().isTLSSupported()) { 780 assert(CapturedStmtInfo->lookup(VD) && 781 "Copyin threadprivates should have been captured!"); 782 DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(), 783 VK_LValue, (*IRef)->getExprLoc()); 784 MasterAddr = EmitLValue(&DRE).getAddress(); 785 LocalDeclMap.erase(VD); 786 } else { 787 MasterAddr = 788 Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD) 789 : CGM.GetAddrOfGlobal(VD), 790 getContext().getDeclAlign(VD)); 791 } 792 // Get the address of the threadprivate variable. 793 Address PrivateAddr = EmitLValue(*IRef).getAddress(); 794 if (CopiedVars.size() == 1) { 795 // At first check if current thread is a master thread. If it is, no 796 // need to copy data. 797 CopyBegin = createBasicBlock("copyin.not.master"); 798 CopyEnd = createBasicBlock("copyin.not.master.end"); 799 Builder.CreateCondBr( 800 Builder.CreateICmpNE( 801 Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy), 802 Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy)), 803 CopyBegin, CopyEnd); 804 EmitBlock(CopyBegin); 805 } 806 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 807 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 808 EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp); 809 } 810 ++IRef; 811 ++ISrcRef; 812 ++IDestRef; 813 } 814 } 815 if (CopyEnd) { 816 // Exit out of copying procedure for non-master thread. 817 EmitBlock(CopyEnd, /*IsFinished=*/true); 818 return true; 819 } 820 return false; 821 } 822 823 bool CodeGenFunction::EmitOMPLastprivateClauseInit( 824 const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) { 825 if (!HaveInsertPoint()) 826 return false; 827 bool HasAtLeastOneLastprivate = false; 828 llvm::DenseSet<const VarDecl *> SIMDLCVs; 829 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 830 auto *LoopDirective = cast<OMPLoopDirective>(&D); 831 for (auto *C : LoopDirective->counters()) { 832 SIMDLCVs.insert( 833 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 834 } 835 } 836 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 837 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 838 HasAtLeastOneLastprivate = true; 839 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) 840 break; 841 auto IRef = C->varlist_begin(); 842 auto IDestRef = C->destination_exprs().begin(); 843 for (auto *IInit : C->private_copies()) { 844 // Keep the address of the original variable for future update at the end 845 // of the loop. 846 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 847 // Taskloops do not require additional initialization, it is done in 848 // runtime support library. 849 if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) { 850 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 851 PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address { 852 DeclRefExpr DRE( 853 const_cast<VarDecl *>(OrigVD), 854 /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup( 855 OrigVD) != nullptr, 856 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 857 return EmitLValue(&DRE).getAddress(); 858 }); 859 // Check if the variable is also a firstprivate: in this case IInit is 860 // not generated. Initialization of this variable will happen in codegen 861 // for 'firstprivate' clause. 862 if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) { 863 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 864 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 865 // Emit private VarDecl with copy init. 866 EmitDecl(*VD); 867 return GetAddrOfLocalVar(VD); 868 }); 869 assert(IsRegistered && 870 "lastprivate var already registered as private"); 871 (void)IsRegistered; 872 } 873 } 874 ++IRef; 875 ++IDestRef; 876 } 877 } 878 return HasAtLeastOneLastprivate; 879 } 880 881 void CodeGenFunction::EmitOMPLastprivateClauseFinal( 882 const OMPExecutableDirective &D, bool NoFinals, 883 llvm::Value *IsLastIterCond) { 884 if (!HaveInsertPoint()) 885 return; 886 // Emit following code: 887 // if (<IsLastIterCond>) { 888 // orig_var1 = private_orig_var1; 889 // ... 890 // orig_varn = private_orig_varn; 891 // } 892 llvm::BasicBlock *ThenBB = nullptr; 893 llvm::BasicBlock *DoneBB = nullptr; 894 if (IsLastIterCond) { 895 ThenBB = createBasicBlock(".omp.lastprivate.then"); 896 DoneBB = createBasicBlock(".omp.lastprivate.done"); 897 Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB); 898 EmitBlock(ThenBB); 899 } 900 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 901 llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates; 902 if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) { 903 auto IC = LoopDirective->counters().begin(); 904 for (auto F : LoopDirective->finals()) { 905 auto *D = 906 cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl(); 907 if (NoFinals) 908 AlreadyEmittedVars.insert(D); 909 else 910 LoopCountersAndUpdates[D] = F; 911 ++IC; 912 } 913 } 914 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 915 auto IRef = C->varlist_begin(); 916 auto ISrcRef = C->source_exprs().begin(); 917 auto IDestRef = C->destination_exprs().begin(); 918 for (auto *AssignOp : C->assignment_ops()) { 919 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 920 QualType Type = PrivateVD->getType(); 921 auto *CanonicalVD = PrivateVD->getCanonicalDecl(); 922 if (AlreadyEmittedVars.insert(CanonicalVD).second) { 923 // If lastprivate variable is a loop control variable for loop-based 924 // directive, update its value before copyin back to original 925 // variable. 926 if (auto *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD)) 927 EmitIgnoredExpr(FinalExpr); 928 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 929 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 930 // Get the address of the original variable. 931 Address OriginalAddr = GetAddrOfLocalVar(DestVD); 932 // Get the address of the private variable. 933 Address PrivateAddr = GetAddrOfLocalVar(PrivateVD); 934 if (auto RefTy = PrivateVD->getType()->getAs<ReferenceType>()) 935 PrivateAddr = 936 Address(Builder.CreateLoad(PrivateAddr), 937 getNaturalTypeAlignment(RefTy->getPointeeType())); 938 EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp); 939 } 940 ++IRef; 941 ++ISrcRef; 942 ++IDestRef; 943 } 944 if (auto *PostUpdate = C->getPostUpdateExpr()) 945 EmitIgnoredExpr(PostUpdate); 946 } 947 if (IsLastIterCond) 948 EmitBlock(DoneBB, /*IsFinished=*/true); 949 } 950 951 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 952 LValue BaseLV, llvm::Value *Addr) { 953 Address Tmp = Address::invalid(); 954 Address TopTmp = Address::invalid(); 955 Address MostTopTmp = Address::invalid(); 956 BaseTy = BaseTy.getNonReferenceType(); 957 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 958 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 959 Tmp = CGF.CreateMemTemp(BaseTy); 960 if (TopTmp.isValid()) 961 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 962 else 963 MostTopTmp = Tmp; 964 TopTmp = Tmp; 965 BaseTy = BaseTy->getPointeeType(); 966 } 967 llvm::Type *Ty = BaseLV.getPointer()->getType(); 968 if (Tmp.isValid()) 969 Ty = Tmp.getElementType(); 970 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 971 if (Tmp.isValid()) { 972 CGF.Builder.CreateStore(Addr, Tmp); 973 return MostTopTmp; 974 } 975 return Address(Addr, BaseLV.getAlignment()); 976 } 977 978 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 979 LValue BaseLV) { 980 BaseTy = BaseTy.getNonReferenceType(); 981 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 982 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 983 if (auto *PtrTy = BaseTy->getAs<PointerType>()) 984 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy); 985 else { 986 BaseLV = CGF.EmitLoadOfReferenceLValue(BaseLV.getAddress(), 987 BaseTy->castAs<ReferenceType>()); 988 } 989 BaseTy = BaseTy->getPointeeType(); 990 } 991 return CGF.MakeAddrLValue( 992 Address( 993 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 994 BaseLV.getPointer(), CGF.ConvertTypeForMem(ElTy)->getPointerTo()), 995 BaseLV.getAlignment()), 996 BaseLV.getType(), BaseLV.getBaseInfo()); 997 } 998 999 void CodeGenFunction::EmitOMPReductionClauseInit( 1000 const OMPExecutableDirective &D, 1001 CodeGenFunction::OMPPrivateScope &PrivateScope) { 1002 if (!HaveInsertPoint()) 1003 return; 1004 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1005 auto ILHS = C->lhs_exprs().begin(); 1006 auto IRHS = C->rhs_exprs().begin(); 1007 auto IPriv = C->privates().begin(); 1008 auto IRed = C->reduction_ops().begin(); 1009 for (auto IRef : C->varlists()) { 1010 auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 1011 auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 1012 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl()); 1013 auto *DRD = getReductionInit(*IRed); 1014 if (auto *OASE = dyn_cast<OMPArraySectionExpr>(IRef)) { 1015 auto *Base = OASE->getBase()->IgnoreParenImpCasts(); 1016 while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 1017 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 1018 while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1019 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1020 auto *DE = cast<DeclRefExpr>(Base); 1021 auto *OrigVD = cast<VarDecl>(DE->getDecl()); 1022 auto OASELValueLB = EmitOMPArraySectionExpr(OASE); 1023 auto OASELValueUB = 1024 EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 1025 auto OriginalBaseLValue = EmitLValue(DE); 1026 LValue BaseLValue = 1027 loadToBegin(*this, OrigVD->getType(), OASELValueLB.getType(), 1028 OriginalBaseLValue); 1029 // Store the address of the original variable associated with the LHS 1030 // implicit variable. 1031 PrivateScope.addPrivate(LHSVD, [OASELValueLB]() -> Address { 1032 return OASELValueLB.getAddress(); 1033 }); 1034 // Emit reduction copy. 1035 bool IsRegistered = PrivateScope.addPrivate( 1036 OrigVD, [this, OrigVD, PrivateVD, BaseLValue, OASELValueLB, 1037 OASELValueUB, OriginalBaseLValue, DRD, IRed]() -> Address { 1038 // Emit VarDecl with copy init for arrays. 1039 // Get the address of the original variable captured in current 1040 // captured region. 1041 auto *Size = Builder.CreatePtrDiff(OASELValueUB.getPointer(), 1042 OASELValueLB.getPointer()); 1043 Size = Builder.CreateNUWAdd( 1044 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 1045 CodeGenFunction::OpaqueValueMapping OpaqueMap( 1046 *this, cast<OpaqueValueExpr>( 1047 getContext() 1048 .getAsVariableArrayType(PrivateVD->getType()) 1049 ->getSizeExpr()), 1050 RValue::get(Size)); 1051 EmitVariablyModifiedType(PrivateVD->getType()); 1052 auto Emission = EmitAutoVarAlloca(*PrivateVD); 1053 auto Addr = Emission.getAllocatedAddress(); 1054 auto *Init = PrivateVD->getInit(); 1055 EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(), 1056 DRD ? *IRed : Init, 1057 OASELValueLB.getAddress()); 1058 EmitAutoVarCleanups(Emission); 1059 // Emit private VarDecl with reduction init. 1060 auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(), 1061 OASELValueLB.getPointer()); 1062 auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset); 1063 return castToBase(*this, OrigVD->getType(), 1064 OASELValueLB.getType(), OriginalBaseLValue, 1065 Ptr); 1066 }); 1067 assert(IsRegistered && "private var already registered as private"); 1068 // Silence the warning about unused variable. 1069 (void)IsRegistered; 1070 PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { 1071 return GetAddrOfLocalVar(PrivateVD); 1072 }); 1073 } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(IRef)) { 1074 auto *Base = ASE->getBase()->IgnoreParenImpCasts(); 1075 while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1076 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1077 auto *DE = cast<DeclRefExpr>(Base); 1078 auto *OrigVD = cast<VarDecl>(DE->getDecl()); 1079 auto ASELValue = EmitLValue(ASE); 1080 auto OriginalBaseLValue = EmitLValue(DE); 1081 LValue BaseLValue = loadToBegin( 1082 *this, OrigVD->getType(), ASELValue.getType(), OriginalBaseLValue); 1083 // Store the address of the original variable associated with the LHS 1084 // implicit variable. 1085 PrivateScope.addPrivate( 1086 LHSVD, [ASELValue]() -> Address { return ASELValue.getAddress(); }); 1087 // Emit reduction copy. 1088 bool IsRegistered = PrivateScope.addPrivate( 1089 OrigVD, [this, OrigVD, PrivateVD, BaseLValue, ASELValue, 1090 OriginalBaseLValue, DRD, IRed]() -> Address { 1091 // Emit private VarDecl with reduction init. 1092 AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD); 1093 auto Addr = Emission.getAllocatedAddress(); 1094 if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 1095 emitInitWithReductionInitializer(*this, DRD, *IRed, Addr, 1096 ASELValue.getAddress(), 1097 ASELValue.getType()); 1098 } else 1099 EmitAutoVarInit(Emission); 1100 EmitAutoVarCleanups(Emission); 1101 auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(), 1102 ASELValue.getPointer()); 1103 auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset); 1104 return castToBase(*this, OrigVD->getType(), ASELValue.getType(), 1105 OriginalBaseLValue, Ptr); 1106 }); 1107 assert(IsRegistered && "private var already registered as private"); 1108 // Silence the warning about unused variable. 1109 (void)IsRegistered; 1110 PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address { 1111 return Builder.CreateElementBitCast( 1112 GetAddrOfLocalVar(PrivateVD), ConvertTypeForMem(RHSVD->getType()), 1113 "rhs.begin"); 1114 }); 1115 } else { 1116 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); 1117 QualType Type = PrivateVD->getType(); 1118 if (getContext().getAsArrayType(Type)) { 1119 // Store the address of the original variable associated with the LHS 1120 // implicit variable. 1121 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1122 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1123 IRef->getType(), VK_LValue, IRef->getExprLoc()); 1124 Address OriginalAddr = EmitLValue(&DRE).getAddress(); 1125 PrivateScope.addPrivate(LHSVD, [this, &OriginalAddr, 1126 LHSVD]() -> Address { 1127 OriginalAddr = Builder.CreateElementBitCast( 1128 OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin"); 1129 return OriginalAddr; 1130 }); 1131 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 1132 if (Type->isVariablyModifiedType()) { 1133 CodeGenFunction::OpaqueValueMapping OpaqueMap( 1134 *this, cast<OpaqueValueExpr>( 1135 getContext() 1136 .getAsVariableArrayType(PrivateVD->getType()) 1137 ->getSizeExpr()), 1138 RValue::get( 1139 getTypeSize(OrigVD->getType().getNonReferenceType()))); 1140 EmitVariablyModifiedType(Type); 1141 } 1142 auto Emission = EmitAutoVarAlloca(*PrivateVD); 1143 auto Addr = Emission.getAllocatedAddress(); 1144 auto *Init = PrivateVD->getInit(); 1145 EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(), 1146 DRD ? *IRed : Init, OriginalAddr); 1147 EmitAutoVarCleanups(Emission); 1148 return Emission.getAllocatedAddress(); 1149 }); 1150 assert(IsRegistered && "private var already registered as private"); 1151 // Silence the warning about unused variable. 1152 (void)IsRegistered; 1153 PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address { 1154 return Builder.CreateElementBitCast( 1155 GetAddrOfLocalVar(PrivateVD), 1156 ConvertTypeForMem(RHSVD->getType()), "rhs.begin"); 1157 }); 1158 } else { 1159 // Store the address of the original variable associated with the LHS 1160 // implicit variable. 1161 Address OriginalAddr = Address::invalid(); 1162 PrivateScope.addPrivate(LHSVD, [this, OrigVD, IRef, 1163 &OriginalAddr]() -> Address { 1164 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1165 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1166 IRef->getType(), VK_LValue, IRef->getExprLoc()); 1167 OriginalAddr = EmitLValue(&DRE).getAddress(); 1168 return OriginalAddr; 1169 }); 1170 // Emit reduction copy. 1171 bool IsRegistered = PrivateScope.addPrivate( 1172 OrigVD, [this, PrivateVD, OriginalAddr, DRD, IRed]() -> Address { 1173 // Emit private VarDecl with reduction init. 1174 AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD); 1175 auto Addr = Emission.getAllocatedAddress(); 1176 if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 1177 emitInitWithReductionInitializer(*this, DRD, *IRed, Addr, 1178 OriginalAddr, 1179 PrivateVD->getType()); 1180 } else 1181 EmitAutoVarInit(Emission); 1182 EmitAutoVarCleanups(Emission); 1183 return Addr; 1184 }); 1185 assert(IsRegistered && "private var already registered as private"); 1186 // Silence the warning about unused variable. 1187 (void)IsRegistered; 1188 PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { 1189 return GetAddrOfLocalVar(PrivateVD); 1190 }); 1191 } 1192 } 1193 ++ILHS; 1194 ++IRHS; 1195 ++IPriv; 1196 ++IRed; 1197 } 1198 } 1199 } 1200 1201 void CodeGenFunction::EmitOMPReductionClauseFinal( 1202 const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) { 1203 if (!HaveInsertPoint()) 1204 return; 1205 llvm::SmallVector<const Expr *, 8> Privates; 1206 llvm::SmallVector<const Expr *, 8> LHSExprs; 1207 llvm::SmallVector<const Expr *, 8> RHSExprs; 1208 llvm::SmallVector<const Expr *, 8> ReductionOps; 1209 bool HasAtLeastOneReduction = false; 1210 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1211 HasAtLeastOneReduction = true; 1212 Privates.append(C->privates().begin(), C->privates().end()); 1213 LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 1214 RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 1215 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 1216 } 1217 if (HasAtLeastOneReduction) { 1218 bool WithNowait = D.getSingleClause<OMPNowaitClause>() || 1219 isOpenMPParallelDirective(D.getDirectiveKind()) || 1220 D.getDirectiveKind() == OMPD_simd; 1221 bool SimpleReduction = D.getDirectiveKind() == OMPD_simd; 1222 // Emit nowait reduction if nowait clause is present or directive is a 1223 // parallel directive (it always has implicit barrier). 1224 CGM.getOpenMPRuntime().emitReduction( 1225 *this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps, 1226 {WithNowait, SimpleReduction, ReductionKind}); 1227 } 1228 } 1229 1230 static void emitPostUpdateForReductionClause( 1231 CodeGenFunction &CGF, const OMPExecutableDirective &D, 1232 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1233 if (!CGF.HaveInsertPoint()) 1234 return; 1235 llvm::BasicBlock *DoneBB = nullptr; 1236 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1237 if (auto *PostUpdate = C->getPostUpdateExpr()) { 1238 if (!DoneBB) { 1239 if (auto *Cond = CondGen(CGF)) { 1240 // If the first post-update expression is found, emit conditional 1241 // block if it was requested. 1242 auto *ThenBB = CGF.createBasicBlock(".omp.reduction.pu"); 1243 DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done"); 1244 CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1245 CGF.EmitBlock(ThenBB); 1246 } 1247 } 1248 CGF.EmitIgnoredExpr(PostUpdate); 1249 } 1250 } 1251 if (DoneBB) 1252 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 1253 } 1254 1255 namespace { 1256 /// Codegen lambda for appending distribute lower and upper bounds to outlined 1257 /// parallel function. This is necessary for combined constructs such as 1258 /// 'distribute parallel for' 1259 typedef llvm::function_ref<void(CodeGenFunction &, 1260 const OMPExecutableDirective &, 1261 llvm::SmallVectorImpl<llvm::Value *> &)> 1262 CodeGenBoundParametersTy; 1263 } // anonymous namespace 1264 1265 static void emitCommonOMPParallelDirective( 1266 CodeGenFunction &CGF, const OMPExecutableDirective &S, 1267 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1268 const CodeGenBoundParametersTy &CodeGenBoundParameters) { 1269 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); 1270 auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( 1271 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 1272 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) { 1273 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 1274 auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), 1275 /*IgnoreResultAssign*/ true); 1276 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause( 1277 CGF, NumThreads, NumThreadsClause->getLocStart()); 1278 } 1279 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) { 1280 CodeGenFunction::RunCleanupsScope ProcBindScope(CGF); 1281 CGF.CGM.getOpenMPRuntime().emitProcBindClause( 1282 CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart()); 1283 } 1284 const Expr *IfCond = nullptr; 1285 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 1286 if (C->getNameModifier() == OMPD_unknown || 1287 C->getNameModifier() == OMPD_parallel) { 1288 IfCond = C->getCondition(); 1289 break; 1290 } 1291 } 1292 1293 OMPParallelScope Scope(CGF, S); 1294 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 1295 // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk 1296 // lower and upper bounds with the pragma 'for' chunking mechanism. 1297 // The following lambda takes care of appending the lower and upper bound 1298 // parameters when necessary 1299 CodeGenBoundParameters(CGF, S, CapturedVars); 1300 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 1301 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn, 1302 CapturedVars, IfCond); 1303 } 1304 1305 static void emitEmptyBoundParameters(CodeGenFunction &, 1306 const OMPExecutableDirective &, 1307 llvm::SmallVectorImpl<llvm::Value *> &) {} 1308 1309 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { 1310 // Emit parallel region as a standalone region. 1311 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1312 OMPPrivateScope PrivateScope(CGF); 1313 bool Copyins = CGF.EmitOMPCopyinClause(S); 1314 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 1315 if (Copyins) { 1316 // Emit implicit barrier to synchronize threads and avoid data races on 1317 // propagation master's thread values of threadprivate variables to local 1318 // instances of that variables of all other implicit threads. 1319 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 1320 CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 1321 /*ForceSimpleCall=*/true); 1322 } 1323 CGF.EmitOMPPrivateClause(S, PrivateScope); 1324 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 1325 (void)PrivateScope.Privatize(); 1326 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1327 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 1328 }; 1329 emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen, 1330 emitEmptyBoundParameters); 1331 emitPostUpdateForReductionClause( 1332 *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1333 } 1334 1335 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, 1336 JumpDest LoopExit) { 1337 RunCleanupsScope BodyScope(*this); 1338 // Update counters values on current iteration. 1339 for (auto I : D.updates()) { 1340 EmitIgnoredExpr(I); 1341 } 1342 // Update the linear variables. 1343 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1344 for (auto *U : C->updates()) 1345 EmitIgnoredExpr(U); 1346 } 1347 1348 // On a continue in the body, jump to the end. 1349 auto Continue = getJumpDestInCurrentScope("omp.body.continue"); 1350 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1351 // Emit loop body. 1352 EmitStmt(D.getBody()); 1353 // The end (updates/cleanups). 1354 EmitBlock(Continue.getBlock()); 1355 BreakContinueStack.pop_back(); 1356 } 1357 1358 void CodeGenFunction::EmitOMPInnerLoop( 1359 const Stmt &S, bool RequiresCleanup, const Expr *LoopCond, 1360 const Expr *IncExpr, 1361 const llvm::function_ref<void(CodeGenFunction &)> &BodyGen, 1362 const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen) { 1363 auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end"); 1364 1365 // Start the loop with a block that tests the condition. 1366 auto CondBlock = createBasicBlock("omp.inner.for.cond"); 1367 EmitBlock(CondBlock); 1368 const SourceRange &R = S.getSourceRange(); 1369 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), 1370 SourceLocToDebugLoc(R.getEnd())); 1371 1372 // If there are any cleanups between here and the loop-exit scope, 1373 // create a block to stage a loop exit along. 1374 auto ExitBlock = LoopExit.getBlock(); 1375 if (RequiresCleanup) 1376 ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup"); 1377 1378 auto LoopBody = createBasicBlock("omp.inner.for.body"); 1379 1380 // Emit condition. 1381 EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S)); 1382 if (ExitBlock != LoopExit.getBlock()) { 1383 EmitBlock(ExitBlock); 1384 EmitBranchThroughCleanup(LoopExit); 1385 } 1386 1387 EmitBlock(LoopBody); 1388 incrementProfileCounter(&S); 1389 1390 // Create a block for the increment. 1391 auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc"); 1392 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1393 1394 BodyGen(*this); 1395 1396 // Emit "IV = IV + 1" and a back-edge to the condition block. 1397 EmitBlock(Continue.getBlock()); 1398 EmitIgnoredExpr(IncExpr); 1399 PostIncGen(*this); 1400 BreakContinueStack.pop_back(); 1401 EmitBranch(CondBlock); 1402 LoopStack.pop(); 1403 // Emit the fall-through block. 1404 EmitBlock(LoopExit.getBlock()); 1405 } 1406 1407 void CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { 1408 if (!HaveInsertPoint()) 1409 return; 1410 // Emit inits for the linear variables. 1411 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1412 for (auto *Init : C->inits()) { 1413 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl()); 1414 if (auto *Ref = dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) { 1415 AutoVarEmission Emission = EmitAutoVarAlloca(*VD); 1416 auto *OrigVD = cast<VarDecl>(Ref->getDecl()); 1417 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1418 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1419 VD->getInit()->getType(), VK_LValue, 1420 VD->getInit()->getExprLoc()); 1421 EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(), 1422 VD->getType()), 1423 /*capturedByInit=*/false); 1424 EmitAutoVarCleanups(Emission); 1425 } else 1426 EmitVarDecl(*VD); 1427 } 1428 // Emit the linear steps for the linear clauses. 1429 // If a step is not constant, it is pre-calculated before the loop. 1430 if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep())) 1431 if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) { 1432 EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl())); 1433 // Emit calculation of the linear step. 1434 EmitIgnoredExpr(CS); 1435 } 1436 } 1437 } 1438 1439 void CodeGenFunction::EmitOMPLinearClauseFinal( 1440 const OMPLoopDirective &D, 1441 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1442 if (!HaveInsertPoint()) 1443 return; 1444 llvm::BasicBlock *DoneBB = nullptr; 1445 // Emit the final values of the linear variables. 1446 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1447 auto IC = C->varlist_begin(); 1448 for (auto *F : C->finals()) { 1449 if (!DoneBB) { 1450 if (auto *Cond = CondGen(*this)) { 1451 // If the first post-update expression is found, emit conditional 1452 // block if it was requested. 1453 auto *ThenBB = createBasicBlock(".omp.linear.pu"); 1454 DoneBB = createBasicBlock(".omp.linear.pu.done"); 1455 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1456 EmitBlock(ThenBB); 1457 } 1458 } 1459 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl()); 1460 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1461 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1462 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); 1463 Address OrigAddr = EmitLValue(&DRE).getAddress(); 1464 CodeGenFunction::OMPPrivateScope VarScope(*this); 1465 VarScope.addPrivate(OrigVD, [OrigAddr]() -> Address { return OrigAddr; }); 1466 (void)VarScope.Privatize(); 1467 EmitIgnoredExpr(F); 1468 ++IC; 1469 } 1470 if (auto *PostUpdate = C->getPostUpdateExpr()) 1471 EmitIgnoredExpr(PostUpdate); 1472 } 1473 if (DoneBB) 1474 EmitBlock(DoneBB, /*IsFinished=*/true); 1475 } 1476 1477 static void emitAlignedClause(CodeGenFunction &CGF, 1478 const OMPExecutableDirective &D) { 1479 if (!CGF.HaveInsertPoint()) 1480 return; 1481 for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) { 1482 unsigned ClauseAlignment = 0; 1483 if (auto AlignmentExpr = Clause->getAlignment()) { 1484 auto AlignmentCI = 1485 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); 1486 ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue()); 1487 } 1488 for (auto E : Clause->varlists()) { 1489 unsigned Alignment = ClauseAlignment; 1490 if (Alignment == 0) { 1491 // OpenMP [2.8.1, Description] 1492 // If no optional parameter is specified, implementation-defined default 1493 // alignments for SIMD instructions on the target platforms are assumed. 1494 Alignment = 1495 CGF.getContext() 1496 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign( 1497 E->getType()->getPointeeType())) 1498 .getQuantity(); 1499 } 1500 assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) && 1501 "alignment is not power of 2"); 1502 if (Alignment != 0) { 1503 llvm::Value *PtrValue = CGF.EmitScalarExpr(E); 1504 CGF.EmitAlignmentAssumption(PtrValue, Alignment); 1505 } 1506 } 1507 } 1508 } 1509 1510 void CodeGenFunction::EmitOMPPrivateLoopCounters( 1511 const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) { 1512 if (!HaveInsertPoint()) 1513 return; 1514 auto I = S.private_counters().begin(); 1515 for (auto *E : S.counters()) { 1516 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1517 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()); 1518 (void)LoopScope.addPrivate(VD, [&]() -> Address { 1519 // Emit var without initialization. 1520 if (!LocalDeclMap.count(PrivateVD)) { 1521 auto VarEmission = EmitAutoVarAlloca(*PrivateVD); 1522 EmitAutoVarCleanups(VarEmission); 1523 } 1524 DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD), 1525 /*RefersToEnclosingVariableOrCapture=*/false, 1526 (*I)->getType(), VK_LValue, (*I)->getExprLoc()); 1527 return EmitLValue(&DRE).getAddress(); 1528 }); 1529 if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) || 1530 VD->hasGlobalStorage()) { 1531 (void)LoopScope.addPrivate(PrivateVD, [&]() -> Address { 1532 DeclRefExpr DRE(const_cast<VarDecl *>(VD), 1533 LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD), 1534 E->getType(), VK_LValue, E->getExprLoc()); 1535 return EmitLValue(&DRE).getAddress(); 1536 }); 1537 } 1538 ++I; 1539 } 1540 } 1541 1542 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S, 1543 const Expr *Cond, llvm::BasicBlock *TrueBlock, 1544 llvm::BasicBlock *FalseBlock, uint64_t TrueCount) { 1545 if (!CGF.HaveInsertPoint()) 1546 return; 1547 { 1548 CodeGenFunction::OMPPrivateScope PreCondScope(CGF); 1549 CGF.EmitOMPPrivateLoopCounters(S, PreCondScope); 1550 (void)PreCondScope.Privatize(); 1551 // Get initial values of real counters. 1552 for (auto I : S.inits()) { 1553 CGF.EmitIgnoredExpr(I); 1554 } 1555 } 1556 // Check that loop is executed at least one time. 1557 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount); 1558 } 1559 1560 void CodeGenFunction::EmitOMPLinearClause( 1561 const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) { 1562 if (!HaveInsertPoint()) 1563 return; 1564 llvm::DenseSet<const VarDecl *> SIMDLCVs; 1565 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 1566 auto *LoopDirective = cast<OMPLoopDirective>(&D); 1567 for (auto *C : LoopDirective->counters()) { 1568 SIMDLCVs.insert( 1569 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 1570 } 1571 } 1572 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1573 auto CurPrivate = C->privates().begin(); 1574 for (auto *E : C->varlists()) { 1575 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1576 auto *PrivateVD = 1577 cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl()); 1578 if (!SIMDLCVs.count(VD->getCanonicalDecl())) { 1579 bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address { 1580 // Emit private VarDecl with copy init. 1581 EmitVarDecl(*PrivateVD); 1582 return GetAddrOfLocalVar(PrivateVD); 1583 }); 1584 assert(IsRegistered && "linear var already registered as private"); 1585 // Silence the warning about unused variable. 1586 (void)IsRegistered; 1587 } else 1588 EmitVarDecl(*PrivateVD); 1589 ++CurPrivate; 1590 } 1591 } 1592 } 1593 1594 static void emitSimdlenSafelenClause(CodeGenFunction &CGF, 1595 const OMPExecutableDirective &D, 1596 bool IsMonotonic) { 1597 if (!CGF.HaveInsertPoint()) 1598 return; 1599 if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) { 1600 RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(), 1601 /*ignoreResult=*/true); 1602 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 1603 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 1604 // In presence of finite 'safelen', it may be unsafe to mark all 1605 // the memory instructions parallel, because loop-carried 1606 // dependences of 'safelen' iterations are possible. 1607 if (!IsMonotonic) 1608 CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>()); 1609 } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) { 1610 RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(), 1611 /*ignoreResult=*/true); 1612 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 1613 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 1614 // In presence of finite 'safelen', it may be unsafe to mark all 1615 // the memory instructions parallel, because loop-carried 1616 // dependences of 'safelen' iterations are possible. 1617 CGF.LoopStack.setParallel(false); 1618 } 1619 } 1620 1621 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D, 1622 bool IsMonotonic) { 1623 // Walk clauses and process safelen/lastprivate. 1624 LoopStack.setParallel(!IsMonotonic); 1625 LoopStack.setVectorizeEnable(true); 1626 emitSimdlenSafelenClause(*this, D, IsMonotonic); 1627 } 1628 1629 void CodeGenFunction::EmitOMPSimdFinal( 1630 const OMPLoopDirective &D, 1631 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1632 if (!HaveInsertPoint()) 1633 return; 1634 llvm::BasicBlock *DoneBB = nullptr; 1635 auto IC = D.counters().begin(); 1636 auto IPC = D.private_counters().begin(); 1637 for (auto F : D.finals()) { 1638 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl()); 1639 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl()); 1640 auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD); 1641 if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) || 1642 OrigVD->hasGlobalStorage() || CED) { 1643 if (!DoneBB) { 1644 if (auto *Cond = CondGen(*this)) { 1645 // If the first post-update expression is found, emit conditional 1646 // block if it was requested. 1647 auto *ThenBB = createBasicBlock(".omp.final.then"); 1648 DoneBB = createBasicBlock(".omp.final.done"); 1649 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1650 EmitBlock(ThenBB); 1651 } 1652 } 1653 Address OrigAddr = Address::invalid(); 1654 if (CED) 1655 OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress(); 1656 else { 1657 DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD), 1658 /*RefersToEnclosingVariableOrCapture=*/false, 1659 (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc()); 1660 OrigAddr = EmitLValue(&DRE).getAddress(); 1661 } 1662 OMPPrivateScope VarScope(*this); 1663 VarScope.addPrivate(OrigVD, 1664 [OrigAddr]() -> Address { return OrigAddr; }); 1665 (void)VarScope.Privatize(); 1666 EmitIgnoredExpr(F); 1667 } 1668 ++IC; 1669 ++IPC; 1670 } 1671 if (DoneBB) 1672 EmitBlock(DoneBB, /*IsFinished=*/true); 1673 } 1674 1675 static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF, 1676 const OMPLoopDirective &S, 1677 CodeGenFunction::JumpDest LoopExit) { 1678 CGF.EmitOMPLoopBody(S, LoopExit); 1679 CGF.EmitStopPoint(&S); 1680 } 1681 1682 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { 1683 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1684 OMPLoopScope PreInitScope(CGF, S); 1685 // if (PreCond) { 1686 // for (IV in 0..LastIteration) BODY; 1687 // <Final counter/linear vars updates>; 1688 // } 1689 // 1690 1691 // Emit: if (PreCond) - begin. 1692 // If the condition constant folds and can be elided, avoid emitting the 1693 // whole loop. 1694 bool CondConstant; 1695 llvm::BasicBlock *ContBlock = nullptr; 1696 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 1697 if (!CondConstant) 1698 return; 1699 } else { 1700 auto *ThenBlock = CGF.createBasicBlock("simd.if.then"); 1701 ContBlock = CGF.createBasicBlock("simd.if.end"); 1702 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 1703 CGF.getProfileCount(&S)); 1704 CGF.EmitBlock(ThenBlock); 1705 CGF.incrementProfileCounter(&S); 1706 } 1707 1708 // Emit the loop iteration variable. 1709 const Expr *IVExpr = S.getIterationVariable(); 1710 const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 1711 CGF.EmitVarDecl(*IVDecl); 1712 CGF.EmitIgnoredExpr(S.getInit()); 1713 1714 // Emit the iterations count variable. 1715 // If it is not a variable, Sema decided to calculate iterations count on 1716 // each iteration (e.g., it is foldable into a constant). 1717 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 1718 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 1719 // Emit calculation of the iterations count. 1720 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 1721 } 1722 1723 CGF.EmitOMPSimdInit(S); 1724 1725 emitAlignedClause(CGF, S); 1726 CGF.EmitOMPLinearClauseInit(S); 1727 { 1728 OMPPrivateScope LoopScope(CGF); 1729 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 1730 CGF.EmitOMPLinearClause(S, LoopScope); 1731 CGF.EmitOMPPrivateClause(S, LoopScope); 1732 CGF.EmitOMPReductionClauseInit(S, LoopScope); 1733 bool HasLastprivateClause = 1734 CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 1735 (void)LoopScope.Privatize(); 1736 CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 1737 S.getInc(), 1738 [&S](CodeGenFunction &CGF) { 1739 CGF.EmitOMPLoopBody(S, JumpDest()); 1740 CGF.EmitStopPoint(&S); 1741 }, 1742 [](CodeGenFunction &) {}); 1743 CGF.EmitOMPSimdFinal( 1744 S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1745 // Emit final copy of the lastprivate variables at the end of loops. 1746 if (HasLastprivateClause) 1747 CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true); 1748 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd); 1749 emitPostUpdateForReductionClause( 1750 CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1751 } 1752 CGF.EmitOMPLinearClauseFinal( 1753 S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1754 // Emit: if (PreCond) - end. 1755 if (ContBlock) { 1756 CGF.EmitBranch(ContBlock); 1757 CGF.EmitBlock(ContBlock, true); 1758 } 1759 }; 1760 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1761 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 1762 } 1763 1764 void CodeGenFunction::EmitOMPOuterLoop( 1765 bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S, 1766 CodeGenFunction::OMPPrivateScope &LoopScope, 1767 const CodeGenFunction::OMPLoopArguments &LoopArgs, 1768 const CodeGenFunction::CodeGenLoopTy &CodeGenLoop, 1769 const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) { 1770 auto &RT = CGM.getOpenMPRuntime(); 1771 1772 const Expr *IVExpr = S.getIterationVariable(); 1773 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1774 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1775 1776 auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end"); 1777 1778 // Start the loop with a block that tests the condition. 1779 auto CondBlock = createBasicBlock("omp.dispatch.cond"); 1780 EmitBlock(CondBlock); 1781 const SourceRange &R = S.getSourceRange(); 1782 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), 1783 SourceLocToDebugLoc(R.getEnd())); 1784 1785 llvm::Value *BoolCondVal = nullptr; 1786 if (!DynamicOrOrdered) { 1787 // UB = min(UB, GlobalUB) or 1788 // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g. 1789 // 'distribute parallel for') 1790 EmitIgnoredExpr(LoopArgs.EUB); 1791 // IV = LB 1792 EmitIgnoredExpr(LoopArgs.Init); 1793 // IV < UB 1794 BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond); 1795 } else { 1796 BoolCondVal = 1797 RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, LoopArgs.IL, 1798 LoopArgs.LB, LoopArgs.UB, LoopArgs.ST); 1799 } 1800 1801 // If there are any cleanups between here and the loop-exit scope, 1802 // create a block to stage a loop exit along. 1803 auto ExitBlock = LoopExit.getBlock(); 1804 if (LoopScope.requiresCleanups()) 1805 ExitBlock = createBasicBlock("omp.dispatch.cleanup"); 1806 1807 auto LoopBody = createBasicBlock("omp.dispatch.body"); 1808 Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock); 1809 if (ExitBlock != LoopExit.getBlock()) { 1810 EmitBlock(ExitBlock); 1811 EmitBranchThroughCleanup(LoopExit); 1812 } 1813 EmitBlock(LoopBody); 1814 1815 // Emit "IV = LB" (in case of static schedule, we have already calculated new 1816 // LB for loop condition and emitted it above). 1817 if (DynamicOrOrdered) 1818 EmitIgnoredExpr(LoopArgs.Init); 1819 1820 // Create a block for the increment. 1821 auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); 1822 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1823 1824 // Generate !llvm.loop.parallel metadata for loads and stores for loops 1825 // with dynamic/guided scheduling and without ordered clause. 1826 if (!isOpenMPSimdDirective(S.getDirectiveKind())) 1827 LoopStack.setParallel(!IsMonotonic); 1828 else 1829 EmitOMPSimdInit(S, IsMonotonic); 1830 1831 SourceLocation Loc = S.getLocStart(); 1832 1833 // when 'distribute' is not combined with a 'for': 1834 // while (idx <= UB) { BODY; ++idx; } 1835 // when 'distribute' is combined with a 'for' 1836 // (e.g. 'distribute parallel for') 1837 // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; } 1838 EmitOMPInnerLoop( 1839 S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr, 1840 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { 1841 CodeGenLoop(CGF, S, LoopExit); 1842 }, 1843 [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) { 1844 CodeGenOrdered(CGF, Loc, IVSize, IVSigned); 1845 }); 1846 1847 EmitBlock(Continue.getBlock()); 1848 BreakContinueStack.pop_back(); 1849 if (!DynamicOrOrdered) { 1850 // Emit "LB = LB + Stride", "UB = UB + Stride". 1851 EmitIgnoredExpr(LoopArgs.NextLB); 1852 EmitIgnoredExpr(LoopArgs.NextUB); 1853 } 1854 1855 EmitBranch(CondBlock); 1856 LoopStack.pop(); 1857 // Emit the fall-through block. 1858 EmitBlock(LoopExit.getBlock()); 1859 1860 // Tell the runtime we are done. 1861 auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) { 1862 if (!DynamicOrOrdered) 1863 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd()); 1864 }; 1865 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); 1866 } 1867 1868 void CodeGenFunction::EmitOMPForOuterLoop( 1869 const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic, 1870 const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, 1871 const OMPLoopArguments &LoopArgs, 1872 const CodeGenDispatchBoundsTy &CGDispatchBounds) { 1873 auto &RT = CGM.getOpenMPRuntime(); 1874 1875 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). 1876 const bool DynamicOrOrdered = 1877 Ordered || RT.isDynamic(ScheduleKind.Schedule); 1878 1879 assert((Ordered || 1880 !RT.isStaticNonchunked(ScheduleKind.Schedule, 1881 LoopArgs.Chunk != nullptr)) && 1882 "static non-chunked schedule does not need outer loop"); 1883 1884 // Emit outer loop. 1885 // 1886 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1887 // When schedule(dynamic,chunk_size) is specified, the iterations are 1888 // distributed to threads in the team in chunks as the threads request them. 1889 // Each thread executes a chunk of iterations, then requests another chunk, 1890 // until no chunks remain to be distributed. Each chunk contains chunk_size 1891 // iterations, except for the last chunk to be distributed, which may have 1892 // fewer iterations. When no chunk_size is specified, it defaults to 1. 1893 // 1894 // When schedule(guided,chunk_size) is specified, the iterations are assigned 1895 // to threads in the team in chunks as the executing threads request them. 1896 // Each thread executes a chunk of iterations, then requests another chunk, 1897 // until no chunks remain to be assigned. For a chunk_size of 1, the size of 1898 // each chunk is proportional to the number of unassigned iterations divided 1899 // by the number of threads in the team, decreasing to 1. For a chunk_size 1900 // with value k (greater than 1), the size of each chunk is determined in the 1901 // same way, with the restriction that the chunks do not contain fewer than k 1902 // iterations (except for the last chunk to be assigned, which may have fewer 1903 // than k iterations). 1904 // 1905 // When schedule(auto) is specified, the decision regarding scheduling is 1906 // delegated to the compiler and/or runtime system. The programmer gives the 1907 // implementation the freedom to choose any possible mapping of iterations to 1908 // threads in the team. 1909 // 1910 // When schedule(runtime) is specified, the decision regarding scheduling is 1911 // deferred until run time, and the schedule and chunk size are taken from the 1912 // run-sched-var ICV. If the ICV is set to auto, the schedule is 1913 // implementation defined 1914 // 1915 // while(__kmpc_dispatch_next(&LB, &UB)) { 1916 // idx = LB; 1917 // while (idx <= UB) { BODY; ++idx; 1918 // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only. 1919 // } // inner loop 1920 // } 1921 // 1922 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1923 // When schedule(static, chunk_size) is specified, iterations are divided into 1924 // chunks of size chunk_size, and the chunks are assigned to the threads in 1925 // the team in a round-robin fashion in the order of the thread number. 1926 // 1927 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { 1928 // while (idx <= UB) { BODY; ++idx; } // inner loop 1929 // LB = LB + ST; 1930 // UB = UB + ST; 1931 // } 1932 // 1933 1934 const Expr *IVExpr = S.getIterationVariable(); 1935 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1936 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1937 1938 if (DynamicOrOrdered) { 1939 auto DispatchBounds = CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB); 1940 llvm::Value *LBVal = DispatchBounds.first; 1941 llvm::Value *UBVal = DispatchBounds.second; 1942 CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal, 1943 LoopArgs.Chunk}; 1944 RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, IVSize, 1945 IVSigned, Ordered, DipatchRTInputValues); 1946 } else { 1947 RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, 1948 Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB, 1949 LoopArgs.ST, LoopArgs.Chunk); 1950 } 1951 1952 auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc, 1953 const unsigned IVSize, 1954 const bool IVSigned) { 1955 if (Ordered) { 1956 CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize, 1957 IVSigned); 1958 } 1959 }; 1960 1961 OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST, 1962 LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB); 1963 OuterLoopArgs.IncExpr = S.getInc(); 1964 OuterLoopArgs.Init = S.getInit(); 1965 OuterLoopArgs.Cond = S.getCond(); 1966 OuterLoopArgs.NextLB = S.getNextLowerBound(); 1967 OuterLoopArgs.NextUB = S.getNextUpperBound(); 1968 EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs, 1969 emitOMPLoopBodyWithStopPoint, CodeGenOrdered); 1970 } 1971 1972 static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc, 1973 const unsigned IVSize, const bool IVSigned) {} 1974 1975 void CodeGenFunction::EmitOMPDistributeOuterLoop( 1976 OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S, 1977 OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs, 1978 const CodeGenLoopTy &CodeGenLoopContent) { 1979 1980 auto &RT = CGM.getOpenMPRuntime(); 1981 1982 // Emit outer loop. 1983 // Same behavior as a OMPForOuterLoop, except that schedule cannot be 1984 // dynamic 1985 // 1986 1987 const Expr *IVExpr = S.getIterationVariable(); 1988 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1989 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1990 1991 RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize, 1992 IVSigned, /* Ordered = */ false, LoopArgs.IL, 1993 LoopArgs.LB, LoopArgs.UB, LoopArgs.ST, 1994 LoopArgs.Chunk); 1995 1996 // for combined 'distribute' and 'for' the increment expression of distribute 1997 // is store in DistInc. For 'distribute' alone, it is in Inc. 1998 Expr *IncExpr; 1999 if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())) 2000 IncExpr = S.getDistInc(); 2001 else 2002 IncExpr = S.getInc(); 2003 2004 // this routine is shared by 'omp distribute parallel for' and 2005 // 'omp distribute': select the right EUB expression depending on the 2006 // directive 2007 OMPLoopArguments OuterLoopArgs; 2008 OuterLoopArgs.LB = LoopArgs.LB; 2009 OuterLoopArgs.UB = LoopArgs.UB; 2010 OuterLoopArgs.ST = LoopArgs.ST; 2011 OuterLoopArgs.IL = LoopArgs.IL; 2012 OuterLoopArgs.Chunk = LoopArgs.Chunk; 2013 OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2014 ? S.getCombinedEnsureUpperBound() 2015 : S.getEnsureUpperBound(); 2016 OuterLoopArgs.IncExpr = IncExpr; 2017 OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2018 ? S.getCombinedInit() 2019 : S.getInit(); 2020 OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2021 ? S.getCombinedCond() 2022 : S.getCond(); 2023 OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2024 ? S.getCombinedNextLowerBound() 2025 : S.getNextLowerBound(); 2026 OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2027 ? S.getCombinedNextUpperBound() 2028 : S.getNextUpperBound(); 2029 2030 EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S, 2031 LoopScope, OuterLoopArgs, CodeGenLoopContent, 2032 emitEmptyOrdered); 2033 } 2034 2035 /// Emit a helper variable and return corresponding lvalue. 2036 static LValue EmitOMPHelperVar(CodeGenFunction &CGF, 2037 const DeclRefExpr *Helper) { 2038 auto VDecl = cast<VarDecl>(Helper->getDecl()); 2039 CGF.EmitVarDecl(*VDecl); 2040 return CGF.EmitLValue(Helper); 2041 } 2042 2043 static std::pair<LValue, LValue> 2044 emitDistributeParallelForInnerBounds(CodeGenFunction &CGF, 2045 const OMPExecutableDirective &S) { 2046 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 2047 LValue LB = 2048 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); 2049 LValue UB = 2050 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); 2051 2052 // When composing 'distribute' with 'for' (e.g. as in 'distribute 2053 // parallel for') we need to use the 'distribute' 2054 // chunk lower and upper bounds rather than the whole loop iteration 2055 // space. These are parameters to the outlined function for 'parallel' 2056 // and we copy the bounds of the previous schedule into the 2057 // the current ones. 2058 LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable()); 2059 LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable()); 2060 llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(PrevLB, SourceLocation()); 2061 PrevLBVal = CGF.EmitScalarConversion( 2062 PrevLBVal, LS.getPrevLowerBoundVariable()->getType(), 2063 LS.getIterationVariable()->getType(), SourceLocation()); 2064 llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(PrevUB, SourceLocation()); 2065 PrevUBVal = CGF.EmitScalarConversion( 2066 PrevUBVal, LS.getPrevUpperBoundVariable()->getType(), 2067 LS.getIterationVariable()->getType(), SourceLocation()); 2068 2069 CGF.EmitStoreOfScalar(PrevLBVal, LB); 2070 CGF.EmitStoreOfScalar(PrevUBVal, UB); 2071 2072 return {LB, UB}; 2073 } 2074 2075 /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then 2076 /// we need to use the LB and UB expressions generated by the worksharing 2077 /// code generation support, whereas in non combined situations we would 2078 /// just emit 0 and the LastIteration expression 2079 /// This function is necessary due to the difference of the LB and UB 2080 /// types for the RT emission routines for 'for_static_init' and 2081 /// 'for_dispatch_init' 2082 static std::pair<llvm::Value *, llvm::Value *> 2083 emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF, 2084 const OMPExecutableDirective &S, 2085 Address LB, Address UB) { 2086 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 2087 const Expr *IVExpr = LS.getIterationVariable(); 2088 // when implementing a dynamic schedule for a 'for' combined with a 2089 // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop 2090 // is not normalized as each team only executes its own assigned 2091 // distribute chunk 2092 QualType IteratorTy = IVExpr->getType(); 2093 llvm::Value *LBVal = CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, 2094 SourceLocation()); 2095 llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, 2096 SourceLocation()); 2097 return {LBVal, UBVal}; 2098 } 2099 2100 static void emitDistributeParallelForDistributeInnerBoundParams( 2101 CodeGenFunction &CGF, const OMPExecutableDirective &S, 2102 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) { 2103 const auto &Dir = cast<OMPLoopDirective>(S); 2104 LValue LB = 2105 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable())); 2106 auto LBCast = CGF.Builder.CreateIntCast( 2107 CGF.Builder.CreateLoad(LB.getAddress()), CGF.SizeTy, /*isSigned=*/false); 2108 CapturedVars.push_back(LBCast); 2109 LValue UB = 2110 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable())); 2111 2112 auto UBCast = CGF.Builder.CreateIntCast( 2113 CGF.Builder.CreateLoad(UB.getAddress()), CGF.SizeTy, /*isSigned=*/false); 2114 CapturedVars.push_back(UBCast); 2115 } 2116 2117 static void 2118 emitInnerParallelForWhenCombined(CodeGenFunction &CGF, 2119 const OMPLoopDirective &S, 2120 CodeGenFunction::JumpDest LoopExit) { 2121 auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF, 2122 PrePostActionTy &) { 2123 CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(), 2124 emitDistributeParallelForInnerBounds, 2125 emitDistributeParallelForDispatchBounds); 2126 }; 2127 2128 emitCommonOMPParallelDirective( 2129 CGF, S, OMPD_for, CGInlinedWorksharingLoop, 2130 emitDistributeParallelForDistributeInnerBoundParams); 2131 } 2132 2133 void CodeGenFunction::EmitOMPDistributeParallelForDirective( 2134 const OMPDistributeParallelForDirective &S) { 2135 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2136 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 2137 S.getDistInc()); 2138 }; 2139 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2140 OMPCancelStackRAII CancelRegion(*this, OMPD_distribute_parallel_for, 2141 /*HasCancel=*/false); 2142 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen, 2143 /*HasCancel=*/false); 2144 } 2145 2146 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective( 2147 const OMPDistributeParallelForSimdDirective &S) { 2148 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2149 CGM.getOpenMPRuntime().emitInlinedDirective( 2150 *this, OMPD_distribute_parallel_for_simd, 2151 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2152 OMPLoopScope PreInitScope(CGF, S); 2153 CGF.EmitStmt( 2154 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2155 }); 2156 } 2157 2158 void CodeGenFunction::EmitOMPDistributeSimdDirective( 2159 const OMPDistributeSimdDirective &S) { 2160 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2161 CGM.getOpenMPRuntime().emitInlinedDirective( 2162 *this, OMPD_distribute_simd, 2163 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2164 OMPLoopScope PreInitScope(CGF, S); 2165 CGF.EmitStmt( 2166 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2167 }); 2168 } 2169 2170 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective( 2171 const OMPTargetParallelForSimdDirective &S) { 2172 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2173 CGM.getOpenMPRuntime().emitInlinedDirective( 2174 *this, OMPD_target_parallel_for_simd, 2175 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2176 OMPLoopScope PreInitScope(CGF, S); 2177 CGF.EmitStmt( 2178 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2179 }); 2180 } 2181 2182 void CodeGenFunction::EmitOMPTargetSimdDirective( 2183 const OMPTargetSimdDirective &S) { 2184 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2185 CGM.getOpenMPRuntime().emitInlinedDirective( 2186 *this, OMPD_target_simd, [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2187 OMPLoopScope PreInitScope(CGF, S); 2188 CGF.EmitStmt( 2189 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2190 }); 2191 } 2192 2193 void CodeGenFunction::EmitOMPTeamsDistributeDirective( 2194 const OMPTeamsDistributeDirective &S) { 2195 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2196 CGM.getOpenMPRuntime().emitInlinedDirective( 2197 *this, OMPD_teams_distribute, 2198 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2199 OMPLoopScope PreInitScope(CGF, S); 2200 CGF.EmitStmt( 2201 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2202 }); 2203 } 2204 2205 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective( 2206 const OMPTeamsDistributeSimdDirective &S) { 2207 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2208 CGM.getOpenMPRuntime().emitInlinedDirective( 2209 *this, OMPD_teams_distribute_simd, 2210 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2211 OMPLoopScope PreInitScope(CGF, S); 2212 CGF.EmitStmt( 2213 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2214 }); 2215 } 2216 2217 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective( 2218 const OMPTeamsDistributeParallelForSimdDirective &S) { 2219 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2220 CGM.getOpenMPRuntime().emitInlinedDirective( 2221 *this, OMPD_teams_distribute_parallel_for_simd, 2222 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2223 OMPLoopScope PreInitScope(CGF, S); 2224 CGF.EmitStmt( 2225 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2226 }); 2227 } 2228 2229 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective( 2230 const OMPTeamsDistributeParallelForDirective &S) { 2231 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2232 CGM.getOpenMPRuntime().emitInlinedDirective( 2233 *this, OMPD_teams_distribute_parallel_for, 2234 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2235 OMPLoopScope PreInitScope(CGF, S); 2236 CGF.EmitStmt( 2237 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2238 }); 2239 } 2240 2241 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective( 2242 const OMPTargetTeamsDistributeDirective &S) { 2243 CGM.getOpenMPRuntime().emitInlinedDirective( 2244 *this, OMPD_target_teams_distribute, 2245 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2246 CGF.EmitStmt( 2247 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2248 }); 2249 } 2250 2251 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective( 2252 const OMPTargetTeamsDistributeParallelForDirective &S) { 2253 CGM.getOpenMPRuntime().emitInlinedDirective( 2254 *this, OMPD_target_teams_distribute_parallel_for, 2255 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2256 CGF.EmitStmt( 2257 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2258 }); 2259 } 2260 2261 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective( 2262 const OMPTargetTeamsDistributeParallelForSimdDirective &S) { 2263 CGM.getOpenMPRuntime().emitInlinedDirective( 2264 *this, OMPD_target_teams_distribute_parallel_for_simd, 2265 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2266 CGF.EmitStmt( 2267 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2268 }); 2269 } 2270 2271 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective( 2272 const OMPTargetTeamsDistributeSimdDirective &S) { 2273 CGM.getOpenMPRuntime().emitInlinedDirective( 2274 *this, OMPD_target_teams_distribute_simd, 2275 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2276 CGF.EmitStmt( 2277 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2278 }); 2279 } 2280 2281 namespace { 2282 struct ScheduleKindModifiersTy { 2283 OpenMPScheduleClauseKind Kind; 2284 OpenMPScheduleClauseModifier M1; 2285 OpenMPScheduleClauseModifier M2; 2286 ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind, 2287 OpenMPScheduleClauseModifier M1, 2288 OpenMPScheduleClauseModifier M2) 2289 : Kind(Kind), M1(M1), M2(M2) {} 2290 }; 2291 } // namespace 2292 2293 bool CodeGenFunction::EmitOMPWorksharingLoop( 2294 const OMPLoopDirective &S, Expr *EUB, 2295 const CodeGenLoopBoundsTy &CodeGenLoopBounds, 2296 const CodeGenDispatchBoundsTy &CGDispatchBounds) { 2297 // Emit the loop iteration variable. 2298 auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 2299 auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); 2300 EmitVarDecl(*IVDecl); 2301 2302 // Emit the iterations count variable. 2303 // If it is not a variable, Sema decided to calculate iterations count on each 2304 // iteration (e.g., it is foldable into a constant). 2305 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 2306 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 2307 // Emit calculation of the iterations count. 2308 EmitIgnoredExpr(S.getCalcLastIteration()); 2309 } 2310 2311 auto &RT = CGM.getOpenMPRuntime(); 2312 2313 bool HasLastprivateClause; 2314 // Check pre-condition. 2315 { 2316 OMPLoopScope PreInitScope(*this, S); 2317 // Skip the entire loop if we don't meet the precondition. 2318 // If the condition constant folds and can be elided, avoid emitting the 2319 // whole loop. 2320 bool CondConstant; 2321 llvm::BasicBlock *ContBlock = nullptr; 2322 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 2323 if (!CondConstant) 2324 return false; 2325 } else { 2326 auto *ThenBlock = createBasicBlock("omp.precond.then"); 2327 ContBlock = createBasicBlock("omp.precond.end"); 2328 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 2329 getProfileCount(&S)); 2330 EmitBlock(ThenBlock); 2331 incrementProfileCounter(&S); 2332 } 2333 2334 bool Ordered = false; 2335 if (auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) { 2336 if (OrderedClause->getNumForLoops()) 2337 RT.emitDoacrossInit(*this, S); 2338 else 2339 Ordered = true; 2340 } 2341 2342 llvm::DenseSet<const Expr *> EmittedFinals; 2343 emitAlignedClause(*this, S); 2344 EmitOMPLinearClauseInit(S); 2345 // Emit helper vars inits. 2346 2347 std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S); 2348 LValue LB = Bounds.first; 2349 LValue UB = Bounds.second; 2350 LValue ST = 2351 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 2352 LValue IL = 2353 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 2354 2355 // Emit 'then' code. 2356 { 2357 OMPPrivateScope LoopScope(*this); 2358 if (EmitOMPFirstprivateClause(S, LoopScope)) { 2359 // Emit implicit barrier to synchronize threads and avoid data races on 2360 // initialization of firstprivate variables and post-update of 2361 // lastprivate variables. 2362 CGM.getOpenMPRuntime().emitBarrierCall( 2363 *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 2364 /*ForceSimpleCall=*/true); 2365 } 2366 EmitOMPPrivateClause(S, LoopScope); 2367 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 2368 EmitOMPReductionClauseInit(S, LoopScope); 2369 EmitOMPPrivateLoopCounters(S, LoopScope); 2370 EmitOMPLinearClause(S, LoopScope); 2371 (void)LoopScope.Privatize(); 2372 2373 // Detect the loop schedule kind and chunk. 2374 llvm::Value *Chunk = nullptr; 2375 OpenMPScheduleTy ScheduleKind; 2376 if (auto *C = S.getSingleClause<OMPScheduleClause>()) { 2377 ScheduleKind.Schedule = C->getScheduleKind(); 2378 ScheduleKind.M1 = C->getFirstScheduleModifier(); 2379 ScheduleKind.M2 = C->getSecondScheduleModifier(); 2380 if (const auto *Ch = C->getChunkSize()) { 2381 Chunk = EmitScalarExpr(Ch); 2382 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 2383 S.getIterationVariable()->getType(), 2384 S.getLocStart()); 2385 } 2386 } 2387 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2388 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2389 // OpenMP 4.5, 2.7.1 Loop Construct, Description. 2390 // If the static schedule kind is specified or if the ordered clause is 2391 // specified, and if no monotonic modifier is specified, the effect will 2392 // be as if the monotonic modifier was specified. 2393 if (RT.isStaticNonchunked(ScheduleKind.Schedule, 2394 /* Chunked */ Chunk != nullptr) && 2395 !Ordered) { 2396 if (isOpenMPSimdDirective(S.getDirectiveKind())) 2397 EmitOMPSimdInit(S, /*IsMonotonic=*/true); 2398 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 2399 // When no chunk_size is specified, the iteration space is divided into 2400 // chunks that are approximately equal in size, and at most one chunk is 2401 // distributed to each thread. Note that the size of the chunks is 2402 // unspecified in this case. 2403 RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, 2404 IVSize, IVSigned, Ordered, 2405 IL.getAddress(), LB.getAddress(), 2406 UB.getAddress(), ST.getAddress()); 2407 auto LoopExit = 2408 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 2409 // UB = min(UB, GlobalUB); 2410 EmitIgnoredExpr(S.getEnsureUpperBound()); 2411 // IV = LB; 2412 EmitIgnoredExpr(S.getInit()); 2413 // while (idx <= UB) { BODY; ++idx; } 2414 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 2415 S.getInc(), 2416 [&S, LoopExit](CodeGenFunction &CGF) { 2417 CGF.EmitOMPLoopBody(S, LoopExit); 2418 CGF.EmitStopPoint(&S); 2419 }, 2420 [](CodeGenFunction &) {}); 2421 EmitBlock(LoopExit.getBlock()); 2422 // Tell the runtime we are done. 2423 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 2424 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd()); 2425 }; 2426 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); 2427 } else { 2428 const bool IsMonotonic = 2429 Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static || 2430 ScheduleKind.Schedule == OMPC_SCHEDULE_unknown || 2431 ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic || 2432 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic; 2433 // Emit the outer loop, which requests its work chunk [LB..UB] from 2434 // runtime and runs the inner loop to process it. 2435 const OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(), 2436 ST.getAddress(), IL.getAddress(), 2437 Chunk, EUB); 2438 EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered, 2439 LoopArguments, CGDispatchBounds); 2440 } 2441 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 2442 EmitOMPSimdFinal(S, 2443 [&](CodeGenFunction &CGF) -> llvm::Value * { 2444 return CGF.Builder.CreateIsNotNull( 2445 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2446 }); 2447 } 2448 EmitOMPReductionClauseFinal( 2449 S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind()) 2450 ? /*Parallel and Simd*/ OMPD_parallel_for_simd 2451 : /*Parallel only*/ OMPD_parallel); 2452 // Emit post-update of the reduction variables if IsLastIter != 0. 2453 emitPostUpdateForReductionClause( 2454 *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * { 2455 return CGF.Builder.CreateIsNotNull( 2456 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2457 }); 2458 // Emit final copy of the lastprivate variables if IsLastIter != 0. 2459 if (HasLastprivateClause) 2460 EmitOMPLastprivateClauseFinal( 2461 S, isOpenMPSimdDirective(S.getDirectiveKind()), 2462 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart()))); 2463 } 2464 EmitOMPLinearClauseFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * { 2465 return CGF.Builder.CreateIsNotNull( 2466 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2467 }); 2468 // We're now done with the loop, so jump to the continuation block. 2469 if (ContBlock) { 2470 EmitBranch(ContBlock); 2471 EmitBlock(ContBlock, true); 2472 } 2473 } 2474 return HasLastprivateClause; 2475 } 2476 2477 /// The following two functions generate expressions for the loop lower 2478 /// and upper bounds in case of static and dynamic (dispatch) schedule 2479 /// of the associated 'for' or 'distribute' loop. 2480 static std::pair<LValue, LValue> 2481 emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 2482 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 2483 LValue LB = 2484 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); 2485 LValue UB = 2486 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); 2487 return {LB, UB}; 2488 } 2489 2490 /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not 2491 /// consider the lower and upper bound expressions generated by the 2492 /// worksharing loop support, but we use 0 and the iteration space size as 2493 /// constants 2494 static std::pair<llvm::Value *, llvm::Value *> 2495 emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S, 2496 Address LB, Address UB) { 2497 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 2498 const Expr *IVExpr = LS.getIterationVariable(); 2499 const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType()); 2500 llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0); 2501 llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration()); 2502 return {LBVal, UBVal}; 2503 } 2504 2505 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { 2506 bool HasLastprivates = false; 2507 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, 2508 PrePostActionTy &) { 2509 OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel()); 2510 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 2511 emitForLoopBounds, 2512 emitDispatchForLoopBounds); 2513 }; 2514 { 2515 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2516 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen, 2517 S.hasCancel()); 2518 } 2519 2520 // Emit an implicit barrier at the end. 2521 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) { 2522 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); 2523 } 2524 } 2525 2526 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { 2527 bool HasLastprivates = false; 2528 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, 2529 PrePostActionTy &) { 2530 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 2531 emitForLoopBounds, 2532 emitDispatchForLoopBounds); 2533 }; 2534 { 2535 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2536 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 2537 } 2538 2539 // Emit an implicit barrier at the end. 2540 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) { 2541 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); 2542 } 2543 } 2544 2545 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, 2546 const Twine &Name, 2547 llvm::Value *Init = nullptr) { 2548 auto LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty); 2549 if (Init) 2550 CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true); 2551 return LVal; 2552 } 2553 2554 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { 2555 auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt(); 2556 auto *CS = dyn_cast<CompoundStmt>(Stmt); 2557 bool HasLastprivates = false; 2558 auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF, 2559 PrePostActionTy &) { 2560 auto &C = CGF.CGM.getContext(); 2561 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2562 // Emit helper vars inits. 2563 LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.", 2564 CGF.Builder.getInt32(0)); 2565 auto *GlobalUBVal = CS != nullptr ? CGF.Builder.getInt32(CS->size() - 1) 2566 : CGF.Builder.getInt32(0); 2567 LValue UB = 2568 createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal); 2569 LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.", 2570 CGF.Builder.getInt32(1)); 2571 LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.", 2572 CGF.Builder.getInt32(0)); 2573 // Loop counter. 2574 LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv."); 2575 OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 2576 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV); 2577 OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 2578 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); 2579 // Generate condition for loop. 2580 BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, 2581 OK_Ordinary, S.getLocStart(), FPOptions()); 2582 // Increment for loop counter. 2583 UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary, 2584 S.getLocStart()); 2585 auto BodyGen = [Stmt, CS, &S, &IV](CodeGenFunction &CGF) { 2586 // Iterate through all sections and emit a switch construct: 2587 // switch (IV) { 2588 // case 0: 2589 // <SectionStmt[0]>; 2590 // break; 2591 // ... 2592 // case <NumSection> - 1: 2593 // <SectionStmt[<NumSection> - 1]>; 2594 // break; 2595 // } 2596 // .omp.sections.exit: 2597 auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit"); 2598 auto *SwitchStmt = CGF.Builder.CreateSwitch( 2599 CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB, 2600 CS == nullptr ? 1 : CS->size()); 2601 if (CS) { 2602 unsigned CaseNumber = 0; 2603 for (auto *SubStmt : CS->children()) { 2604 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 2605 CGF.EmitBlock(CaseBB); 2606 SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB); 2607 CGF.EmitStmt(SubStmt); 2608 CGF.EmitBranch(ExitBB); 2609 ++CaseNumber; 2610 } 2611 } else { 2612 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 2613 CGF.EmitBlock(CaseBB); 2614 SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB); 2615 CGF.EmitStmt(Stmt); 2616 CGF.EmitBranch(ExitBB); 2617 } 2618 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 2619 }; 2620 2621 CodeGenFunction::OMPPrivateScope LoopScope(CGF); 2622 if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) { 2623 // Emit implicit barrier to synchronize threads and avoid data races on 2624 // initialization of firstprivate variables and post-update of lastprivate 2625 // variables. 2626 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 2627 CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 2628 /*ForceSimpleCall=*/true); 2629 } 2630 CGF.EmitOMPPrivateClause(S, LoopScope); 2631 HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 2632 CGF.EmitOMPReductionClauseInit(S, LoopScope); 2633 (void)LoopScope.Privatize(); 2634 2635 // Emit static non-chunked loop. 2636 OpenMPScheduleTy ScheduleKind; 2637 ScheduleKind.Schedule = OMPC_SCHEDULE_static; 2638 CGF.CGM.getOpenMPRuntime().emitForStaticInit( 2639 CGF, S.getLocStart(), ScheduleKind, /*IVSize=*/32, 2640 /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), LB.getAddress(), 2641 UB.getAddress(), ST.getAddress()); 2642 // UB = min(UB, GlobalUB); 2643 auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart()); 2644 auto *MinUBGlobalUB = CGF.Builder.CreateSelect( 2645 CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal); 2646 CGF.EmitStoreOfScalar(MinUBGlobalUB, UB); 2647 // IV = LB; 2648 CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV); 2649 // while (idx <= UB) { BODY; ++idx; } 2650 CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen, 2651 [](CodeGenFunction &) {}); 2652 // Tell the runtime we are done. 2653 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 2654 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd()); 2655 }; 2656 CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen); 2657 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 2658 // Emit post-update of the reduction variables if IsLastIter != 0. 2659 emitPostUpdateForReductionClause( 2660 CGF, S, [&](CodeGenFunction &CGF) -> llvm::Value * { 2661 return CGF.Builder.CreateIsNotNull( 2662 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2663 }); 2664 2665 // Emit final copy of the lastprivate variables if IsLastIter != 0. 2666 if (HasLastprivates) 2667 CGF.EmitOMPLastprivateClauseFinal( 2668 S, /*NoFinals=*/false, 2669 CGF.Builder.CreateIsNotNull( 2670 CGF.EmitLoadOfScalar(IL, S.getLocStart()))); 2671 }; 2672 2673 bool HasCancel = false; 2674 if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S)) 2675 HasCancel = OSD->hasCancel(); 2676 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S)) 2677 HasCancel = OPSD->hasCancel(); 2678 OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel); 2679 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen, 2680 HasCancel); 2681 // Emit barrier for lastprivates only if 'sections' directive has 'nowait' 2682 // clause. Otherwise the barrier will be generated by the codegen for the 2683 // directive. 2684 if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) { 2685 // Emit implicit barrier to synchronize threads and avoid data races on 2686 // initialization of firstprivate variables. 2687 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), 2688 OMPD_unknown); 2689 } 2690 } 2691 2692 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { 2693 { 2694 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2695 EmitSections(S); 2696 } 2697 // Emit an implicit barrier at the end. 2698 if (!S.getSingleClause<OMPNowaitClause>()) { 2699 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), 2700 OMPD_sections); 2701 } 2702 } 2703 2704 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { 2705 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2706 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2707 }; 2708 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2709 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen, 2710 S.hasCancel()); 2711 } 2712 2713 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { 2714 llvm::SmallVector<const Expr *, 8> CopyprivateVars; 2715 llvm::SmallVector<const Expr *, 8> DestExprs; 2716 llvm::SmallVector<const Expr *, 8> SrcExprs; 2717 llvm::SmallVector<const Expr *, 8> AssignmentOps; 2718 // Check if there are any 'copyprivate' clauses associated with this 2719 // 'single' construct. 2720 // Build a list of copyprivate variables along with helper expressions 2721 // (<source>, <destination>, <destination>=<source> expressions) 2722 for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) { 2723 CopyprivateVars.append(C->varlists().begin(), C->varlists().end()); 2724 DestExprs.append(C->destination_exprs().begin(), 2725 C->destination_exprs().end()); 2726 SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end()); 2727 AssignmentOps.append(C->assignment_ops().begin(), 2728 C->assignment_ops().end()); 2729 } 2730 // Emit code for 'single' region along with 'copyprivate' clauses 2731 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2732 Action.Enter(CGF); 2733 OMPPrivateScope SingleScope(CGF); 2734 (void)CGF.EmitOMPFirstprivateClause(S, SingleScope); 2735 CGF.EmitOMPPrivateClause(S, SingleScope); 2736 (void)SingleScope.Privatize(); 2737 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2738 }; 2739 { 2740 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2741 CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(), 2742 CopyprivateVars, DestExprs, 2743 SrcExprs, AssignmentOps); 2744 } 2745 // Emit an implicit barrier at the end (to avoid data race on firstprivate 2746 // init or if no 'nowait' clause was specified and no 'copyprivate' clause). 2747 if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) { 2748 CGM.getOpenMPRuntime().emitBarrierCall( 2749 *this, S.getLocStart(), 2750 S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single); 2751 } 2752 } 2753 2754 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { 2755 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2756 Action.Enter(CGF); 2757 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2758 }; 2759 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2760 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart()); 2761 } 2762 2763 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { 2764 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2765 Action.Enter(CGF); 2766 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2767 }; 2768 Expr *Hint = nullptr; 2769 if (auto *HintClause = S.getSingleClause<OMPHintClause>()) 2770 Hint = HintClause->getHint(); 2771 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2772 CGM.getOpenMPRuntime().emitCriticalRegion(*this, 2773 S.getDirectiveName().getAsString(), 2774 CodeGen, S.getLocStart(), Hint); 2775 } 2776 2777 void CodeGenFunction::EmitOMPParallelForDirective( 2778 const OMPParallelForDirective &S) { 2779 // Emit directive as a combined directive that consists of two implicit 2780 // directives: 'parallel' with 'for' directive. 2781 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2782 OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel()); 2783 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 2784 emitDispatchForLoopBounds); 2785 }; 2786 emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen, 2787 emitEmptyBoundParameters); 2788 } 2789 2790 void CodeGenFunction::EmitOMPParallelForSimdDirective( 2791 const OMPParallelForSimdDirective &S) { 2792 // Emit directive as a combined directive that consists of two implicit 2793 // directives: 'parallel' with 'for' directive. 2794 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2795 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 2796 emitDispatchForLoopBounds); 2797 }; 2798 emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen, 2799 emitEmptyBoundParameters); 2800 } 2801 2802 void CodeGenFunction::EmitOMPParallelSectionsDirective( 2803 const OMPParallelSectionsDirective &S) { 2804 // Emit directive as a combined directive that consists of two implicit 2805 // directives: 'parallel' with 'sections' directive. 2806 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2807 CGF.EmitSections(S); 2808 }; 2809 emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen, 2810 emitEmptyBoundParameters); 2811 } 2812 2813 void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, 2814 const RegionCodeGenTy &BodyGen, 2815 const TaskGenTy &TaskGen, 2816 OMPTaskDataTy &Data) { 2817 // Emit outlined function for task construct. 2818 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2819 auto *I = CS->getCapturedDecl()->param_begin(); 2820 auto *PartId = std::next(I); 2821 auto *TaskT = std::next(I, 4); 2822 // Check if the task is final 2823 if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) { 2824 // If the condition constant folds and can be elided, try to avoid emitting 2825 // the condition and the dead arm of the if/else. 2826 auto *Cond = Clause->getCondition(); 2827 bool CondConstant; 2828 if (ConstantFoldsToSimpleInteger(Cond, CondConstant)) 2829 Data.Final.setInt(CondConstant); 2830 else 2831 Data.Final.setPointer(EvaluateExprAsBool(Cond)); 2832 } else { 2833 // By default the task is not final. 2834 Data.Final.setInt(/*IntVal=*/false); 2835 } 2836 // Check if the task has 'priority' clause. 2837 if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) { 2838 auto *Prio = Clause->getPriority(); 2839 Data.Priority.setInt(/*IntVal=*/true); 2840 Data.Priority.setPointer(EmitScalarConversion( 2841 EmitScalarExpr(Prio), Prio->getType(), 2842 getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1), 2843 Prio->getExprLoc())); 2844 } 2845 // The first function argument for tasks is a thread id, the second one is a 2846 // part id (0 for tied tasks, >=0 for untied task). 2847 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 2848 // Get list of private variables. 2849 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 2850 auto IRef = C->varlist_begin(); 2851 for (auto *IInit : C->private_copies()) { 2852 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2853 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2854 Data.PrivateVars.push_back(*IRef); 2855 Data.PrivateCopies.push_back(IInit); 2856 } 2857 ++IRef; 2858 } 2859 } 2860 EmittedAsPrivate.clear(); 2861 // Get list of firstprivate variables. 2862 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 2863 auto IRef = C->varlist_begin(); 2864 auto IElemInitRef = C->inits().begin(); 2865 for (auto *IInit : C->private_copies()) { 2866 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2867 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2868 Data.FirstprivateVars.push_back(*IRef); 2869 Data.FirstprivateCopies.push_back(IInit); 2870 Data.FirstprivateInits.push_back(*IElemInitRef); 2871 } 2872 ++IRef; 2873 ++IElemInitRef; 2874 } 2875 } 2876 // Get list of lastprivate variables (for taskloops). 2877 llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs; 2878 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 2879 auto IRef = C->varlist_begin(); 2880 auto ID = C->destination_exprs().begin(); 2881 for (auto *IInit : C->private_copies()) { 2882 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2883 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2884 Data.LastprivateVars.push_back(*IRef); 2885 Data.LastprivateCopies.push_back(IInit); 2886 } 2887 LastprivateDstsOrigs.insert( 2888 {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()), 2889 cast<DeclRefExpr>(*IRef)}); 2890 ++IRef; 2891 ++ID; 2892 } 2893 } 2894 // Build list of dependences. 2895 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) 2896 for (auto *IRef : C->varlists()) 2897 Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef)); 2898 auto &&CodeGen = [&Data, CS, &BodyGen, &LastprivateDstsOrigs]( 2899 CodeGenFunction &CGF, PrePostActionTy &Action) { 2900 // Set proper addresses for generated private copies. 2901 OMPPrivateScope Scope(CGF); 2902 if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() || 2903 !Data.LastprivateVars.empty()) { 2904 auto *CopyFn = CGF.Builder.CreateLoad( 2905 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3))); 2906 auto *PrivatesPtr = CGF.Builder.CreateLoad( 2907 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2))); 2908 // Map privates. 2909 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; 2910 llvm::SmallVector<llvm::Value *, 16> CallArgs; 2911 CallArgs.push_back(PrivatesPtr); 2912 for (auto *E : Data.PrivateVars) { 2913 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2914 Address PrivatePtr = CGF.CreateMemTemp( 2915 CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr"); 2916 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2917 CallArgs.push_back(PrivatePtr.getPointer()); 2918 } 2919 for (auto *E : Data.FirstprivateVars) { 2920 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2921 Address PrivatePtr = 2922 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 2923 ".firstpriv.ptr.addr"); 2924 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2925 CallArgs.push_back(PrivatePtr.getPointer()); 2926 } 2927 for (auto *E : Data.LastprivateVars) { 2928 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2929 Address PrivatePtr = 2930 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 2931 ".lastpriv.ptr.addr"); 2932 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2933 CallArgs.push_back(PrivatePtr.getPointer()); 2934 } 2935 CGF.EmitRuntimeCall(CopyFn, CallArgs); 2936 for (auto &&Pair : LastprivateDstsOrigs) { 2937 auto *OrigVD = cast<VarDecl>(Pair.second->getDecl()); 2938 DeclRefExpr DRE( 2939 const_cast<VarDecl *>(OrigVD), 2940 /*RefersToEnclosingVariableOrCapture=*/CGF.CapturedStmtInfo->lookup( 2941 OrigVD) != nullptr, 2942 Pair.second->getType(), VK_LValue, Pair.second->getExprLoc()); 2943 Scope.addPrivate(Pair.first, [&CGF, &DRE]() { 2944 return CGF.EmitLValue(&DRE).getAddress(); 2945 }); 2946 } 2947 for (auto &&Pair : PrivatePtrs) { 2948 Address Replacement(CGF.Builder.CreateLoad(Pair.second), 2949 CGF.getContext().getDeclAlign(Pair.first)); 2950 Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); 2951 } 2952 } 2953 (void)Scope.Privatize(); 2954 2955 Action.Enter(CGF); 2956 BodyGen(CGF); 2957 }; 2958 auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( 2959 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied, 2960 Data.NumberOfParts); 2961 OMPLexicalScope Scope(*this, S); 2962 TaskGen(*this, OutlinedFn, Data); 2963 } 2964 2965 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { 2966 // Emit outlined function for task construct. 2967 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2968 auto CapturedStruct = GenerateCapturedStmtArgument(*CS); 2969 auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 2970 const Expr *IfCond = nullptr; 2971 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 2972 if (C->getNameModifier() == OMPD_unknown || 2973 C->getNameModifier() == OMPD_task) { 2974 IfCond = C->getCondition(); 2975 break; 2976 } 2977 } 2978 2979 OMPTaskDataTy Data; 2980 // Check if we should emit tied or untied task. 2981 Data.Tied = !S.getSingleClause<OMPUntiedClause>(); 2982 auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { 2983 CGF.EmitStmt(CS->getCapturedStmt()); 2984 }; 2985 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 2986 IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn, 2987 const OMPTaskDataTy &Data) { 2988 CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getLocStart(), S, OutlinedFn, 2989 SharedsTy, CapturedStruct, IfCond, 2990 Data); 2991 }; 2992 EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data); 2993 } 2994 2995 void CodeGenFunction::EmitOMPTaskyieldDirective( 2996 const OMPTaskyieldDirective &S) { 2997 CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart()); 2998 } 2999 3000 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { 3001 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier); 3002 } 3003 3004 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) { 3005 CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart()); 3006 } 3007 3008 void CodeGenFunction::EmitOMPTaskgroupDirective( 3009 const OMPTaskgroupDirective &S) { 3010 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3011 Action.Enter(CGF); 3012 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3013 }; 3014 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 3015 CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart()); 3016 } 3017 3018 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { 3019 CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> { 3020 if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) { 3021 return llvm::makeArrayRef(FlushClause->varlist_begin(), 3022 FlushClause->varlist_end()); 3023 } 3024 return llvm::None; 3025 }(), S.getLocStart()); 3026 } 3027 3028 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, 3029 const CodeGenLoopTy &CodeGenLoop, 3030 Expr *IncExpr) { 3031 // Emit the loop iteration variable. 3032 auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 3033 auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); 3034 EmitVarDecl(*IVDecl); 3035 3036 // Emit the iterations count variable. 3037 // If it is not a variable, Sema decided to calculate iterations count on each 3038 // iteration (e.g., it is foldable into a constant). 3039 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 3040 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 3041 // Emit calculation of the iterations count. 3042 EmitIgnoredExpr(S.getCalcLastIteration()); 3043 } 3044 3045 auto &RT = CGM.getOpenMPRuntime(); 3046 3047 bool HasLastprivateClause = false; 3048 // Check pre-condition. 3049 { 3050 OMPLoopScope PreInitScope(*this, S); 3051 // Skip the entire loop if we don't meet the precondition. 3052 // If the condition constant folds and can be elided, avoid emitting the 3053 // whole loop. 3054 bool CondConstant; 3055 llvm::BasicBlock *ContBlock = nullptr; 3056 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 3057 if (!CondConstant) 3058 return; 3059 } else { 3060 auto *ThenBlock = createBasicBlock("omp.precond.then"); 3061 ContBlock = createBasicBlock("omp.precond.end"); 3062 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 3063 getProfileCount(&S)); 3064 EmitBlock(ThenBlock); 3065 incrementProfileCounter(&S); 3066 } 3067 3068 // Emit 'then' code. 3069 { 3070 // Emit helper vars inits. 3071 3072 LValue LB = EmitOMPHelperVar( 3073 *this, cast<DeclRefExpr>( 3074 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3075 ? S.getCombinedLowerBoundVariable() 3076 : S.getLowerBoundVariable()))); 3077 LValue UB = EmitOMPHelperVar( 3078 *this, cast<DeclRefExpr>( 3079 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3080 ? S.getCombinedUpperBoundVariable() 3081 : S.getUpperBoundVariable()))); 3082 LValue ST = 3083 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 3084 LValue IL = 3085 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 3086 3087 OMPPrivateScope LoopScope(*this); 3088 if (EmitOMPFirstprivateClause(S, LoopScope)) { 3089 // Emit implicit barrier to synchronize threads and avoid data races on 3090 // initialization of firstprivate variables and post-update of 3091 // lastprivate variables. 3092 CGM.getOpenMPRuntime().emitBarrierCall( 3093 *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 3094 /*ForceSimpleCall=*/true); 3095 } 3096 EmitOMPPrivateClause(S, LoopScope); 3097 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 3098 EmitOMPPrivateLoopCounters(S, LoopScope); 3099 (void)LoopScope.Privatize(); 3100 3101 // Detect the distribute schedule kind and chunk. 3102 llvm::Value *Chunk = nullptr; 3103 OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown; 3104 if (auto *C = S.getSingleClause<OMPDistScheduleClause>()) { 3105 ScheduleKind = C->getDistScheduleKind(); 3106 if (const auto *Ch = C->getChunkSize()) { 3107 Chunk = EmitScalarExpr(Ch); 3108 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 3109 S.getIterationVariable()->getType(), 3110 S.getLocStart()); 3111 } 3112 } 3113 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 3114 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 3115 3116 // OpenMP [2.10.8, distribute Construct, Description] 3117 // If dist_schedule is specified, kind must be static. If specified, 3118 // iterations are divided into chunks of size chunk_size, chunks are 3119 // assigned to the teams of the league in a round-robin fashion in the 3120 // order of the team number. When no chunk_size is specified, the 3121 // iteration space is divided into chunks that are approximately equal 3122 // in size, and at most one chunk is distributed to each team of the 3123 // league. The size of the chunks is unspecified in this case. 3124 if (RT.isStaticNonchunked(ScheduleKind, 3125 /* Chunked */ Chunk != nullptr)) { 3126 RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, 3127 IVSize, IVSigned, /* Ordered = */ false, 3128 IL.getAddress(), LB.getAddress(), 3129 UB.getAddress(), ST.getAddress()); 3130 auto LoopExit = 3131 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 3132 // UB = min(UB, GlobalUB); 3133 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3134 ? S.getCombinedEnsureUpperBound() 3135 : S.getEnsureUpperBound()); 3136 // IV = LB; 3137 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3138 ? S.getCombinedInit() 3139 : S.getInit()); 3140 3141 Expr *Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3142 ? S.getCombinedCond() 3143 : S.getCond(); 3144 3145 // for distribute alone, codegen 3146 // while (idx <= UB) { BODY; ++idx; } 3147 // when combined with 'for' (e.g. as in 'distribute parallel for') 3148 // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; } 3149 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), Cond, IncExpr, 3150 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { 3151 CodeGenLoop(CGF, S, LoopExit); 3152 }, 3153 [](CodeGenFunction &) {}); 3154 EmitBlock(LoopExit.getBlock()); 3155 // Tell the runtime we are done. 3156 RT.emitForStaticFinish(*this, S.getLocStart()); 3157 } else { 3158 // Emit the outer loop, which requests its work chunk [LB..UB] from 3159 // runtime and runs the inner loop to process it. 3160 const OMPLoopArguments LoopArguments = { 3161 LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(), 3162 Chunk}; 3163 EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments, 3164 CodeGenLoop); 3165 } 3166 3167 // Emit final copy of the lastprivate variables if IsLastIter != 0. 3168 if (HasLastprivateClause) 3169 EmitOMPLastprivateClauseFinal( 3170 S, /*NoFinals=*/false, 3171 Builder.CreateIsNotNull( 3172 EmitLoadOfScalar(IL, S.getLocStart()))); 3173 } 3174 3175 // We're now done with the loop, so jump to the continuation block. 3176 if (ContBlock) { 3177 EmitBranch(ContBlock); 3178 EmitBlock(ContBlock, true); 3179 } 3180 } 3181 } 3182 3183 void CodeGenFunction::EmitOMPDistributeDirective( 3184 const OMPDistributeDirective &S) { 3185 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3186 3187 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 3188 }; 3189 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 3190 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen, 3191 false); 3192 } 3193 3194 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, 3195 const CapturedStmt *S) { 3196 CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); 3197 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; 3198 CGF.CapturedStmtInfo = &CapStmtInfo; 3199 auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S); 3200 Fn->addFnAttr(llvm::Attribute::NoInline); 3201 return Fn; 3202 } 3203 3204 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { 3205 if (!S.getAssociatedStmt()) { 3206 for (const auto *DC : S.getClausesOfKind<OMPDependClause>()) 3207 CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC); 3208 return; 3209 } 3210 auto *C = S.getSingleClause<OMPSIMDClause>(); 3211 auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF, 3212 PrePostActionTy &Action) { 3213 if (C) { 3214 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 3215 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 3216 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 3217 auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS); 3218 CGF.EmitNounwindRuntimeCall(OutlinedFn, CapturedVars); 3219 } else { 3220 Action.Enter(CGF); 3221 CGF.EmitStmt( 3222 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3223 } 3224 }; 3225 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 3226 CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C); 3227 } 3228 3229 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, 3230 QualType SrcType, QualType DestType, 3231 SourceLocation Loc) { 3232 assert(CGF.hasScalarEvaluationKind(DestType) && 3233 "DestType must have scalar evaluation kind."); 3234 assert(!Val.isAggregate() && "Must be a scalar or complex."); 3235 return Val.isScalar() 3236 ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType, 3237 Loc) 3238 : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType, 3239 DestType, Loc); 3240 } 3241 3242 static CodeGenFunction::ComplexPairTy 3243 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, 3244 QualType DestType, SourceLocation Loc) { 3245 assert(CGF.getEvaluationKind(DestType) == TEK_Complex && 3246 "DestType must have complex evaluation kind."); 3247 CodeGenFunction::ComplexPairTy ComplexVal; 3248 if (Val.isScalar()) { 3249 // Convert the input element to the element type of the complex. 3250 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 3251 auto ScalarVal = CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, 3252 DestElementType, Loc); 3253 ComplexVal = CodeGenFunction::ComplexPairTy( 3254 ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType())); 3255 } else { 3256 assert(Val.isComplex() && "Must be a scalar or complex."); 3257 auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType(); 3258 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 3259 ComplexVal.first = CGF.EmitScalarConversion( 3260 Val.getComplexVal().first, SrcElementType, DestElementType, Loc); 3261 ComplexVal.second = CGF.EmitScalarConversion( 3262 Val.getComplexVal().second, SrcElementType, DestElementType, Loc); 3263 } 3264 return ComplexVal; 3265 } 3266 3267 static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst, 3268 LValue LVal, RValue RVal) { 3269 if (LVal.isGlobalReg()) { 3270 CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal); 3271 } else { 3272 CGF.EmitAtomicStore(RVal, LVal, 3273 IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3274 : llvm::AtomicOrdering::Monotonic, 3275 LVal.isVolatile(), /*IsInit=*/false); 3276 } 3277 } 3278 3279 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal, 3280 QualType RValTy, SourceLocation Loc) { 3281 switch (getEvaluationKind(LVal.getType())) { 3282 case TEK_Scalar: 3283 EmitStoreThroughLValue(RValue::get(convertToScalarValue( 3284 *this, RVal, RValTy, LVal.getType(), Loc)), 3285 LVal); 3286 break; 3287 case TEK_Complex: 3288 EmitStoreOfComplex( 3289 convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal, 3290 /*isInit=*/false); 3291 break; 3292 case TEK_Aggregate: 3293 llvm_unreachable("Must be a scalar or complex."); 3294 } 3295 } 3296 3297 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst, 3298 const Expr *X, const Expr *V, 3299 SourceLocation Loc) { 3300 // v = x; 3301 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue"); 3302 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue"); 3303 LValue XLValue = CGF.EmitLValue(X); 3304 LValue VLValue = CGF.EmitLValue(V); 3305 RValue Res = XLValue.isGlobalReg() 3306 ? CGF.EmitLoadOfLValue(XLValue, Loc) 3307 : CGF.EmitAtomicLoad( 3308 XLValue, Loc, 3309 IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3310 : llvm::AtomicOrdering::Monotonic, 3311 XLValue.isVolatile()); 3312 // OpenMP, 2.12.6, atomic Construct 3313 // Any atomic construct with a seq_cst clause forces the atomically 3314 // performed operation to include an implicit flush operation without a 3315 // list. 3316 if (IsSeqCst) 3317 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3318 CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc); 3319 } 3320 3321 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst, 3322 const Expr *X, const Expr *E, 3323 SourceLocation Loc) { 3324 // x = expr; 3325 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue"); 3326 emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E)); 3327 // OpenMP, 2.12.6, atomic Construct 3328 // Any atomic construct with a seq_cst clause forces the atomically 3329 // performed operation to include an implicit flush operation without a 3330 // list. 3331 if (IsSeqCst) 3332 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3333 } 3334 3335 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, 3336 RValue Update, 3337 BinaryOperatorKind BO, 3338 llvm::AtomicOrdering AO, 3339 bool IsXLHSInRHSPart) { 3340 auto &Context = CGF.CGM.getContext(); 3341 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x' 3342 // expression is simple and atomic is allowed for the given type for the 3343 // target platform. 3344 if (BO == BO_Comma || !Update.isScalar() || 3345 !Update.getScalarVal()->getType()->isIntegerTy() || 3346 !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) && 3347 (Update.getScalarVal()->getType() != 3348 X.getAddress().getElementType())) || 3349 !X.getAddress().getElementType()->isIntegerTy() || 3350 !Context.getTargetInfo().hasBuiltinAtomic( 3351 Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment()))) 3352 return std::make_pair(false, RValue::get(nullptr)); 3353 3354 llvm::AtomicRMWInst::BinOp RMWOp; 3355 switch (BO) { 3356 case BO_Add: 3357 RMWOp = llvm::AtomicRMWInst::Add; 3358 break; 3359 case BO_Sub: 3360 if (!IsXLHSInRHSPart) 3361 return std::make_pair(false, RValue::get(nullptr)); 3362 RMWOp = llvm::AtomicRMWInst::Sub; 3363 break; 3364 case BO_And: 3365 RMWOp = llvm::AtomicRMWInst::And; 3366 break; 3367 case BO_Or: 3368 RMWOp = llvm::AtomicRMWInst::Or; 3369 break; 3370 case BO_Xor: 3371 RMWOp = llvm::AtomicRMWInst::Xor; 3372 break; 3373 case BO_LT: 3374 RMWOp = X.getType()->hasSignedIntegerRepresentation() 3375 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min 3376 : llvm::AtomicRMWInst::Max) 3377 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin 3378 : llvm::AtomicRMWInst::UMax); 3379 break; 3380 case BO_GT: 3381 RMWOp = X.getType()->hasSignedIntegerRepresentation() 3382 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max 3383 : llvm::AtomicRMWInst::Min) 3384 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax 3385 : llvm::AtomicRMWInst::UMin); 3386 break; 3387 case BO_Assign: 3388 RMWOp = llvm::AtomicRMWInst::Xchg; 3389 break; 3390 case BO_Mul: 3391 case BO_Div: 3392 case BO_Rem: 3393 case BO_Shl: 3394 case BO_Shr: 3395 case BO_LAnd: 3396 case BO_LOr: 3397 return std::make_pair(false, RValue::get(nullptr)); 3398 case BO_PtrMemD: 3399 case BO_PtrMemI: 3400 case BO_LE: 3401 case BO_GE: 3402 case BO_EQ: 3403 case BO_NE: 3404 case BO_AddAssign: 3405 case BO_SubAssign: 3406 case BO_AndAssign: 3407 case BO_OrAssign: 3408 case BO_XorAssign: 3409 case BO_MulAssign: 3410 case BO_DivAssign: 3411 case BO_RemAssign: 3412 case BO_ShlAssign: 3413 case BO_ShrAssign: 3414 case BO_Comma: 3415 llvm_unreachable("Unsupported atomic update operation"); 3416 } 3417 auto *UpdateVal = Update.getScalarVal(); 3418 if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) { 3419 UpdateVal = CGF.Builder.CreateIntCast( 3420 IC, X.getAddress().getElementType(), 3421 X.getType()->hasSignedIntegerRepresentation()); 3422 } 3423 auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO); 3424 return std::make_pair(true, RValue::get(Res)); 3425 } 3426 3427 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr( 3428 LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart, 3429 llvm::AtomicOrdering AO, SourceLocation Loc, 3430 const llvm::function_ref<RValue(RValue)> &CommonGen) { 3431 // Update expressions are allowed to have the following forms: 3432 // x binop= expr; -> xrval + expr; 3433 // x++, ++x -> xrval + 1; 3434 // x--, --x -> xrval - 1; 3435 // x = x binop expr; -> xrval binop expr 3436 // x = expr Op x; - > expr binop xrval; 3437 auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart); 3438 if (!Res.first) { 3439 if (X.isGlobalReg()) { 3440 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop 3441 // 'xrval'. 3442 EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X); 3443 } else { 3444 // Perform compare-and-swap procedure. 3445 EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified()); 3446 } 3447 } 3448 return Res; 3449 } 3450 3451 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst, 3452 const Expr *X, const Expr *E, 3453 const Expr *UE, bool IsXLHSInRHSPart, 3454 SourceLocation Loc) { 3455 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 3456 "Update expr in 'atomic update' must be a binary operator."); 3457 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 3458 // Update expressions are allowed to have the following forms: 3459 // x binop= expr; -> xrval + expr; 3460 // x++, ++x -> xrval + 1; 3461 // x--, --x -> xrval - 1; 3462 // x = x binop expr; -> xrval binop expr 3463 // x = expr Op x; - > expr binop xrval; 3464 assert(X->isLValue() && "X of 'omp atomic update' is not lvalue"); 3465 LValue XLValue = CGF.EmitLValue(X); 3466 RValue ExprRValue = CGF.EmitAnyExpr(E); 3467 auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3468 : llvm::AtomicOrdering::Monotonic; 3469 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 3470 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 3471 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 3472 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 3473 auto Gen = 3474 [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue { 3475 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3476 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 3477 return CGF.EmitAnyExpr(UE); 3478 }; 3479 (void)CGF.EmitOMPAtomicSimpleUpdateExpr( 3480 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 3481 // OpenMP, 2.12.6, atomic Construct 3482 // Any atomic construct with a seq_cst clause forces the atomically 3483 // performed operation to include an implicit flush operation without a 3484 // list. 3485 if (IsSeqCst) 3486 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3487 } 3488 3489 static RValue convertToType(CodeGenFunction &CGF, RValue Value, 3490 QualType SourceType, QualType ResType, 3491 SourceLocation Loc) { 3492 switch (CGF.getEvaluationKind(ResType)) { 3493 case TEK_Scalar: 3494 return RValue::get( 3495 convertToScalarValue(CGF, Value, SourceType, ResType, Loc)); 3496 case TEK_Complex: { 3497 auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc); 3498 return RValue::getComplex(Res.first, Res.second); 3499 } 3500 case TEK_Aggregate: 3501 break; 3502 } 3503 llvm_unreachable("Must be a scalar or complex."); 3504 } 3505 3506 static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst, 3507 bool IsPostfixUpdate, const Expr *V, 3508 const Expr *X, const Expr *E, 3509 const Expr *UE, bool IsXLHSInRHSPart, 3510 SourceLocation Loc) { 3511 assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue"); 3512 assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue"); 3513 RValue NewVVal; 3514 LValue VLValue = CGF.EmitLValue(V); 3515 LValue XLValue = CGF.EmitLValue(X); 3516 RValue ExprRValue = CGF.EmitAnyExpr(E); 3517 auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3518 : llvm::AtomicOrdering::Monotonic; 3519 QualType NewVValType; 3520 if (UE) { 3521 // 'x' is updated with some additional value. 3522 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 3523 "Update expr in 'atomic capture' must be a binary operator."); 3524 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 3525 // Update expressions are allowed to have the following forms: 3526 // x binop= expr; -> xrval + expr; 3527 // x++, ++x -> xrval + 1; 3528 // x--, --x -> xrval - 1; 3529 // x = x binop expr; -> xrval binop expr 3530 // x = expr Op x; - > expr binop xrval; 3531 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 3532 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 3533 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 3534 NewVValType = XRValExpr->getType(); 3535 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 3536 auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr, 3537 IsPostfixUpdate](RValue XRValue) -> RValue { 3538 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3539 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 3540 RValue Res = CGF.EmitAnyExpr(UE); 3541 NewVVal = IsPostfixUpdate ? XRValue : Res; 3542 return Res; 3543 }; 3544 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 3545 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 3546 if (Res.first) { 3547 // 'atomicrmw' instruction was generated. 3548 if (IsPostfixUpdate) { 3549 // Use old value from 'atomicrmw'. 3550 NewVVal = Res.second; 3551 } else { 3552 // 'atomicrmw' does not provide new value, so evaluate it using old 3553 // value of 'x'. 3554 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3555 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second); 3556 NewVVal = CGF.EmitAnyExpr(UE); 3557 } 3558 } 3559 } else { 3560 // 'x' is simply rewritten with some 'expr'. 3561 NewVValType = X->getType().getNonReferenceType(); 3562 ExprRValue = convertToType(CGF, ExprRValue, E->getType(), 3563 X->getType().getNonReferenceType(), Loc); 3564 auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) -> RValue { 3565 NewVVal = XRValue; 3566 return ExprRValue; 3567 }; 3568 // Try to perform atomicrmw xchg, otherwise simple exchange. 3569 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 3570 XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO, 3571 Loc, Gen); 3572 if (Res.first) { 3573 // 'atomicrmw' instruction was generated. 3574 NewVVal = IsPostfixUpdate ? Res.second : ExprRValue; 3575 } 3576 } 3577 // Emit post-update store to 'v' of old/new 'x' value. 3578 CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc); 3579 // OpenMP, 2.12.6, atomic Construct 3580 // Any atomic construct with a seq_cst clause forces the atomically 3581 // performed operation to include an implicit flush operation without a 3582 // list. 3583 if (IsSeqCst) 3584 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3585 } 3586 3587 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, 3588 bool IsSeqCst, bool IsPostfixUpdate, 3589 const Expr *X, const Expr *V, const Expr *E, 3590 const Expr *UE, bool IsXLHSInRHSPart, 3591 SourceLocation Loc) { 3592 switch (Kind) { 3593 case OMPC_read: 3594 EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc); 3595 break; 3596 case OMPC_write: 3597 EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc); 3598 break; 3599 case OMPC_unknown: 3600 case OMPC_update: 3601 EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc); 3602 break; 3603 case OMPC_capture: 3604 EmitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE, 3605 IsXLHSInRHSPart, Loc); 3606 break; 3607 case OMPC_if: 3608 case OMPC_final: 3609 case OMPC_num_threads: 3610 case OMPC_private: 3611 case OMPC_firstprivate: 3612 case OMPC_lastprivate: 3613 case OMPC_reduction: 3614 case OMPC_safelen: 3615 case OMPC_simdlen: 3616 case OMPC_collapse: 3617 case OMPC_default: 3618 case OMPC_seq_cst: 3619 case OMPC_shared: 3620 case OMPC_linear: 3621 case OMPC_aligned: 3622 case OMPC_copyin: 3623 case OMPC_copyprivate: 3624 case OMPC_flush: 3625 case OMPC_proc_bind: 3626 case OMPC_schedule: 3627 case OMPC_ordered: 3628 case OMPC_nowait: 3629 case OMPC_untied: 3630 case OMPC_threadprivate: 3631 case OMPC_depend: 3632 case OMPC_mergeable: 3633 case OMPC_device: 3634 case OMPC_threads: 3635 case OMPC_simd: 3636 case OMPC_map: 3637 case OMPC_num_teams: 3638 case OMPC_thread_limit: 3639 case OMPC_priority: 3640 case OMPC_grainsize: 3641 case OMPC_nogroup: 3642 case OMPC_num_tasks: 3643 case OMPC_hint: 3644 case OMPC_dist_schedule: 3645 case OMPC_defaultmap: 3646 case OMPC_uniform: 3647 case OMPC_to: 3648 case OMPC_from: 3649 case OMPC_use_device_ptr: 3650 case OMPC_is_device_ptr: 3651 llvm_unreachable("Clause is not allowed in 'omp atomic'."); 3652 } 3653 } 3654 3655 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { 3656 bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>(); 3657 OpenMPClauseKind Kind = OMPC_unknown; 3658 for (auto *C : S.clauses()) { 3659 // Find first clause (skip seq_cst clause, if it is first). 3660 if (C->getClauseKind() != OMPC_seq_cst) { 3661 Kind = C->getClauseKind(); 3662 break; 3663 } 3664 } 3665 3666 const auto *CS = 3667 S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 3668 if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) { 3669 enterFullExpression(EWC); 3670 } 3671 // Processing for statements under 'atomic capture'. 3672 if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) { 3673 for (const auto *C : Compound->body()) { 3674 if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) { 3675 enterFullExpression(EWC); 3676 } 3677 } 3678 } 3679 3680 auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF, 3681 PrePostActionTy &) { 3682 CGF.EmitStopPoint(CS); 3683 EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(), 3684 S.getV(), S.getExpr(), S.getUpdateExpr(), 3685 S.isXLHSInRHSPart(), S.getLocStart()); 3686 }; 3687 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 3688 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen); 3689 } 3690 3691 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, 3692 const OMPExecutableDirective &S, 3693 const RegionCodeGenTy &CodeGen) { 3694 assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind())); 3695 CodeGenModule &CGM = CGF.CGM; 3696 const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt()); 3697 3698 llvm::Function *Fn = nullptr; 3699 llvm::Constant *FnID = nullptr; 3700 3701 const Expr *IfCond = nullptr; 3702 // Check for the at most one if clause associated with the target region. 3703 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 3704 if (C->getNameModifier() == OMPD_unknown || 3705 C->getNameModifier() == OMPD_target) { 3706 IfCond = C->getCondition(); 3707 break; 3708 } 3709 } 3710 3711 // Check if we have any device clause associated with the directive. 3712 const Expr *Device = nullptr; 3713 if (auto *C = S.getSingleClause<OMPDeviceClause>()) { 3714 Device = C->getDevice(); 3715 } 3716 3717 // Check if we have an if clause whose conditional always evaluates to false 3718 // or if we do not have any targets specified. If so the target region is not 3719 // an offload entry point. 3720 bool IsOffloadEntry = true; 3721 if (IfCond) { 3722 bool Val; 3723 if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val) 3724 IsOffloadEntry = false; 3725 } 3726 if (CGM.getLangOpts().OMPTargetTriples.empty()) 3727 IsOffloadEntry = false; 3728 3729 assert(CGF.CurFuncDecl && "No parent declaration for target region!"); 3730 StringRef ParentName; 3731 // In case we have Ctors/Dtors we use the complete type variant to produce 3732 // the mangling of the device outlined kernel. 3733 if (auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl)) 3734 ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete)); 3735 else if (auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl)) 3736 ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete)); 3737 else 3738 ParentName = 3739 CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl))); 3740 3741 // Emit target region as a standalone region. 3742 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID, 3743 IsOffloadEntry, CodeGen); 3744 OMPLexicalScope Scope(CGF, S); 3745 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 3746 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 3747 CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device, 3748 CapturedVars); 3749 } 3750 3751 static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S, 3752 PrePostActionTy &Action) { 3753 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 3754 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 3755 CGF.EmitOMPPrivateClause(S, PrivateScope); 3756 (void)PrivateScope.Privatize(); 3757 3758 Action.Enter(CGF); 3759 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3760 } 3761 3762 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM, 3763 StringRef ParentName, 3764 const OMPTargetDirective &S) { 3765 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3766 emitTargetRegion(CGF, S, Action); 3767 }; 3768 llvm::Function *Fn; 3769 llvm::Constant *Addr; 3770 // Emit target region as a standalone region. 3771 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 3772 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 3773 assert(Fn && Addr && "Target device function emission failed."); 3774 } 3775 3776 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { 3777 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3778 emitTargetRegion(CGF, S, Action); 3779 }; 3780 emitCommonOMPTargetDirective(*this, S, CodeGen); 3781 } 3782 3783 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF, 3784 const OMPExecutableDirective &S, 3785 OpenMPDirectiveKind InnermostKind, 3786 const RegionCodeGenTy &CodeGen) { 3787 const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams); 3788 auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction( 3789 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 3790 3791 const OMPNumTeamsClause *NT = S.getSingleClause<OMPNumTeamsClause>(); 3792 const OMPThreadLimitClause *TL = S.getSingleClause<OMPThreadLimitClause>(); 3793 if (NT || TL) { 3794 Expr *NumTeams = (NT) ? NT->getNumTeams() : nullptr; 3795 Expr *ThreadLimit = (TL) ? TL->getThreadLimit() : nullptr; 3796 3797 CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit, 3798 S.getLocStart()); 3799 } 3800 3801 OMPTeamsScope Scope(CGF, S); 3802 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 3803 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 3804 CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn, 3805 CapturedVars); 3806 } 3807 3808 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) { 3809 // Emit teams region as a standalone region. 3810 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3811 OMPPrivateScope PrivateScope(CGF); 3812 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 3813 CGF.EmitOMPPrivateClause(S, PrivateScope); 3814 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 3815 (void)PrivateScope.Privatize(); 3816 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3817 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 3818 }; 3819 emitCommonOMPTeamsDirective(*this, S, OMPD_teams, CodeGen); 3820 emitPostUpdateForReductionClause( 3821 *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 3822 } 3823 3824 static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action, 3825 const OMPTargetTeamsDirective &S) { 3826 auto *CS = S.getCapturedStmt(OMPD_teams); 3827 Action.Enter(CGF); 3828 auto &&CodeGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { 3829 // TODO: Add support for clauses. 3830 CGF.EmitStmt(CS->getCapturedStmt()); 3831 }; 3832 emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen); 3833 } 3834 3835 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 3836 CodeGenModule &CGM, StringRef ParentName, 3837 const OMPTargetTeamsDirective &S) { 3838 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3839 emitTargetTeamsRegion(CGF, Action, S); 3840 }; 3841 llvm::Function *Fn; 3842 llvm::Constant *Addr; 3843 // Emit target region as a standalone region. 3844 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 3845 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 3846 assert(Fn && Addr && "Target device function emission failed."); 3847 } 3848 3849 void CodeGenFunction::EmitOMPTargetTeamsDirective( 3850 const OMPTargetTeamsDirective &S) { 3851 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3852 emitTargetTeamsRegion(CGF, Action, S); 3853 }; 3854 emitCommonOMPTargetDirective(*this, S, CodeGen); 3855 } 3856 3857 void CodeGenFunction::EmitOMPCancellationPointDirective( 3858 const OMPCancellationPointDirective &S) { 3859 CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(), 3860 S.getCancelRegion()); 3861 } 3862 3863 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { 3864 const Expr *IfCond = nullptr; 3865 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 3866 if (C->getNameModifier() == OMPD_unknown || 3867 C->getNameModifier() == OMPD_cancel) { 3868 IfCond = C->getCondition(); 3869 break; 3870 } 3871 } 3872 CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(), IfCond, 3873 S.getCancelRegion()); 3874 } 3875 3876 CodeGenFunction::JumpDest 3877 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) { 3878 if (Kind == OMPD_parallel || Kind == OMPD_task || 3879 Kind == OMPD_target_parallel) 3880 return ReturnBlock; 3881 assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections || 3882 Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for || 3883 Kind == OMPD_distribute_parallel_for || 3884 Kind == OMPD_target_parallel_for); 3885 return OMPCancelStack.getExitBlock(); 3886 } 3887 3888 void CodeGenFunction::EmitOMPUseDevicePtrClause( 3889 const OMPClause &NC, OMPPrivateScope &PrivateScope, 3890 const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) { 3891 const auto &C = cast<OMPUseDevicePtrClause>(NC); 3892 auto OrigVarIt = C.varlist_begin(); 3893 auto InitIt = C.inits().begin(); 3894 for (auto PvtVarIt : C.private_copies()) { 3895 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl()); 3896 auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl()); 3897 auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl()); 3898 3899 // In order to identify the right initializer we need to match the 3900 // declaration used by the mapping logic. In some cases we may get 3901 // OMPCapturedExprDecl that refers to the original declaration. 3902 const ValueDecl *MatchingVD = OrigVD; 3903 if (auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) { 3904 // OMPCapturedExprDecl are used to privative fields of the current 3905 // structure. 3906 auto *ME = cast<MemberExpr>(OED->getInit()); 3907 assert(isa<CXXThisExpr>(ME->getBase()) && 3908 "Base should be the current struct!"); 3909 MatchingVD = ME->getMemberDecl(); 3910 } 3911 3912 // If we don't have information about the current list item, move on to 3913 // the next one. 3914 auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD); 3915 if (InitAddrIt == CaptureDeviceAddrMap.end()) 3916 continue; 3917 3918 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 3919 // Initialize the temporary initialization variable with the address we 3920 // get from the runtime library. We have to cast the source address 3921 // because it is always a void *. References are materialized in the 3922 // privatization scope, so the initialization here disregards the fact 3923 // the original variable is a reference. 3924 QualType AddrQTy = 3925 getContext().getPointerType(OrigVD->getType().getNonReferenceType()); 3926 llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy); 3927 Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy); 3928 setAddrOfLocalVar(InitVD, InitAddr); 3929 3930 // Emit private declaration, it will be initialized by the value we 3931 // declaration we just added to the local declarations map. 3932 EmitDecl(*PvtVD); 3933 3934 // The initialization variables reached its purpose in the emission 3935 // ofthe previous declaration, so we don't need it anymore. 3936 LocalDeclMap.erase(InitVD); 3937 3938 // Return the address of the private variable. 3939 return GetAddrOfLocalVar(PvtVD); 3940 }); 3941 assert(IsRegistered && "firstprivate var already registered as private"); 3942 // Silence the warning about unused variable. 3943 (void)IsRegistered; 3944 3945 ++OrigVarIt; 3946 ++InitIt; 3947 } 3948 } 3949 3950 // Generate the instructions for '#pragma omp target data' directive. 3951 void CodeGenFunction::EmitOMPTargetDataDirective( 3952 const OMPTargetDataDirective &S) { 3953 CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true); 3954 3955 // Create a pre/post action to signal the privatization of the device pointer. 3956 // This action can be replaced by the OpenMP runtime code generation to 3957 // deactivate privatization. 3958 bool PrivatizeDevicePointers = false; 3959 class DevicePointerPrivActionTy : public PrePostActionTy { 3960 bool &PrivatizeDevicePointers; 3961 3962 public: 3963 explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers) 3964 : PrePostActionTy(), PrivatizeDevicePointers(PrivatizeDevicePointers) {} 3965 void Enter(CodeGenFunction &CGF) override { 3966 PrivatizeDevicePointers = true; 3967 } 3968 }; 3969 DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers); 3970 3971 auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers]( 3972 CodeGenFunction &CGF, PrePostActionTy &Action) { 3973 auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3974 CGF.EmitStmt( 3975 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3976 }; 3977 3978 // Codegen that selects wheather to generate the privatization code or not. 3979 auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers, 3980 &InnermostCodeGen](CodeGenFunction &CGF, 3981 PrePostActionTy &Action) { 3982 RegionCodeGenTy RCG(InnermostCodeGen); 3983 PrivatizeDevicePointers = false; 3984 3985 // Call the pre-action to change the status of PrivatizeDevicePointers if 3986 // needed. 3987 Action.Enter(CGF); 3988 3989 if (PrivatizeDevicePointers) { 3990 OMPPrivateScope PrivateScope(CGF); 3991 // Emit all instances of the use_device_ptr clause. 3992 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>()) 3993 CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope, 3994 Info.CaptureDeviceAddrMap); 3995 (void)PrivateScope.Privatize(); 3996 RCG(CGF); 3997 } else 3998 RCG(CGF); 3999 }; 4000 4001 // Forward the provided action to the privatization codegen. 4002 RegionCodeGenTy PrivRCG(PrivCodeGen); 4003 PrivRCG.setAction(Action); 4004 4005 // Notwithstanding the body of the region is emitted as inlined directive, 4006 // we don't use an inline scope as changes in the references inside the 4007 // region are expected to be visible outside, so we do not privative them. 4008 OMPLexicalScope Scope(CGF, S); 4009 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data, 4010 PrivRCG); 4011 }; 4012 4013 RegionCodeGenTy RCG(CodeGen); 4014 4015 // If we don't have target devices, don't bother emitting the data mapping 4016 // code. 4017 if (CGM.getLangOpts().OMPTargetTriples.empty()) { 4018 RCG(*this); 4019 return; 4020 } 4021 4022 // Check if we have any if clause associated with the directive. 4023 const Expr *IfCond = nullptr; 4024 if (auto *C = S.getSingleClause<OMPIfClause>()) 4025 IfCond = C->getCondition(); 4026 4027 // Check if we have any device clause associated with the directive. 4028 const Expr *Device = nullptr; 4029 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 4030 Device = C->getDevice(); 4031 4032 // Set the action to signal privatization of device pointers. 4033 RCG.setAction(PrivAction); 4034 4035 // Emit region code. 4036 CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG, 4037 Info); 4038 } 4039 4040 void CodeGenFunction::EmitOMPTargetEnterDataDirective( 4041 const OMPTargetEnterDataDirective &S) { 4042 // If we don't have target devices, don't bother emitting the data mapping 4043 // code. 4044 if (CGM.getLangOpts().OMPTargetTriples.empty()) 4045 return; 4046 4047 // Check if we have any if clause associated with the directive. 4048 const Expr *IfCond = nullptr; 4049 if (auto *C = S.getSingleClause<OMPIfClause>()) 4050 IfCond = C->getCondition(); 4051 4052 // Check if we have any device clause associated with the directive. 4053 const Expr *Device = nullptr; 4054 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 4055 Device = C->getDevice(); 4056 4057 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 4058 } 4059 4060 void CodeGenFunction::EmitOMPTargetExitDataDirective( 4061 const OMPTargetExitDataDirective &S) { 4062 // If we don't have target devices, don't bother emitting the data mapping 4063 // code. 4064 if (CGM.getLangOpts().OMPTargetTriples.empty()) 4065 return; 4066 4067 // Check if we have any if clause associated with the directive. 4068 const Expr *IfCond = nullptr; 4069 if (auto *C = S.getSingleClause<OMPIfClause>()) 4070 IfCond = C->getCondition(); 4071 4072 // Check if we have any device clause associated with the directive. 4073 const Expr *Device = nullptr; 4074 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 4075 Device = C->getDevice(); 4076 4077 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 4078 } 4079 4080 static void emitTargetParallelRegion(CodeGenFunction &CGF, 4081 const OMPTargetParallelDirective &S, 4082 PrePostActionTy &Action) { 4083 // Get the captured statement associated with the 'parallel' region. 4084 auto *CS = S.getCapturedStmt(OMPD_parallel); 4085 Action.Enter(CGF); 4086 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &) { 4087 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 4088 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 4089 CGF.EmitOMPPrivateClause(S, PrivateScope); 4090 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4091 (void)PrivateScope.Privatize(); 4092 // TODO: Add support for clauses. 4093 CGF.EmitStmt(CS->getCapturedStmt()); 4094 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 4095 }; 4096 emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen, 4097 emitEmptyBoundParameters); 4098 emitPostUpdateForReductionClause( 4099 CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 4100 } 4101 4102 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 4103 CodeGenModule &CGM, StringRef ParentName, 4104 const OMPTargetParallelDirective &S) { 4105 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4106 emitTargetParallelRegion(CGF, S, Action); 4107 }; 4108 llvm::Function *Fn; 4109 llvm::Constant *Addr; 4110 // Emit target region as a standalone region. 4111 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 4112 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 4113 assert(Fn && Addr && "Target device function emission failed."); 4114 } 4115 4116 void CodeGenFunction::EmitOMPTargetParallelDirective( 4117 const OMPTargetParallelDirective &S) { 4118 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4119 emitTargetParallelRegion(CGF, S, Action); 4120 }; 4121 emitCommonOMPTargetDirective(*this, S, CodeGen); 4122 } 4123 4124 void CodeGenFunction::EmitOMPTargetParallelForDirective( 4125 const OMPTargetParallelForDirective &S) { 4126 // TODO: codegen for target parallel for. 4127 } 4128 4129 /// Emit a helper variable and return corresponding lvalue. 4130 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper, 4131 const ImplicitParamDecl *PVD, 4132 CodeGenFunction::OMPPrivateScope &Privates) { 4133 auto *VDecl = cast<VarDecl>(Helper->getDecl()); 4134 Privates.addPrivate( 4135 VDecl, [&CGF, PVD]() -> Address { return CGF.GetAddrOfLocalVar(PVD); }); 4136 } 4137 4138 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { 4139 assert(isOpenMPTaskLoopDirective(S.getDirectiveKind())); 4140 // Emit outlined function for task construct. 4141 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 4142 auto CapturedStruct = GenerateCapturedStmtArgument(*CS); 4143 auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 4144 const Expr *IfCond = nullptr; 4145 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 4146 if (C->getNameModifier() == OMPD_unknown || 4147 C->getNameModifier() == OMPD_taskloop) { 4148 IfCond = C->getCondition(); 4149 break; 4150 } 4151 } 4152 4153 OMPTaskDataTy Data; 4154 // Check if taskloop must be emitted without taskgroup. 4155 Data.Nogroup = S.getSingleClause<OMPNogroupClause>(); 4156 // TODO: Check if we should emit tied or untied task. 4157 Data.Tied = true; 4158 // Set scheduling for taskloop 4159 if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) { 4160 // grainsize clause 4161 Data.Schedule.setInt(/*IntVal=*/false); 4162 Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize())); 4163 } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) { 4164 // num_tasks clause 4165 Data.Schedule.setInt(/*IntVal=*/true); 4166 Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks())); 4167 } 4168 4169 auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) { 4170 // if (PreCond) { 4171 // for (IV in 0..LastIteration) BODY; 4172 // <Final counter/linear vars updates>; 4173 // } 4174 // 4175 4176 // Emit: if (PreCond) - begin. 4177 // If the condition constant folds and can be elided, avoid emitting the 4178 // whole loop. 4179 bool CondConstant; 4180 llvm::BasicBlock *ContBlock = nullptr; 4181 OMPLoopScope PreInitScope(CGF, S); 4182 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 4183 if (!CondConstant) 4184 return; 4185 } else { 4186 auto *ThenBlock = CGF.createBasicBlock("taskloop.if.then"); 4187 ContBlock = CGF.createBasicBlock("taskloop.if.end"); 4188 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 4189 CGF.getProfileCount(&S)); 4190 CGF.EmitBlock(ThenBlock); 4191 CGF.incrementProfileCounter(&S); 4192 } 4193 4194 if (isOpenMPSimdDirective(S.getDirectiveKind())) 4195 CGF.EmitOMPSimdInit(S); 4196 4197 OMPPrivateScope LoopScope(CGF); 4198 // Emit helper vars inits. 4199 enum { LowerBound = 5, UpperBound, Stride, LastIter }; 4200 auto *I = CS->getCapturedDecl()->param_begin(); 4201 auto *LBP = std::next(I, LowerBound); 4202 auto *UBP = std::next(I, UpperBound); 4203 auto *STP = std::next(I, Stride); 4204 auto *LIP = std::next(I, LastIter); 4205 mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP, 4206 LoopScope); 4207 mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP, 4208 LoopScope); 4209 mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope); 4210 mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP, 4211 LoopScope); 4212 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 4213 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 4214 (void)LoopScope.Privatize(); 4215 // Emit the loop iteration variable. 4216 const Expr *IVExpr = S.getIterationVariable(); 4217 const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 4218 CGF.EmitVarDecl(*IVDecl); 4219 CGF.EmitIgnoredExpr(S.getInit()); 4220 4221 // Emit the iterations count variable. 4222 // If it is not a variable, Sema decided to calculate iterations count on 4223 // each iteration (e.g., it is foldable into a constant). 4224 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 4225 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 4226 // Emit calculation of the iterations count. 4227 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 4228 } 4229 4230 CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 4231 S.getInc(), 4232 [&S](CodeGenFunction &CGF) { 4233 CGF.EmitOMPLoopBody(S, JumpDest()); 4234 CGF.EmitStopPoint(&S); 4235 }, 4236 [](CodeGenFunction &) {}); 4237 // Emit: if (PreCond) - end. 4238 if (ContBlock) { 4239 CGF.EmitBranch(ContBlock); 4240 CGF.EmitBlock(ContBlock, true); 4241 } 4242 // Emit final copy of the lastprivate variables if IsLastIter != 0. 4243 if (HasLastprivateClause) { 4244 CGF.EmitOMPLastprivateClauseFinal( 4245 S, isOpenMPSimdDirective(S.getDirectiveKind()), 4246 CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar( 4247 CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false, 4248 (*LIP)->getType(), S.getLocStart()))); 4249 } 4250 }; 4251 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 4252 IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn, 4253 const OMPTaskDataTy &Data) { 4254 auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &) { 4255 OMPLoopScope PreInitScope(CGF, S); 4256 CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getLocStart(), S, 4257 OutlinedFn, SharedsTy, 4258 CapturedStruct, IfCond, Data); 4259 }; 4260 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop, 4261 CodeGen); 4262 }; 4263 EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data); 4264 } 4265 4266 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) { 4267 EmitOMPTaskLoopBasedDirective(S); 4268 } 4269 4270 void CodeGenFunction::EmitOMPTaskLoopSimdDirective( 4271 const OMPTaskLoopSimdDirective &S) { 4272 EmitOMPTaskLoopBasedDirective(S); 4273 } 4274 4275 // Generate the instructions for '#pragma omp target update' directive. 4276 void CodeGenFunction::EmitOMPTargetUpdateDirective( 4277 const OMPTargetUpdateDirective &S) { 4278 // If we don't have target devices, don't bother emitting the data mapping 4279 // code. 4280 if (CGM.getLangOpts().OMPTargetTriples.empty()) 4281 return; 4282 4283 // Check if we have any if clause associated with the directive. 4284 const Expr *IfCond = nullptr; 4285 if (auto *C = S.getSingleClause<OMPIfClause>()) 4286 IfCond = C->getCondition(); 4287 4288 // Check if we have any device clause associated with the directive. 4289 const Expr *Device = nullptr; 4290 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 4291 Device = C->getDevice(); 4292 4293 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 4294 } 4295