1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This contains code to emit OpenMP nodes as LLVM code. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGCleanup.h" 15 #include "CGOpenMPRuntime.h" 16 #include "CodeGenFunction.h" 17 #include "CodeGenModule.h" 18 #include "TargetInfo.h" 19 #include "clang/AST/Stmt.h" 20 #include "clang/AST/StmtOpenMP.h" 21 #include "clang/AST/DeclOpenMP.h" 22 #include "llvm/IR/CallSite.h" 23 using namespace clang; 24 using namespace CodeGen; 25 26 namespace { 27 /// Lexical scope for OpenMP executable constructs, that handles correct codegen 28 /// for captured expressions. 29 class OMPLexicalScope : public CodeGenFunction::LexicalScope { 30 void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 31 for (const auto *C : S.clauses()) { 32 if (auto *CPI = OMPClauseWithPreInit::get(C)) { 33 if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { 34 for (const auto *I : PreInit->decls()) { 35 if (!I->hasAttr<OMPCaptureNoInitAttr>()) 36 CGF.EmitVarDecl(cast<VarDecl>(*I)); 37 else { 38 CodeGenFunction::AutoVarEmission Emission = 39 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 40 CGF.EmitAutoVarCleanups(Emission); 41 } 42 } 43 } 44 } 45 } 46 } 47 CodeGenFunction::OMPPrivateScope InlinedShareds; 48 49 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { 50 return CGF.LambdaCaptureFields.lookup(VD) || 51 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || 52 (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl)); 53 } 54 55 public: 56 OMPLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S, 57 bool AsInlined = false, bool EmitPreInitStmt = true) 58 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), 59 InlinedShareds(CGF) { 60 if (EmitPreInitStmt) 61 emitPreInitStmt(CGF, S); 62 if (AsInlined) { 63 if (S.hasAssociatedStmt()) { 64 auto *CS = cast<CapturedStmt>(S.getAssociatedStmt()); 65 for (auto &C : CS->captures()) { 66 if (C.capturesVariable() || C.capturesVariableByCopy()) { 67 auto *VD = C.getCapturedVar(); 68 DeclRefExpr DRE(const_cast<VarDecl *>(VD), 69 isCapturedVar(CGF, VD) || 70 (CGF.CapturedStmtInfo && 71 InlinedShareds.isGlobalVarCaptured(VD)), 72 VD->getType().getNonReferenceType(), VK_LValue, 73 SourceLocation()); 74 InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address { 75 return CGF.EmitLValue(&DRE).getAddress(); 76 }); 77 } 78 } 79 (void)InlinedShareds.Privatize(); 80 } 81 } 82 } 83 }; 84 85 /// Lexical scope for OpenMP parallel construct, that handles correct codegen 86 /// for captured expressions. 87 class OMPParallelScope final : public OMPLexicalScope { 88 bool EmitPreInitStmt(const OMPExecutableDirective &S) { 89 OpenMPDirectiveKind Kind = S.getDirectiveKind(); 90 return !(isOpenMPTargetExecutionDirective(Kind) || 91 isOpenMPLoopBoundSharingDirective(Kind)) && 92 isOpenMPParallelDirective(Kind); 93 } 94 95 public: 96 OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 97 : OMPLexicalScope(CGF, S, 98 /*AsInlined=*/false, 99 /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {} 100 }; 101 102 /// Lexical scope for OpenMP teams construct, that handles correct codegen 103 /// for captured expressions. 104 class OMPTeamsScope final : public OMPLexicalScope { 105 bool EmitPreInitStmt(const OMPExecutableDirective &S) { 106 OpenMPDirectiveKind Kind = S.getDirectiveKind(); 107 return !isOpenMPTargetExecutionDirective(Kind) && 108 isOpenMPTeamsDirective(Kind); 109 } 110 111 public: 112 OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 113 : OMPLexicalScope(CGF, S, 114 /*AsInlined=*/false, 115 /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {} 116 }; 117 118 /// Private scope for OpenMP loop-based directives, that supports capturing 119 /// of used expression from loop statement. 120 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { 121 void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) { 122 if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) { 123 if (auto *PreInits = cast_or_null<DeclStmt>(LD->getPreInits())) { 124 for (const auto *I : PreInits->decls()) 125 CGF.EmitVarDecl(cast<VarDecl>(*I)); 126 } 127 } 128 } 129 130 public: 131 OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S) 132 : CodeGenFunction::RunCleanupsScope(CGF) { 133 emitPreInitStmt(CGF, S); 134 } 135 }; 136 137 } // namespace 138 139 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) { 140 auto &C = getContext(); 141 llvm::Value *Size = nullptr; 142 auto SizeInChars = C.getTypeSizeInChars(Ty); 143 if (SizeInChars.isZero()) { 144 // getTypeSizeInChars() returns 0 for a VLA. 145 while (auto *VAT = C.getAsVariableArrayType(Ty)) { 146 llvm::Value *ArraySize; 147 std::tie(ArraySize, Ty) = getVLASize(VAT); 148 Size = Size ? Builder.CreateNUWMul(Size, ArraySize) : ArraySize; 149 } 150 SizeInChars = C.getTypeSizeInChars(Ty); 151 if (SizeInChars.isZero()) 152 return llvm::ConstantInt::get(SizeTy, /*V=*/0); 153 Size = Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars)); 154 } else 155 Size = CGM.getSize(SizeInChars); 156 return Size; 157 } 158 159 void CodeGenFunction::GenerateOpenMPCapturedVars( 160 const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) { 161 const RecordDecl *RD = S.getCapturedRecordDecl(); 162 auto CurField = RD->field_begin(); 163 auto CurCap = S.captures().begin(); 164 for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(), 165 E = S.capture_init_end(); 166 I != E; ++I, ++CurField, ++CurCap) { 167 if (CurField->hasCapturedVLAType()) { 168 auto VAT = CurField->getCapturedVLAType(); 169 auto *Val = VLASizeMap[VAT->getSizeExpr()]; 170 CapturedVars.push_back(Val); 171 } else if (CurCap->capturesThis()) 172 CapturedVars.push_back(CXXThisValue); 173 else if (CurCap->capturesVariableByCopy()) { 174 llvm::Value *CV = 175 EmitLoadOfLValue(EmitLValue(*I), SourceLocation()).getScalarVal(); 176 177 // If the field is not a pointer, we need to save the actual value 178 // and load it as a void pointer. 179 if (!CurField->getType()->isAnyPointerType()) { 180 auto &Ctx = getContext(); 181 auto DstAddr = CreateMemTemp( 182 Ctx.getUIntPtrType(), 183 Twine(CurCap->getCapturedVar()->getName()) + ".casted"); 184 LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType()); 185 186 auto *SrcAddrVal = EmitScalarConversion( 187 DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()), 188 Ctx.getPointerType(CurField->getType()), SourceLocation()); 189 LValue SrcLV = 190 MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType()); 191 192 // Store the value using the source type pointer. 193 EmitStoreThroughLValue(RValue::get(CV), SrcLV); 194 195 // Load the value using the destination type pointer. 196 CV = EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal(); 197 } 198 CapturedVars.push_back(CV); 199 } else { 200 assert(CurCap->capturesVariable() && "Expected capture by reference."); 201 CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer()); 202 } 203 } 204 } 205 206 static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType, 207 StringRef Name, LValue AddrLV, 208 bool isReferenceType = false) { 209 ASTContext &Ctx = CGF.getContext(); 210 211 auto *CastedPtr = CGF.EmitScalarConversion( 212 AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(), 213 Ctx.getPointerType(DstType), SourceLocation()); 214 auto TmpAddr = 215 CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType)) 216 .getAddress(); 217 218 // If we are dealing with references we need to return the address of the 219 // reference instead of the reference of the value. 220 if (isReferenceType) { 221 QualType RefType = Ctx.getLValueReferenceType(DstType); 222 auto *RefVal = TmpAddr.getPointer(); 223 TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name) + ".ref"); 224 auto TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType); 225 CGF.EmitStoreThroughLValue(RValue::get(RefVal), TmpLVal, /*isInit*/ true); 226 } 227 228 return TmpAddr; 229 } 230 231 static QualType getCanonicalParamType(ASTContext &C, QualType T) { 232 if (T->isLValueReferenceType()) { 233 return C.getLValueReferenceType( 234 getCanonicalParamType(C, T.getNonReferenceType()), 235 /*SpelledAsLValue=*/false); 236 } 237 if (T->isPointerType()) 238 return C.getPointerType(getCanonicalParamType(C, T->getPointeeType())); 239 return C.getCanonicalParamType(T); 240 } 241 242 llvm::Function * 243 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { 244 assert( 245 CapturedStmtInfo && 246 "CapturedStmtInfo should be set when generating the captured function"); 247 const CapturedDecl *CD = S.getCapturedDecl(); 248 const RecordDecl *RD = S.getCapturedRecordDecl(); 249 assert(CD->hasBody() && "missing CapturedDecl body"); 250 251 // Build the argument list. 252 ASTContext &Ctx = CGM.getContext(); 253 FunctionArgList Args; 254 Args.append(CD->param_begin(), 255 std::next(CD->param_begin(), CD->getContextParamPosition())); 256 auto I = S.captures().begin(); 257 for (auto *FD : RD->fields()) { 258 QualType ArgType = FD->getType(); 259 IdentifierInfo *II = nullptr; 260 VarDecl *CapVar = nullptr; 261 262 // If this is a capture by copy and the type is not a pointer, the outlined 263 // function argument type should be uintptr and the value properly casted to 264 // uintptr. This is necessary given that the runtime library is only able to 265 // deal with pointers. We can pass in the same way the VLA type sizes to the 266 // outlined function. 267 if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) || 268 I->capturesVariableArrayType()) 269 ArgType = Ctx.getUIntPtrType(); 270 271 if (I->capturesVariable() || I->capturesVariableByCopy()) { 272 CapVar = I->getCapturedVar(); 273 II = CapVar->getIdentifier(); 274 } else if (I->capturesThis()) 275 II = &getContext().Idents.get("this"); 276 else { 277 assert(I->capturesVariableArrayType()); 278 II = &getContext().Idents.get("vla"); 279 } 280 if (ArgType->isVariablyModifiedType()) { 281 ArgType = 282 getCanonicalParamType(getContext(), ArgType.getNonReferenceType()); 283 } 284 Args.push_back(ImplicitParamDecl::Create(getContext(), nullptr, 285 FD->getLocation(), II, ArgType)); 286 ++I; 287 } 288 Args.append( 289 std::next(CD->param_begin(), CD->getContextParamPosition() + 1), 290 CD->param_end()); 291 292 // Create the function declaration. 293 FunctionType::ExtInfo ExtInfo; 294 const CGFunctionInfo &FuncInfo = 295 CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args); 296 llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); 297 298 llvm::Function *F = llvm::Function::Create( 299 FuncLLVMTy, llvm::GlobalValue::InternalLinkage, 300 CapturedStmtInfo->getHelperName(), &CGM.getModule()); 301 CGM.SetInternalFunctionAttributes(CD, F, FuncInfo); 302 if (CD->isNothrow()) 303 F->addFnAttr(llvm::Attribute::NoUnwind); 304 305 // Generate the function. 306 StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getLocation(), 307 CD->getBody()->getLocStart()); 308 unsigned Cnt = CD->getContextParamPosition(); 309 I = S.captures().begin(); 310 for (auto *FD : RD->fields()) { 311 // If we are capturing a pointer by copy we don't need to do anything, just 312 // use the value that we get from the arguments. 313 if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) { 314 const VarDecl *CurVD = I->getCapturedVar(); 315 Address LocalAddr = GetAddrOfLocalVar(Args[Cnt]); 316 // If the variable is a reference we need to materialize it here. 317 if (CurVD->getType()->isReferenceType()) { 318 Address RefAddr = CreateMemTemp(CurVD->getType(), getPointerAlign(), 319 ".materialized_ref"); 320 EmitStoreOfScalar(LocalAddr.getPointer(), RefAddr, /*Volatile=*/false, 321 CurVD->getType()); 322 LocalAddr = RefAddr; 323 } 324 setAddrOfLocalVar(CurVD, LocalAddr); 325 ++Cnt; 326 ++I; 327 continue; 328 } 329 330 LValueBaseInfo BaseInfo(AlignmentSource::Decl, false); 331 LValue ArgLVal = 332 MakeAddrLValue(GetAddrOfLocalVar(Args[Cnt]), Args[Cnt]->getType(), 333 BaseInfo); 334 if (FD->hasCapturedVLAType()) { 335 LValue CastedArgLVal = 336 MakeAddrLValue(castValueFromUintptr(*this, FD->getType(), 337 Args[Cnt]->getName(), ArgLVal), 338 FD->getType(), BaseInfo); 339 auto *ExprArg = 340 EmitLoadOfLValue(CastedArgLVal, SourceLocation()).getScalarVal(); 341 auto VAT = FD->getCapturedVLAType(); 342 VLASizeMap[VAT->getSizeExpr()] = ExprArg; 343 } else if (I->capturesVariable()) { 344 auto *Var = I->getCapturedVar(); 345 QualType VarTy = Var->getType(); 346 Address ArgAddr = ArgLVal.getAddress(); 347 if (!VarTy->isReferenceType()) { 348 if (ArgLVal.getType()->isLValueReferenceType()) { 349 ArgAddr = EmitLoadOfReference( 350 ArgAddr, ArgLVal.getType()->castAs<ReferenceType>()); 351 } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) { 352 assert(ArgLVal.getType()->isPointerType()); 353 ArgAddr = EmitLoadOfPointer( 354 ArgAddr, ArgLVal.getType()->castAs<PointerType>()); 355 } 356 } 357 setAddrOfLocalVar( 358 Var, Address(ArgAddr.getPointer(), getContext().getDeclAlign(Var))); 359 } else if (I->capturesVariableByCopy()) { 360 assert(!FD->getType()->isAnyPointerType() && 361 "Not expecting a captured pointer."); 362 auto *Var = I->getCapturedVar(); 363 QualType VarTy = Var->getType(); 364 setAddrOfLocalVar(Var, castValueFromUintptr(*this, FD->getType(), 365 Args[Cnt]->getName(), ArgLVal, 366 VarTy->isReferenceType())); 367 } else { 368 // If 'this' is captured, load it into CXXThisValue. 369 assert(I->capturesThis()); 370 CXXThisValue = 371 EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation()).getScalarVal(); 372 } 373 ++Cnt; 374 ++I; 375 } 376 377 PGO.assignRegionCounters(GlobalDecl(CD), F); 378 CapturedStmtInfo->EmitBody(*this, CD->getBody()); 379 FinishFunction(CD->getBodyRBrace()); 380 381 return F; 382 } 383 384 //===----------------------------------------------------------------------===// 385 // OpenMP Directive Emission 386 //===----------------------------------------------------------------------===// 387 void CodeGenFunction::EmitOMPAggregateAssign( 388 Address DestAddr, Address SrcAddr, QualType OriginalType, 389 const llvm::function_ref<void(Address, Address)> &CopyGen) { 390 // Perform element-by-element initialization. 391 QualType ElementTy; 392 393 // Drill down to the base element type on both arrays. 394 auto ArrayTy = OriginalType->getAsArrayTypeUnsafe(); 395 auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr); 396 SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 397 398 auto SrcBegin = SrcAddr.getPointer(); 399 auto DestBegin = DestAddr.getPointer(); 400 // Cast from pointer to array type to pointer to single element. 401 auto DestEnd = Builder.CreateGEP(DestBegin, NumElements); 402 // The basic structure here is a while-do loop. 403 auto BodyBB = createBasicBlock("omp.arraycpy.body"); 404 auto DoneBB = createBasicBlock("omp.arraycpy.done"); 405 auto IsEmpty = 406 Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty"); 407 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 408 409 // Enter the loop body, making that address the current address. 410 auto EntryBB = Builder.GetInsertBlock(); 411 EmitBlock(BodyBB); 412 413 CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy); 414 415 llvm::PHINode *SrcElementPHI = 416 Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 417 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 418 Address SrcElementCurrent = 419 Address(SrcElementPHI, 420 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 421 422 llvm::PHINode *DestElementPHI = 423 Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 424 DestElementPHI->addIncoming(DestBegin, EntryBB); 425 Address DestElementCurrent = 426 Address(DestElementPHI, 427 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 428 429 // Emit copy. 430 CopyGen(DestElementCurrent, SrcElementCurrent); 431 432 // Shift the address forward by one element. 433 auto DestElementNext = Builder.CreateConstGEP1_32( 434 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 435 auto SrcElementNext = Builder.CreateConstGEP1_32( 436 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 437 // Check whether we've reached the end. 438 auto Done = 439 Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 440 Builder.CreateCondBr(Done, DoneBB, BodyBB); 441 DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock()); 442 SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock()); 443 444 // Done. 445 EmitBlock(DoneBB, /*IsFinished=*/true); 446 } 447 448 /// Check if the combiner is a call to UDR combiner and if it is so return the 449 /// UDR decl used for reduction. 450 static const OMPDeclareReductionDecl * 451 getReductionInit(const Expr *ReductionOp) { 452 if (auto *CE = dyn_cast<CallExpr>(ReductionOp)) 453 if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 454 if (auto *DRE = 455 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 456 if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 457 return DRD; 458 return nullptr; 459 } 460 461 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 462 const OMPDeclareReductionDecl *DRD, 463 const Expr *InitOp, 464 Address Private, Address Original, 465 QualType Ty) { 466 if (DRD->getInitializer()) { 467 std::pair<llvm::Function *, llvm::Function *> Reduction = 468 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 469 auto *CE = cast<CallExpr>(InitOp); 470 auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 471 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 472 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 473 auto *LHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 474 auto *RHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 475 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 476 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 477 [=]() -> Address { return Private; }); 478 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 479 [=]() -> Address { return Original; }); 480 (void)PrivateScope.Privatize(); 481 RValue Func = RValue::get(Reduction.second); 482 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 483 CGF.EmitIgnoredExpr(InitOp); 484 } else { 485 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 486 auto *GV = new llvm::GlobalVariable( 487 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 488 llvm::GlobalValue::PrivateLinkage, Init, ".init"); 489 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 490 RValue InitRVal; 491 switch (CGF.getEvaluationKind(Ty)) { 492 case TEK_Scalar: 493 InitRVal = CGF.EmitLoadOfLValue(LV, SourceLocation()); 494 break; 495 case TEK_Complex: 496 InitRVal = 497 RValue::getComplex(CGF.EmitLoadOfComplex(LV, SourceLocation())); 498 break; 499 case TEK_Aggregate: 500 InitRVal = RValue::getAggregate(LV.getAddress()); 501 break; 502 } 503 OpaqueValueExpr OVE(SourceLocation(), Ty, VK_RValue); 504 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 505 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 506 /*IsInitializer=*/false); 507 } 508 } 509 510 /// \brief Emit initialization of arrays of complex types. 511 /// \param DestAddr Address of the array. 512 /// \param Type Type of array. 513 /// \param Init Initial expression of array. 514 /// \param SrcAddr Address of the original array. 515 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 516 QualType Type, const Expr *Init, 517 Address SrcAddr = Address::invalid()) { 518 auto *DRD = getReductionInit(Init); 519 // Perform element-by-element initialization. 520 QualType ElementTy; 521 522 // Drill down to the base element type on both arrays. 523 auto ArrayTy = Type->getAsArrayTypeUnsafe(); 524 auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 525 DestAddr = 526 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 527 if (DRD) 528 SrcAddr = 529 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 530 531 llvm::Value *SrcBegin = nullptr; 532 if (DRD) 533 SrcBegin = SrcAddr.getPointer(); 534 auto DestBegin = DestAddr.getPointer(); 535 // Cast from pointer to array type to pointer to single element. 536 auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 537 // The basic structure here is a while-do loop. 538 auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 539 auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 540 auto IsEmpty = 541 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 542 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 543 544 // Enter the loop body, making that address the current address. 545 auto EntryBB = CGF.Builder.GetInsertBlock(); 546 CGF.EmitBlock(BodyBB); 547 548 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 549 550 llvm::PHINode *SrcElementPHI = nullptr; 551 Address SrcElementCurrent = Address::invalid(); 552 if (DRD) { 553 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 554 "omp.arraycpy.srcElementPast"); 555 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 556 SrcElementCurrent = 557 Address(SrcElementPHI, 558 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 559 } 560 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 561 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 562 DestElementPHI->addIncoming(DestBegin, EntryBB); 563 Address DestElementCurrent = 564 Address(DestElementPHI, 565 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 566 567 // Emit copy. 568 { 569 CodeGenFunction::RunCleanupsScope InitScope(CGF); 570 if (DRD && (DRD->getInitializer() || !Init)) { 571 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 572 SrcElementCurrent, ElementTy); 573 } else 574 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 575 /*IsInitializer=*/false); 576 } 577 578 if (DRD) { 579 // Shift the address forward by one element. 580 auto SrcElementNext = CGF.Builder.CreateConstGEP1_32( 581 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 582 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 583 } 584 585 // Shift the address forward by one element. 586 auto DestElementNext = CGF.Builder.CreateConstGEP1_32( 587 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 588 // Check whether we've reached the end. 589 auto Done = 590 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 591 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 592 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 593 594 // Done. 595 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 596 } 597 598 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr, 599 Address SrcAddr, const VarDecl *DestVD, 600 const VarDecl *SrcVD, const Expr *Copy) { 601 if (OriginalType->isArrayType()) { 602 auto *BO = dyn_cast<BinaryOperator>(Copy); 603 if (BO && BO->getOpcode() == BO_Assign) { 604 // Perform simple memcpy for simple copying. 605 EmitAggregateAssign(DestAddr, SrcAddr, OriginalType); 606 } else { 607 // For arrays with complex element types perform element by element 608 // copying. 609 EmitOMPAggregateAssign( 610 DestAddr, SrcAddr, OriginalType, 611 [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) { 612 // Working with the single array element, so have to remap 613 // destination and source variables to corresponding array 614 // elements. 615 CodeGenFunction::OMPPrivateScope Remap(*this); 616 Remap.addPrivate(DestVD, [DestElement]() -> Address { 617 return DestElement; 618 }); 619 Remap.addPrivate( 620 SrcVD, [SrcElement]() -> Address { return SrcElement; }); 621 (void)Remap.Privatize(); 622 EmitIgnoredExpr(Copy); 623 }); 624 } 625 } else { 626 // Remap pseudo source variable to private copy. 627 CodeGenFunction::OMPPrivateScope Remap(*this); 628 Remap.addPrivate(SrcVD, [SrcAddr]() -> Address { return SrcAddr; }); 629 Remap.addPrivate(DestVD, [DestAddr]() -> Address { return DestAddr; }); 630 (void)Remap.Privatize(); 631 // Emit copying of the whole variable. 632 EmitIgnoredExpr(Copy); 633 } 634 } 635 636 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, 637 OMPPrivateScope &PrivateScope) { 638 if (!HaveInsertPoint()) 639 return false; 640 bool FirstprivateIsLastprivate = false; 641 llvm::DenseSet<const VarDecl *> Lastprivates; 642 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 643 for (const auto *D : C->varlists()) 644 Lastprivates.insert( 645 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); 646 } 647 llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate; 648 CGCapturedStmtInfo CapturesInfo(cast<CapturedStmt>(*D.getAssociatedStmt())); 649 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { 650 auto IRef = C->varlist_begin(); 651 auto InitsRef = C->inits().begin(); 652 for (auto IInit : C->private_copies()) { 653 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 654 bool ThisFirstprivateIsLastprivate = 655 Lastprivates.count(OrigVD->getCanonicalDecl()) > 0; 656 auto *CapFD = CapturesInfo.lookup(OrigVD); 657 auto *FD = CapturedStmtInfo->lookup(OrigVD); 658 if (!ThisFirstprivateIsLastprivate && FD && (FD == CapFD) && 659 !FD->getType()->isReferenceType()) { 660 EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()); 661 ++IRef; 662 ++InitsRef; 663 continue; 664 } 665 FirstprivateIsLastprivate = 666 FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate; 667 if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) { 668 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 669 auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl()); 670 bool IsRegistered; 671 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 672 /*RefersToEnclosingVariableOrCapture=*/FD != nullptr, 673 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 674 Address OriginalAddr = EmitLValue(&DRE).getAddress(); 675 QualType Type = VD->getType(); 676 if (Type->isArrayType()) { 677 // Emit VarDecl with copy init for arrays. 678 // Get the address of the original variable captured in current 679 // captured region. 680 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 681 auto Emission = EmitAutoVarAlloca(*VD); 682 auto *Init = VD->getInit(); 683 if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) { 684 // Perform simple memcpy. 685 EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr, 686 Type); 687 } else { 688 EmitOMPAggregateAssign( 689 Emission.getAllocatedAddress(), OriginalAddr, Type, 690 [this, VDInit, Init](Address DestElement, 691 Address SrcElement) { 692 // Clean up any temporaries needed by the initialization. 693 RunCleanupsScope InitScope(*this); 694 // Emit initialization for single element. 695 setAddrOfLocalVar(VDInit, SrcElement); 696 EmitAnyExprToMem(Init, DestElement, 697 Init->getType().getQualifiers(), 698 /*IsInitializer*/ false); 699 LocalDeclMap.erase(VDInit); 700 }); 701 } 702 EmitAutoVarCleanups(Emission); 703 return Emission.getAllocatedAddress(); 704 }); 705 } else { 706 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 707 // Emit private VarDecl with copy init. 708 // Remap temp VDInit variable to the address of the original 709 // variable 710 // (for proper handling of captured global variables). 711 setAddrOfLocalVar(VDInit, OriginalAddr); 712 EmitDecl(*VD); 713 LocalDeclMap.erase(VDInit); 714 return GetAddrOfLocalVar(VD); 715 }); 716 } 717 assert(IsRegistered && 718 "firstprivate var already registered as private"); 719 // Silence the warning about unused variable. 720 (void)IsRegistered; 721 } 722 ++IRef; 723 ++InitsRef; 724 } 725 } 726 return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty(); 727 } 728 729 void CodeGenFunction::EmitOMPPrivateClause( 730 const OMPExecutableDirective &D, 731 CodeGenFunction::OMPPrivateScope &PrivateScope) { 732 if (!HaveInsertPoint()) 733 return; 734 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 735 for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) { 736 auto IRef = C->varlist_begin(); 737 for (auto IInit : C->private_copies()) { 738 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 739 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 740 auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 741 bool IsRegistered = 742 PrivateScope.addPrivate(OrigVD, [&]() -> Address { 743 // Emit private VarDecl with copy init. 744 EmitDecl(*VD); 745 return GetAddrOfLocalVar(VD); 746 }); 747 assert(IsRegistered && "private var already registered as private"); 748 // Silence the warning about unused variable. 749 (void)IsRegistered; 750 } 751 ++IRef; 752 } 753 } 754 } 755 756 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { 757 if (!HaveInsertPoint()) 758 return false; 759 // threadprivate_var1 = master_threadprivate_var1; 760 // operator=(threadprivate_var2, master_threadprivate_var2); 761 // ... 762 // __kmpc_barrier(&loc, global_tid); 763 llvm::DenseSet<const VarDecl *> CopiedVars; 764 llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr; 765 for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) { 766 auto IRef = C->varlist_begin(); 767 auto ISrcRef = C->source_exprs().begin(); 768 auto IDestRef = C->destination_exprs().begin(); 769 for (auto *AssignOp : C->assignment_ops()) { 770 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 771 QualType Type = VD->getType(); 772 if (CopiedVars.insert(VD->getCanonicalDecl()).second) { 773 // Get the address of the master variable. If we are emitting code with 774 // TLS support, the address is passed from the master as field in the 775 // captured declaration. 776 Address MasterAddr = Address::invalid(); 777 if (getLangOpts().OpenMPUseTLS && 778 getContext().getTargetInfo().isTLSSupported()) { 779 assert(CapturedStmtInfo->lookup(VD) && 780 "Copyin threadprivates should have been captured!"); 781 DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(), 782 VK_LValue, (*IRef)->getExprLoc()); 783 MasterAddr = EmitLValue(&DRE).getAddress(); 784 LocalDeclMap.erase(VD); 785 } else { 786 MasterAddr = 787 Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD) 788 : CGM.GetAddrOfGlobal(VD), 789 getContext().getDeclAlign(VD)); 790 } 791 // Get the address of the threadprivate variable. 792 Address PrivateAddr = EmitLValue(*IRef).getAddress(); 793 if (CopiedVars.size() == 1) { 794 // At first check if current thread is a master thread. If it is, no 795 // need to copy data. 796 CopyBegin = createBasicBlock("copyin.not.master"); 797 CopyEnd = createBasicBlock("copyin.not.master.end"); 798 Builder.CreateCondBr( 799 Builder.CreateICmpNE( 800 Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy), 801 Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy)), 802 CopyBegin, CopyEnd); 803 EmitBlock(CopyBegin); 804 } 805 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 806 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 807 EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp); 808 } 809 ++IRef; 810 ++ISrcRef; 811 ++IDestRef; 812 } 813 } 814 if (CopyEnd) { 815 // Exit out of copying procedure for non-master thread. 816 EmitBlock(CopyEnd, /*IsFinished=*/true); 817 return true; 818 } 819 return false; 820 } 821 822 bool CodeGenFunction::EmitOMPLastprivateClauseInit( 823 const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) { 824 if (!HaveInsertPoint()) 825 return false; 826 bool HasAtLeastOneLastprivate = false; 827 llvm::DenseSet<const VarDecl *> SIMDLCVs; 828 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 829 auto *LoopDirective = cast<OMPLoopDirective>(&D); 830 for (auto *C : LoopDirective->counters()) { 831 SIMDLCVs.insert( 832 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 833 } 834 } 835 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 836 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 837 HasAtLeastOneLastprivate = true; 838 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) 839 break; 840 auto IRef = C->varlist_begin(); 841 auto IDestRef = C->destination_exprs().begin(); 842 for (auto *IInit : C->private_copies()) { 843 // Keep the address of the original variable for future update at the end 844 // of the loop. 845 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 846 // Taskloops do not require additional initialization, it is done in 847 // runtime support library. 848 if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) { 849 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 850 PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address { 851 DeclRefExpr DRE( 852 const_cast<VarDecl *>(OrigVD), 853 /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup( 854 OrigVD) != nullptr, 855 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 856 return EmitLValue(&DRE).getAddress(); 857 }); 858 // Check if the variable is also a firstprivate: in this case IInit is 859 // not generated. Initialization of this variable will happen in codegen 860 // for 'firstprivate' clause. 861 if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) { 862 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 863 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 864 // Emit private VarDecl with copy init. 865 EmitDecl(*VD); 866 return GetAddrOfLocalVar(VD); 867 }); 868 assert(IsRegistered && 869 "lastprivate var already registered as private"); 870 (void)IsRegistered; 871 } 872 } 873 ++IRef; 874 ++IDestRef; 875 } 876 } 877 return HasAtLeastOneLastprivate; 878 } 879 880 void CodeGenFunction::EmitOMPLastprivateClauseFinal( 881 const OMPExecutableDirective &D, bool NoFinals, 882 llvm::Value *IsLastIterCond) { 883 if (!HaveInsertPoint()) 884 return; 885 // Emit following code: 886 // if (<IsLastIterCond>) { 887 // orig_var1 = private_orig_var1; 888 // ... 889 // orig_varn = private_orig_varn; 890 // } 891 llvm::BasicBlock *ThenBB = nullptr; 892 llvm::BasicBlock *DoneBB = nullptr; 893 if (IsLastIterCond) { 894 ThenBB = createBasicBlock(".omp.lastprivate.then"); 895 DoneBB = createBasicBlock(".omp.lastprivate.done"); 896 Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB); 897 EmitBlock(ThenBB); 898 } 899 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 900 llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates; 901 if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) { 902 auto IC = LoopDirective->counters().begin(); 903 for (auto F : LoopDirective->finals()) { 904 auto *D = 905 cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl(); 906 if (NoFinals) 907 AlreadyEmittedVars.insert(D); 908 else 909 LoopCountersAndUpdates[D] = F; 910 ++IC; 911 } 912 } 913 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 914 auto IRef = C->varlist_begin(); 915 auto ISrcRef = C->source_exprs().begin(); 916 auto IDestRef = C->destination_exprs().begin(); 917 for (auto *AssignOp : C->assignment_ops()) { 918 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 919 QualType Type = PrivateVD->getType(); 920 auto *CanonicalVD = PrivateVD->getCanonicalDecl(); 921 if (AlreadyEmittedVars.insert(CanonicalVD).second) { 922 // If lastprivate variable is a loop control variable for loop-based 923 // directive, update its value before copyin back to original 924 // variable. 925 if (auto *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD)) 926 EmitIgnoredExpr(FinalExpr); 927 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 928 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 929 // Get the address of the original variable. 930 Address OriginalAddr = GetAddrOfLocalVar(DestVD); 931 // Get the address of the private variable. 932 Address PrivateAddr = GetAddrOfLocalVar(PrivateVD); 933 if (auto RefTy = PrivateVD->getType()->getAs<ReferenceType>()) 934 PrivateAddr = 935 Address(Builder.CreateLoad(PrivateAddr), 936 getNaturalTypeAlignment(RefTy->getPointeeType())); 937 EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp); 938 } 939 ++IRef; 940 ++ISrcRef; 941 ++IDestRef; 942 } 943 if (auto *PostUpdate = C->getPostUpdateExpr()) 944 EmitIgnoredExpr(PostUpdate); 945 } 946 if (IsLastIterCond) 947 EmitBlock(DoneBB, /*IsFinished=*/true); 948 } 949 950 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 951 LValue BaseLV, llvm::Value *Addr) { 952 Address Tmp = Address::invalid(); 953 Address TopTmp = Address::invalid(); 954 Address MostTopTmp = Address::invalid(); 955 BaseTy = BaseTy.getNonReferenceType(); 956 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 957 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 958 Tmp = CGF.CreateMemTemp(BaseTy); 959 if (TopTmp.isValid()) 960 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 961 else 962 MostTopTmp = Tmp; 963 TopTmp = Tmp; 964 BaseTy = BaseTy->getPointeeType(); 965 } 966 llvm::Type *Ty = BaseLV.getPointer()->getType(); 967 if (Tmp.isValid()) 968 Ty = Tmp.getElementType(); 969 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 970 if (Tmp.isValid()) { 971 CGF.Builder.CreateStore(Addr, Tmp); 972 return MostTopTmp; 973 } 974 return Address(Addr, BaseLV.getAlignment()); 975 } 976 977 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 978 LValue BaseLV) { 979 BaseTy = BaseTy.getNonReferenceType(); 980 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 981 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 982 if (auto *PtrTy = BaseTy->getAs<PointerType>()) 983 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy); 984 else { 985 BaseLV = CGF.EmitLoadOfReferenceLValue(BaseLV.getAddress(), 986 BaseTy->castAs<ReferenceType>()); 987 } 988 BaseTy = BaseTy->getPointeeType(); 989 } 990 return CGF.MakeAddrLValue( 991 Address( 992 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 993 BaseLV.getPointer(), CGF.ConvertTypeForMem(ElTy)->getPointerTo()), 994 BaseLV.getAlignment()), 995 BaseLV.getType(), BaseLV.getBaseInfo()); 996 } 997 998 void CodeGenFunction::EmitOMPReductionClauseInit( 999 const OMPExecutableDirective &D, 1000 CodeGenFunction::OMPPrivateScope &PrivateScope) { 1001 if (!HaveInsertPoint()) 1002 return; 1003 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1004 auto ILHS = C->lhs_exprs().begin(); 1005 auto IRHS = C->rhs_exprs().begin(); 1006 auto IPriv = C->privates().begin(); 1007 auto IRed = C->reduction_ops().begin(); 1008 for (auto IRef : C->varlists()) { 1009 auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 1010 auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 1011 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl()); 1012 auto *DRD = getReductionInit(*IRed); 1013 if (auto *OASE = dyn_cast<OMPArraySectionExpr>(IRef)) { 1014 auto *Base = OASE->getBase()->IgnoreParenImpCasts(); 1015 while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 1016 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 1017 while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1018 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1019 auto *DE = cast<DeclRefExpr>(Base); 1020 auto *OrigVD = cast<VarDecl>(DE->getDecl()); 1021 auto OASELValueLB = EmitOMPArraySectionExpr(OASE); 1022 auto OASELValueUB = 1023 EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 1024 auto OriginalBaseLValue = EmitLValue(DE); 1025 LValue BaseLValue = 1026 loadToBegin(*this, OrigVD->getType(), OASELValueLB.getType(), 1027 OriginalBaseLValue); 1028 // Store the address of the original variable associated with the LHS 1029 // implicit variable. 1030 PrivateScope.addPrivate(LHSVD, [OASELValueLB]() -> Address { 1031 return OASELValueLB.getAddress(); 1032 }); 1033 // Emit reduction copy. 1034 bool IsRegistered = PrivateScope.addPrivate( 1035 OrigVD, [this, OrigVD, PrivateVD, BaseLValue, OASELValueLB, 1036 OASELValueUB, OriginalBaseLValue, DRD, IRed]() -> Address { 1037 // Emit VarDecl with copy init for arrays. 1038 // Get the address of the original variable captured in current 1039 // captured region. 1040 auto *Size = Builder.CreatePtrDiff(OASELValueUB.getPointer(), 1041 OASELValueLB.getPointer()); 1042 Size = Builder.CreateNUWAdd( 1043 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 1044 CodeGenFunction::OpaqueValueMapping OpaqueMap( 1045 *this, cast<OpaqueValueExpr>( 1046 getContext() 1047 .getAsVariableArrayType(PrivateVD->getType()) 1048 ->getSizeExpr()), 1049 RValue::get(Size)); 1050 EmitVariablyModifiedType(PrivateVD->getType()); 1051 auto Emission = EmitAutoVarAlloca(*PrivateVD); 1052 auto Addr = Emission.getAllocatedAddress(); 1053 auto *Init = PrivateVD->getInit(); 1054 EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(), 1055 DRD ? *IRed : Init, 1056 OASELValueLB.getAddress()); 1057 EmitAutoVarCleanups(Emission); 1058 // Emit private VarDecl with reduction init. 1059 auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(), 1060 OASELValueLB.getPointer()); 1061 auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset); 1062 return castToBase(*this, OrigVD->getType(), 1063 OASELValueLB.getType(), OriginalBaseLValue, 1064 Ptr); 1065 }); 1066 assert(IsRegistered && "private var already registered as private"); 1067 // Silence the warning about unused variable. 1068 (void)IsRegistered; 1069 PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { 1070 return GetAddrOfLocalVar(PrivateVD); 1071 }); 1072 } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(IRef)) { 1073 auto *Base = ASE->getBase()->IgnoreParenImpCasts(); 1074 while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1075 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1076 auto *DE = cast<DeclRefExpr>(Base); 1077 auto *OrigVD = cast<VarDecl>(DE->getDecl()); 1078 auto ASELValue = EmitLValue(ASE); 1079 auto OriginalBaseLValue = EmitLValue(DE); 1080 LValue BaseLValue = loadToBegin( 1081 *this, OrigVD->getType(), ASELValue.getType(), OriginalBaseLValue); 1082 // Store the address of the original variable associated with the LHS 1083 // implicit variable. 1084 PrivateScope.addPrivate( 1085 LHSVD, [ASELValue]() -> Address { return ASELValue.getAddress(); }); 1086 // Emit reduction copy. 1087 bool IsRegistered = PrivateScope.addPrivate( 1088 OrigVD, [this, OrigVD, PrivateVD, BaseLValue, ASELValue, 1089 OriginalBaseLValue, DRD, IRed]() -> Address { 1090 // Emit private VarDecl with reduction init. 1091 AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD); 1092 auto Addr = Emission.getAllocatedAddress(); 1093 if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 1094 emitInitWithReductionInitializer(*this, DRD, *IRed, Addr, 1095 ASELValue.getAddress(), 1096 ASELValue.getType()); 1097 } else 1098 EmitAutoVarInit(Emission); 1099 EmitAutoVarCleanups(Emission); 1100 auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(), 1101 ASELValue.getPointer()); 1102 auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset); 1103 return castToBase(*this, OrigVD->getType(), ASELValue.getType(), 1104 OriginalBaseLValue, Ptr); 1105 }); 1106 assert(IsRegistered && "private var already registered as private"); 1107 // Silence the warning about unused variable. 1108 (void)IsRegistered; 1109 PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address { 1110 return Builder.CreateElementBitCast( 1111 GetAddrOfLocalVar(PrivateVD), ConvertTypeForMem(RHSVD->getType()), 1112 "rhs.begin"); 1113 }); 1114 } else { 1115 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); 1116 QualType Type = PrivateVD->getType(); 1117 if (getContext().getAsArrayType(Type)) { 1118 // Store the address of the original variable associated with the LHS 1119 // implicit variable. 1120 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1121 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1122 IRef->getType(), VK_LValue, IRef->getExprLoc()); 1123 Address OriginalAddr = EmitLValue(&DRE).getAddress(); 1124 PrivateScope.addPrivate(LHSVD, [this, &OriginalAddr, 1125 LHSVD]() -> Address { 1126 OriginalAddr = Builder.CreateElementBitCast( 1127 OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin"); 1128 return OriginalAddr; 1129 }); 1130 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 1131 if (Type->isVariablyModifiedType()) { 1132 CodeGenFunction::OpaqueValueMapping OpaqueMap( 1133 *this, cast<OpaqueValueExpr>( 1134 getContext() 1135 .getAsVariableArrayType(PrivateVD->getType()) 1136 ->getSizeExpr()), 1137 RValue::get( 1138 getTypeSize(OrigVD->getType().getNonReferenceType()))); 1139 EmitVariablyModifiedType(Type); 1140 } 1141 auto Emission = EmitAutoVarAlloca(*PrivateVD); 1142 auto Addr = Emission.getAllocatedAddress(); 1143 auto *Init = PrivateVD->getInit(); 1144 EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(), 1145 DRD ? *IRed : Init, OriginalAddr); 1146 EmitAutoVarCleanups(Emission); 1147 return Emission.getAllocatedAddress(); 1148 }); 1149 assert(IsRegistered && "private var already registered as private"); 1150 // Silence the warning about unused variable. 1151 (void)IsRegistered; 1152 PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address { 1153 return Builder.CreateElementBitCast( 1154 GetAddrOfLocalVar(PrivateVD), 1155 ConvertTypeForMem(RHSVD->getType()), "rhs.begin"); 1156 }); 1157 } else { 1158 // Store the address of the original variable associated with the LHS 1159 // implicit variable. 1160 Address OriginalAddr = Address::invalid(); 1161 PrivateScope.addPrivate(LHSVD, [this, OrigVD, IRef, 1162 &OriginalAddr]() -> Address { 1163 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1164 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1165 IRef->getType(), VK_LValue, IRef->getExprLoc()); 1166 OriginalAddr = EmitLValue(&DRE).getAddress(); 1167 return OriginalAddr; 1168 }); 1169 // Emit reduction copy. 1170 bool IsRegistered = PrivateScope.addPrivate( 1171 OrigVD, [this, PrivateVD, OriginalAddr, DRD, IRed]() -> Address { 1172 // Emit private VarDecl with reduction init. 1173 AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD); 1174 auto Addr = Emission.getAllocatedAddress(); 1175 if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 1176 emitInitWithReductionInitializer(*this, DRD, *IRed, Addr, 1177 OriginalAddr, 1178 PrivateVD->getType()); 1179 } else 1180 EmitAutoVarInit(Emission); 1181 EmitAutoVarCleanups(Emission); 1182 return Addr; 1183 }); 1184 assert(IsRegistered && "private var already registered as private"); 1185 // Silence the warning about unused variable. 1186 (void)IsRegistered; 1187 PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { 1188 return GetAddrOfLocalVar(PrivateVD); 1189 }); 1190 } 1191 } 1192 ++ILHS; 1193 ++IRHS; 1194 ++IPriv; 1195 ++IRed; 1196 } 1197 } 1198 } 1199 1200 void CodeGenFunction::EmitOMPReductionClauseFinal( 1201 const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) { 1202 if (!HaveInsertPoint()) 1203 return; 1204 llvm::SmallVector<const Expr *, 8> Privates; 1205 llvm::SmallVector<const Expr *, 8> LHSExprs; 1206 llvm::SmallVector<const Expr *, 8> RHSExprs; 1207 llvm::SmallVector<const Expr *, 8> ReductionOps; 1208 bool HasAtLeastOneReduction = false; 1209 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1210 HasAtLeastOneReduction = true; 1211 Privates.append(C->privates().begin(), C->privates().end()); 1212 LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 1213 RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 1214 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 1215 } 1216 if (HasAtLeastOneReduction) { 1217 bool WithNowait = D.getSingleClause<OMPNowaitClause>() || 1218 isOpenMPParallelDirective(D.getDirectiveKind()) || 1219 D.getDirectiveKind() == OMPD_simd; 1220 bool SimpleReduction = D.getDirectiveKind() == OMPD_simd; 1221 // Emit nowait reduction if nowait clause is present or directive is a 1222 // parallel directive (it always has implicit barrier). 1223 CGM.getOpenMPRuntime().emitReduction( 1224 *this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps, 1225 {WithNowait, SimpleReduction, ReductionKind}); 1226 } 1227 } 1228 1229 static void emitPostUpdateForReductionClause( 1230 CodeGenFunction &CGF, const OMPExecutableDirective &D, 1231 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1232 if (!CGF.HaveInsertPoint()) 1233 return; 1234 llvm::BasicBlock *DoneBB = nullptr; 1235 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1236 if (auto *PostUpdate = C->getPostUpdateExpr()) { 1237 if (!DoneBB) { 1238 if (auto *Cond = CondGen(CGF)) { 1239 // If the first post-update expression is found, emit conditional 1240 // block if it was requested. 1241 auto *ThenBB = CGF.createBasicBlock(".omp.reduction.pu"); 1242 DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done"); 1243 CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1244 CGF.EmitBlock(ThenBB); 1245 } 1246 } 1247 CGF.EmitIgnoredExpr(PostUpdate); 1248 } 1249 } 1250 if (DoneBB) 1251 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 1252 } 1253 1254 namespace { 1255 /// Codegen lambda for appending distribute lower and upper bounds to outlined 1256 /// parallel function. This is necessary for combined constructs such as 1257 /// 'distribute parallel for' 1258 typedef llvm::function_ref<void(CodeGenFunction &, 1259 const OMPExecutableDirective &, 1260 llvm::SmallVectorImpl<llvm::Value *> &)> 1261 CodeGenBoundParametersTy; 1262 } // anonymous namespace 1263 1264 static void emitCommonOMPParallelDirective( 1265 CodeGenFunction &CGF, const OMPExecutableDirective &S, 1266 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1267 const CodeGenBoundParametersTy &CodeGenBoundParameters) { 1268 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); 1269 auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( 1270 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 1271 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) { 1272 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 1273 auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), 1274 /*IgnoreResultAssign*/ true); 1275 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause( 1276 CGF, NumThreads, NumThreadsClause->getLocStart()); 1277 } 1278 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) { 1279 CodeGenFunction::RunCleanupsScope ProcBindScope(CGF); 1280 CGF.CGM.getOpenMPRuntime().emitProcBindClause( 1281 CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart()); 1282 } 1283 const Expr *IfCond = nullptr; 1284 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 1285 if (C->getNameModifier() == OMPD_unknown || 1286 C->getNameModifier() == OMPD_parallel) { 1287 IfCond = C->getCondition(); 1288 break; 1289 } 1290 } 1291 1292 OMPParallelScope Scope(CGF, S); 1293 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 1294 // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk 1295 // lower and upper bounds with the pragma 'for' chunking mechanism. 1296 // The following lambda takes care of appending the lower and upper bound 1297 // parameters when necessary 1298 CodeGenBoundParameters(CGF, S, CapturedVars); 1299 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 1300 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn, 1301 CapturedVars, IfCond); 1302 } 1303 1304 static void emitEmptyBoundParameters(CodeGenFunction &, 1305 const OMPExecutableDirective &, 1306 llvm::SmallVectorImpl<llvm::Value *> &) {} 1307 1308 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { 1309 // Emit parallel region as a standalone region. 1310 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1311 OMPPrivateScope PrivateScope(CGF); 1312 bool Copyins = CGF.EmitOMPCopyinClause(S); 1313 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 1314 if (Copyins) { 1315 // Emit implicit barrier to synchronize threads and avoid data races on 1316 // propagation master's thread values of threadprivate variables to local 1317 // instances of that variables of all other implicit threads. 1318 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 1319 CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 1320 /*ForceSimpleCall=*/true); 1321 } 1322 CGF.EmitOMPPrivateClause(S, PrivateScope); 1323 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 1324 (void)PrivateScope.Privatize(); 1325 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1326 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 1327 }; 1328 emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen, 1329 emitEmptyBoundParameters); 1330 emitPostUpdateForReductionClause( 1331 *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1332 } 1333 1334 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, 1335 JumpDest LoopExit) { 1336 RunCleanupsScope BodyScope(*this); 1337 // Update counters values on current iteration. 1338 for (auto I : D.updates()) { 1339 EmitIgnoredExpr(I); 1340 } 1341 // Update the linear variables. 1342 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1343 for (auto *U : C->updates()) 1344 EmitIgnoredExpr(U); 1345 } 1346 1347 // On a continue in the body, jump to the end. 1348 auto Continue = getJumpDestInCurrentScope("omp.body.continue"); 1349 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1350 // Emit loop body. 1351 EmitStmt(D.getBody()); 1352 // The end (updates/cleanups). 1353 EmitBlock(Continue.getBlock()); 1354 BreakContinueStack.pop_back(); 1355 } 1356 1357 void CodeGenFunction::EmitOMPInnerLoop( 1358 const Stmt &S, bool RequiresCleanup, const Expr *LoopCond, 1359 const Expr *IncExpr, 1360 const llvm::function_ref<void(CodeGenFunction &)> &BodyGen, 1361 const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen) { 1362 auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end"); 1363 1364 // Start the loop with a block that tests the condition. 1365 auto CondBlock = createBasicBlock("omp.inner.for.cond"); 1366 EmitBlock(CondBlock); 1367 const SourceRange &R = S.getSourceRange(); 1368 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), 1369 SourceLocToDebugLoc(R.getEnd())); 1370 1371 // If there are any cleanups between here and the loop-exit scope, 1372 // create a block to stage a loop exit along. 1373 auto ExitBlock = LoopExit.getBlock(); 1374 if (RequiresCleanup) 1375 ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup"); 1376 1377 auto LoopBody = createBasicBlock("omp.inner.for.body"); 1378 1379 // Emit condition. 1380 EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S)); 1381 if (ExitBlock != LoopExit.getBlock()) { 1382 EmitBlock(ExitBlock); 1383 EmitBranchThroughCleanup(LoopExit); 1384 } 1385 1386 EmitBlock(LoopBody); 1387 incrementProfileCounter(&S); 1388 1389 // Create a block for the increment. 1390 auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc"); 1391 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1392 1393 BodyGen(*this); 1394 1395 // Emit "IV = IV + 1" and a back-edge to the condition block. 1396 EmitBlock(Continue.getBlock()); 1397 EmitIgnoredExpr(IncExpr); 1398 PostIncGen(*this); 1399 BreakContinueStack.pop_back(); 1400 EmitBranch(CondBlock); 1401 LoopStack.pop(); 1402 // Emit the fall-through block. 1403 EmitBlock(LoopExit.getBlock()); 1404 } 1405 1406 void CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { 1407 if (!HaveInsertPoint()) 1408 return; 1409 // Emit inits for the linear variables. 1410 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1411 for (auto *Init : C->inits()) { 1412 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl()); 1413 if (auto *Ref = dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) { 1414 AutoVarEmission Emission = EmitAutoVarAlloca(*VD); 1415 auto *OrigVD = cast<VarDecl>(Ref->getDecl()); 1416 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1417 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1418 VD->getInit()->getType(), VK_LValue, 1419 VD->getInit()->getExprLoc()); 1420 EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(), 1421 VD->getType()), 1422 /*capturedByInit=*/false); 1423 EmitAutoVarCleanups(Emission); 1424 } else 1425 EmitVarDecl(*VD); 1426 } 1427 // Emit the linear steps for the linear clauses. 1428 // If a step is not constant, it is pre-calculated before the loop. 1429 if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep())) 1430 if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) { 1431 EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl())); 1432 // Emit calculation of the linear step. 1433 EmitIgnoredExpr(CS); 1434 } 1435 } 1436 } 1437 1438 void CodeGenFunction::EmitOMPLinearClauseFinal( 1439 const OMPLoopDirective &D, 1440 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1441 if (!HaveInsertPoint()) 1442 return; 1443 llvm::BasicBlock *DoneBB = nullptr; 1444 // Emit the final values of the linear variables. 1445 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1446 auto IC = C->varlist_begin(); 1447 for (auto *F : C->finals()) { 1448 if (!DoneBB) { 1449 if (auto *Cond = CondGen(*this)) { 1450 // If the first post-update expression is found, emit conditional 1451 // block if it was requested. 1452 auto *ThenBB = createBasicBlock(".omp.linear.pu"); 1453 DoneBB = createBasicBlock(".omp.linear.pu.done"); 1454 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1455 EmitBlock(ThenBB); 1456 } 1457 } 1458 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl()); 1459 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1460 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1461 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); 1462 Address OrigAddr = EmitLValue(&DRE).getAddress(); 1463 CodeGenFunction::OMPPrivateScope VarScope(*this); 1464 VarScope.addPrivate(OrigVD, [OrigAddr]() -> Address { return OrigAddr; }); 1465 (void)VarScope.Privatize(); 1466 EmitIgnoredExpr(F); 1467 ++IC; 1468 } 1469 if (auto *PostUpdate = C->getPostUpdateExpr()) 1470 EmitIgnoredExpr(PostUpdate); 1471 } 1472 if (DoneBB) 1473 EmitBlock(DoneBB, /*IsFinished=*/true); 1474 } 1475 1476 static void emitAlignedClause(CodeGenFunction &CGF, 1477 const OMPExecutableDirective &D) { 1478 if (!CGF.HaveInsertPoint()) 1479 return; 1480 for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) { 1481 unsigned ClauseAlignment = 0; 1482 if (auto AlignmentExpr = Clause->getAlignment()) { 1483 auto AlignmentCI = 1484 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); 1485 ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue()); 1486 } 1487 for (auto E : Clause->varlists()) { 1488 unsigned Alignment = ClauseAlignment; 1489 if (Alignment == 0) { 1490 // OpenMP [2.8.1, Description] 1491 // If no optional parameter is specified, implementation-defined default 1492 // alignments for SIMD instructions on the target platforms are assumed. 1493 Alignment = 1494 CGF.getContext() 1495 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign( 1496 E->getType()->getPointeeType())) 1497 .getQuantity(); 1498 } 1499 assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) && 1500 "alignment is not power of 2"); 1501 if (Alignment != 0) { 1502 llvm::Value *PtrValue = CGF.EmitScalarExpr(E); 1503 CGF.EmitAlignmentAssumption(PtrValue, Alignment); 1504 } 1505 } 1506 } 1507 } 1508 1509 void CodeGenFunction::EmitOMPPrivateLoopCounters( 1510 const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) { 1511 if (!HaveInsertPoint()) 1512 return; 1513 auto I = S.private_counters().begin(); 1514 for (auto *E : S.counters()) { 1515 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1516 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()); 1517 (void)LoopScope.addPrivate(VD, [&]() -> Address { 1518 // Emit var without initialization. 1519 if (!LocalDeclMap.count(PrivateVD)) { 1520 auto VarEmission = EmitAutoVarAlloca(*PrivateVD); 1521 EmitAutoVarCleanups(VarEmission); 1522 } 1523 DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD), 1524 /*RefersToEnclosingVariableOrCapture=*/false, 1525 (*I)->getType(), VK_LValue, (*I)->getExprLoc()); 1526 return EmitLValue(&DRE).getAddress(); 1527 }); 1528 if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) || 1529 VD->hasGlobalStorage()) { 1530 (void)LoopScope.addPrivate(PrivateVD, [&]() -> Address { 1531 DeclRefExpr DRE(const_cast<VarDecl *>(VD), 1532 LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD), 1533 E->getType(), VK_LValue, E->getExprLoc()); 1534 return EmitLValue(&DRE).getAddress(); 1535 }); 1536 } 1537 ++I; 1538 } 1539 } 1540 1541 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S, 1542 const Expr *Cond, llvm::BasicBlock *TrueBlock, 1543 llvm::BasicBlock *FalseBlock, uint64_t TrueCount) { 1544 if (!CGF.HaveInsertPoint()) 1545 return; 1546 { 1547 CodeGenFunction::OMPPrivateScope PreCondScope(CGF); 1548 CGF.EmitOMPPrivateLoopCounters(S, PreCondScope); 1549 (void)PreCondScope.Privatize(); 1550 // Get initial values of real counters. 1551 for (auto I : S.inits()) { 1552 CGF.EmitIgnoredExpr(I); 1553 } 1554 } 1555 // Check that loop is executed at least one time. 1556 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount); 1557 } 1558 1559 void CodeGenFunction::EmitOMPLinearClause( 1560 const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) { 1561 if (!HaveInsertPoint()) 1562 return; 1563 llvm::DenseSet<const VarDecl *> SIMDLCVs; 1564 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 1565 auto *LoopDirective = cast<OMPLoopDirective>(&D); 1566 for (auto *C : LoopDirective->counters()) { 1567 SIMDLCVs.insert( 1568 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 1569 } 1570 } 1571 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1572 auto CurPrivate = C->privates().begin(); 1573 for (auto *E : C->varlists()) { 1574 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1575 auto *PrivateVD = 1576 cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl()); 1577 if (!SIMDLCVs.count(VD->getCanonicalDecl())) { 1578 bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address { 1579 // Emit private VarDecl with copy init. 1580 EmitVarDecl(*PrivateVD); 1581 return GetAddrOfLocalVar(PrivateVD); 1582 }); 1583 assert(IsRegistered && "linear var already registered as private"); 1584 // Silence the warning about unused variable. 1585 (void)IsRegistered; 1586 } else 1587 EmitVarDecl(*PrivateVD); 1588 ++CurPrivate; 1589 } 1590 } 1591 } 1592 1593 static void emitSimdlenSafelenClause(CodeGenFunction &CGF, 1594 const OMPExecutableDirective &D, 1595 bool IsMonotonic) { 1596 if (!CGF.HaveInsertPoint()) 1597 return; 1598 if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) { 1599 RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(), 1600 /*ignoreResult=*/true); 1601 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 1602 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 1603 // In presence of finite 'safelen', it may be unsafe to mark all 1604 // the memory instructions parallel, because loop-carried 1605 // dependences of 'safelen' iterations are possible. 1606 if (!IsMonotonic) 1607 CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>()); 1608 } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) { 1609 RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(), 1610 /*ignoreResult=*/true); 1611 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 1612 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 1613 // In presence of finite 'safelen', it may be unsafe to mark all 1614 // the memory instructions parallel, because loop-carried 1615 // dependences of 'safelen' iterations are possible. 1616 CGF.LoopStack.setParallel(false); 1617 } 1618 } 1619 1620 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D, 1621 bool IsMonotonic) { 1622 // Walk clauses and process safelen/lastprivate. 1623 LoopStack.setParallel(!IsMonotonic); 1624 LoopStack.setVectorizeEnable(true); 1625 emitSimdlenSafelenClause(*this, D, IsMonotonic); 1626 } 1627 1628 void CodeGenFunction::EmitOMPSimdFinal( 1629 const OMPLoopDirective &D, 1630 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1631 if (!HaveInsertPoint()) 1632 return; 1633 llvm::BasicBlock *DoneBB = nullptr; 1634 auto IC = D.counters().begin(); 1635 auto IPC = D.private_counters().begin(); 1636 for (auto F : D.finals()) { 1637 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl()); 1638 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl()); 1639 auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD); 1640 if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) || 1641 OrigVD->hasGlobalStorage() || CED) { 1642 if (!DoneBB) { 1643 if (auto *Cond = CondGen(*this)) { 1644 // If the first post-update expression is found, emit conditional 1645 // block if it was requested. 1646 auto *ThenBB = createBasicBlock(".omp.final.then"); 1647 DoneBB = createBasicBlock(".omp.final.done"); 1648 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1649 EmitBlock(ThenBB); 1650 } 1651 } 1652 Address OrigAddr = Address::invalid(); 1653 if (CED) 1654 OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress(); 1655 else { 1656 DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD), 1657 /*RefersToEnclosingVariableOrCapture=*/false, 1658 (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc()); 1659 OrigAddr = EmitLValue(&DRE).getAddress(); 1660 } 1661 OMPPrivateScope VarScope(*this); 1662 VarScope.addPrivate(OrigVD, 1663 [OrigAddr]() -> Address { return OrigAddr; }); 1664 (void)VarScope.Privatize(); 1665 EmitIgnoredExpr(F); 1666 } 1667 ++IC; 1668 ++IPC; 1669 } 1670 if (DoneBB) 1671 EmitBlock(DoneBB, /*IsFinished=*/true); 1672 } 1673 1674 static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF, 1675 const OMPLoopDirective &S, 1676 CodeGenFunction::JumpDest LoopExit) { 1677 CGF.EmitOMPLoopBody(S, LoopExit); 1678 CGF.EmitStopPoint(&S); 1679 } 1680 1681 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { 1682 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1683 OMPLoopScope PreInitScope(CGF, S); 1684 // if (PreCond) { 1685 // for (IV in 0..LastIteration) BODY; 1686 // <Final counter/linear vars updates>; 1687 // } 1688 // 1689 1690 // Emit: if (PreCond) - begin. 1691 // If the condition constant folds and can be elided, avoid emitting the 1692 // whole loop. 1693 bool CondConstant; 1694 llvm::BasicBlock *ContBlock = nullptr; 1695 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 1696 if (!CondConstant) 1697 return; 1698 } else { 1699 auto *ThenBlock = CGF.createBasicBlock("simd.if.then"); 1700 ContBlock = CGF.createBasicBlock("simd.if.end"); 1701 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 1702 CGF.getProfileCount(&S)); 1703 CGF.EmitBlock(ThenBlock); 1704 CGF.incrementProfileCounter(&S); 1705 } 1706 1707 // Emit the loop iteration variable. 1708 const Expr *IVExpr = S.getIterationVariable(); 1709 const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 1710 CGF.EmitVarDecl(*IVDecl); 1711 CGF.EmitIgnoredExpr(S.getInit()); 1712 1713 // Emit the iterations count variable. 1714 // If it is not a variable, Sema decided to calculate iterations count on 1715 // each iteration (e.g., it is foldable into a constant). 1716 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 1717 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 1718 // Emit calculation of the iterations count. 1719 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 1720 } 1721 1722 CGF.EmitOMPSimdInit(S); 1723 1724 emitAlignedClause(CGF, S); 1725 CGF.EmitOMPLinearClauseInit(S); 1726 { 1727 OMPPrivateScope LoopScope(CGF); 1728 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 1729 CGF.EmitOMPLinearClause(S, LoopScope); 1730 CGF.EmitOMPPrivateClause(S, LoopScope); 1731 CGF.EmitOMPReductionClauseInit(S, LoopScope); 1732 bool HasLastprivateClause = 1733 CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 1734 (void)LoopScope.Privatize(); 1735 CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 1736 S.getInc(), 1737 [&S](CodeGenFunction &CGF) { 1738 CGF.EmitOMPLoopBody(S, JumpDest()); 1739 CGF.EmitStopPoint(&S); 1740 }, 1741 [](CodeGenFunction &) {}); 1742 CGF.EmitOMPSimdFinal( 1743 S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1744 // Emit final copy of the lastprivate variables at the end of loops. 1745 if (HasLastprivateClause) 1746 CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true); 1747 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd); 1748 emitPostUpdateForReductionClause( 1749 CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1750 } 1751 CGF.EmitOMPLinearClauseFinal( 1752 S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1753 // Emit: if (PreCond) - end. 1754 if (ContBlock) { 1755 CGF.EmitBranch(ContBlock); 1756 CGF.EmitBlock(ContBlock, true); 1757 } 1758 }; 1759 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1760 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 1761 } 1762 1763 void CodeGenFunction::EmitOMPOuterLoop( 1764 bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S, 1765 CodeGenFunction::OMPPrivateScope &LoopScope, 1766 const CodeGenFunction::OMPLoopArguments &LoopArgs, 1767 const CodeGenFunction::CodeGenLoopTy &CodeGenLoop, 1768 const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) { 1769 auto &RT = CGM.getOpenMPRuntime(); 1770 1771 const Expr *IVExpr = S.getIterationVariable(); 1772 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1773 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1774 1775 auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end"); 1776 1777 // Start the loop with a block that tests the condition. 1778 auto CondBlock = createBasicBlock("omp.dispatch.cond"); 1779 EmitBlock(CondBlock); 1780 const SourceRange &R = S.getSourceRange(); 1781 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), 1782 SourceLocToDebugLoc(R.getEnd())); 1783 1784 llvm::Value *BoolCondVal = nullptr; 1785 if (!DynamicOrOrdered) { 1786 // UB = min(UB, GlobalUB) or 1787 // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g. 1788 // 'distribute parallel for') 1789 EmitIgnoredExpr(LoopArgs.EUB); 1790 // IV = LB 1791 EmitIgnoredExpr(LoopArgs.Init); 1792 // IV < UB 1793 BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond); 1794 } else { 1795 BoolCondVal = 1796 RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, LoopArgs.IL, 1797 LoopArgs.LB, LoopArgs.UB, LoopArgs.ST); 1798 } 1799 1800 // If there are any cleanups between here and the loop-exit scope, 1801 // create a block to stage a loop exit along. 1802 auto ExitBlock = LoopExit.getBlock(); 1803 if (LoopScope.requiresCleanups()) 1804 ExitBlock = createBasicBlock("omp.dispatch.cleanup"); 1805 1806 auto LoopBody = createBasicBlock("omp.dispatch.body"); 1807 Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock); 1808 if (ExitBlock != LoopExit.getBlock()) { 1809 EmitBlock(ExitBlock); 1810 EmitBranchThroughCleanup(LoopExit); 1811 } 1812 EmitBlock(LoopBody); 1813 1814 // Emit "IV = LB" (in case of static schedule, we have already calculated new 1815 // LB for loop condition and emitted it above). 1816 if (DynamicOrOrdered) 1817 EmitIgnoredExpr(LoopArgs.Init); 1818 1819 // Create a block for the increment. 1820 auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); 1821 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1822 1823 // Generate !llvm.loop.parallel metadata for loads and stores for loops 1824 // with dynamic/guided scheduling and without ordered clause. 1825 if (!isOpenMPSimdDirective(S.getDirectiveKind())) 1826 LoopStack.setParallel(!IsMonotonic); 1827 else 1828 EmitOMPSimdInit(S, IsMonotonic); 1829 1830 SourceLocation Loc = S.getLocStart(); 1831 1832 // when 'distribute' is not combined with a 'for': 1833 // while (idx <= UB) { BODY; ++idx; } 1834 // when 'distribute' is combined with a 'for' 1835 // (e.g. 'distribute parallel for') 1836 // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; } 1837 EmitOMPInnerLoop( 1838 S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr, 1839 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { 1840 CodeGenLoop(CGF, S, LoopExit); 1841 }, 1842 [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) { 1843 CodeGenOrdered(CGF, Loc, IVSize, IVSigned); 1844 }); 1845 1846 EmitBlock(Continue.getBlock()); 1847 BreakContinueStack.pop_back(); 1848 if (!DynamicOrOrdered) { 1849 // Emit "LB = LB + Stride", "UB = UB + Stride". 1850 EmitIgnoredExpr(LoopArgs.NextLB); 1851 EmitIgnoredExpr(LoopArgs.NextUB); 1852 } 1853 1854 EmitBranch(CondBlock); 1855 LoopStack.pop(); 1856 // Emit the fall-through block. 1857 EmitBlock(LoopExit.getBlock()); 1858 1859 // Tell the runtime we are done. 1860 auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) { 1861 if (!DynamicOrOrdered) 1862 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd()); 1863 }; 1864 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); 1865 } 1866 1867 void CodeGenFunction::EmitOMPForOuterLoop( 1868 const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic, 1869 const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, 1870 const OMPLoopArguments &LoopArgs, 1871 const CodeGenDispatchBoundsTy &CGDispatchBounds) { 1872 auto &RT = CGM.getOpenMPRuntime(); 1873 1874 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). 1875 const bool DynamicOrOrdered = 1876 Ordered || RT.isDynamic(ScheduleKind.Schedule); 1877 1878 assert((Ordered || 1879 !RT.isStaticNonchunked(ScheduleKind.Schedule, 1880 LoopArgs.Chunk != nullptr)) && 1881 "static non-chunked schedule does not need outer loop"); 1882 1883 // Emit outer loop. 1884 // 1885 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1886 // When schedule(dynamic,chunk_size) is specified, the iterations are 1887 // distributed to threads in the team in chunks as the threads request them. 1888 // Each thread executes a chunk of iterations, then requests another chunk, 1889 // until no chunks remain to be distributed. Each chunk contains chunk_size 1890 // iterations, except for the last chunk to be distributed, which may have 1891 // fewer iterations. When no chunk_size is specified, it defaults to 1. 1892 // 1893 // When schedule(guided,chunk_size) is specified, the iterations are assigned 1894 // to threads in the team in chunks as the executing threads request them. 1895 // Each thread executes a chunk of iterations, then requests another chunk, 1896 // until no chunks remain to be assigned. For a chunk_size of 1, the size of 1897 // each chunk is proportional to the number of unassigned iterations divided 1898 // by the number of threads in the team, decreasing to 1. For a chunk_size 1899 // with value k (greater than 1), the size of each chunk is determined in the 1900 // same way, with the restriction that the chunks do not contain fewer than k 1901 // iterations (except for the last chunk to be assigned, which may have fewer 1902 // than k iterations). 1903 // 1904 // When schedule(auto) is specified, the decision regarding scheduling is 1905 // delegated to the compiler and/or runtime system. The programmer gives the 1906 // implementation the freedom to choose any possible mapping of iterations to 1907 // threads in the team. 1908 // 1909 // When schedule(runtime) is specified, the decision regarding scheduling is 1910 // deferred until run time, and the schedule and chunk size are taken from the 1911 // run-sched-var ICV. If the ICV is set to auto, the schedule is 1912 // implementation defined 1913 // 1914 // while(__kmpc_dispatch_next(&LB, &UB)) { 1915 // idx = LB; 1916 // while (idx <= UB) { BODY; ++idx; 1917 // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only. 1918 // } // inner loop 1919 // } 1920 // 1921 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1922 // When schedule(static, chunk_size) is specified, iterations are divided into 1923 // chunks of size chunk_size, and the chunks are assigned to the threads in 1924 // the team in a round-robin fashion in the order of the thread number. 1925 // 1926 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { 1927 // while (idx <= UB) { BODY; ++idx; } // inner loop 1928 // LB = LB + ST; 1929 // UB = UB + ST; 1930 // } 1931 // 1932 1933 const Expr *IVExpr = S.getIterationVariable(); 1934 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1935 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1936 1937 if (DynamicOrOrdered) { 1938 auto DispatchBounds = CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB); 1939 llvm::Value *LBVal = DispatchBounds.first; 1940 llvm::Value *UBVal = DispatchBounds.second; 1941 CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal, 1942 LoopArgs.Chunk}; 1943 RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, IVSize, 1944 IVSigned, Ordered, DipatchRTInputValues); 1945 } else { 1946 RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, 1947 Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB, 1948 LoopArgs.ST, LoopArgs.Chunk); 1949 } 1950 1951 auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc, 1952 const unsigned IVSize, 1953 const bool IVSigned) { 1954 if (Ordered) { 1955 CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize, 1956 IVSigned); 1957 } 1958 }; 1959 1960 OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST, 1961 LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB); 1962 OuterLoopArgs.IncExpr = S.getInc(); 1963 OuterLoopArgs.Init = S.getInit(); 1964 OuterLoopArgs.Cond = S.getCond(); 1965 OuterLoopArgs.NextLB = S.getNextLowerBound(); 1966 OuterLoopArgs.NextUB = S.getNextUpperBound(); 1967 EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs, 1968 emitOMPLoopBodyWithStopPoint, CodeGenOrdered); 1969 } 1970 1971 static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc, 1972 const unsigned IVSize, const bool IVSigned) {} 1973 1974 void CodeGenFunction::EmitOMPDistributeOuterLoop( 1975 OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S, 1976 OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs, 1977 const CodeGenLoopTy &CodeGenLoopContent) { 1978 1979 auto &RT = CGM.getOpenMPRuntime(); 1980 1981 // Emit outer loop. 1982 // Same behavior as a OMPForOuterLoop, except that schedule cannot be 1983 // dynamic 1984 // 1985 1986 const Expr *IVExpr = S.getIterationVariable(); 1987 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1988 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1989 1990 RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize, 1991 IVSigned, /* Ordered = */ false, LoopArgs.IL, 1992 LoopArgs.LB, LoopArgs.UB, LoopArgs.ST, 1993 LoopArgs.Chunk); 1994 1995 // for combined 'distribute' and 'for' the increment expression of distribute 1996 // is store in DistInc. For 'distribute' alone, it is in Inc. 1997 Expr *IncExpr; 1998 if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())) 1999 IncExpr = S.getDistInc(); 2000 else 2001 IncExpr = S.getInc(); 2002 2003 // this routine is shared by 'omp distribute parallel for' and 2004 // 'omp distribute': select the right EUB expression depending on the 2005 // directive 2006 OMPLoopArguments OuterLoopArgs; 2007 OuterLoopArgs.LB = LoopArgs.LB; 2008 OuterLoopArgs.UB = LoopArgs.UB; 2009 OuterLoopArgs.ST = LoopArgs.ST; 2010 OuterLoopArgs.IL = LoopArgs.IL; 2011 OuterLoopArgs.Chunk = LoopArgs.Chunk; 2012 OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2013 ? S.getCombinedEnsureUpperBound() 2014 : S.getEnsureUpperBound(); 2015 OuterLoopArgs.IncExpr = IncExpr; 2016 OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2017 ? S.getCombinedInit() 2018 : S.getInit(); 2019 OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2020 ? S.getCombinedCond() 2021 : S.getCond(); 2022 OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2023 ? S.getCombinedNextLowerBound() 2024 : S.getNextLowerBound(); 2025 OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2026 ? S.getCombinedNextUpperBound() 2027 : S.getNextUpperBound(); 2028 2029 EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S, 2030 LoopScope, OuterLoopArgs, CodeGenLoopContent, 2031 emitEmptyOrdered); 2032 } 2033 2034 /// Emit a helper variable and return corresponding lvalue. 2035 static LValue EmitOMPHelperVar(CodeGenFunction &CGF, 2036 const DeclRefExpr *Helper) { 2037 auto VDecl = cast<VarDecl>(Helper->getDecl()); 2038 CGF.EmitVarDecl(*VDecl); 2039 return CGF.EmitLValue(Helper); 2040 } 2041 2042 static std::pair<LValue, LValue> 2043 emitDistributeParallelForInnerBounds(CodeGenFunction &CGF, 2044 const OMPExecutableDirective &S) { 2045 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 2046 LValue LB = 2047 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); 2048 LValue UB = 2049 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); 2050 2051 // When composing 'distribute' with 'for' (e.g. as in 'distribute 2052 // parallel for') we need to use the 'distribute' 2053 // chunk lower and upper bounds rather than the whole loop iteration 2054 // space. These are parameters to the outlined function for 'parallel' 2055 // and we copy the bounds of the previous schedule into the 2056 // the current ones. 2057 LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable()); 2058 LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable()); 2059 llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(PrevLB, SourceLocation()); 2060 PrevLBVal = CGF.EmitScalarConversion( 2061 PrevLBVal, LS.getPrevLowerBoundVariable()->getType(), 2062 LS.getIterationVariable()->getType(), SourceLocation()); 2063 llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(PrevUB, SourceLocation()); 2064 PrevUBVal = CGF.EmitScalarConversion( 2065 PrevUBVal, LS.getPrevUpperBoundVariable()->getType(), 2066 LS.getIterationVariable()->getType(), SourceLocation()); 2067 2068 CGF.EmitStoreOfScalar(PrevLBVal, LB); 2069 CGF.EmitStoreOfScalar(PrevUBVal, UB); 2070 2071 return {LB, UB}; 2072 } 2073 2074 /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then 2075 /// we need to use the LB and UB expressions generated by the worksharing 2076 /// code generation support, whereas in non combined situations we would 2077 /// just emit 0 and the LastIteration expression 2078 /// This function is necessary due to the difference of the LB and UB 2079 /// types for the RT emission routines for 'for_static_init' and 2080 /// 'for_dispatch_init' 2081 static std::pair<llvm::Value *, llvm::Value *> 2082 emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF, 2083 const OMPExecutableDirective &S, 2084 Address LB, Address UB) { 2085 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 2086 const Expr *IVExpr = LS.getIterationVariable(); 2087 // when implementing a dynamic schedule for a 'for' combined with a 2088 // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop 2089 // is not normalized as each team only executes its own assigned 2090 // distribute chunk 2091 QualType IteratorTy = IVExpr->getType(); 2092 llvm::Value *LBVal = CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, 2093 SourceLocation()); 2094 llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, 2095 SourceLocation()); 2096 return {LBVal, UBVal}; 2097 } 2098 2099 static void emitDistributeParallelForDistributeInnerBoundParams( 2100 CodeGenFunction &CGF, const OMPExecutableDirective &S, 2101 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) { 2102 const auto &Dir = cast<OMPLoopDirective>(S); 2103 LValue LB = 2104 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable())); 2105 auto LBCast = CGF.Builder.CreateIntCast( 2106 CGF.Builder.CreateLoad(LB.getAddress()), CGF.SizeTy, /*isSigned=*/false); 2107 CapturedVars.push_back(LBCast); 2108 LValue UB = 2109 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable())); 2110 2111 auto UBCast = CGF.Builder.CreateIntCast( 2112 CGF.Builder.CreateLoad(UB.getAddress()), CGF.SizeTy, /*isSigned=*/false); 2113 CapturedVars.push_back(UBCast); 2114 } 2115 2116 static void 2117 emitInnerParallelForWhenCombined(CodeGenFunction &CGF, 2118 const OMPLoopDirective &S, 2119 CodeGenFunction::JumpDest LoopExit) { 2120 auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF, 2121 PrePostActionTy &) { 2122 CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(), 2123 emitDistributeParallelForInnerBounds, 2124 emitDistributeParallelForDispatchBounds); 2125 }; 2126 2127 emitCommonOMPParallelDirective( 2128 CGF, S, OMPD_for, CGInlinedWorksharingLoop, 2129 emitDistributeParallelForDistributeInnerBoundParams); 2130 } 2131 2132 void CodeGenFunction::EmitOMPDistributeParallelForDirective( 2133 const OMPDistributeParallelForDirective &S) { 2134 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2135 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 2136 S.getDistInc()); 2137 }; 2138 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2139 OMPCancelStackRAII CancelRegion(*this, OMPD_distribute_parallel_for, 2140 /*HasCancel=*/false); 2141 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen, 2142 /*HasCancel=*/false); 2143 } 2144 2145 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective( 2146 const OMPDistributeParallelForSimdDirective &S) { 2147 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2148 CGM.getOpenMPRuntime().emitInlinedDirective( 2149 *this, OMPD_distribute_parallel_for_simd, 2150 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2151 OMPLoopScope PreInitScope(CGF, S); 2152 CGF.EmitStmt( 2153 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2154 }); 2155 } 2156 2157 void CodeGenFunction::EmitOMPDistributeSimdDirective( 2158 const OMPDistributeSimdDirective &S) { 2159 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2160 CGM.getOpenMPRuntime().emitInlinedDirective( 2161 *this, OMPD_distribute_simd, 2162 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2163 OMPLoopScope PreInitScope(CGF, S); 2164 CGF.EmitStmt( 2165 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2166 }); 2167 } 2168 2169 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective( 2170 const OMPTargetParallelForSimdDirective &S) { 2171 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2172 CGM.getOpenMPRuntime().emitInlinedDirective( 2173 *this, OMPD_target_parallel_for_simd, 2174 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2175 OMPLoopScope PreInitScope(CGF, S); 2176 CGF.EmitStmt( 2177 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2178 }); 2179 } 2180 2181 void CodeGenFunction::EmitOMPTargetSimdDirective( 2182 const OMPTargetSimdDirective &S) { 2183 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2184 CGM.getOpenMPRuntime().emitInlinedDirective( 2185 *this, OMPD_target_simd, [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2186 OMPLoopScope PreInitScope(CGF, S); 2187 CGF.EmitStmt( 2188 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2189 }); 2190 } 2191 2192 void CodeGenFunction::EmitOMPTeamsDistributeDirective( 2193 const OMPTeamsDistributeDirective &S) { 2194 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2195 CGM.getOpenMPRuntime().emitInlinedDirective( 2196 *this, OMPD_teams_distribute, 2197 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2198 OMPLoopScope PreInitScope(CGF, S); 2199 CGF.EmitStmt( 2200 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2201 }); 2202 } 2203 2204 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective( 2205 const OMPTeamsDistributeSimdDirective &S) { 2206 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2207 CGM.getOpenMPRuntime().emitInlinedDirective( 2208 *this, OMPD_teams_distribute_simd, 2209 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2210 OMPLoopScope PreInitScope(CGF, S); 2211 CGF.EmitStmt( 2212 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2213 }); 2214 } 2215 2216 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective( 2217 const OMPTeamsDistributeParallelForSimdDirective &S) { 2218 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2219 CGM.getOpenMPRuntime().emitInlinedDirective( 2220 *this, OMPD_teams_distribute_parallel_for_simd, 2221 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2222 OMPLoopScope PreInitScope(CGF, S); 2223 CGF.EmitStmt( 2224 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2225 }); 2226 } 2227 2228 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective( 2229 const OMPTeamsDistributeParallelForDirective &S) { 2230 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2231 CGM.getOpenMPRuntime().emitInlinedDirective( 2232 *this, OMPD_teams_distribute_parallel_for, 2233 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2234 OMPLoopScope PreInitScope(CGF, S); 2235 CGF.EmitStmt( 2236 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2237 }); 2238 } 2239 2240 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective( 2241 const OMPTargetTeamsDistributeDirective &S) { 2242 CGM.getOpenMPRuntime().emitInlinedDirective( 2243 *this, OMPD_target_teams_distribute, 2244 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2245 CGF.EmitStmt( 2246 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2247 }); 2248 } 2249 2250 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective( 2251 const OMPTargetTeamsDistributeParallelForDirective &S) { 2252 CGM.getOpenMPRuntime().emitInlinedDirective( 2253 *this, OMPD_target_teams_distribute_parallel_for, 2254 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2255 CGF.EmitStmt( 2256 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2257 }); 2258 } 2259 2260 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective( 2261 const OMPTargetTeamsDistributeParallelForSimdDirective &S) { 2262 CGM.getOpenMPRuntime().emitInlinedDirective( 2263 *this, OMPD_target_teams_distribute_parallel_for_simd, 2264 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2265 CGF.EmitStmt( 2266 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2267 }); 2268 } 2269 2270 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective( 2271 const OMPTargetTeamsDistributeSimdDirective &S) { 2272 CGM.getOpenMPRuntime().emitInlinedDirective( 2273 *this, OMPD_target_teams_distribute_simd, 2274 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2275 CGF.EmitStmt( 2276 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2277 }); 2278 } 2279 2280 namespace { 2281 struct ScheduleKindModifiersTy { 2282 OpenMPScheduleClauseKind Kind; 2283 OpenMPScheduleClauseModifier M1; 2284 OpenMPScheduleClauseModifier M2; 2285 ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind, 2286 OpenMPScheduleClauseModifier M1, 2287 OpenMPScheduleClauseModifier M2) 2288 : Kind(Kind), M1(M1), M2(M2) {} 2289 }; 2290 } // namespace 2291 2292 bool CodeGenFunction::EmitOMPWorksharingLoop( 2293 const OMPLoopDirective &S, Expr *EUB, 2294 const CodeGenLoopBoundsTy &CodeGenLoopBounds, 2295 const CodeGenDispatchBoundsTy &CGDispatchBounds) { 2296 // Emit the loop iteration variable. 2297 auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 2298 auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); 2299 EmitVarDecl(*IVDecl); 2300 2301 // Emit the iterations count variable. 2302 // If it is not a variable, Sema decided to calculate iterations count on each 2303 // iteration (e.g., it is foldable into a constant). 2304 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 2305 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 2306 // Emit calculation of the iterations count. 2307 EmitIgnoredExpr(S.getCalcLastIteration()); 2308 } 2309 2310 auto &RT = CGM.getOpenMPRuntime(); 2311 2312 bool HasLastprivateClause; 2313 // Check pre-condition. 2314 { 2315 OMPLoopScope PreInitScope(*this, S); 2316 // Skip the entire loop if we don't meet the precondition. 2317 // If the condition constant folds and can be elided, avoid emitting the 2318 // whole loop. 2319 bool CondConstant; 2320 llvm::BasicBlock *ContBlock = nullptr; 2321 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 2322 if (!CondConstant) 2323 return false; 2324 } else { 2325 auto *ThenBlock = createBasicBlock("omp.precond.then"); 2326 ContBlock = createBasicBlock("omp.precond.end"); 2327 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 2328 getProfileCount(&S)); 2329 EmitBlock(ThenBlock); 2330 incrementProfileCounter(&S); 2331 } 2332 2333 bool Ordered = false; 2334 if (auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) { 2335 if (OrderedClause->getNumForLoops()) 2336 RT.emitDoacrossInit(*this, S); 2337 else 2338 Ordered = true; 2339 } 2340 2341 llvm::DenseSet<const Expr *> EmittedFinals; 2342 emitAlignedClause(*this, S); 2343 EmitOMPLinearClauseInit(S); 2344 // Emit helper vars inits. 2345 2346 std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S); 2347 LValue LB = Bounds.first; 2348 LValue UB = Bounds.second; 2349 LValue ST = 2350 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 2351 LValue IL = 2352 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 2353 2354 // Emit 'then' code. 2355 { 2356 OMPPrivateScope LoopScope(*this); 2357 if (EmitOMPFirstprivateClause(S, LoopScope)) { 2358 // Emit implicit barrier to synchronize threads and avoid data races on 2359 // initialization of firstprivate variables and post-update of 2360 // lastprivate variables. 2361 CGM.getOpenMPRuntime().emitBarrierCall( 2362 *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 2363 /*ForceSimpleCall=*/true); 2364 } 2365 EmitOMPPrivateClause(S, LoopScope); 2366 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 2367 EmitOMPReductionClauseInit(S, LoopScope); 2368 EmitOMPPrivateLoopCounters(S, LoopScope); 2369 EmitOMPLinearClause(S, LoopScope); 2370 (void)LoopScope.Privatize(); 2371 2372 // Detect the loop schedule kind and chunk. 2373 llvm::Value *Chunk = nullptr; 2374 OpenMPScheduleTy ScheduleKind; 2375 if (auto *C = S.getSingleClause<OMPScheduleClause>()) { 2376 ScheduleKind.Schedule = C->getScheduleKind(); 2377 ScheduleKind.M1 = C->getFirstScheduleModifier(); 2378 ScheduleKind.M2 = C->getSecondScheduleModifier(); 2379 if (const auto *Ch = C->getChunkSize()) { 2380 Chunk = EmitScalarExpr(Ch); 2381 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 2382 S.getIterationVariable()->getType(), 2383 S.getLocStart()); 2384 } 2385 } 2386 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2387 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2388 // OpenMP 4.5, 2.7.1 Loop Construct, Description. 2389 // If the static schedule kind is specified or if the ordered clause is 2390 // specified, and if no monotonic modifier is specified, the effect will 2391 // be as if the monotonic modifier was specified. 2392 if (RT.isStaticNonchunked(ScheduleKind.Schedule, 2393 /* Chunked */ Chunk != nullptr) && 2394 !Ordered) { 2395 if (isOpenMPSimdDirective(S.getDirectiveKind())) 2396 EmitOMPSimdInit(S, /*IsMonotonic=*/true); 2397 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 2398 // When no chunk_size is specified, the iteration space is divided into 2399 // chunks that are approximately equal in size, and at most one chunk is 2400 // distributed to each thread. Note that the size of the chunks is 2401 // unspecified in this case. 2402 RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, 2403 IVSize, IVSigned, Ordered, 2404 IL.getAddress(), LB.getAddress(), 2405 UB.getAddress(), ST.getAddress()); 2406 auto LoopExit = 2407 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 2408 // UB = min(UB, GlobalUB); 2409 EmitIgnoredExpr(S.getEnsureUpperBound()); 2410 // IV = LB; 2411 EmitIgnoredExpr(S.getInit()); 2412 // while (idx <= UB) { BODY; ++idx; } 2413 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 2414 S.getInc(), 2415 [&S, LoopExit](CodeGenFunction &CGF) { 2416 CGF.EmitOMPLoopBody(S, LoopExit); 2417 CGF.EmitStopPoint(&S); 2418 }, 2419 [](CodeGenFunction &) {}); 2420 EmitBlock(LoopExit.getBlock()); 2421 // Tell the runtime we are done. 2422 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 2423 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd()); 2424 }; 2425 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); 2426 } else { 2427 const bool IsMonotonic = 2428 Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static || 2429 ScheduleKind.Schedule == OMPC_SCHEDULE_unknown || 2430 ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic || 2431 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic; 2432 // Emit the outer loop, which requests its work chunk [LB..UB] from 2433 // runtime and runs the inner loop to process it. 2434 const OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(), 2435 ST.getAddress(), IL.getAddress(), 2436 Chunk, EUB); 2437 EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered, 2438 LoopArguments, CGDispatchBounds); 2439 } 2440 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 2441 EmitOMPSimdFinal(S, 2442 [&](CodeGenFunction &CGF) -> llvm::Value * { 2443 return CGF.Builder.CreateIsNotNull( 2444 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2445 }); 2446 } 2447 EmitOMPReductionClauseFinal( 2448 S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind()) 2449 ? /*Parallel and Simd*/ OMPD_parallel_for_simd 2450 : /*Parallel only*/ OMPD_parallel); 2451 // Emit post-update of the reduction variables if IsLastIter != 0. 2452 emitPostUpdateForReductionClause( 2453 *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * { 2454 return CGF.Builder.CreateIsNotNull( 2455 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2456 }); 2457 // Emit final copy of the lastprivate variables if IsLastIter != 0. 2458 if (HasLastprivateClause) 2459 EmitOMPLastprivateClauseFinal( 2460 S, isOpenMPSimdDirective(S.getDirectiveKind()), 2461 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart()))); 2462 } 2463 EmitOMPLinearClauseFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * { 2464 return CGF.Builder.CreateIsNotNull( 2465 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2466 }); 2467 // We're now done with the loop, so jump to the continuation block. 2468 if (ContBlock) { 2469 EmitBranch(ContBlock); 2470 EmitBlock(ContBlock, true); 2471 } 2472 } 2473 return HasLastprivateClause; 2474 } 2475 2476 /// The following two functions generate expressions for the loop lower 2477 /// and upper bounds in case of static and dynamic (dispatch) schedule 2478 /// of the associated 'for' or 'distribute' loop. 2479 static std::pair<LValue, LValue> 2480 emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 2481 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 2482 LValue LB = 2483 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); 2484 LValue UB = 2485 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); 2486 return {LB, UB}; 2487 } 2488 2489 /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not 2490 /// consider the lower and upper bound expressions generated by the 2491 /// worksharing loop support, but we use 0 and the iteration space size as 2492 /// constants 2493 static std::pair<llvm::Value *, llvm::Value *> 2494 emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S, 2495 Address LB, Address UB) { 2496 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 2497 const Expr *IVExpr = LS.getIterationVariable(); 2498 const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType()); 2499 llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0); 2500 llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration()); 2501 return {LBVal, UBVal}; 2502 } 2503 2504 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { 2505 bool HasLastprivates = false; 2506 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, 2507 PrePostActionTy &) { 2508 OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel()); 2509 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 2510 emitForLoopBounds, 2511 emitDispatchForLoopBounds); 2512 }; 2513 { 2514 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2515 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen, 2516 S.hasCancel()); 2517 } 2518 2519 // Emit an implicit barrier at the end. 2520 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) { 2521 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); 2522 } 2523 } 2524 2525 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { 2526 bool HasLastprivates = false; 2527 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, 2528 PrePostActionTy &) { 2529 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 2530 emitForLoopBounds, 2531 emitDispatchForLoopBounds); 2532 }; 2533 { 2534 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2535 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 2536 } 2537 2538 // Emit an implicit barrier at the end. 2539 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) { 2540 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); 2541 } 2542 } 2543 2544 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, 2545 const Twine &Name, 2546 llvm::Value *Init = nullptr) { 2547 auto LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty); 2548 if (Init) 2549 CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true); 2550 return LVal; 2551 } 2552 2553 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { 2554 auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt(); 2555 auto *CS = dyn_cast<CompoundStmt>(Stmt); 2556 bool HasLastprivates = false; 2557 auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF, 2558 PrePostActionTy &) { 2559 auto &C = CGF.CGM.getContext(); 2560 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2561 // Emit helper vars inits. 2562 LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.", 2563 CGF.Builder.getInt32(0)); 2564 auto *GlobalUBVal = CS != nullptr ? CGF.Builder.getInt32(CS->size() - 1) 2565 : CGF.Builder.getInt32(0); 2566 LValue UB = 2567 createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal); 2568 LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.", 2569 CGF.Builder.getInt32(1)); 2570 LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.", 2571 CGF.Builder.getInt32(0)); 2572 // Loop counter. 2573 LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv."); 2574 OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 2575 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV); 2576 OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 2577 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); 2578 // Generate condition for loop. 2579 BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, 2580 OK_Ordinary, S.getLocStart(), FPOptions()); 2581 // Increment for loop counter. 2582 UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary, 2583 S.getLocStart()); 2584 auto BodyGen = [Stmt, CS, &S, &IV](CodeGenFunction &CGF) { 2585 // Iterate through all sections and emit a switch construct: 2586 // switch (IV) { 2587 // case 0: 2588 // <SectionStmt[0]>; 2589 // break; 2590 // ... 2591 // case <NumSection> - 1: 2592 // <SectionStmt[<NumSection> - 1]>; 2593 // break; 2594 // } 2595 // .omp.sections.exit: 2596 auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit"); 2597 auto *SwitchStmt = CGF.Builder.CreateSwitch( 2598 CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB, 2599 CS == nullptr ? 1 : CS->size()); 2600 if (CS) { 2601 unsigned CaseNumber = 0; 2602 for (auto *SubStmt : CS->children()) { 2603 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 2604 CGF.EmitBlock(CaseBB); 2605 SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB); 2606 CGF.EmitStmt(SubStmt); 2607 CGF.EmitBranch(ExitBB); 2608 ++CaseNumber; 2609 } 2610 } else { 2611 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 2612 CGF.EmitBlock(CaseBB); 2613 SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB); 2614 CGF.EmitStmt(Stmt); 2615 CGF.EmitBranch(ExitBB); 2616 } 2617 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 2618 }; 2619 2620 CodeGenFunction::OMPPrivateScope LoopScope(CGF); 2621 if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) { 2622 // Emit implicit barrier to synchronize threads and avoid data races on 2623 // initialization of firstprivate variables and post-update of lastprivate 2624 // variables. 2625 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 2626 CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 2627 /*ForceSimpleCall=*/true); 2628 } 2629 CGF.EmitOMPPrivateClause(S, LoopScope); 2630 HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 2631 CGF.EmitOMPReductionClauseInit(S, LoopScope); 2632 (void)LoopScope.Privatize(); 2633 2634 // Emit static non-chunked loop. 2635 OpenMPScheduleTy ScheduleKind; 2636 ScheduleKind.Schedule = OMPC_SCHEDULE_static; 2637 CGF.CGM.getOpenMPRuntime().emitForStaticInit( 2638 CGF, S.getLocStart(), ScheduleKind, /*IVSize=*/32, 2639 /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), LB.getAddress(), 2640 UB.getAddress(), ST.getAddress()); 2641 // UB = min(UB, GlobalUB); 2642 auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart()); 2643 auto *MinUBGlobalUB = CGF.Builder.CreateSelect( 2644 CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal); 2645 CGF.EmitStoreOfScalar(MinUBGlobalUB, UB); 2646 // IV = LB; 2647 CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV); 2648 // while (idx <= UB) { BODY; ++idx; } 2649 CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen, 2650 [](CodeGenFunction &) {}); 2651 // Tell the runtime we are done. 2652 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 2653 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd()); 2654 }; 2655 CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen); 2656 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 2657 // Emit post-update of the reduction variables if IsLastIter != 0. 2658 emitPostUpdateForReductionClause( 2659 CGF, S, [&](CodeGenFunction &CGF) -> llvm::Value * { 2660 return CGF.Builder.CreateIsNotNull( 2661 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2662 }); 2663 2664 // Emit final copy of the lastprivate variables if IsLastIter != 0. 2665 if (HasLastprivates) 2666 CGF.EmitOMPLastprivateClauseFinal( 2667 S, /*NoFinals=*/false, 2668 CGF.Builder.CreateIsNotNull( 2669 CGF.EmitLoadOfScalar(IL, S.getLocStart()))); 2670 }; 2671 2672 bool HasCancel = false; 2673 if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S)) 2674 HasCancel = OSD->hasCancel(); 2675 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S)) 2676 HasCancel = OPSD->hasCancel(); 2677 OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel); 2678 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen, 2679 HasCancel); 2680 // Emit barrier for lastprivates only if 'sections' directive has 'nowait' 2681 // clause. Otherwise the barrier will be generated by the codegen for the 2682 // directive. 2683 if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) { 2684 // Emit implicit barrier to synchronize threads and avoid data races on 2685 // initialization of firstprivate variables. 2686 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), 2687 OMPD_unknown); 2688 } 2689 } 2690 2691 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { 2692 { 2693 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2694 EmitSections(S); 2695 } 2696 // Emit an implicit barrier at the end. 2697 if (!S.getSingleClause<OMPNowaitClause>()) { 2698 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), 2699 OMPD_sections); 2700 } 2701 } 2702 2703 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { 2704 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2705 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2706 }; 2707 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2708 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen, 2709 S.hasCancel()); 2710 } 2711 2712 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { 2713 llvm::SmallVector<const Expr *, 8> CopyprivateVars; 2714 llvm::SmallVector<const Expr *, 8> DestExprs; 2715 llvm::SmallVector<const Expr *, 8> SrcExprs; 2716 llvm::SmallVector<const Expr *, 8> AssignmentOps; 2717 // Check if there are any 'copyprivate' clauses associated with this 2718 // 'single' construct. 2719 // Build a list of copyprivate variables along with helper expressions 2720 // (<source>, <destination>, <destination>=<source> expressions) 2721 for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) { 2722 CopyprivateVars.append(C->varlists().begin(), C->varlists().end()); 2723 DestExprs.append(C->destination_exprs().begin(), 2724 C->destination_exprs().end()); 2725 SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end()); 2726 AssignmentOps.append(C->assignment_ops().begin(), 2727 C->assignment_ops().end()); 2728 } 2729 // Emit code for 'single' region along with 'copyprivate' clauses 2730 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2731 Action.Enter(CGF); 2732 OMPPrivateScope SingleScope(CGF); 2733 (void)CGF.EmitOMPFirstprivateClause(S, SingleScope); 2734 CGF.EmitOMPPrivateClause(S, SingleScope); 2735 (void)SingleScope.Privatize(); 2736 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2737 }; 2738 { 2739 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2740 CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(), 2741 CopyprivateVars, DestExprs, 2742 SrcExprs, AssignmentOps); 2743 } 2744 // Emit an implicit barrier at the end (to avoid data race on firstprivate 2745 // init or if no 'nowait' clause was specified and no 'copyprivate' clause). 2746 if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) { 2747 CGM.getOpenMPRuntime().emitBarrierCall( 2748 *this, S.getLocStart(), 2749 S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single); 2750 } 2751 } 2752 2753 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { 2754 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2755 Action.Enter(CGF); 2756 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2757 }; 2758 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2759 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart()); 2760 } 2761 2762 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { 2763 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2764 Action.Enter(CGF); 2765 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2766 }; 2767 Expr *Hint = nullptr; 2768 if (auto *HintClause = S.getSingleClause<OMPHintClause>()) 2769 Hint = HintClause->getHint(); 2770 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2771 CGM.getOpenMPRuntime().emitCriticalRegion(*this, 2772 S.getDirectiveName().getAsString(), 2773 CodeGen, S.getLocStart(), Hint); 2774 } 2775 2776 void CodeGenFunction::EmitOMPParallelForDirective( 2777 const OMPParallelForDirective &S) { 2778 // Emit directive as a combined directive that consists of two implicit 2779 // directives: 'parallel' with 'for' directive. 2780 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2781 OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel()); 2782 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 2783 emitDispatchForLoopBounds); 2784 }; 2785 emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen, 2786 emitEmptyBoundParameters); 2787 } 2788 2789 void CodeGenFunction::EmitOMPParallelForSimdDirective( 2790 const OMPParallelForSimdDirective &S) { 2791 // Emit directive as a combined directive that consists of two implicit 2792 // directives: 'parallel' with 'for' directive. 2793 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2794 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 2795 emitDispatchForLoopBounds); 2796 }; 2797 emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen, 2798 emitEmptyBoundParameters); 2799 } 2800 2801 void CodeGenFunction::EmitOMPParallelSectionsDirective( 2802 const OMPParallelSectionsDirective &S) { 2803 // Emit directive as a combined directive that consists of two implicit 2804 // directives: 'parallel' with 'sections' directive. 2805 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2806 CGF.EmitSections(S); 2807 }; 2808 emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen, 2809 emitEmptyBoundParameters); 2810 } 2811 2812 void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, 2813 const RegionCodeGenTy &BodyGen, 2814 const TaskGenTy &TaskGen, 2815 OMPTaskDataTy &Data) { 2816 // Emit outlined function for task construct. 2817 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2818 auto *I = CS->getCapturedDecl()->param_begin(); 2819 auto *PartId = std::next(I); 2820 auto *TaskT = std::next(I, 4); 2821 // Check if the task is final 2822 if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) { 2823 // If the condition constant folds and can be elided, try to avoid emitting 2824 // the condition and the dead arm of the if/else. 2825 auto *Cond = Clause->getCondition(); 2826 bool CondConstant; 2827 if (ConstantFoldsToSimpleInteger(Cond, CondConstant)) 2828 Data.Final.setInt(CondConstant); 2829 else 2830 Data.Final.setPointer(EvaluateExprAsBool(Cond)); 2831 } else { 2832 // By default the task is not final. 2833 Data.Final.setInt(/*IntVal=*/false); 2834 } 2835 // Check if the task has 'priority' clause. 2836 if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) { 2837 auto *Prio = Clause->getPriority(); 2838 Data.Priority.setInt(/*IntVal=*/true); 2839 Data.Priority.setPointer(EmitScalarConversion( 2840 EmitScalarExpr(Prio), Prio->getType(), 2841 getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1), 2842 Prio->getExprLoc())); 2843 } 2844 // The first function argument for tasks is a thread id, the second one is a 2845 // part id (0 for tied tasks, >=0 for untied task). 2846 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 2847 // Get list of private variables. 2848 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 2849 auto IRef = C->varlist_begin(); 2850 for (auto *IInit : C->private_copies()) { 2851 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2852 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2853 Data.PrivateVars.push_back(*IRef); 2854 Data.PrivateCopies.push_back(IInit); 2855 } 2856 ++IRef; 2857 } 2858 } 2859 EmittedAsPrivate.clear(); 2860 // Get list of firstprivate variables. 2861 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 2862 auto IRef = C->varlist_begin(); 2863 auto IElemInitRef = C->inits().begin(); 2864 for (auto *IInit : C->private_copies()) { 2865 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2866 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2867 Data.FirstprivateVars.push_back(*IRef); 2868 Data.FirstprivateCopies.push_back(IInit); 2869 Data.FirstprivateInits.push_back(*IElemInitRef); 2870 } 2871 ++IRef; 2872 ++IElemInitRef; 2873 } 2874 } 2875 // Get list of lastprivate variables (for taskloops). 2876 llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs; 2877 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 2878 auto IRef = C->varlist_begin(); 2879 auto ID = C->destination_exprs().begin(); 2880 for (auto *IInit : C->private_copies()) { 2881 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2882 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2883 Data.LastprivateVars.push_back(*IRef); 2884 Data.LastprivateCopies.push_back(IInit); 2885 } 2886 LastprivateDstsOrigs.insert( 2887 {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()), 2888 cast<DeclRefExpr>(*IRef)}); 2889 ++IRef; 2890 ++ID; 2891 } 2892 } 2893 // Build list of dependences. 2894 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) 2895 for (auto *IRef : C->varlists()) 2896 Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef)); 2897 auto &&CodeGen = [&Data, CS, &BodyGen, &LastprivateDstsOrigs]( 2898 CodeGenFunction &CGF, PrePostActionTy &Action) { 2899 // Set proper addresses for generated private copies. 2900 OMPPrivateScope Scope(CGF); 2901 if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() || 2902 !Data.LastprivateVars.empty()) { 2903 auto *CopyFn = CGF.Builder.CreateLoad( 2904 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3))); 2905 auto *PrivatesPtr = CGF.Builder.CreateLoad( 2906 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2))); 2907 // Map privates. 2908 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; 2909 llvm::SmallVector<llvm::Value *, 16> CallArgs; 2910 CallArgs.push_back(PrivatesPtr); 2911 for (auto *E : Data.PrivateVars) { 2912 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2913 Address PrivatePtr = CGF.CreateMemTemp( 2914 CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr"); 2915 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2916 CallArgs.push_back(PrivatePtr.getPointer()); 2917 } 2918 for (auto *E : Data.FirstprivateVars) { 2919 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2920 Address PrivatePtr = 2921 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 2922 ".firstpriv.ptr.addr"); 2923 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2924 CallArgs.push_back(PrivatePtr.getPointer()); 2925 } 2926 for (auto *E : Data.LastprivateVars) { 2927 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2928 Address PrivatePtr = 2929 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 2930 ".lastpriv.ptr.addr"); 2931 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2932 CallArgs.push_back(PrivatePtr.getPointer()); 2933 } 2934 CGF.EmitRuntimeCall(CopyFn, CallArgs); 2935 for (auto &&Pair : LastprivateDstsOrigs) { 2936 auto *OrigVD = cast<VarDecl>(Pair.second->getDecl()); 2937 DeclRefExpr DRE( 2938 const_cast<VarDecl *>(OrigVD), 2939 /*RefersToEnclosingVariableOrCapture=*/CGF.CapturedStmtInfo->lookup( 2940 OrigVD) != nullptr, 2941 Pair.second->getType(), VK_LValue, Pair.second->getExprLoc()); 2942 Scope.addPrivate(Pair.first, [&CGF, &DRE]() { 2943 return CGF.EmitLValue(&DRE).getAddress(); 2944 }); 2945 } 2946 for (auto &&Pair : PrivatePtrs) { 2947 Address Replacement(CGF.Builder.CreateLoad(Pair.second), 2948 CGF.getContext().getDeclAlign(Pair.first)); 2949 Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); 2950 } 2951 } 2952 (void)Scope.Privatize(); 2953 2954 Action.Enter(CGF); 2955 BodyGen(CGF); 2956 }; 2957 auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( 2958 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied, 2959 Data.NumberOfParts); 2960 OMPLexicalScope Scope(*this, S); 2961 TaskGen(*this, OutlinedFn, Data); 2962 } 2963 2964 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { 2965 // Emit outlined function for task construct. 2966 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2967 auto CapturedStruct = GenerateCapturedStmtArgument(*CS); 2968 auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 2969 const Expr *IfCond = nullptr; 2970 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 2971 if (C->getNameModifier() == OMPD_unknown || 2972 C->getNameModifier() == OMPD_task) { 2973 IfCond = C->getCondition(); 2974 break; 2975 } 2976 } 2977 2978 OMPTaskDataTy Data; 2979 // Check if we should emit tied or untied task. 2980 Data.Tied = !S.getSingleClause<OMPUntiedClause>(); 2981 auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { 2982 CGF.EmitStmt(CS->getCapturedStmt()); 2983 }; 2984 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 2985 IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn, 2986 const OMPTaskDataTy &Data) { 2987 CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getLocStart(), S, OutlinedFn, 2988 SharedsTy, CapturedStruct, IfCond, 2989 Data); 2990 }; 2991 EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data); 2992 } 2993 2994 void CodeGenFunction::EmitOMPTaskyieldDirective( 2995 const OMPTaskyieldDirective &S) { 2996 CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart()); 2997 } 2998 2999 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { 3000 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier); 3001 } 3002 3003 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) { 3004 CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart()); 3005 } 3006 3007 void CodeGenFunction::EmitOMPTaskgroupDirective( 3008 const OMPTaskgroupDirective &S) { 3009 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3010 Action.Enter(CGF); 3011 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3012 }; 3013 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 3014 CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart()); 3015 } 3016 3017 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { 3018 CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> { 3019 if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) { 3020 return llvm::makeArrayRef(FlushClause->varlist_begin(), 3021 FlushClause->varlist_end()); 3022 } 3023 return llvm::None; 3024 }(), S.getLocStart()); 3025 } 3026 3027 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, 3028 const CodeGenLoopTy &CodeGenLoop, 3029 Expr *IncExpr) { 3030 // Emit the loop iteration variable. 3031 auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 3032 auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); 3033 EmitVarDecl(*IVDecl); 3034 3035 // Emit the iterations count variable. 3036 // If it is not a variable, Sema decided to calculate iterations count on each 3037 // iteration (e.g., it is foldable into a constant). 3038 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 3039 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 3040 // Emit calculation of the iterations count. 3041 EmitIgnoredExpr(S.getCalcLastIteration()); 3042 } 3043 3044 auto &RT = CGM.getOpenMPRuntime(); 3045 3046 bool HasLastprivateClause = false; 3047 // Check pre-condition. 3048 { 3049 OMPLoopScope PreInitScope(*this, S); 3050 // Skip the entire loop if we don't meet the precondition. 3051 // If the condition constant folds and can be elided, avoid emitting the 3052 // whole loop. 3053 bool CondConstant; 3054 llvm::BasicBlock *ContBlock = nullptr; 3055 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 3056 if (!CondConstant) 3057 return; 3058 } else { 3059 auto *ThenBlock = createBasicBlock("omp.precond.then"); 3060 ContBlock = createBasicBlock("omp.precond.end"); 3061 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 3062 getProfileCount(&S)); 3063 EmitBlock(ThenBlock); 3064 incrementProfileCounter(&S); 3065 } 3066 3067 // Emit 'then' code. 3068 { 3069 // Emit helper vars inits. 3070 3071 LValue LB = EmitOMPHelperVar( 3072 *this, cast<DeclRefExpr>( 3073 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3074 ? S.getCombinedLowerBoundVariable() 3075 : S.getLowerBoundVariable()))); 3076 LValue UB = EmitOMPHelperVar( 3077 *this, cast<DeclRefExpr>( 3078 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3079 ? S.getCombinedUpperBoundVariable() 3080 : S.getUpperBoundVariable()))); 3081 LValue ST = 3082 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 3083 LValue IL = 3084 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 3085 3086 OMPPrivateScope LoopScope(*this); 3087 if (EmitOMPFirstprivateClause(S, LoopScope)) { 3088 // Emit implicit barrier to synchronize threads and avoid data races on 3089 // initialization of firstprivate variables and post-update of 3090 // lastprivate variables. 3091 CGM.getOpenMPRuntime().emitBarrierCall( 3092 *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 3093 /*ForceSimpleCall=*/true); 3094 } 3095 EmitOMPPrivateClause(S, LoopScope); 3096 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 3097 EmitOMPPrivateLoopCounters(S, LoopScope); 3098 (void)LoopScope.Privatize(); 3099 3100 // Detect the distribute schedule kind and chunk. 3101 llvm::Value *Chunk = nullptr; 3102 OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown; 3103 if (auto *C = S.getSingleClause<OMPDistScheduleClause>()) { 3104 ScheduleKind = C->getDistScheduleKind(); 3105 if (const auto *Ch = C->getChunkSize()) { 3106 Chunk = EmitScalarExpr(Ch); 3107 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 3108 S.getIterationVariable()->getType(), 3109 S.getLocStart()); 3110 } 3111 } 3112 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 3113 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 3114 3115 // OpenMP [2.10.8, distribute Construct, Description] 3116 // If dist_schedule is specified, kind must be static. If specified, 3117 // iterations are divided into chunks of size chunk_size, chunks are 3118 // assigned to the teams of the league in a round-robin fashion in the 3119 // order of the team number. When no chunk_size is specified, the 3120 // iteration space is divided into chunks that are approximately equal 3121 // in size, and at most one chunk is distributed to each team of the 3122 // league. The size of the chunks is unspecified in this case. 3123 if (RT.isStaticNonchunked(ScheduleKind, 3124 /* Chunked */ Chunk != nullptr)) { 3125 RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, 3126 IVSize, IVSigned, /* Ordered = */ false, 3127 IL.getAddress(), LB.getAddress(), 3128 UB.getAddress(), ST.getAddress()); 3129 auto LoopExit = 3130 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 3131 // UB = min(UB, GlobalUB); 3132 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3133 ? S.getCombinedEnsureUpperBound() 3134 : S.getEnsureUpperBound()); 3135 // IV = LB; 3136 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3137 ? S.getCombinedInit() 3138 : S.getInit()); 3139 3140 Expr *Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3141 ? S.getCombinedCond() 3142 : S.getCond(); 3143 3144 // for distribute alone, codegen 3145 // while (idx <= UB) { BODY; ++idx; } 3146 // when combined with 'for' (e.g. as in 'distribute parallel for') 3147 // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; } 3148 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), Cond, IncExpr, 3149 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { 3150 CodeGenLoop(CGF, S, LoopExit); 3151 }, 3152 [](CodeGenFunction &) {}); 3153 EmitBlock(LoopExit.getBlock()); 3154 // Tell the runtime we are done. 3155 RT.emitForStaticFinish(*this, S.getLocStart()); 3156 } else { 3157 // Emit the outer loop, which requests its work chunk [LB..UB] from 3158 // runtime and runs the inner loop to process it. 3159 const OMPLoopArguments LoopArguments = { 3160 LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(), 3161 Chunk}; 3162 EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments, 3163 CodeGenLoop); 3164 } 3165 3166 // Emit final copy of the lastprivate variables if IsLastIter != 0. 3167 if (HasLastprivateClause) 3168 EmitOMPLastprivateClauseFinal( 3169 S, /*NoFinals=*/false, 3170 Builder.CreateIsNotNull( 3171 EmitLoadOfScalar(IL, S.getLocStart()))); 3172 } 3173 3174 // We're now done with the loop, so jump to the continuation block. 3175 if (ContBlock) { 3176 EmitBranch(ContBlock); 3177 EmitBlock(ContBlock, true); 3178 } 3179 } 3180 } 3181 3182 void CodeGenFunction::EmitOMPDistributeDirective( 3183 const OMPDistributeDirective &S) { 3184 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3185 3186 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 3187 }; 3188 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 3189 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen, 3190 false); 3191 } 3192 3193 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, 3194 const CapturedStmt *S) { 3195 CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); 3196 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; 3197 CGF.CapturedStmtInfo = &CapStmtInfo; 3198 auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S); 3199 Fn->addFnAttr(llvm::Attribute::NoInline); 3200 return Fn; 3201 } 3202 3203 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { 3204 if (!S.getAssociatedStmt()) { 3205 for (const auto *DC : S.getClausesOfKind<OMPDependClause>()) 3206 CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC); 3207 return; 3208 } 3209 auto *C = S.getSingleClause<OMPSIMDClause>(); 3210 auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF, 3211 PrePostActionTy &Action) { 3212 if (C) { 3213 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 3214 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 3215 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 3216 auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS); 3217 CGF.EmitNounwindRuntimeCall(OutlinedFn, CapturedVars); 3218 } else { 3219 Action.Enter(CGF); 3220 CGF.EmitStmt( 3221 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3222 } 3223 }; 3224 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 3225 CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C); 3226 } 3227 3228 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, 3229 QualType SrcType, QualType DestType, 3230 SourceLocation Loc) { 3231 assert(CGF.hasScalarEvaluationKind(DestType) && 3232 "DestType must have scalar evaluation kind."); 3233 assert(!Val.isAggregate() && "Must be a scalar or complex."); 3234 return Val.isScalar() 3235 ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType, 3236 Loc) 3237 : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType, 3238 DestType, Loc); 3239 } 3240 3241 static CodeGenFunction::ComplexPairTy 3242 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, 3243 QualType DestType, SourceLocation Loc) { 3244 assert(CGF.getEvaluationKind(DestType) == TEK_Complex && 3245 "DestType must have complex evaluation kind."); 3246 CodeGenFunction::ComplexPairTy ComplexVal; 3247 if (Val.isScalar()) { 3248 // Convert the input element to the element type of the complex. 3249 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 3250 auto ScalarVal = CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, 3251 DestElementType, Loc); 3252 ComplexVal = CodeGenFunction::ComplexPairTy( 3253 ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType())); 3254 } else { 3255 assert(Val.isComplex() && "Must be a scalar or complex."); 3256 auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType(); 3257 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 3258 ComplexVal.first = CGF.EmitScalarConversion( 3259 Val.getComplexVal().first, SrcElementType, DestElementType, Loc); 3260 ComplexVal.second = CGF.EmitScalarConversion( 3261 Val.getComplexVal().second, SrcElementType, DestElementType, Loc); 3262 } 3263 return ComplexVal; 3264 } 3265 3266 static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst, 3267 LValue LVal, RValue RVal) { 3268 if (LVal.isGlobalReg()) { 3269 CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal); 3270 } else { 3271 CGF.EmitAtomicStore(RVal, LVal, 3272 IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3273 : llvm::AtomicOrdering::Monotonic, 3274 LVal.isVolatile(), /*IsInit=*/false); 3275 } 3276 } 3277 3278 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal, 3279 QualType RValTy, SourceLocation Loc) { 3280 switch (getEvaluationKind(LVal.getType())) { 3281 case TEK_Scalar: 3282 EmitStoreThroughLValue(RValue::get(convertToScalarValue( 3283 *this, RVal, RValTy, LVal.getType(), Loc)), 3284 LVal); 3285 break; 3286 case TEK_Complex: 3287 EmitStoreOfComplex( 3288 convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal, 3289 /*isInit=*/false); 3290 break; 3291 case TEK_Aggregate: 3292 llvm_unreachable("Must be a scalar or complex."); 3293 } 3294 } 3295 3296 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst, 3297 const Expr *X, const Expr *V, 3298 SourceLocation Loc) { 3299 // v = x; 3300 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue"); 3301 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue"); 3302 LValue XLValue = CGF.EmitLValue(X); 3303 LValue VLValue = CGF.EmitLValue(V); 3304 RValue Res = XLValue.isGlobalReg() 3305 ? CGF.EmitLoadOfLValue(XLValue, Loc) 3306 : CGF.EmitAtomicLoad( 3307 XLValue, Loc, 3308 IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3309 : llvm::AtomicOrdering::Monotonic, 3310 XLValue.isVolatile()); 3311 // OpenMP, 2.12.6, atomic Construct 3312 // Any atomic construct with a seq_cst clause forces the atomically 3313 // performed operation to include an implicit flush operation without a 3314 // list. 3315 if (IsSeqCst) 3316 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3317 CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc); 3318 } 3319 3320 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst, 3321 const Expr *X, const Expr *E, 3322 SourceLocation Loc) { 3323 // x = expr; 3324 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue"); 3325 emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E)); 3326 // OpenMP, 2.12.6, atomic Construct 3327 // Any atomic construct with a seq_cst clause forces the atomically 3328 // performed operation to include an implicit flush operation without a 3329 // list. 3330 if (IsSeqCst) 3331 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3332 } 3333 3334 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, 3335 RValue Update, 3336 BinaryOperatorKind BO, 3337 llvm::AtomicOrdering AO, 3338 bool IsXLHSInRHSPart) { 3339 auto &Context = CGF.CGM.getContext(); 3340 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x' 3341 // expression is simple and atomic is allowed for the given type for the 3342 // target platform. 3343 if (BO == BO_Comma || !Update.isScalar() || 3344 !Update.getScalarVal()->getType()->isIntegerTy() || 3345 !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) && 3346 (Update.getScalarVal()->getType() != 3347 X.getAddress().getElementType())) || 3348 !X.getAddress().getElementType()->isIntegerTy() || 3349 !Context.getTargetInfo().hasBuiltinAtomic( 3350 Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment()))) 3351 return std::make_pair(false, RValue::get(nullptr)); 3352 3353 llvm::AtomicRMWInst::BinOp RMWOp; 3354 switch (BO) { 3355 case BO_Add: 3356 RMWOp = llvm::AtomicRMWInst::Add; 3357 break; 3358 case BO_Sub: 3359 if (!IsXLHSInRHSPart) 3360 return std::make_pair(false, RValue::get(nullptr)); 3361 RMWOp = llvm::AtomicRMWInst::Sub; 3362 break; 3363 case BO_And: 3364 RMWOp = llvm::AtomicRMWInst::And; 3365 break; 3366 case BO_Or: 3367 RMWOp = llvm::AtomicRMWInst::Or; 3368 break; 3369 case BO_Xor: 3370 RMWOp = llvm::AtomicRMWInst::Xor; 3371 break; 3372 case BO_LT: 3373 RMWOp = X.getType()->hasSignedIntegerRepresentation() 3374 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min 3375 : llvm::AtomicRMWInst::Max) 3376 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin 3377 : llvm::AtomicRMWInst::UMax); 3378 break; 3379 case BO_GT: 3380 RMWOp = X.getType()->hasSignedIntegerRepresentation() 3381 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max 3382 : llvm::AtomicRMWInst::Min) 3383 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax 3384 : llvm::AtomicRMWInst::UMin); 3385 break; 3386 case BO_Assign: 3387 RMWOp = llvm::AtomicRMWInst::Xchg; 3388 break; 3389 case BO_Mul: 3390 case BO_Div: 3391 case BO_Rem: 3392 case BO_Shl: 3393 case BO_Shr: 3394 case BO_LAnd: 3395 case BO_LOr: 3396 return std::make_pair(false, RValue::get(nullptr)); 3397 case BO_PtrMemD: 3398 case BO_PtrMemI: 3399 case BO_LE: 3400 case BO_GE: 3401 case BO_EQ: 3402 case BO_NE: 3403 case BO_AddAssign: 3404 case BO_SubAssign: 3405 case BO_AndAssign: 3406 case BO_OrAssign: 3407 case BO_XorAssign: 3408 case BO_MulAssign: 3409 case BO_DivAssign: 3410 case BO_RemAssign: 3411 case BO_ShlAssign: 3412 case BO_ShrAssign: 3413 case BO_Comma: 3414 llvm_unreachable("Unsupported atomic update operation"); 3415 } 3416 auto *UpdateVal = Update.getScalarVal(); 3417 if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) { 3418 UpdateVal = CGF.Builder.CreateIntCast( 3419 IC, X.getAddress().getElementType(), 3420 X.getType()->hasSignedIntegerRepresentation()); 3421 } 3422 auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO); 3423 return std::make_pair(true, RValue::get(Res)); 3424 } 3425 3426 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr( 3427 LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart, 3428 llvm::AtomicOrdering AO, SourceLocation Loc, 3429 const llvm::function_ref<RValue(RValue)> &CommonGen) { 3430 // Update expressions are allowed to have the following forms: 3431 // x binop= expr; -> xrval + expr; 3432 // x++, ++x -> xrval + 1; 3433 // x--, --x -> xrval - 1; 3434 // x = x binop expr; -> xrval binop expr 3435 // x = expr Op x; - > expr binop xrval; 3436 auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart); 3437 if (!Res.first) { 3438 if (X.isGlobalReg()) { 3439 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop 3440 // 'xrval'. 3441 EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X); 3442 } else { 3443 // Perform compare-and-swap procedure. 3444 EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified()); 3445 } 3446 } 3447 return Res; 3448 } 3449 3450 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst, 3451 const Expr *X, const Expr *E, 3452 const Expr *UE, bool IsXLHSInRHSPart, 3453 SourceLocation Loc) { 3454 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 3455 "Update expr in 'atomic update' must be a binary operator."); 3456 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 3457 // Update expressions are allowed to have the following forms: 3458 // x binop= expr; -> xrval + expr; 3459 // x++, ++x -> xrval + 1; 3460 // x--, --x -> xrval - 1; 3461 // x = x binop expr; -> xrval binop expr 3462 // x = expr Op x; - > expr binop xrval; 3463 assert(X->isLValue() && "X of 'omp atomic update' is not lvalue"); 3464 LValue XLValue = CGF.EmitLValue(X); 3465 RValue ExprRValue = CGF.EmitAnyExpr(E); 3466 auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3467 : llvm::AtomicOrdering::Monotonic; 3468 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 3469 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 3470 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 3471 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 3472 auto Gen = 3473 [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue { 3474 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3475 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 3476 return CGF.EmitAnyExpr(UE); 3477 }; 3478 (void)CGF.EmitOMPAtomicSimpleUpdateExpr( 3479 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 3480 // OpenMP, 2.12.6, atomic Construct 3481 // Any atomic construct with a seq_cst clause forces the atomically 3482 // performed operation to include an implicit flush operation without a 3483 // list. 3484 if (IsSeqCst) 3485 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3486 } 3487 3488 static RValue convertToType(CodeGenFunction &CGF, RValue Value, 3489 QualType SourceType, QualType ResType, 3490 SourceLocation Loc) { 3491 switch (CGF.getEvaluationKind(ResType)) { 3492 case TEK_Scalar: 3493 return RValue::get( 3494 convertToScalarValue(CGF, Value, SourceType, ResType, Loc)); 3495 case TEK_Complex: { 3496 auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc); 3497 return RValue::getComplex(Res.first, Res.second); 3498 } 3499 case TEK_Aggregate: 3500 break; 3501 } 3502 llvm_unreachable("Must be a scalar or complex."); 3503 } 3504 3505 static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst, 3506 bool IsPostfixUpdate, const Expr *V, 3507 const Expr *X, const Expr *E, 3508 const Expr *UE, bool IsXLHSInRHSPart, 3509 SourceLocation Loc) { 3510 assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue"); 3511 assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue"); 3512 RValue NewVVal; 3513 LValue VLValue = CGF.EmitLValue(V); 3514 LValue XLValue = CGF.EmitLValue(X); 3515 RValue ExprRValue = CGF.EmitAnyExpr(E); 3516 auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3517 : llvm::AtomicOrdering::Monotonic; 3518 QualType NewVValType; 3519 if (UE) { 3520 // 'x' is updated with some additional value. 3521 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 3522 "Update expr in 'atomic capture' must be a binary operator."); 3523 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 3524 // Update expressions are allowed to have the following forms: 3525 // x binop= expr; -> xrval + expr; 3526 // x++, ++x -> xrval + 1; 3527 // x--, --x -> xrval - 1; 3528 // x = x binop expr; -> xrval binop expr 3529 // x = expr Op x; - > expr binop xrval; 3530 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 3531 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 3532 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 3533 NewVValType = XRValExpr->getType(); 3534 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 3535 auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr, 3536 IsPostfixUpdate](RValue XRValue) -> RValue { 3537 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3538 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 3539 RValue Res = CGF.EmitAnyExpr(UE); 3540 NewVVal = IsPostfixUpdate ? XRValue : Res; 3541 return Res; 3542 }; 3543 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 3544 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 3545 if (Res.first) { 3546 // 'atomicrmw' instruction was generated. 3547 if (IsPostfixUpdate) { 3548 // Use old value from 'atomicrmw'. 3549 NewVVal = Res.second; 3550 } else { 3551 // 'atomicrmw' does not provide new value, so evaluate it using old 3552 // value of 'x'. 3553 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3554 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second); 3555 NewVVal = CGF.EmitAnyExpr(UE); 3556 } 3557 } 3558 } else { 3559 // 'x' is simply rewritten with some 'expr'. 3560 NewVValType = X->getType().getNonReferenceType(); 3561 ExprRValue = convertToType(CGF, ExprRValue, E->getType(), 3562 X->getType().getNonReferenceType(), Loc); 3563 auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) -> RValue { 3564 NewVVal = XRValue; 3565 return ExprRValue; 3566 }; 3567 // Try to perform atomicrmw xchg, otherwise simple exchange. 3568 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 3569 XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO, 3570 Loc, Gen); 3571 if (Res.first) { 3572 // 'atomicrmw' instruction was generated. 3573 NewVVal = IsPostfixUpdate ? Res.second : ExprRValue; 3574 } 3575 } 3576 // Emit post-update store to 'v' of old/new 'x' value. 3577 CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc); 3578 // OpenMP, 2.12.6, atomic Construct 3579 // Any atomic construct with a seq_cst clause forces the atomically 3580 // performed operation to include an implicit flush operation without a 3581 // list. 3582 if (IsSeqCst) 3583 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3584 } 3585 3586 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, 3587 bool IsSeqCst, bool IsPostfixUpdate, 3588 const Expr *X, const Expr *V, const Expr *E, 3589 const Expr *UE, bool IsXLHSInRHSPart, 3590 SourceLocation Loc) { 3591 switch (Kind) { 3592 case OMPC_read: 3593 EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc); 3594 break; 3595 case OMPC_write: 3596 EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc); 3597 break; 3598 case OMPC_unknown: 3599 case OMPC_update: 3600 EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc); 3601 break; 3602 case OMPC_capture: 3603 EmitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE, 3604 IsXLHSInRHSPart, Loc); 3605 break; 3606 case OMPC_if: 3607 case OMPC_final: 3608 case OMPC_num_threads: 3609 case OMPC_private: 3610 case OMPC_firstprivate: 3611 case OMPC_lastprivate: 3612 case OMPC_reduction: 3613 case OMPC_safelen: 3614 case OMPC_simdlen: 3615 case OMPC_collapse: 3616 case OMPC_default: 3617 case OMPC_seq_cst: 3618 case OMPC_shared: 3619 case OMPC_linear: 3620 case OMPC_aligned: 3621 case OMPC_copyin: 3622 case OMPC_copyprivate: 3623 case OMPC_flush: 3624 case OMPC_proc_bind: 3625 case OMPC_schedule: 3626 case OMPC_ordered: 3627 case OMPC_nowait: 3628 case OMPC_untied: 3629 case OMPC_threadprivate: 3630 case OMPC_depend: 3631 case OMPC_mergeable: 3632 case OMPC_device: 3633 case OMPC_threads: 3634 case OMPC_simd: 3635 case OMPC_map: 3636 case OMPC_num_teams: 3637 case OMPC_thread_limit: 3638 case OMPC_priority: 3639 case OMPC_grainsize: 3640 case OMPC_nogroup: 3641 case OMPC_num_tasks: 3642 case OMPC_hint: 3643 case OMPC_dist_schedule: 3644 case OMPC_defaultmap: 3645 case OMPC_uniform: 3646 case OMPC_to: 3647 case OMPC_from: 3648 case OMPC_use_device_ptr: 3649 case OMPC_is_device_ptr: 3650 llvm_unreachable("Clause is not allowed in 'omp atomic'."); 3651 } 3652 } 3653 3654 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { 3655 bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>(); 3656 OpenMPClauseKind Kind = OMPC_unknown; 3657 for (auto *C : S.clauses()) { 3658 // Find first clause (skip seq_cst clause, if it is first). 3659 if (C->getClauseKind() != OMPC_seq_cst) { 3660 Kind = C->getClauseKind(); 3661 break; 3662 } 3663 } 3664 3665 const auto *CS = 3666 S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 3667 if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) { 3668 enterFullExpression(EWC); 3669 } 3670 // Processing for statements under 'atomic capture'. 3671 if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) { 3672 for (const auto *C : Compound->body()) { 3673 if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) { 3674 enterFullExpression(EWC); 3675 } 3676 } 3677 } 3678 3679 auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF, 3680 PrePostActionTy &) { 3681 CGF.EmitStopPoint(CS); 3682 EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(), 3683 S.getV(), S.getExpr(), S.getUpdateExpr(), 3684 S.isXLHSInRHSPart(), S.getLocStart()); 3685 }; 3686 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 3687 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen); 3688 } 3689 3690 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, 3691 const OMPExecutableDirective &S, 3692 const RegionCodeGenTy &CodeGen) { 3693 assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind())); 3694 CodeGenModule &CGM = CGF.CGM; 3695 const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt()); 3696 3697 llvm::Function *Fn = nullptr; 3698 llvm::Constant *FnID = nullptr; 3699 3700 const Expr *IfCond = nullptr; 3701 // Check for the at most one if clause associated with the target region. 3702 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 3703 if (C->getNameModifier() == OMPD_unknown || 3704 C->getNameModifier() == OMPD_target) { 3705 IfCond = C->getCondition(); 3706 break; 3707 } 3708 } 3709 3710 // Check if we have any device clause associated with the directive. 3711 const Expr *Device = nullptr; 3712 if (auto *C = S.getSingleClause<OMPDeviceClause>()) { 3713 Device = C->getDevice(); 3714 } 3715 3716 // Check if we have an if clause whose conditional always evaluates to false 3717 // or if we do not have any targets specified. If so the target region is not 3718 // an offload entry point. 3719 bool IsOffloadEntry = true; 3720 if (IfCond) { 3721 bool Val; 3722 if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val) 3723 IsOffloadEntry = false; 3724 } 3725 if (CGM.getLangOpts().OMPTargetTriples.empty()) 3726 IsOffloadEntry = false; 3727 3728 assert(CGF.CurFuncDecl && "No parent declaration for target region!"); 3729 StringRef ParentName; 3730 // In case we have Ctors/Dtors we use the complete type variant to produce 3731 // the mangling of the device outlined kernel. 3732 if (auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl)) 3733 ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete)); 3734 else if (auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl)) 3735 ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete)); 3736 else 3737 ParentName = 3738 CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl))); 3739 3740 // Emit target region as a standalone region. 3741 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID, 3742 IsOffloadEntry, CodeGen); 3743 OMPLexicalScope Scope(CGF, S); 3744 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 3745 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 3746 CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device, 3747 CapturedVars); 3748 } 3749 3750 static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S, 3751 PrePostActionTy &Action) { 3752 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 3753 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 3754 CGF.EmitOMPPrivateClause(S, PrivateScope); 3755 (void)PrivateScope.Privatize(); 3756 3757 Action.Enter(CGF); 3758 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3759 } 3760 3761 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM, 3762 StringRef ParentName, 3763 const OMPTargetDirective &S) { 3764 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3765 emitTargetRegion(CGF, S, Action); 3766 }; 3767 llvm::Function *Fn; 3768 llvm::Constant *Addr; 3769 // Emit target region as a standalone region. 3770 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 3771 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 3772 assert(Fn && Addr && "Target device function emission failed."); 3773 } 3774 3775 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { 3776 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3777 emitTargetRegion(CGF, S, Action); 3778 }; 3779 emitCommonOMPTargetDirective(*this, S, CodeGen); 3780 } 3781 3782 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF, 3783 const OMPExecutableDirective &S, 3784 OpenMPDirectiveKind InnermostKind, 3785 const RegionCodeGenTy &CodeGen) { 3786 const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams); 3787 auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction( 3788 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 3789 3790 const OMPNumTeamsClause *NT = S.getSingleClause<OMPNumTeamsClause>(); 3791 const OMPThreadLimitClause *TL = S.getSingleClause<OMPThreadLimitClause>(); 3792 if (NT || TL) { 3793 Expr *NumTeams = (NT) ? NT->getNumTeams() : nullptr; 3794 Expr *ThreadLimit = (TL) ? TL->getThreadLimit() : nullptr; 3795 3796 CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit, 3797 S.getLocStart()); 3798 } 3799 3800 OMPTeamsScope Scope(CGF, S); 3801 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 3802 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 3803 CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn, 3804 CapturedVars); 3805 } 3806 3807 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) { 3808 // Emit teams region as a standalone region. 3809 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3810 OMPPrivateScope PrivateScope(CGF); 3811 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 3812 CGF.EmitOMPPrivateClause(S, PrivateScope); 3813 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 3814 (void)PrivateScope.Privatize(); 3815 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3816 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 3817 }; 3818 emitCommonOMPTeamsDirective(*this, S, OMPD_teams, CodeGen); 3819 emitPostUpdateForReductionClause( 3820 *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 3821 } 3822 3823 static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action, 3824 const OMPTargetTeamsDirective &S) { 3825 auto *CS = S.getCapturedStmt(OMPD_teams); 3826 Action.Enter(CGF); 3827 auto &&CodeGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { 3828 // TODO: Add support for clauses. 3829 CGF.EmitStmt(CS->getCapturedStmt()); 3830 }; 3831 emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen); 3832 } 3833 3834 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 3835 CodeGenModule &CGM, StringRef ParentName, 3836 const OMPTargetTeamsDirective &S) { 3837 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3838 emitTargetTeamsRegion(CGF, Action, S); 3839 }; 3840 llvm::Function *Fn; 3841 llvm::Constant *Addr; 3842 // Emit target region as a standalone region. 3843 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 3844 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 3845 assert(Fn && Addr && "Target device function emission failed."); 3846 } 3847 3848 void CodeGenFunction::EmitOMPTargetTeamsDirective( 3849 const OMPTargetTeamsDirective &S) { 3850 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3851 emitTargetTeamsRegion(CGF, Action, S); 3852 }; 3853 emitCommonOMPTargetDirective(*this, S, CodeGen); 3854 } 3855 3856 void CodeGenFunction::EmitOMPCancellationPointDirective( 3857 const OMPCancellationPointDirective &S) { 3858 CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(), 3859 S.getCancelRegion()); 3860 } 3861 3862 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { 3863 const Expr *IfCond = nullptr; 3864 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 3865 if (C->getNameModifier() == OMPD_unknown || 3866 C->getNameModifier() == OMPD_cancel) { 3867 IfCond = C->getCondition(); 3868 break; 3869 } 3870 } 3871 CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(), IfCond, 3872 S.getCancelRegion()); 3873 } 3874 3875 CodeGenFunction::JumpDest 3876 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) { 3877 if (Kind == OMPD_parallel || Kind == OMPD_task || 3878 Kind == OMPD_target_parallel) 3879 return ReturnBlock; 3880 assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections || 3881 Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for || 3882 Kind == OMPD_distribute_parallel_for || 3883 Kind == OMPD_target_parallel_for); 3884 return OMPCancelStack.getExitBlock(); 3885 } 3886 3887 void CodeGenFunction::EmitOMPUseDevicePtrClause( 3888 const OMPClause &NC, OMPPrivateScope &PrivateScope, 3889 const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) { 3890 const auto &C = cast<OMPUseDevicePtrClause>(NC); 3891 auto OrigVarIt = C.varlist_begin(); 3892 auto InitIt = C.inits().begin(); 3893 for (auto PvtVarIt : C.private_copies()) { 3894 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl()); 3895 auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl()); 3896 auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl()); 3897 3898 // In order to identify the right initializer we need to match the 3899 // declaration used by the mapping logic. In some cases we may get 3900 // OMPCapturedExprDecl that refers to the original declaration. 3901 const ValueDecl *MatchingVD = OrigVD; 3902 if (auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) { 3903 // OMPCapturedExprDecl are used to privative fields of the current 3904 // structure. 3905 auto *ME = cast<MemberExpr>(OED->getInit()); 3906 assert(isa<CXXThisExpr>(ME->getBase()) && 3907 "Base should be the current struct!"); 3908 MatchingVD = ME->getMemberDecl(); 3909 } 3910 3911 // If we don't have information about the current list item, move on to 3912 // the next one. 3913 auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD); 3914 if (InitAddrIt == CaptureDeviceAddrMap.end()) 3915 continue; 3916 3917 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 3918 // Initialize the temporary initialization variable with the address we 3919 // get from the runtime library. We have to cast the source address 3920 // because it is always a void *. References are materialized in the 3921 // privatization scope, so the initialization here disregards the fact 3922 // the original variable is a reference. 3923 QualType AddrQTy = 3924 getContext().getPointerType(OrigVD->getType().getNonReferenceType()); 3925 llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy); 3926 Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy); 3927 setAddrOfLocalVar(InitVD, InitAddr); 3928 3929 // Emit private declaration, it will be initialized by the value we 3930 // declaration we just added to the local declarations map. 3931 EmitDecl(*PvtVD); 3932 3933 // The initialization variables reached its purpose in the emission 3934 // ofthe previous declaration, so we don't need it anymore. 3935 LocalDeclMap.erase(InitVD); 3936 3937 // Return the address of the private variable. 3938 return GetAddrOfLocalVar(PvtVD); 3939 }); 3940 assert(IsRegistered && "firstprivate var already registered as private"); 3941 // Silence the warning about unused variable. 3942 (void)IsRegistered; 3943 3944 ++OrigVarIt; 3945 ++InitIt; 3946 } 3947 } 3948 3949 // Generate the instructions for '#pragma omp target data' directive. 3950 void CodeGenFunction::EmitOMPTargetDataDirective( 3951 const OMPTargetDataDirective &S) { 3952 CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true); 3953 3954 // Create a pre/post action to signal the privatization of the device pointer. 3955 // This action can be replaced by the OpenMP runtime code generation to 3956 // deactivate privatization. 3957 bool PrivatizeDevicePointers = false; 3958 class DevicePointerPrivActionTy : public PrePostActionTy { 3959 bool &PrivatizeDevicePointers; 3960 3961 public: 3962 explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers) 3963 : PrePostActionTy(), PrivatizeDevicePointers(PrivatizeDevicePointers) {} 3964 void Enter(CodeGenFunction &CGF) override { 3965 PrivatizeDevicePointers = true; 3966 } 3967 }; 3968 DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers); 3969 3970 auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers]( 3971 CodeGenFunction &CGF, PrePostActionTy &Action) { 3972 auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3973 CGF.EmitStmt( 3974 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3975 }; 3976 3977 // Codegen that selects wheather to generate the privatization code or not. 3978 auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers, 3979 &InnermostCodeGen](CodeGenFunction &CGF, 3980 PrePostActionTy &Action) { 3981 RegionCodeGenTy RCG(InnermostCodeGen); 3982 PrivatizeDevicePointers = false; 3983 3984 // Call the pre-action to change the status of PrivatizeDevicePointers if 3985 // needed. 3986 Action.Enter(CGF); 3987 3988 if (PrivatizeDevicePointers) { 3989 OMPPrivateScope PrivateScope(CGF); 3990 // Emit all instances of the use_device_ptr clause. 3991 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>()) 3992 CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope, 3993 Info.CaptureDeviceAddrMap); 3994 (void)PrivateScope.Privatize(); 3995 RCG(CGF); 3996 } else 3997 RCG(CGF); 3998 }; 3999 4000 // Forward the provided action to the privatization codegen. 4001 RegionCodeGenTy PrivRCG(PrivCodeGen); 4002 PrivRCG.setAction(Action); 4003 4004 // Notwithstanding the body of the region is emitted as inlined directive, 4005 // we don't use an inline scope as changes in the references inside the 4006 // region are expected to be visible outside, so we do not privative them. 4007 OMPLexicalScope Scope(CGF, S); 4008 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data, 4009 PrivRCG); 4010 }; 4011 4012 RegionCodeGenTy RCG(CodeGen); 4013 4014 // If we don't have target devices, don't bother emitting the data mapping 4015 // code. 4016 if (CGM.getLangOpts().OMPTargetTriples.empty()) { 4017 RCG(*this); 4018 return; 4019 } 4020 4021 // Check if we have any if clause associated with the directive. 4022 const Expr *IfCond = nullptr; 4023 if (auto *C = S.getSingleClause<OMPIfClause>()) 4024 IfCond = C->getCondition(); 4025 4026 // Check if we have any device clause associated with the directive. 4027 const Expr *Device = nullptr; 4028 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 4029 Device = C->getDevice(); 4030 4031 // Set the action to signal privatization of device pointers. 4032 RCG.setAction(PrivAction); 4033 4034 // Emit region code. 4035 CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG, 4036 Info); 4037 } 4038 4039 void CodeGenFunction::EmitOMPTargetEnterDataDirective( 4040 const OMPTargetEnterDataDirective &S) { 4041 // If we don't have target devices, don't bother emitting the data mapping 4042 // code. 4043 if (CGM.getLangOpts().OMPTargetTriples.empty()) 4044 return; 4045 4046 // Check if we have any if clause associated with the directive. 4047 const Expr *IfCond = nullptr; 4048 if (auto *C = S.getSingleClause<OMPIfClause>()) 4049 IfCond = C->getCondition(); 4050 4051 // Check if we have any device clause associated with the directive. 4052 const Expr *Device = nullptr; 4053 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 4054 Device = C->getDevice(); 4055 4056 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 4057 } 4058 4059 void CodeGenFunction::EmitOMPTargetExitDataDirective( 4060 const OMPTargetExitDataDirective &S) { 4061 // If we don't have target devices, don't bother emitting the data mapping 4062 // code. 4063 if (CGM.getLangOpts().OMPTargetTriples.empty()) 4064 return; 4065 4066 // Check if we have any if clause associated with the directive. 4067 const Expr *IfCond = nullptr; 4068 if (auto *C = S.getSingleClause<OMPIfClause>()) 4069 IfCond = C->getCondition(); 4070 4071 // Check if we have any device clause associated with the directive. 4072 const Expr *Device = nullptr; 4073 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 4074 Device = C->getDevice(); 4075 4076 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 4077 } 4078 4079 static void emitTargetParallelRegion(CodeGenFunction &CGF, 4080 const OMPTargetParallelDirective &S, 4081 PrePostActionTy &Action) { 4082 // Get the captured statement associated with the 'parallel' region. 4083 auto *CS = S.getCapturedStmt(OMPD_parallel); 4084 Action.Enter(CGF); 4085 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &) { 4086 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 4087 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 4088 CGF.EmitOMPPrivateClause(S, PrivateScope); 4089 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4090 (void)PrivateScope.Privatize(); 4091 // TODO: Add support for clauses. 4092 CGF.EmitStmt(CS->getCapturedStmt()); 4093 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 4094 }; 4095 emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen, 4096 emitEmptyBoundParameters); 4097 emitPostUpdateForReductionClause( 4098 CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 4099 } 4100 4101 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 4102 CodeGenModule &CGM, StringRef ParentName, 4103 const OMPTargetParallelDirective &S) { 4104 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4105 emitTargetParallelRegion(CGF, S, Action); 4106 }; 4107 llvm::Function *Fn; 4108 llvm::Constant *Addr; 4109 // Emit target region as a standalone region. 4110 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 4111 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 4112 assert(Fn && Addr && "Target device function emission failed."); 4113 } 4114 4115 void CodeGenFunction::EmitOMPTargetParallelDirective( 4116 const OMPTargetParallelDirective &S) { 4117 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4118 emitTargetParallelRegion(CGF, S, Action); 4119 }; 4120 emitCommonOMPTargetDirective(*this, S, CodeGen); 4121 } 4122 4123 void CodeGenFunction::EmitOMPTargetParallelForDirective( 4124 const OMPTargetParallelForDirective &S) { 4125 // TODO: codegen for target parallel for. 4126 } 4127 4128 /// Emit a helper variable and return corresponding lvalue. 4129 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper, 4130 const ImplicitParamDecl *PVD, 4131 CodeGenFunction::OMPPrivateScope &Privates) { 4132 auto *VDecl = cast<VarDecl>(Helper->getDecl()); 4133 Privates.addPrivate( 4134 VDecl, [&CGF, PVD]() -> Address { return CGF.GetAddrOfLocalVar(PVD); }); 4135 } 4136 4137 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { 4138 assert(isOpenMPTaskLoopDirective(S.getDirectiveKind())); 4139 // Emit outlined function for task construct. 4140 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 4141 auto CapturedStruct = GenerateCapturedStmtArgument(*CS); 4142 auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 4143 const Expr *IfCond = nullptr; 4144 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 4145 if (C->getNameModifier() == OMPD_unknown || 4146 C->getNameModifier() == OMPD_taskloop) { 4147 IfCond = C->getCondition(); 4148 break; 4149 } 4150 } 4151 4152 OMPTaskDataTy Data; 4153 // Check if taskloop must be emitted without taskgroup. 4154 Data.Nogroup = S.getSingleClause<OMPNogroupClause>(); 4155 // TODO: Check if we should emit tied or untied task. 4156 Data.Tied = true; 4157 // Set scheduling for taskloop 4158 if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) { 4159 // grainsize clause 4160 Data.Schedule.setInt(/*IntVal=*/false); 4161 Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize())); 4162 } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) { 4163 // num_tasks clause 4164 Data.Schedule.setInt(/*IntVal=*/true); 4165 Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks())); 4166 } 4167 4168 auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) { 4169 // if (PreCond) { 4170 // for (IV in 0..LastIteration) BODY; 4171 // <Final counter/linear vars updates>; 4172 // } 4173 // 4174 4175 // Emit: if (PreCond) - begin. 4176 // If the condition constant folds and can be elided, avoid emitting the 4177 // whole loop. 4178 bool CondConstant; 4179 llvm::BasicBlock *ContBlock = nullptr; 4180 OMPLoopScope PreInitScope(CGF, S); 4181 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 4182 if (!CondConstant) 4183 return; 4184 } else { 4185 auto *ThenBlock = CGF.createBasicBlock("taskloop.if.then"); 4186 ContBlock = CGF.createBasicBlock("taskloop.if.end"); 4187 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 4188 CGF.getProfileCount(&S)); 4189 CGF.EmitBlock(ThenBlock); 4190 CGF.incrementProfileCounter(&S); 4191 } 4192 4193 if (isOpenMPSimdDirective(S.getDirectiveKind())) 4194 CGF.EmitOMPSimdInit(S); 4195 4196 OMPPrivateScope LoopScope(CGF); 4197 // Emit helper vars inits. 4198 enum { LowerBound = 5, UpperBound, Stride, LastIter }; 4199 auto *I = CS->getCapturedDecl()->param_begin(); 4200 auto *LBP = std::next(I, LowerBound); 4201 auto *UBP = std::next(I, UpperBound); 4202 auto *STP = std::next(I, Stride); 4203 auto *LIP = std::next(I, LastIter); 4204 mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP, 4205 LoopScope); 4206 mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP, 4207 LoopScope); 4208 mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope); 4209 mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP, 4210 LoopScope); 4211 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 4212 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 4213 (void)LoopScope.Privatize(); 4214 // Emit the loop iteration variable. 4215 const Expr *IVExpr = S.getIterationVariable(); 4216 const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 4217 CGF.EmitVarDecl(*IVDecl); 4218 CGF.EmitIgnoredExpr(S.getInit()); 4219 4220 // Emit the iterations count variable. 4221 // If it is not a variable, Sema decided to calculate iterations count on 4222 // each iteration (e.g., it is foldable into a constant). 4223 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 4224 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 4225 // Emit calculation of the iterations count. 4226 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 4227 } 4228 4229 CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 4230 S.getInc(), 4231 [&S](CodeGenFunction &CGF) { 4232 CGF.EmitOMPLoopBody(S, JumpDest()); 4233 CGF.EmitStopPoint(&S); 4234 }, 4235 [](CodeGenFunction &) {}); 4236 // Emit: if (PreCond) - end. 4237 if (ContBlock) { 4238 CGF.EmitBranch(ContBlock); 4239 CGF.EmitBlock(ContBlock, true); 4240 } 4241 // Emit final copy of the lastprivate variables if IsLastIter != 0. 4242 if (HasLastprivateClause) { 4243 CGF.EmitOMPLastprivateClauseFinal( 4244 S, isOpenMPSimdDirective(S.getDirectiveKind()), 4245 CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar( 4246 CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false, 4247 (*LIP)->getType(), S.getLocStart()))); 4248 } 4249 }; 4250 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 4251 IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn, 4252 const OMPTaskDataTy &Data) { 4253 auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &) { 4254 OMPLoopScope PreInitScope(CGF, S); 4255 CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getLocStart(), S, 4256 OutlinedFn, SharedsTy, 4257 CapturedStruct, IfCond, Data); 4258 }; 4259 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop, 4260 CodeGen); 4261 }; 4262 EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data); 4263 } 4264 4265 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) { 4266 EmitOMPTaskLoopBasedDirective(S); 4267 } 4268 4269 void CodeGenFunction::EmitOMPTaskLoopSimdDirective( 4270 const OMPTaskLoopSimdDirective &S) { 4271 EmitOMPTaskLoopBasedDirective(S); 4272 } 4273 4274 // Generate the instructions for '#pragma omp target update' directive. 4275 void CodeGenFunction::EmitOMPTargetUpdateDirective( 4276 const OMPTargetUpdateDirective &S) { 4277 // If we don't have target devices, don't bother emitting the data mapping 4278 // code. 4279 if (CGM.getLangOpts().OMPTargetTriples.empty()) 4280 return; 4281 4282 // Check if we have any if clause associated with the directive. 4283 const Expr *IfCond = nullptr; 4284 if (auto *C = S.getSingleClause<OMPIfClause>()) 4285 IfCond = C->getCondition(); 4286 4287 // Check if we have any device clause associated with the directive. 4288 const Expr *Device = nullptr; 4289 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 4290 Device = C->getDevice(); 4291 4292 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 4293 } 4294