1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This contains code to emit OpenMP nodes as LLVM code. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGCleanup.h" 15 #include "CGOpenMPRuntime.h" 16 #include "CodeGenFunction.h" 17 #include "CodeGenModule.h" 18 #include "TargetInfo.h" 19 #include "clang/AST/Stmt.h" 20 #include "clang/AST/StmtOpenMP.h" 21 #include "clang/AST/DeclOpenMP.h" 22 #include "llvm/IR/CallSite.h" 23 using namespace clang; 24 using namespace CodeGen; 25 26 namespace { 27 /// Lexical scope for OpenMP executable constructs, that handles correct codegen 28 /// for captured expressions. 29 class OMPLexicalScope final : public CodeGenFunction::LexicalScope { 30 void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 31 for (const auto *C : S.clauses()) { 32 if (auto *CPI = OMPClauseWithPreInit::get(C)) { 33 if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { 34 for (const auto *I : PreInit->decls()) { 35 if (!I->hasAttr<OMPCaptureNoInitAttr>()) 36 CGF.EmitVarDecl(cast<VarDecl>(*I)); 37 else { 38 CodeGenFunction::AutoVarEmission Emission = 39 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 40 CGF.EmitAutoVarCleanups(Emission); 41 } 42 } 43 } 44 } 45 } 46 } 47 CodeGenFunction::OMPPrivateScope InlinedShareds; 48 49 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { 50 return CGF.LambdaCaptureFields.lookup(VD) || 51 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || 52 (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl)); 53 } 54 55 public: 56 OMPLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S, 57 bool AsInlined = false) 58 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), 59 InlinedShareds(CGF) { 60 emitPreInitStmt(CGF, S); 61 if (AsInlined) { 62 if (S.hasAssociatedStmt()) { 63 auto *CS = cast<CapturedStmt>(S.getAssociatedStmt()); 64 for (auto &C : CS->captures()) { 65 if (C.capturesVariable() || C.capturesVariableByCopy()) { 66 auto *VD = C.getCapturedVar(); 67 DeclRefExpr DRE(const_cast<VarDecl *>(VD), 68 isCapturedVar(CGF, VD) || 69 (CGF.CapturedStmtInfo && 70 InlinedShareds.isGlobalVarCaptured(VD)), 71 VD->getType().getNonReferenceType(), VK_LValue, 72 SourceLocation()); 73 InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address { 74 return CGF.EmitLValue(&DRE).getAddress(); 75 }); 76 } 77 } 78 (void)InlinedShareds.Privatize(); 79 } 80 } 81 } 82 }; 83 84 /// Private scope for OpenMP loop-based directives, that supports capturing 85 /// of used expression from loop statement. 86 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { 87 void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) { 88 if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) { 89 if (auto *PreInits = cast_or_null<DeclStmt>(LD->getPreInits())) { 90 for (const auto *I : PreInits->decls()) 91 CGF.EmitVarDecl(cast<VarDecl>(*I)); 92 } 93 } 94 } 95 96 public: 97 OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S) 98 : CodeGenFunction::RunCleanupsScope(CGF) { 99 emitPreInitStmt(CGF, S); 100 } 101 }; 102 103 } // namespace 104 105 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) { 106 auto &C = getContext(); 107 llvm::Value *Size = nullptr; 108 auto SizeInChars = C.getTypeSizeInChars(Ty); 109 if (SizeInChars.isZero()) { 110 // getTypeSizeInChars() returns 0 for a VLA. 111 while (auto *VAT = C.getAsVariableArrayType(Ty)) { 112 llvm::Value *ArraySize; 113 std::tie(ArraySize, Ty) = getVLASize(VAT); 114 Size = Size ? Builder.CreateNUWMul(Size, ArraySize) : ArraySize; 115 } 116 SizeInChars = C.getTypeSizeInChars(Ty); 117 if (SizeInChars.isZero()) 118 return llvm::ConstantInt::get(SizeTy, /*V=*/0); 119 Size = Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars)); 120 } else 121 Size = CGM.getSize(SizeInChars); 122 return Size; 123 } 124 125 void CodeGenFunction::GenerateOpenMPCapturedVars( 126 const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) { 127 const RecordDecl *RD = S.getCapturedRecordDecl(); 128 auto CurField = RD->field_begin(); 129 auto CurCap = S.captures().begin(); 130 for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(), 131 E = S.capture_init_end(); 132 I != E; ++I, ++CurField, ++CurCap) { 133 if (CurField->hasCapturedVLAType()) { 134 auto VAT = CurField->getCapturedVLAType(); 135 auto *Val = VLASizeMap[VAT->getSizeExpr()]; 136 CapturedVars.push_back(Val); 137 } else if (CurCap->capturesThis()) 138 CapturedVars.push_back(CXXThisValue); 139 else if (CurCap->capturesVariableByCopy()) { 140 llvm::Value *CV = 141 EmitLoadOfLValue(EmitLValue(*I), SourceLocation()).getScalarVal(); 142 143 // If the field is not a pointer, we need to save the actual value 144 // and load it as a void pointer. 145 if (!CurField->getType()->isAnyPointerType()) { 146 auto &Ctx = getContext(); 147 auto DstAddr = CreateMemTemp( 148 Ctx.getUIntPtrType(), 149 Twine(CurCap->getCapturedVar()->getName()) + ".casted"); 150 LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType()); 151 152 auto *SrcAddrVal = EmitScalarConversion( 153 DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()), 154 Ctx.getPointerType(CurField->getType()), SourceLocation()); 155 LValue SrcLV = 156 MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType()); 157 158 // Store the value using the source type pointer. 159 EmitStoreThroughLValue(RValue::get(CV), SrcLV); 160 161 // Load the value using the destination type pointer. 162 CV = EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal(); 163 } 164 CapturedVars.push_back(CV); 165 } else { 166 assert(CurCap->capturesVariable() && "Expected capture by reference."); 167 CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer()); 168 } 169 } 170 } 171 172 static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType, 173 StringRef Name, LValue AddrLV, 174 bool isReferenceType = false) { 175 ASTContext &Ctx = CGF.getContext(); 176 177 auto *CastedPtr = CGF.EmitScalarConversion( 178 AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(), 179 Ctx.getPointerType(DstType), SourceLocation()); 180 auto TmpAddr = 181 CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType)) 182 .getAddress(); 183 184 // If we are dealing with references we need to return the address of the 185 // reference instead of the reference of the value. 186 if (isReferenceType) { 187 QualType RefType = Ctx.getLValueReferenceType(DstType); 188 auto *RefVal = TmpAddr.getPointer(); 189 TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name) + ".ref"); 190 auto TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType); 191 CGF.EmitStoreThroughLValue(RValue::get(RefVal), TmpLVal, /*isInit*/ true); 192 } 193 194 return TmpAddr; 195 } 196 197 llvm::Function * 198 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { 199 assert( 200 CapturedStmtInfo && 201 "CapturedStmtInfo should be set when generating the captured function"); 202 const CapturedDecl *CD = S.getCapturedDecl(); 203 const RecordDecl *RD = S.getCapturedRecordDecl(); 204 assert(CD->hasBody() && "missing CapturedDecl body"); 205 206 // Build the argument list. 207 ASTContext &Ctx = CGM.getContext(); 208 FunctionArgList Args; 209 Args.append(CD->param_begin(), 210 std::next(CD->param_begin(), CD->getContextParamPosition())); 211 auto I = S.captures().begin(); 212 for (auto *FD : RD->fields()) { 213 QualType ArgType = FD->getType(); 214 IdentifierInfo *II = nullptr; 215 VarDecl *CapVar = nullptr; 216 217 // If this is a capture by copy and the type is not a pointer, the outlined 218 // function argument type should be uintptr and the value properly casted to 219 // uintptr. This is necessary given that the runtime library is only able to 220 // deal with pointers. We can pass in the same way the VLA type sizes to the 221 // outlined function. 222 if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) || 223 I->capturesVariableArrayType()) 224 ArgType = Ctx.getUIntPtrType(); 225 226 if (I->capturesVariable() || I->capturesVariableByCopy()) { 227 CapVar = I->getCapturedVar(); 228 II = CapVar->getIdentifier(); 229 } else if (I->capturesThis()) 230 II = &getContext().Idents.get("this"); 231 else { 232 assert(I->capturesVariableArrayType()); 233 II = &getContext().Idents.get("vla"); 234 } 235 if (ArgType->isVariablyModifiedType()) { 236 bool IsReference = ArgType->isLValueReferenceType(); 237 ArgType = 238 getContext().getCanonicalParamType(ArgType.getNonReferenceType()); 239 if (IsReference && !ArgType->isPointerType()) { 240 ArgType = getContext().getLValueReferenceType( 241 ArgType, /*SpelledAsLValue=*/false); 242 } 243 } 244 Args.push_back(ImplicitParamDecl::Create(getContext(), nullptr, 245 FD->getLocation(), II, ArgType)); 246 ++I; 247 } 248 Args.append( 249 std::next(CD->param_begin(), CD->getContextParamPosition() + 1), 250 CD->param_end()); 251 252 // Create the function declaration. 253 FunctionType::ExtInfo ExtInfo; 254 const CGFunctionInfo &FuncInfo = 255 CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args); 256 llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); 257 258 llvm::Function *F = llvm::Function::Create( 259 FuncLLVMTy, llvm::GlobalValue::InternalLinkage, 260 CapturedStmtInfo->getHelperName(), &CGM.getModule()); 261 CGM.SetInternalFunctionAttributes(CD, F, FuncInfo); 262 if (CD->isNothrow()) 263 F->addFnAttr(llvm::Attribute::NoUnwind); 264 265 // Generate the function. 266 StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getLocation(), 267 CD->getBody()->getLocStart()); 268 unsigned Cnt = CD->getContextParamPosition(); 269 I = S.captures().begin(); 270 for (auto *FD : RD->fields()) { 271 // If we are capturing a pointer by copy we don't need to do anything, just 272 // use the value that we get from the arguments. 273 if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) { 274 const VarDecl *CurVD = I->getCapturedVar(); 275 Address LocalAddr = GetAddrOfLocalVar(Args[Cnt]); 276 // If the variable is a reference we need to materialize it here. 277 if (CurVD->getType()->isReferenceType()) { 278 Address RefAddr = CreateMemTemp(CurVD->getType(), getPointerAlign(), 279 ".materialized_ref"); 280 EmitStoreOfScalar(LocalAddr.getPointer(), RefAddr, /*Volatile=*/false, 281 CurVD->getType()); 282 LocalAddr = RefAddr; 283 } 284 setAddrOfLocalVar(CurVD, LocalAddr); 285 ++Cnt; 286 ++I; 287 continue; 288 } 289 290 LValue ArgLVal = 291 MakeAddrLValue(GetAddrOfLocalVar(Args[Cnt]), Args[Cnt]->getType(), 292 AlignmentSource::Decl); 293 if (FD->hasCapturedVLAType()) { 294 LValue CastedArgLVal = 295 MakeAddrLValue(castValueFromUintptr(*this, FD->getType(), 296 Args[Cnt]->getName(), ArgLVal), 297 FD->getType(), AlignmentSource::Decl); 298 auto *ExprArg = 299 EmitLoadOfLValue(CastedArgLVal, SourceLocation()).getScalarVal(); 300 auto VAT = FD->getCapturedVLAType(); 301 VLASizeMap[VAT->getSizeExpr()] = ExprArg; 302 } else if (I->capturesVariable()) { 303 auto *Var = I->getCapturedVar(); 304 QualType VarTy = Var->getType(); 305 Address ArgAddr = ArgLVal.getAddress(); 306 if (!VarTy->isReferenceType()) { 307 if (ArgLVal.getType()->isLValueReferenceType()) { 308 ArgAddr = EmitLoadOfReference( 309 ArgAddr, ArgLVal.getType()->castAs<ReferenceType>()); 310 } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) { 311 assert(ArgLVal.getType()->isPointerType()); 312 ArgAddr = EmitLoadOfPointer( 313 ArgAddr, ArgLVal.getType()->castAs<PointerType>()); 314 } 315 } 316 setAddrOfLocalVar( 317 Var, Address(ArgAddr.getPointer(), getContext().getDeclAlign(Var))); 318 } else if (I->capturesVariableByCopy()) { 319 assert(!FD->getType()->isAnyPointerType() && 320 "Not expecting a captured pointer."); 321 auto *Var = I->getCapturedVar(); 322 QualType VarTy = Var->getType(); 323 setAddrOfLocalVar(Var, castValueFromUintptr(*this, FD->getType(), 324 Args[Cnt]->getName(), ArgLVal, 325 VarTy->isReferenceType())); 326 } else { 327 // If 'this' is captured, load it into CXXThisValue. 328 assert(I->capturesThis()); 329 CXXThisValue = 330 EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation()).getScalarVal(); 331 } 332 ++Cnt; 333 ++I; 334 } 335 336 PGO.assignRegionCounters(GlobalDecl(CD), F); 337 CapturedStmtInfo->EmitBody(*this, CD->getBody()); 338 FinishFunction(CD->getBodyRBrace()); 339 340 return F; 341 } 342 343 //===----------------------------------------------------------------------===// 344 // OpenMP Directive Emission 345 //===----------------------------------------------------------------------===// 346 void CodeGenFunction::EmitOMPAggregateAssign( 347 Address DestAddr, Address SrcAddr, QualType OriginalType, 348 const llvm::function_ref<void(Address, Address)> &CopyGen) { 349 // Perform element-by-element initialization. 350 QualType ElementTy; 351 352 // Drill down to the base element type on both arrays. 353 auto ArrayTy = OriginalType->getAsArrayTypeUnsafe(); 354 auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr); 355 SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 356 357 auto SrcBegin = SrcAddr.getPointer(); 358 auto DestBegin = DestAddr.getPointer(); 359 // Cast from pointer to array type to pointer to single element. 360 auto DestEnd = Builder.CreateGEP(DestBegin, NumElements); 361 // The basic structure here is a while-do loop. 362 auto BodyBB = createBasicBlock("omp.arraycpy.body"); 363 auto DoneBB = createBasicBlock("omp.arraycpy.done"); 364 auto IsEmpty = 365 Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty"); 366 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 367 368 // Enter the loop body, making that address the current address. 369 auto EntryBB = Builder.GetInsertBlock(); 370 EmitBlock(BodyBB); 371 372 CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy); 373 374 llvm::PHINode *SrcElementPHI = 375 Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 376 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 377 Address SrcElementCurrent = 378 Address(SrcElementPHI, 379 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 380 381 llvm::PHINode *DestElementPHI = 382 Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 383 DestElementPHI->addIncoming(DestBegin, EntryBB); 384 Address DestElementCurrent = 385 Address(DestElementPHI, 386 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 387 388 // Emit copy. 389 CopyGen(DestElementCurrent, SrcElementCurrent); 390 391 // Shift the address forward by one element. 392 auto DestElementNext = Builder.CreateConstGEP1_32( 393 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 394 auto SrcElementNext = Builder.CreateConstGEP1_32( 395 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 396 // Check whether we've reached the end. 397 auto Done = 398 Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 399 Builder.CreateCondBr(Done, DoneBB, BodyBB); 400 DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock()); 401 SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock()); 402 403 // Done. 404 EmitBlock(DoneBB, /*IsFinished=*/true); 405 } 406 407 /// Check if the combiner is a call to UDR combiner and if it is so return the 408 /// UDR decl used for reduction. 409 static const OMPDeclareReductionDecl * 410 getReductionInit(const Expr *ReductionOp) { 411 if (auto *CE = dyn_cast<CallExpr>(ReductionOp)) 412 if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 413 if (auto *DRE = 414 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 415 if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 416 return DRD; 417 return nullptr; 418 } 419 420 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 421 const OMPDeclareReductionDecl *DRD, 422 const Expr *InitOp, 423 Address Private, Address Original, 424 QualType Ty) { 425 if (DRD->getInitializer()) { 426 std::pair<llvm::Function *, llvm::Function *> Reduction = 427 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 428 auto *CE = cast<CallExpr>(InitOp); 429 auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 430 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 431 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 432 auto *LHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 433 auto *RHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 434 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 435 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 436 [=]() -> Address { return Private; }); 437 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 438 [=]() -> Address { return Original; }); 439 (void)PrivateScope.Privatize(); 440 RValue Func = RValue::get(Reduction.second); 441 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 442 CGF.EmitIgnoredExpr(InitOp); 443 } else { 444 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 445 auto *GV = new llvm::GlobalVariable( 446 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 447 llvm::GlobalValue::PrivateLinkage, Init, ".init"); 448 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 449 RValue InitRVal; 450 switch (CGF.getEvaluationKind(Ty)) { 451 case TEK_Scalar: 452 InitRVal = CGF.EmitLoadOfLValue(LV, SourceLocation()); 453 break; 454 case TEK_Complex: 455 InitRVal = 456 RValue::getComplex(CGF.EmitLoadOfComplex(LV, SourceLocation())); 457 break; 458 case TEK_Aggregate: 459 InitRVal = RValue::getAggregate(LV.getAddress()); 460 break; 461 } 462 OpaqueValueExpr OVE(SourceLocation(), Ty, VK_RValue); 463 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 464 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 465 /*IsInitializer=*/false); 466 } 467 } 468 469 /// \brief Emit initialization of arrays of complex types. 470 /// \param DestAddr Address of the array. 471 /// \param Type Type of array. 472 /// \param Init Initial expression of array. 473 /// \param SrcAddr Address of the original array. 474 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 475 QualType Type, const Expr *Init, 476 Address SrcAddr = Address::invalid()) { 477 auto *DRD = getReductionInit(Init); 478 // Perform element-by-element initialization. 479 QualType ElementTy; 480 481 // Drill down to the base element type on both arrays. 482 auto ArrayTy = Type->getAsArrayTypeUnsafe(); 483 auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 484 DestAddr = 485 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 486 if (DRD) 487 SrcAddr = 488 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 489 490 llvm::Value *SrcBegin = nullptr; 491 if (DRD) 492 SrcBegin = SrcAddr.getPointer(); 493 auto DestBegin = DestAddr.getPointer(); 494 // Cast from pointer to array type to pointer to single element. 495 auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 496 // The basic structure here is a while-do loop. 497 auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 498 auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 499 auto IsEmpty = 500 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 501 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 502 503 // Enter the loop body, making that address the current address. 504 auto EntryBB = CGF.Builder.GetInsertBlock(); 505 CGF.EmitBlock(BodyBB); 506 507 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 508 509 llvm::PHINode *SrcElementPHI = nullptr; 510 Address SrcElementCurrent = Address::invalid(); 511 if (DRD) { 512 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 513 "omp.arraycpy.srcElementPast"); 514 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 515 SrcElementCurrent = 516 Address(SrcElementPHI, 517 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 518 } 519 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 520 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 521 DestElementPHI->addIncoming(DestBegin, EntryBB); 522 Address DestElementCurrent = 523 Address(DestElementPHI, 524 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 525 526 // Emit copy. 527 { 528 CodeGenFunction::RunCleanupsScope InitScope(CGF); 529 if (DRD && (DRD->getInitializer() || !Init)) { 530 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 531 SrcElementCurrent, ElementTy); 532 } else 533 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 534 /*IsInitializer=*/false); 535 } 536 537 if (DRD) { 538 // Shift the address forward by one element. 539 auto SrcElementNext = CGF.Builder.CreateConstGEP1_32( 540 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 541 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 542 } 543 544 // Shift the address forward by one element. 545 auto DestElementNext = CGF.Builder.CreateConstGEP1_32( 546 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 547 // Check whether we've reached the end. 548 auto Done = 549 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 550 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 551 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 552 553 // Done. 554 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 555 } 556 557 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr, 558 Address SrcAddr, const VarDecl *DestVD, 559 const VarDecl *SrcVD, const Expr *Copy) { 560 if (OriginalType->isArrayType()) { 561 auto *BO = dyn_cast<BinaryOperator>(Copy); 562 if (BO && BO->getOpcode() == BO_Assign) { 563 // Perform simple memcpy for simple copying. 564 EmitAggregateAssign(DestAddr, SrcAddr, OriginalType); 565 } else { 566 // For arrays with complex element types perform element by element 567 // copying. 568 EmitOMPAggregateAssign( 569 DestAddr, SrcAddr, OriginalType, 570 [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) { 571 // Working with the single array element, so have to remap 572 // destination and source variables to corresponding array 573 // elements. 574 CodeGenFunction::OMPPrivateScope Remap(*this); 575 Remap.addPrivate(DestVD, [DestElement]() -> Address { 576 return DestElement; 577 }); 578 Remap.addPrivate( 579 SrcVD, [SrcElement]() -> Address { return SrcElement; }); 580 (void)Remap.Privatize(); 581 EmitIgnoredExpr(Copy); 582 }); 583 } 584 } else { 585 // Remap pseudo source variable to private copy. 586 CodeGenFunction::OMPPrivateScope Remap(*this); 587 Remap.addPrivate(SrcVD, [SrcAddr]() -> Address { return SrcAddr; }); 588 Remap.addPrivate(DestVD, [DestAddr]() -> Address { return DestAddr; }); 589 (void)Remap.Privatize(); 590 // Emit copying of the whole variable. 591 EmitIgnoredExpr(Copy); 592 } 593 } 594 595 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, 596 OMPPrivateScope &PrivateScope) { 597 if (!HaveInsertPoint()) 598 return false; 599 bool FirstprivateIsLastprivate = false; 600 llvm::DenseSet<const VarDecl *> Lastprivates; 601 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 602 for (const auto *D : C->varlists()) 603 Lastprivates.insert( 604 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); 605 } 606 llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate; 607 CGCapturedStmtInfo CapturesInfo(cast<CapturedStmt>(*D.getAssociatedStmt())); 608 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { 609 auto IRef = C->varlist_begin(); 610 auto InitsRef = C->inits().begin(); 611 for (auto IInit : C->private_copies()) { 612 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 613 bool ThisFirstprivateIsLastprivate = 614 Lastprivates.count(OrigVD->getCanonicalDecl()) > 0; 615 auto *CapFD = CapturesInfo.lookup(OrigVD); 616 auto *FD = CapturedStmtInfo->lookup(OrigVD); 617 if (!ThisFirstprivateIsLastprivate && FD && (FD == CapFD) && 618 !FD->getType()->isReferenceType()) { 619 EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()); 620 ++IRef; 621 ++InitsRef; 622 continue; 623 } 624 FirstprivateIsLastprivate = 625 FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate; 626 if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) { 627 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 628 auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl()); 629 bool IsRegistered; 630 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 631 /*RefersToEnclosingVariableOrCapture=*/FD != nullptr, 632 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 633 Address OriginalAddr = EmitLValue(&DRE).getAddress(); 634 QualType Type = VD->getType(); 635 if (Type->isArrayType()) { 636 // Emit VarDecl with copy init for arrays. 637 // Get the address of the original variable captured in current 638 // captured region. 639 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 640 auto Emission = EmitAutoVarAlloca(*VD); 641 auto *Init = VD->getInit(); 642 if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) { 643 // Perform simple memcpy. 644 EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr, 645 Type); 646 } else { 647 EmitOMPAggregateAssign( 648 Emission.getAllocatedAddress(), OriginalAddr, Type, 649 [this, VDInit, Init](Address DestElement, 650 Address SrcElement) { 651 // Clean up any temporaries needed by the initialization. 652 RunCleanupsScope InitScope(*this); 653 // Emit initialization for single element. 654 setAddrOfLocalVar(VDInit, SrcElement); 655 EmitAnyExprToMem(Init, DestElement, 656 Init->getType().getQualifiers(), 657 /*IsInitializer*/ false); 658 LocalDeclMap.erase(VDInit); 659 }); 660 } 661 EmitAutoVarCleanups(Emission); 662 return Emission.getAllocatedAddress(); 663 }); 664 } else { 665 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 666 // Emit private VarDecl with copy init. 667 // Remap temp VDInit variable to the address of the original 668 // variable 669 // (for proper handling of captured global variables). 670 setAddrOfLocalVar(VDInit, OriginalAddr); 671 EmitDecl(*VD); 672 LocalDeclMap.erase(VDInit); 673 return GetAddrOfLocalVar(VD); 674 }); 675 } 676 assert(IsRegistered && 677 "firstprivate var already registered as private"); 678 // Silence the warning about unused variable. 679 (void)IsRegistered; 680 } 681 ++IRef; 682 ++InitsRef; 683 } 684 } 685 return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty(); 686 } 687 688 void CodeGenFunction::EmitOMPPrivateClause( 689 const OMPExecutableDirective &D, 690 CodeGenFunction::OMPPrivateScope &PrivateScope) { 691 if (!HaveInsertPoint()) 692 return; 693 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 694 for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) { 695 auto IRef = C->varlist_begin(); 696 for (auto IInit : C->private_copies()) { 697 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 698 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 699 auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 700 bool IsRegistered = 701 PrivateScope.addPrivate(OrigVD, [&]() -> Address { 702 // Emit private VarDecl with copy init. 703 EmitDecl(*VD); 704 return GetAddrOfLocalVar(VD); 705 }); 706 assert(IsRegistered && "private var already registered as private"); 707 // Silence the warning about unused variable. 708 (void)IsRegistered; 709 } 710 ++IRef; 711 } 712 } 713 } 714 715 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { 716 if (!HaveInsertPoint()) 717 return false; 718 // threadprivate_var1 = master_threadprivate_var1; 719 // operator=(threadprivate_var2, master_threadprivate_var2); 720 // ... 721 // __kmpc_barrier(&loc, global_tid); 722 llvm::DenseSet<const VarDecl *> CopiedVars; 723 llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr; 724 for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) { 725 auto IRef = C->varlist_begin(); 726 auto ISrcRef = C->source_exprs().begin(); 727 auto IDestRef = C->destination_exprs().begin(); 728 for (auto *AssignOp : C->assignment_ops()) { 729 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 730 QualType Type = VD->getType(); 731 if (CopiedVars.insert(VD->getCanonicalDecl()).second) { 732 // Get the address of the master variable. If we are emitting code with 733 // TLS support, the address is passed from the master as field in the 734 // captured declaration. 735 Address MasterAddr = Address::invalid(); 736 if (getLangOpts().OpenMPUseTLS && 737 getContext().getTargetInfo().isTLSSupported()) { 738 assert(CapturedStmtInfo->lookup(VD) && 739 "Copyin threadprivates should have been captured!"); 740 DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(), 741 VK_LValue, (*IRef)->getExprLoc()); 742 MasterAddr = EmitLValue(&DRE).getAddress(); 743 LocalDeclMap.erase(VD); 744 } else { 745 MasterAddr = 746 Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD) 747 : CGM.GetAddrOfGlobal(VD), 748 getContext().getDeclAlign(VD)); 749 } 750 // Get the address of the threadprivate variable. 751 Address PrivateAddr = EmitLValue(*IRef).getAddress(); 752 if (CopiedVars.size() == 1) { 753 // At first check if current thread is a master thread. If it is, no 754 // need to copy data. 755 CopyBegin = createBasicBlock("copyin.not.master"); 756 CopyEnd = createBasicBlock("copyin.not.master.end"); 757 Builder.CreateCondBr( 758 Builder.CreateICmpNE( 759 Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy), 760 Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy)), 761 CopyBegin, CopyEnd); 762 EmitBlock(CopyBegin); 763 } 764 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 765 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 766 EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp); 767 } 768 ++IRef; 769 ++ISrcRef; 770 ++IDestRef; 771 } 772 } 773 if (CopyEnd) { 774 // Exit out of copying procedure for non-master thread. 775 EmitBlock(CopyEnd, /*IsFinished=*/true); 776 return true; 777 } 778 return false; 779 } 780 781 bool CodeGenFunction::EmitOMPLastprivateClauseInit( 782 const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) { 783 if (!HaveInsertPoint()) 784 return false; 785 bool HasAtLeastOneLastprivate = false; 786 llvm::DenseSet<const VarDecl *> SIMDLCVs; 787 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 788 auto *LoopDirective = cast<OMPLoopDirective>(&D); 789 for (auto *C : LoopDirective->counters()) { 790 SIMDLCVs.insert( 791 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 792 } 793 } 794 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 795 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 796 HasAtLeastOneLastprivate = true; 797 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) 798 break; 799 auto IRef = C->varlist_begin(); 800 auto IDestRef = C->destination_exprs().begin(); 801 for (auto *IInit : C->private_copies()) { 802 // Keep the address of the original variable for future update at the end 803 // of the loop. 804 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 805 // Taskloops do not require additional initialization, it is done in 806 // runtime support library. 807 if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) { 808 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 809 PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address { 810 DeclRefExpr DRE( 811 const_cast<VarDecl *>(OrigVD), 812 /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup( 813 OrigVD) != nullptr, 814 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 815 return EmitLValue(&DRE).getAddress(); 816 }); 817 // Check if the variable is also a firstprivate: in this case IInit is 818 // not generated. Initialization of this variable will happen in codegen 819 // for 'firstprivate' clause. 820 if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) { 821 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 822 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 823 // Emit private VarDecl with copy init. 824 EmitDecl(*VD); 825 return GetAddrOfLocalVar(VD); 826 }); 827 assert(IsRegistered && 828 "lastprivate var already registered as private"); 829 (void)IsRegistered; 830 } 831 } 832 ++IRef; 833 ++IDestRef; 834 } 835 } 836 return HasAtLeastOneLastprivate; 837 } 838 839 void CodeGenFunction::EmitOMPLastprivateClauseFinal( 840 const OMPExecutableDirective &D, bool NoFinals, 841 llvm::Value *IsLastIterCond) { 842 if (!HaveInsertPoint()) 843 return; 844 // Emit following code: 845 // if (<IsLastIterCond>) { 846 // orig_var1 = private_orig_var1; 847 // ... 848 // orig_varn = private_orig_varn; 849 // } 850 llvm::BasicBlock *ThenBB = nullptr; 851 llvm::BasicBlock *DoneBB = nullptr; 852 if (IsLastIterCond) { 853 ThenBB = createBasicBlock(".omp.lastprivate.then"); 854 DoneBB = createBasicBlock(".omp.lastprivate.done"); 855 Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB); 856 EmitBlock(ThenBB); 857 } 858 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 859 llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates; 860 if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) { 861 auto IC = LoopDirective->counters().begin(); 862 for (auto F : LoopDirective->finals()) { 863 auto *D = 864 cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl(); 865 if (NoFinals) 866 AlreadyEmittedVars.insert(D); 867 else 868 LoopCountersAndUpdates[D] = F; 869 ++IC; 870 } 871 } 872 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 873 auto IRef = C->varlist_begin(); 874 auto ISrcRef = C->source_exprs().begin(); 875 auto IDestRef = C->destination_exprs().begin(); 876 for (auto *AssignOp : C->assignment_ops()) { 877 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 878 QualType Type = PrivateVD->getType(); 879 auto *CanonicalVD = PrivateVD->getCanonicalDecl(); 880 if (AlreadyEmittedVars.insert(CanonicalVD).second) { 881 // If lastprivate variable is a loop control variable for loop-based 882 // directive, update its value before copyin back to original 883 // variable. 884 if (auto *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD)) 885 EmitIgnoredExpr(FinalExpr); 886 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 887 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 888 // Get the address of the original variable. 889 Address OriginalAddr = GetAddrOfLocalVar(DestVD); 890 // Get the address of the private variable. 891 Address PrivateAddr = GetAddrOfLocalVar(PrivateVD); 892 if (auto RefTy = PrivateVD->getType()->getAs<ReferenceType>()) 893 PrivateAddr = 894 Address(Builder.CreateLoad(PrivateAddr), 895 getNaturalTypeAlignment(RefTy->getPointeeType())); 896 EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp); 897 } 898 ++IRef; 899 ++ISrcRef; 900 ++IDestRef; 901 } 902 if (auto *PostUpdate = C->getPostUpdateExpr()) 903 EmitIgnoredExpr(PostUpdate); 904 } 905 if (IsLastIterCond) 906 EmitBlock(DoneBB, /*IsFinished=*/true); 907 } 908 909 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 910 LValue BaseLV, llvm::Value *Addr) { 911 Address Tmp = Address::invalid(); 912 Address TopTmp = Address::invalid(); 913 Address MostTopTmp = Address::invalid(); 914 BaseTy = BaseTy.getNonReferenceType(); 915 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 916 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 917 Tmp = CGF.CreateMemTemp(BaseTy); 918 if (TopTmp.isValid()) 919 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 920 else 921 MostTopTmp = Tmp; 922 TopTmp = Tmp; 923 BaseTy = BaseTy->getPointeeType(); 924 } 925 llvm::Type *Ty = BaseLV.getPointer()->getType(); 926 if (Tmp.isValid()) 927 Ty = Tmp.getElementType(); 928 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 929 if (Tmp.isValid()) { 930 CGF.Builder.CreateStore(Addr, Tmp); 931 return MostTopTmp; 932 } 933 return Address(Addr, BaseLV.getAlignment()); 934 } 935 936 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 937 LValue BaseLV) { 938 BaseTy = BaseTy.getNonReferenceType(); 939 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 940 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 941 if (auto *PtrTy = BaseTy->getAs<PointerType>()) 942 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy); 943 else { 944 BaseLV = CGF.EmitLoadOfReferenceLValue(BaseLV.getAddress(), 945 BaseTy->castAs<ReferenceType>()); 946 } 947 BaseTy = BaseTy->getPointeeType(); 948 } 949 return CGF.MakeAddrLValue( 950 Address( 951 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 952 BaseLV.getPointer(), CGF.ConvertTypeForMem(ElTy)->getPointerTo()), 953 BaseLV.getAlignment()), 954 BaseLV.getType(), BaseLV.getAlignmentSource()); 955 } 956 957 void CodeGenFunction::EmitOMPReductionClauseInit( 958 const OMPExecutableDirective &D, 959 CodeGenFunction::OMPPrivateScope &PrivateScope) { 960 if (!HaveInsertPoint()) 961 return; 962 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 963 auto ILHS = C->lhs_exprs().begin(); 964 auto IRHS = C->rhs_exprs().begin(); 965 auto IPriv = C->privates().begin(); 966 auto IRed = C->reduction_ops().begin(); 967 for (auto IRef : C->varlists()) { 968 auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 969 auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 970 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl()); 971 auto *DRD = getReductionInit(*IRed); 972 if (auto *OASE = dyn_cast<OMPArraySectionExpr>(IRef)) { 973 auto *Base = OASE->getBase()->IgnoreParenImpCasts(); 974 while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 975 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 976 while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 977 Base = TempASE->getBase()->IgnoreParenImpCasts(); 978 auto *DE = cast<DeclRefExpr>(Base); 979 auto *OrigVD = cast<VarDecl>(DE->getDecl()); 980 auto OASELValueLB = EmitOMPArraySectionExpr(OASE); 981 auto OASELValueUB = 982 EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 983 auto OriginalBaseLValue = EmitLValue(DE); 984 LValue BaseLValue = 985 loadToBegin(*this, OrigVD->getType(), OASELValueLB.getType(), 986 OriginalBaseLValue); 987 // Store the address of the original variable associated with the LHS 988 // implicit variable. 989 PrivateScope.addPrivate(LHSVD, [OASELValueLB]() -> Address { 990 return OASELValueLB.getAddress(); 991 }); 992 // Emit reduction copy. 993 bool IsRegistered = PrivateScope.addPrivate( 994 OrigVD, [this, OrigVD, PrivateVD, BaseLValue, OASELValueLB, 995 OASELValueUB, OriginalBaseLValue, DRD, IRed]() -> Address { 996 // Emit VarDecl with copy init for arrays. 997 // Get the address of the original variable captured in current 998 // captured region. 999 auto *Size = Builder.CreatePtrDiff(OASELValueUB.getPointer(), 1000 OASELValueLB.getPointer()); 1001 Size = Builder.CreateNUWAdd( 1002 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 1003 CodeGenFunction::OpaqueValueMapping OpaqueMap( 1004 *this, cast<OpaqueValueExpr>( 1005 getContext() 1006 .getAsVariableArrayType(PrivateVD->getType()) 1007 ->getSizeExpr()), 1008 RValue::get(Size)); 1009 EmitVariablyModifiedType(PrivateVD->getType()); 1010 auto Emission = EmitAutoVarAlloca(*PrivateVD); 1011 auto Addr = Emission.getAllocatedAddress(); 1012 auto *Init = PrivateVD->getInit(); 1013 EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(), 1014 DRD ? *IRed : Init, 1015 OASELValueLB.getAddress()); 1016 EmitAutoVarCleanups(Emission); 1017 // Emit private VarDecl with reduction init. 1018 auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(), 1019 OASELValueLB.getPointer()); 1020 auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset); 1021 return castToBase(*this, OrigVD->getType(), 1022 OASELValueLB.getType(), OriginalBaseLValue, 1023 Ptr); 1024 }); 1025 assert(IsRegistered && "private var already registered as private"); 1026 // Silence the warning about unused variable. 1027 (void)IsRegistered; 1028 PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { 1029 return GetAddrOfLocalVar(PrivateVD); 1030 }); 1031 } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(IRef)) { 1032 auto *Base = ASE->getBase()->IgnoreParenImpCasts(); 1033 while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1034 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1035 auto *DE = cast<DeclRefExpr>(Base); 1036 auto *OrigVD = cast<VarDecl>(DE->getDecl()); 1037 auto ASELValue = EmitLValue(ASE); 1038 auto OriginalBaseLValue = EmitLValue(DE); 1039 LValue BaseLValue = loadToBegin( 1040 *this, OrigVD->getType(), ASELValue.getType(), OriginalBaseLValue); 1041 // Store the address of the original variable associated with the LHS 1042 // implicit variable. 1043 PrivateScope.addPrivate( 1044 LHSVD, [ASELValue]() -> Address { return ASELValue.getAddress(); }); 1045 // Emit reduction copy. 1046 bool IsRegistered = PrivateScope.addPrivate( 1047 OrigVD, [this, OrigVD, PrivateVD, BaseLValue, ASELValue, 1048 OriginalBaseLValue, DRD, IRed]() -> Address { 1049 // Emit private VarDecl with reduction init. 1050 AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD); 1051 auto Addr = Emission.getAllocatedAddress(); 1052 if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 1053 emitInitWithReductionInitializer(*this, DRD, *IRed, Addr, 1054 ASELValue.getAddress(), 1055 ASELValue.getType()); 1056 } else 1057 EmitAutoVarInit(Emission); 1058 EmitAutoVarCleanups(Emission); 1059 auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(), 1060 ASELValue.getPointer()); 1061 auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset); 1062 return castToBase(*this, OrigVD->getType(), ASELValue.getType(), 1063 OriginalBaseLValue, Ptr); 1064 }); 1065 assert(IsRegistered && "private var already registered as private"); 1066 // Silence the warning about unused variable. 1067 (void)IsRegistered; 1068 PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address { 1069 return Builder.CreateElementBitCast( 1070 GetAddrOfLocalVar(PrivateVD), ConvertTypeForMem(RHSVD->getType()), 1071 "rhs.begin"); 1072 }); 1073 } else { 1074 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); 1075 QualType Type = PrivateVD->getType(); 1076 if (getContext().getAsArrayType(Type)) { 1077 // Store the address of the original variable associated with the LHS 1078 // implicit variable. 1079 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1080 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1081 IRef->getType(), VK_LValue, IRef->getExprLoc()); 1082 Address OriginalAddr = EmitLValue(&DRE).getAddress(); 1083 PrivateScope.addPrivate(LHSVD, [this, &OriginalAddr, 1084 LHSVD]() -> Address { 1085 OriginalAddr = Builder.CreateElementBitCast( 1086 OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin"); 1087 return OriginalAddr; 1088 }); 1089 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 1090 if (Type->isVariablyModifiedType()) { 1091 CodeGenFunction::OpaqueValueMapping OpaqueMap( 1092 *this, cast<OpaqueValueExpr>( 1093 getContext() 1094 .getAsVariableArrayType(PrivateVD->getType()) 1095 ->getSizeExpr()), 1096 RValue::get( 1097 getTypeSize(OrigVD->getType().getNonReferenceType()))); 1098 EmitVariablyModifiedType(Type); 1099 } 1100 auto Emission = EmitAutoVarAlloca(*PrivateVD); 1101 auto Addr = Emission.getAllocatedAddress(); 1102 auto *Init = PrivateVD->getInit(); 1103 EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(), 1104 DRD ? *IRed : Init, OriginalAddr); 1105 EmitAutoVarCleanups(Emission); 1106 return Emission.getAllocatedAddress(); 1107 }); 1108 assert(IsRegistered && "private var already registered as private"); 1109 // Silence the warning about unused variable. 1110 (void)IsRegistered; 1111 PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address { 1112 return Builder.CreateElementBitCast( 1113 GetAddrOfLocalVar(PrivateVD), 1114 ConvertTypeForMem(RHSVD->getType()), "rhs.begin"); 1115 }); 1116 } else { 1117 // Store the address of the original variable associated with the LHS 1118 // implicit variable. 1119 Address OriginalAddr = Address::invalid(); 1120 PrivateScope.addPrivate(LHSVD, [this, OrigVD, IRef, 1121 &OriginalAddr]() -> Address { 1122 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1123 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1124 IRef->getType(), VK_LValue, IRef->getExprLoc()); 1125 OriginalAddr = EmitLValue(&DRE).getAddress(); 1126 return OriginalAddr; 1127 }); 1128 // Emit reduction copy. 1129 bool IsRegistered = PrivateScope.addPrivate( 1130 OrigVD, [this, PrivateVD, OriginalAddr, DRD, IRed]() -> Address { 1131 // Emit private VarDecl with reduction init. 1132 AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD); 1133 auto Addr = Emission.getAllocatedAddress(); 1134 if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 1135 emitInitWithReductionInitializer(*this, DRD, *IRed, Addr, 1136 OriginalAddr, 1137 PrivateVD->getType()); 1138 } else 1139 EmitAutoVarInit(Emission); 1140 EmitAutoVarCleanups(Emission); 1141 return Addr; 1142 }); 1143 assert(IsRegistered && "private var already registered as private"); 1144 // Silence the warning about unused variable. 1145 (void)IsRegistered; 1146 PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { 1147 return GetAddrOfLocalVar(PrivateVD); 1148 }); 1149 } 1150 } 1151 ++ILHS; 1152 ++IRHS; 1153 ++IPriv; 1154 ++IRed; 1155 } 1156 } 1157 } 1158 1159 void CodeGenFunction::EmitOMPReductionClauseFinal( 1160 const OMPExecutableDirective &D) { 1161 if (!HaveInsertPoint()) 1162 return; 1163 llvm::SmallVector<const Expr *, 8> Privates; 1164 llvm::SmallVector<const Expr *, 8> LHSExprs; 1165 llvm::SmallVector<const Expr *, 8> RHSExprs; 1166 llvm::SmallVector<const Expr *, 8> ReductionOps; 1167 bool HasAtLeastOneReduction = false; 1168 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1169 HasAtLeastOneReduction = true; 1170 Privates.append(C->privates().begin(), C->privates().end()); 1171 LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 1172 RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 1173 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 1174 } 1175 if (HasAtLeastOneReduction) { 1176 // Emit nowait reduction if nowait clause is present or directive is a 1177 // parallel directive (it always has implicit barrier). 1178 CGM.getOpenMPRuntime().emitReduction( 1179 *this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps, 1180 D.getSingleClause<OMPNowaitClause>() || 1181 isOpenMPParallelDirective(D.getDirectiveKind()) || 1182 D.getDirectiveKind() == OMPD_simd, 1183 D.getDirectiveKind() == OMPD_simd); 1184 } 1185 } 1186 1187 static void emitPostUpdateForReductionClause( 1188 CodeGenFunction &CGF, const OMPExecutableDirective &D, 1189 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1190 if (!CGF.HaveInsertPoint()) 1191 return; 1192 llvm::BasicBlock *DoneBB = nullptr; 1193 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1194 if (auto *PostUpdate = C->getPostUpdateExpr()) { 1195 if (!DoneBB) { 1196 if (auto *Cond = CondGen(CGF)) { 1197 // If the first post-update expression is found, emit conditional 1198 // block if it was requested. 1199 auto *ThenBB = CGF.createBasicBlock(".omp.reduction.pu"); 1200 DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done"); 1201 CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1202 CGF.EmitBlock(ThenBB); 1203 } 1204 } 1205 CGF.EmitIgnoredExpr(PostUpdate); 1206 } 1207 } 1208 if (DoneBB) 1209 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 1210 } 1211 1212 static void emitCommonOMPParallelDirective(CodeGenFunction &CGF, 1213 const OMPExecutableDirective &S, 1214 OpenMPDirectiveKind InnermostKind, 1215 const RegionCodeGenTy &CodeGen) { 1216 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 1217 auto OutlinedFn = CGF.CGM.getOpenMPRuntime(). 1218 emitParallelOrTeamsOutlinedFunction(S, 1219 *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 1220 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) { 1221 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 1222 auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), 1223 /*IgnoreResultAssign*/ true); 1224 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause( 1225 CGF, NumThreads, NumThreadsClause->getLocStart()); 1226 } 1227 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) { 1228 CodeGenFunction::RunCleanupsScope ProcBindScope(CGF); 1229 CGF.CGM.getOpenMPRuntime().emitProcBindClause( 1230 CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart()); 1231 } 1232 const Expr *IfCond = nullptr; 1233 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 1234 if (C->getNameModifier() == OMPD_unknown || 1235 C->getNameModifier() == OMPD_parallel) { 1236 IfCond = C->getCondition(); 1237 break; 1238 } 1239 } 1240 1241 OMPLexicalScope Scope(CGF, S); 1242 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 1243 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 1244 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn, 1245 CapturedVars, IfCond); 1246 } 1247 1248 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { 1249 // Emit parallel region as a standalone region. 1250 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1251 OMPPrivateScope PrivateScope(CGF); 1252 bool Copyins = CGF.EmitOMPCopyinClause(S); 1253 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 1254 if (Copyins) { 1255 // Emit implicit barrier to synchronize threads and avoid data races on 1256 // propagation master's thread values of threadprivate variables to local 1257 // instances of that variables of all other implicit threads. 1258 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 1259 CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 1260 /*ForceSimpleCall=*/true); 1261 } 1262 CGF.EmitOMPPrivateClause(S, PrivateScope); 1263 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 1264 (void)PrivateScope.Privatize(); 1265 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1266 CGF.EmitOMPReductionClauseFinal(S); 1267 }; 1268 emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen); 1269 emitPostUpdateForReductionClause( 1270 *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1271 } 1272 1273 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, 1274 JumpDest LoopExit) { 1275 RunCleanupsScope BodyScope(*this); 1276 // Update counters values on current iteration. 1277 for (auto I : D.updates()) { 1278 EmitIgnoredExpr(I); 1279 } 1280 // Update the linear variables. 1281 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1282 for (auto *U : C->updates()) 1283 EmitIgnoredExpr(U); 1284 } 1285 1286 // On a continue in the body, jump to the end. 1287 auto Continue = getJumpDestInCurrentScope("omp.body.continue"); 1288 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1289 // Emit loop body. 1290 EmitStmt(D.getBody()); 1291 // The end (updates/cleanups). 1292 EmitBlock(Continue.getBlock()); 1293 BreakContinueStack.pop_back(); 1294 } 1295 1296 void CodeGenFunction::EmitOMPInnerLoop( 1297 const Stmt &S, bool RequiresCleanup, const Expr *LoopCond, 1298 const Expr *IncExpr, 1299 const llvm::function_ref<void(CodeGenFunction &)> &BodyGen, 1300 const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen) { 1301 auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end"); 1302 1303 // Start the loop with a block that tests the condition. 1304 auto CondBlock = createBasicBlock("omp.inner.for.cond"); 1305 EmitBlock(CondBlock); 1306 const SourceRange &R = S.getSourceRange(); 1307 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), 1308 SourceLocToDebugLoc(R.getEnd())); 1309 1310 // If there are any cleanups between here and the loop-exit scope, 1311 // create a block to stage a loop exit along. 1312 auto ExitBlock = LoopExit.getBlock(); 1313 if (RequiresCleanup) 1314 ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup"); 1315 1316 auto LoopBody = createBasicBlock("omp.inner.for.body"); 1317 1318 // Emit condition. 1319 EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S)); 1320 if (ExitBlock != LoopExit.getBlock()) { 1321 EmitBlock(ExitBlock); 1322 EmitBranchThroughCleanup(LoopExit); 1323 } 1324 1325 EmitBlock(LoopBody); 1326 incrementProfileCounter(&S); 1327 1328 // Create a block for the increment. 1329 auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc"); 1330 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1331 1332 BodyGen(*this); 1333 1334 // Emit "IV = IV + 1" and a back-edge to the condition block. 1335 EmitBlock(Continue.getBlock()); 1336 EmitIgnoredExpr(IncExpr); 1337 PostIncGen(*this); 1338 BreakContinueStack.pop_back(); 1339 EmitBranch(CondBlock); 1340 LoopStack.pop(); 1341 // Emit the fall-through block. 1342 EmitBlock(LoopExit.getBlock()); 1343 } 1344 1345 void CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { 1346 if (!HaveInsertPoint()) 1347 return; 1348 // Emit inits for the linear variables. 1349 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1350 for (auto *Init : C->inits()) { 1351 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl()); 1352 if (auto *Ref = dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) { 1353 AutoVarEmission Emission = EmitAutoVarAlloca(*VD); 1354 auto *OrigVD = cast<VarDecl>(Ref->getDecl()); 1355 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1356 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1357 VD->getInit()->getType(), VK_LValue, 1358 VD->getInit()->getExprLoc()); 1359 EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(), 1360 VD->getType()), 1361 /*capturedByInit=*/false); 1362 EmitAutoVarCleanups(Emission); 1363 } else 1364 EmitVarDecl(*VD); 1365 } 1366 // Emit the linear steps for the linear clauses. 1367 // If a step is not constant, it is pre-calculated before the loop. 1368 if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep())) 1369 if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) { 1370 EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl())); 1371 // Emit calculation of the linear step. 1372 EmitIgnoredExpr(CS); 1373 } 1374 } 1375 } 1376 1377 void CodeGenFunction::EmitOMPLinearClauseFinal( 1378 const OMPLoopDirective &D, 1379 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1380 if (!HaveInsertPoint()) 1381 return; 1382 llvm::BasicBlock *DoneBB = nullptr; 1383 // Emit the final values of the linear variables. 1384 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1385 auto IC = C->varlist_begin(); 1386 for (auto *F : C->finals()) { 1387 if (!DoneBB) { 1388 if (auto *Cond = CondGen(*this)) { 1389 // If the first post-update expression is found, emit conditional 1390 // block if it was requested. 1391 auto *ThenBB = createBasicBlock(".omp.linear.pu"); 1392 DoneBB = createBasicBlock(".omp.linear.pu.done"); 1393 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1394 EmitBlock(ThenBB); 1395 } 1396 } 1397 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl()); 1398 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1399 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1400 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); 1401 Address OrigAddr = EmitLValue(&DRE).getAddress(); 1402 CodeGenFunction::OMPPrivateScope VarScope(*this); 1403 VarScope.addPrivate(OrigVD, [OrigAddr]() -> Address { return OrigAddr; }); 1404 (void)VarScope.Privatize(); 1405 EmitIgnoredExpr(F); 1406 ++IC; 1407 } 1408 if (auto *PostUpdate = C->getPostUpdateExpr()) 1409 EmitIgnoredExpr(PostUpdate); 1410 } 1411 if (DoneBB) 1412 EmitBlock(DoneBB, /*IsFinished=*/true); 1413 } 1414 1415 static void emitAlignedClause(CodeGenFunction &CGF, 1416 const OMPExecutableDirective &D) { 1417 if (!CGF.HaveInsertPoint()) 1418 return; 1419 for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) { 1420 unsigned ClauseAlignment = 0; 1421 if (auto AlignmentExpr = Clause->getAlignment()) { 1422 auto AlignmentCI = 1423 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); 1424 ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue()); 1425 } 1426 for (auto E : Clause->varlists()) { 1427 unsigned Alignment = ClauseAlignment; 1428 if (Alignment == 0) { 1429 // OpenMP [2.8.1, Description] 1430 // If no optional parameter is specified, implementation-defined default 1431 // alignments for SIMD instructions on the target platforms are assumed. 1432 Alignment = 1433 CGF.getContext() 1434 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign( 1435 E->getType()->getPointeeType())) 1436 .getQuantity(); 1437 } 1438 assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) && 1439 "alignment is not power of 2"); 1440 if (Alignment != 0) { 1441 llvm::Value *PtrValue = CGF.EmitScalarExpr(E); 1442 CGF.EmitAlignmentAssumption(PtrValue, Alignment); 1443 } 1444 } 1445 } 1446 } 1447 1448 void CodeGenFunction::EmitOMPPrivateLoopCounters( 1449 const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) { 1450 if (!HaveInsertPoint()) 1451 return; 1452 auto I = S.private_counters().begin(); 1453 for (auto *E : S.counters()) { 1454 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1455 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()); 1456 (void)LoopScope.addPrivate(VD, [&]() -> Address { 1457 // Emit var without initialization. 1458 if (!LocalDeclMap.count(PrivateVD)) { 1459 auto VarEmission = EmitAutoVarAlloca(*PrivateVD); 1460 EmitAutoVarCleanups(VarEmission); 1461 } 1462 DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD), 1463 /*RefersToEnclosingVariableOrCapture=*/false, 1464 (*I)->getType(), VK_LValue, (*I)->getExprLoc()); 1465 return EmitLValue(&DRE).getAddress(); 1466 }); 1467 if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) || 1468 VD->hasGlobalStorage()) { 1469 (void)LoopScope.addPrivate(PrivateVD, [&]() -> Address { 1470 DeclRefExpr DRE(const_cast<VarDecl *>(VD), 1471 LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD), 1472 E->getType(), VK_LValue, E->getExprLoc()); 1473 return EmitLValue(&DRE).getAddress(); 1474 }); 1475 } 1476 ++I; 1477 } 1478 } 1479 1480 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S, 1481 const Expr *Cond, llvm::BasicBlock *TrueBlock, 1482 llvm::BasicBlock *FalseBlock, uint64_t TrueCount) { 1483 if (!CGF.HaveInsertPoint()) 1484 return; 1485 { 1486 CodeGenFunction::OMPPrivateScope PreCondScope(CGF); 1487 CGF.EmitOMPPrivateLoopCounters(S, PreCondScope); 1488 (void)PreCondScope.Privatize(); 1489 // Get initial values of real counters. 1490 for (auto I : S.inits()) { 1491 CGF.EmitIgnoredExpr(I); 1492 } 1493 } 1494 // Check that loop is executed at least one time. 1495 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount); 1496 } 1497 1498 void CodeGenFunction::EmitOMPLinearClause( 1499 const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) { 1500 if (!HaveInsertPoint()) 1501 return; 1502 llvm::DenseSet<const VarDecl *> SIMDLCVs; 1503 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 1504 auto *LoopDirective = cast<OMPLoopDirective>(&D); 1505 for (auto *C : LoopDirective->counters()) { 1506 SIMDLCVs.insert( 1507 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 1508 } 1509 } 1510 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1511 auto CurPrivate = C->privates().begin(); 1512 for (auto *E : C->varlists()) { 1513 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1514 auto *PrivateVD = 1515 cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl()); 1516 if (!SIMDLCVs.count(VD->getCanonicalDecl())) { 1517 bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address { 1518 // Emit private VarDecl with copy init. 1519 EmitVarDecl(*PrivateVD); 1520 return GetAddrOfLocalVar(PrivateVD); 1521 }); 1522 assert(IsRegistered && "linear var already registered as private"); 1523 // Silence the warning about unused variable. 1524 (void)IsRegistered; 1525 } else 1526 EmitVarDecl(*PrivateVD); 1527 ++CurPrivate; 1528 } 1529 } 1530 } 1531 1532 static void emitSimdlenSafelenClause(CodeGenFunction &CGF, 1533 const OMPExecutableDirective &D, 1534 bool IsMonotonic) { 1535 if (!CGF.HaveInsertPoint()) 1536 return; 1537 if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) { 1538 RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(), 1539 /*ignoreResult=*/true); 1540 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 1541 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 1542 // In presence of finite 'safelen', it may be unsafe to mark all 1543 // the memory instructions parallel, because loop-carried 1544 // dependences of 'safelen' iterations are possible. 1545 if (!IsMonotonic) 1546 CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>()); 1547 } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) { 1548 RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(), 1549 /*ignoreResult=*/true); 1550 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 1551 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 1552 // In presence of finite 'safelen', it may be unsafe to mark all 1553 // the memory instructions parallel, because loop-carried 1554 // dependences of 'safelen' iterations are possible. 1555 CGF.LoopStack.setParallel(false); 1556 } 1557 } 1558 1559 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D, 1560 bool IsMonotonic) { 1561 // Walk clauses and process safelen/lastprivate. 1562 LoopStack.setParallel(!IsMonotonic); 1563 LoopStack.setVectorizeEnable(true); 1564 emitSimdlenSafelenClause(*this, D, IsMonotonic); 1565 } 1566 1567 void CodeGenFunction::EmitOMPSimdFinal( 1568 const OMPLoopDirective &D, 1569 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1570 if (!HaveInsertPoint()) 1571 return; 1572 llvm::BasicBlock *DoneBB = nullptr; 1573 auto IC = D.counters().begin(); 1574 auto IPC = D.private_counters().begin(); 1575 for (auto F : D.finals()) { 1576 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl()); 1577 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl()); 1578 auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD); 1579 if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) || 1580 OrigVD->hasGlobalStorage() || CED) { 1581 if (!DoneBB) { 1582 if (auto *Cond = CondGen(*this)) { 1583 // If the first post-update expression is found, emit conditional 1584 // block if it was requested. 1585 auto *ThenBB = createBasicBlock(".omp.final.then"); 1586 DoneBB = createBasicBlock(".omp.final.done"); 1587 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1588 EmitBlock(ThenBB); 1589 } 1590 } 1591 Address OrigAddr = Address::invalid(); 1592 if (CED) 1593 OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress(); 1594 else { 1595 DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD), 1596 /*RefersToEnclosingVariableOrCapture=*/false, 1597 (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc()); 1598 OrigAddr = EmitLValue(&DRE).getAddress(); 1599 } 1600 OMPPrivateScope VarScope(*this); 1601 VarScope.addPrivate(OrigVD, 1602 [OrigAddr]() -> Address { return OrigAddr; }); 1603 (void)VarScope.Privatize(); 1604 EmitIgnoredExpr(F); 1605 } 1606 ++IC; 1607 ++IPC; 1608 } 1609 if (DoneBB) 1610 EmitBlock(DoneBB, /*IsFinished=*/true); 1611 } 1612 1613 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { 1614 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1615 OMPLoopScope PreInitScope(CGF, S); 1616 // if (PreCond) { 1617 // for (IV in 0..LastIteration) BODY; 1618 // <Final counter/linear vars updates>; 1619 // } 1620 // 1621 1622 // Emit: if (PreCond) - begin. 1623 // If the condition constant folds and can be elided, avoid emitting the 1624 // whole loop. 1625 bool CondConstant; 1626 llvm::BasicBlock *ContBlock = nullptr; 1627 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 1628 if (!CondConstant) 1629 return; 1630 } else { 1631 auto *ThenBlock = CGF.createBasicBlock("simd.if.then"); 1632 ContBlock = CGF.createBasicBlock("simd.if.end"); 1633 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 1634 CGF.getProfileCount(&S)); 1635 CGF.EmitBlock(ThenBlock); 1636 CGF.incrementProfileCounter(&S); 1637 } 1638 1639 // Emit the loop iteration variable. 1640 const Expr *IVExpr = S.getIterationVariable(); 1641 const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 1642 CGF.EmitVarDecl(*IVDecl); 1643 CGF.EmitIgnoredExpr(S.getInit()); 1644 1645 // Emit the iterations count variable. 1646 // If it is not a variable, Sema decided to calculate iterations count on 1647 // each iteration (e.g., it is foldable into a constant). 1648 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 1649 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 1650 // Emit calculation of the iterations count. 1651 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 1652 } 1653 1654 CGF.EmitOMPSimdInit(S); 1655 1656 emitAlignedClause(CGF, S); 1657 CGF.EmitOMPLinearClauseInit(S); 1658 { 1659 OMPPrivateScope LoopScope(CGF); 1660 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 1661 CGF.EmitOMPLinearClause(S, LoopScope); 1662 CGF.EmitOMPPrivateClause(S, LoopScope); 1663 CGF.EmitOMPReductionClauseInit(S, LoopScope); 1664 bool HasLastprivateClause = 1665 CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 1666 (void)LoopScope.Privatize(); 1667 CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 1668 S.getInc(), 1669 [&S](CodeGenFunction &CGF) { 1670 CGF.EmitOMPLoopBody(S, JumpDest()); 1671 CGF.EmitStopPoint(&S); 1672 }, 1673 [](CodeGenFunction &) {}); 1674 CGF.EmitOMPSimdFinal( 1675 S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1676 // Emit final copy of the lastprivate variables at the end of loops. 1677 if (HasLastprivateClause) 1678 CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true); 1679 CGF.EmitOMPReductionClauseFinal(S); 1680 emitPostUpdateForReductionClause( 1681 CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1682 } 1683 CGF.EmitOMPLinearClauseFinal( 1684 S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1685 // Emit: if (PreCond) - end. 1686 if (ContBlock) { 1687 CGF.EmitBranch(ContBlock); 1688 CGF.EmitBlock(ContBlock, true); 1689 } 1690 }; 1691 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1692 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 1693 } 1694 1695 void CodeGenFunction::EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic, 1696 const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, 1697 Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) { 1698 auto &RT = CGM.getOpenMPRuntime(); 1699 1700 const Expr *IVExpr = S.getIterationVariable(); 1701 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1702 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1703 1704 auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end"); 1705 1706 // Start the loop with a block that tests the condition. 1707 auto CondBlock = createBasicBlock("omp.dispatch.cond"); 1708 EmitBlock(CondBlock); 1709 const SourceRange &R = S.getSourceRange(); 1710 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), 1711 SourceLocToDebugLoc(R.getEnd())); 1712 1713 llvm::Value *BoolCondVal = nullptr; 1714 if (!DynamicOrOrdered) { 1715 // UB = min(UB, GlobalUB) 1716 EmitIgnoredExpr(S.getEnsureUpperBound()); 1717 // IV = LB 1718 EmitIgnoredExpr(S.getInit()); 1719 // IV < UB 1720 BoolCondVal = EvaluateExprAsBool(S.getCond()); 1721 } else { 1722 BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, IL, 1723 LB, UB, ST); 1724 } 1725 1726 // If there are any cleanups between here and the loop-exit scope, 1727 // create a block to stage a loop exit along. 1728 auto ExitBlock = LoopExit.getBlock(); 1729 if (LoopScope.requiresCleanups()) 1730 ExitBlock = createBasicBlock("omp.dispatch.cleanup"); 1731 1732 auto LoopBody = createBasicBlock("omp.dispatch.body"); 1733 Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock); 1734 if (ExitBlock != LoopExit.getBlock()) { 1735 EmitBlock(ExitBlock); 1736 EmitBranchThroughCleanup(LoopExit); 1737 } 1738 EmitBlock(LoopBody); 1739 1740 // Emit "IV = LB" (in case of static schedule, we have already calculated new 1741 // LB for loop condition and emitted it above). 1742 if (DynamicOrOrdered) 1743 EmitIgnoredExpr(S.getInit()); 1744 1745 // Create a block for the increment. 1746 auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); 1747 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1748 1749 // Generate !llvm.loop.parallel metadata for loads and stores for loops 1750 // with dynamic/guided scheduling and without ordered clause. 1751 if (!isOpenMPSimdDirective(S.getDirectiveKind())) 1752 LoopStack.setParallel(!IsMonotonic); 1753 else 1754 EmitOMPSimdInit(S, IsMonotonic); 1755 1756 SourceLocation Loc = S.getLocStart(); 1757 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), 1758 [&S, LoopExit](CodeGenFunction &CGF) { 1759 CGF.EmitOMPLoopBody(S, LoopExit); 1760 CGF.EmitStopPoint(&S); 1761 }, 1762 [Ordered, IVSize, IVSigned, Loc](CodeGenFunction &CGF) { 1763 if (Ordered) { 1764 CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd( 1765 CGF, Loc, IVSize, IVSigned); 1766 } 1767 }); 1768 1769 EmitBlock(Continue.getBlock()); 1770 BreakContinueStack.pop_back(); 1771 if (!DynamicOrOrdered) { 1772 // Emit "LB = LB + Stride", "UB = UB + Stride". 1773 EmitIgnoredExpr(S.getNextLowerBound()); 1774 EmitIgnoredExpr(S.getNextUpperBound()); 1775 } 1776 1777 EmitBranch(CondBlock); 1778 LoopStack.pop(); 1779 // Emit the fall-through block. 1780 EmitBlock(LoopExit.getBlock()); 1781 1782 // Tell the runtime we are done. 1783 auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) { 1784 if (!DynamicOrOrdered) 1785 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd()); 1786 }; 1787 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); 1788 } 1789 1790 void CodeGenFunction::EmitOMPForOuterLoop( 1791 const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic, 1792 const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, 1793 Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) { 1794 auto &RT = CGM.getOpenMPRuntime(); 1795 1796 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). 1797 const bool DynamicOrOrdered = 1798 Ordered || RT.isDynamic(ScheduleKind.Schedule); 1799 1800 assert((Ordered || 1801 !RT.isStaticNonchunked(ScheduleKind.Schedule, 1802 /*Chunked=*/Chunk != nullptr)) && 1803 "static non-chunked schedule does not need outer loop"); 1804 1805 // Emit outer loop. 1806 // 1807 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1808 // When schedule(dynamic,chunk_size) is specified, the iterations are 1809 // distributed to threads in the team in chunks as the threads request them. 1810 // Each thread executes a chunk of iterations, then requests another chunk, 1811 // until no chunks remain to be distributed. Each chunk contains chunk_size 1812 // iterations, except for the last chunk to be distributed, which may have 1813 // fewer iterations. When no chunk_size is specified, it defaults to 1. 1814 // 1815 // When schedule(guided,chunk_size) is specified, the iterations are assigned 1816 // to threads in the team in chunks as the executing threads request them. 1817 // Each thread executes a chunk of iterations, then requests another chunk, 1818 // until no chunks remain to be assigned. For a chunk_size of 1, the size of 1819 // each chunk is proportional to the number of unassigned iterations divided 1820 // by the number of threads in the team, decreasing to 1. For a chunk_size 1821 // with value k (greater than 1), the size of each chunk is determined in the 1822 // same way, with the restriction that the chunks do not contain fewer than k 1823 // iterations (except for the last chunk to be assigned, which may have fewer 1824 // than k iterations). 1825 // 1826 // When schedule(auto) is specified, the decision regarding scheduling is 1827 // delegated to the compiler and/or runtime system. The programmer gives the 1828 // implementation the freedom to choose any possible mapping of iterations to 1829 // threads in the team. 1830 // 1831 // When schedule(runtime) is specified, the decision regarding scheduling is 1832 // deferred until run time, and the schedule and chunk size are taken from the 1833 // run-sched-var ICV. If the ICV is set to auto, the schedule is 1834 // implementation defined 1835 // 1836 // while(__kmpc_dispatch_next(&LB, &UB)) { 1837 // idx = LB; 1838 // while (idx <= UB) { BODY; ++idx; 1839 // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only. 1840 // } // inner loop 1841 // } 1842 // 1843 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1844 // When schedule(static, chunk_size) is specified, iterations are divided into 1845 // chunks of size chunk_size, and the chunks are assigned to the threads in 1846 // the team in a round-robin fashion in the order of the thread number. 1847 // 1848 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { 1849 // while (idx <= UB) { BODY; ++idx; } // inner loop 1850 // LB = LB + ST; 1851 // UB = UB + ST; 1852 // } 1853 // 1854 1855 const Expr *IVExpr = S.getIterationVariable(); 1856 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1857 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1858 1859 if (DynamicOrOrdered) { 1860 llvm::Value *UBVal = EmitScalarExpr(S.getLastIteration()); 1861 RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, IVSize, 1862 IVSigned, Ordered, UBVal, Chunk); 1863 } else { 1864 RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, 1865 Ordered, IL, LB, UB, ST, Chunk); 1866 } 1867 1868 EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, Ordered, LB, UB, 1869 ST, IL, Chunk); 1870 } 1871 1872 void CodeGenFunction::EmitOMPDistributeOuterLoop( 1873 OpenMPDistScheduleClauseKind ScheduleKind, 1874 const OMPDistributeDirective &S, OMPPrivateScope &LoopScope, 1875 Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) { 1876 1877 auto &RT = CGM.getOpenMPRuntime(); 1878 1879 // Emit outer loop. 1880 // Same behavior as a OMPForOuterLoop, except that schedule cannot be 1881 // dynamic 1882 // 1883 1884 const Expr *IVExpr = S.getIterationVariable(); 1885 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1886 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1887 1888 RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, 1889 IVSize, IVSigned, /* Ordered = */ false, 1890 IL, LB, UB, ST, Chunk); 1891 1892 EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, 1893 S, LoopScope, /* Ordered = */ false, LB, UB, ST, IL, Chunk); 1894 } 1895 1896 void CodeGenFunction::EmitOMPDistributeParallelForDirective( 1897 const OMPDistributeParallelForDirective &S) { 1898 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1899 CGM.getOpenMPRuntime().emitInlinedDirective( 1900 *this, OMPD_distribute_parallel_for, 1901 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1902 OMPLoopScope PreInitScope(CGF, S); 1903 OMPCancelStackRAII CancelRegion(CGF, OMPD_distribute_parallel_for, 1904 /*HasCancel=*/false); 1905 CGF.EmitStmt( 1906 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1907 }); 1908 } 1909 1910 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective( 1911 const OMPDistributeParallelForSimdDirective &S) { 1912 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1913 CGM.getOpenMPRuntime().emitInlinedDirective( 1914 *this, OMPD_distribute_parallel_for_simd, 1915 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1916 OMPLoopScope PreInitScope(CGF, S); 1917 CGF.EmitStmt( 1918 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1919 }); 1920 } 1921 1922 void CodeGenFunction::EmitOMPDistributeSimdDirective( 1923 const OMPDistributeSimdDirective &S) { 1924 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1925 CGM.getOpenMPRuntime().emitInlinedDirective( 1926 *this, OMPD_distribute_simd, 1927 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1928 OMPLoopScope PreInitScope(CGF, S); 1929 CGF.EmitStmt( 1930 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1931 }); 1932 } 1933 1934 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective( 1935 const OMPTargetParallelForSimdDirective &S) { 1936 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1937 CGM.getOpenMPRuntime().emitInlinedDirective( 1938 *this, OMPD_target_parallel_for_simd, 1939 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1940 OMPLoopScope PreInitScope(CGF, S); 1941 CGF.EmitStmt( 1942 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1943 }); 1944 } 1945 1946 void CodeGenFunction::EmitOMPTargetSimdDirective( 1947 const OMPTargetSimdDirective &S) { 1948 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1949 CGM.getOpenMPRuntime().emitInlinedDirective( 1950 *this, OMPD_target_simd, [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1951 OMPLoopScope PreInitScope(CGF, S); 1952 CGF.EmitStmt( 1953 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1954 }); 1955 } 1956 1957 void CodeGenFunction::EmitOMPTeamsDistributeDirective( 1958 const OMPTeamsDistributeDirective &S) { 1959 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1960 CGM.getOpenMPRuntime().emitInlinedDirective( 1961 *this, OMPD_teams_distribute, 1962 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1963 OMPLoopScope PreInitScope(CGF, S); 1964 CGF.EmitStmt( 1965 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1966 }); 1967 } 1968 1969 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective( 1970 const OMPTeamsDistributeSimdDirective &S) { 1971 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1972 CGM.getOpenMPRuntime().emitInlinedDirective( 1973 *this, OMPD_teams_distribute_simd, 1974 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1975 OMPLoopScope PreInitScope(CGF, S); 1976 CGF.EmitStmt( 1977 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1978 }); 1979 } 1980 1981 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective( 1982 const OMPTeamsDistributeParallelForSimdDirective &S) { 1983 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1984 CGM.getOpenMPRuntime().emitInlinedDirective( 1985 *this, OMPD_teams_distribute_parallel_for_simd, 1986 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1987 OMPLoopScope PreInitScope(CGF, S); 1988 CGF.EmitStmt( 1989 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1990 }); 1991 } 1992 1993 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective( 1994 const OMPTeamsDistributeParallelForDirective &S) { 1995 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1996 CGM.getOpenMPRuntime().emitInlinedDirective( 1997 *this, OMPD_teams_distribute_parallel_for, 1998 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1999 OMPLoopScope PreInitScope(CGF, S); 2000 CGF.EmitStmt( 2001 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2002 }); 2003 } 2004 2005 void CodeGenFunction::EmitOMPTargetTeamsDirective( 2006 const OMPTargetTeamsDirective &S) { 2007 CGM.getOpenMPRuntime().emitInlinedDirective( 2008 *this, OMPD_target_teams, [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2009 CGF.EmitStmt( 2010 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2011 }); 2012 } 2013 2014 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective( 2015 const OMPTargetTeamsDistributeDirective &S) { 2016 CGM.getOpenMPRuntime().emitInlinedDirective( 2017 *this, OMPD_target_teams_distribute, 2018 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2019 CGF.EmitStmt( 2020 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2021 }); 2022 } 2023 2024 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective( 2025 const OMPTargetTeamsDistributeParallelForDirective &S) { 2026 CGM.getOpenMPRuntime().emitInlinedDirective( 2027 *this, OMPD_target_teams_distribute_parallel_for, 2028 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2029 CGF.EmitStmt( 2030 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2031 }); 2032 } 2033 2034 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective( 2035 const OMPTargetTeamsDistributeParallelForSimdDirective &S) { 2036 CGM.getOpenMPRuntime().emitInlinedDirective( 2037 *this, OMPD_target_teams_distribute_parallel_for_simd, 2038 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2039 CGF.EmitStmt( 2040 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2041 }); 2042 } 2043 2044 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective( 2045 const OMPTargetTeamsDistributeSimdDirective &S) { 2046 CGM.getOpenMPRuntime().emitInlinedDirective( 2047 *this, OMPD_target_teams_distribute_simd, 2048 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2049 CGF.EmitStmt( 2050 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2051 }); 2052 } 2053 2054 /// \brief Emit a helper variable and return corresponding lvalue. 2055 static LValue EmitOMPHelperVar(CodeGenFunction &CGF, 2056 const DeclRefExpr *Helper) { 2057 auto VDecl = cast<VarDecl>(Helper->getDecl()); 2058 CGF.EmitVarDecl(*VDecl); 2059 return CGF.EmitLValue(Helper); 2060 } 2061 2062 namespace { 2063 struct ScheduleKindModifiersTy { 2064 OpenMPScheduleClauseKind Kind; 2065 OpenMPScheduleClauseModifier M1; 2066 OpenMPScheduleClauseModifier M2; 2067 ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind, 2068 OpenMPScheduleClauseModifier M1, 2069 OpenMPScheduleClauseModifier M2) 2070 : Kind(Kind), M1(M1), M2(M2) {} 2071 }; 2072 } // namespace 2073 2074 bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { 2075 // Emit the loop iteration variable. 2076 auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 2077 auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); 2078 EmitVarDecl(*IVDecl); 2079 2080 // Emit the iterations count variable. 2081 // If it is not a variable, Sema decided to calculate iterations count on each 2082 // iteration (e.g., it is foldable into a constant). 2083 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 2084 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 2085 // Emit calculation of the iterations count. 2086 EmitIgnoredExpr(S.getCalcLastIteration()); 2087 } 2088 2089 auto &RT = CGM.getOpenMPRuntime(); 2090 2091 bool HasLastprivateClause; 2092 // Check pre-condition. 2093 { 2094 OMPLoopScope PreInitScope(*this, S); 2095 // Skip the entire loop if we don't meet the precondition. 2096 // If the condition constant folds and can be elided, avoid emitting the 2097 // whole loop. 2098 bool CondConstant; 2099 llvm::BasicBlock *ContBlock = nullptr; 2100 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 2101 if (!CondConstant) 2102 return false; 2103 } else { 2104 auto *ThenBlock = createBasicBlock("omp.precond.then"); 2105 ContBlock = createBasicBlock("omp.precond.end"); 2106 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 2107 getProfileCount(&S)); 2108 EmitBlock(ThenBlock); 2109 incrementProfileCounter(&S); 2110 } 2111 2112 bool Ordered = false; 2113 if (auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) { 2114 if (OrderedClause->getNumForLoops()) 2115 RT.emitDoacrossInit(*this, S); 2116 else 2117 Ordered = true; 2118 } 2119 2120 llvm::DenseSet<const Expr *> EmittedFinals; 2121 emitAlignedClause(*this, S); 2122 EmitOMPLinearClauseInit(S); 2123 // Emit helper vars inits. 2124 LValue LB = 2125 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable())); 2126 LValue UB = 2127 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable())); 2128 LValue ST = 2129 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 2130 LValue IL = 2131 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 2132 2133 // Emit 'then' code. 2134 { 2135 OMPPrivateScope LoopScope(*this); 2136 if (EmitOMPFirstprivateClause(S, LoopScope)) { 2137 // Emit implicit barrier to synchronize threads and avoid data races on 2138 // initialization of firstprivate variables and post-update of 2139 // lastprivate variables. 2140 CGM.getOpenMPRuntime().emitBarrierCall( 2141 *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 2142 /*ForceSimpleCall=*/true); 2143 } 2144 EmitOMPPrivateClause(S, LoopScope); 2145 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 2146 EmitOMPReductionClauseInit(S, LoopScope); 2147 EmitOMPPrivateLoopCounters(S, LoopScope); 2148 EmitOMPLinearClause(S, LoopScope); 2149 (void)LoopScope.Privatize(); 2150 2151 // Detect the loop schedule kind and chunk. 2152 llvm::Value *Chunk = nullptr; 2153 OpenMPScheduleTy ScheduleKind; 2154 if (auto *C = S.getSingleClause<OMPScheduleClause>()) { 2155 ScheduleKind.Schedule = C->getScheduleKind(); 2156 ScheduleKind.M1 = C->getFirstScheduleModifier(); 2157 ScheduleKind.M2 = C->getSecondScheduleModifier(); 2158 if (const auto *Ch = C->getChunkSize()) { 2159 Chunk = EmitScalarExpr(Ch); 2160 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 2161 S.getIterationVariable()->getType(), 2162 S.getLocStart()); 2163 } 2164 } 2165 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2166 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2167 // OpenMP 4.5, 2.7.1 Loop Construct, Description. 2168 // If the static schedule kind is specified or if the ordered clause is 2169 // specified, and if no monotonic modifier is specified, the effect will 2170 // be as if the monotonic modifier was specified. 2171 if (RT.isStaticNonchunked(ScheduleKind.Schedule, 2172 /* Chunked */ Chunk != nullptr) && 2173 !Ordered) { 2174 if (isOpenMPSimdDirective(S.getDirectiveKind())) 2175 EmitOMPSimdInit(S, /*IsMonotonic=*/true); 2176 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 2177 // When no chunk_size is specified, the iteration space is divided into 2178 // chunks that are approximately equal in size, and at most one chunk is 2179 // distributed to each thread. Note that the size of the chunks is 2180 // unspecified in this case. 2181 RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, 2182 IVSize, IVSigned, Ordered, 2183 IL.getAddress(), LB.getAddress(), 2184 UB.getAddress(), ST.getAddress()); 2185 auto LoopExit = 2186 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 2187 // UB = min(UB, GlobalUB); 2188 EmitIgnoredExpr(S.getEnsureUpperBound()); 2189 // IV = LB; 2190 EmitIgnoredExpr(S.getInit()); 2191 // while (idx <= UB) { BODY; ++idx; } 2192 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 2193 S.getInc(), 2194 [&S, LoopExit](CodeGenFunction &CGF) { 2195 CGF.EmitOMPLoopBody(S, LoopExit); 2196 CGF.EmitStopPoint(&S); 2197 }, 2198 [](CodeGenFunction &) {}); 2199 EmitBlock(LoopExit.getBlock()); 2200 // Tell the runtime we are done. 2201 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 2202 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd()); 2203 }; 2204 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); 2205 } else { 2206 const bool IsMonotonic = 2207 Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static || 2208 ScheduleKind.Schedule == OMPC_SCHEDULE_unknown || 2209 ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic || 2210 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic; 2211 // Emit the outer loop, which requests its work chunk [LB..UB] from 2212 // runtime and runs the inner loop to process it. 2213 EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered, 2214 LB.getAddress(), UB.getAddress(), ST.getAddress(), 2215 IL.getAddress(), Chunk); 2216 } 2217 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 2218 EmitOMPSimdFinal(S, 2219 [&](CodeGenFunction &CGF) -> llvm::Value * { 2220 return CGF.Builder.CreateIsNotNull( 2221 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2222 }); 2223 } 2224 EmitOMPReductionClauseFinal(S); 2225 // Emit post-update of the reduction variables if IsLastIter != 0. 2226 emitPostUpdateForReductionClause( 2227 *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * { 2228 return CGF.Builder.CreateIsNotNull( 2229 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2230 }); 2231 // Emit final copy of the lastprivate variables if IsLastIter != 0. 2232 if (HasLastprivateClause) 2233 EmitOMPLastprivateClauseFinal( 2234 S, isOpenMPSimdDirective(S.getDirectiveKind()), 2235 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart()))); 2236 } 2237 EmitOMPLinearClauseFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * { 2238 return CGF.Builder.CreateIsNotNull( 2239 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2240 }); 2241 // We're now done with the loop, so jump to the continuation block. 2242 if (ContBlock) { 2243 EmitBranch(ContBlock); 2244 EmitBlock(ContBlock, true); 2245 } 2246 } 2247 return HasLastprivateClause; 2248 } 2249 2250 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { 2251 bool HasLastprivates = false; 2252 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, 2253 PrePostActionTy &) { 2254 OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel()); 2255 HasLastprivates = CGF.EmitOMPWorksharingLoop(S); 2256 }; 2257 { 2258 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2259 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen, 2260 S.hasCancel()); 2261 } 2262 2263 // Emit an implicit barrier at the end. 2264 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) { 2265 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); 2266 } 2267 } 2268 2269 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { 2270 bool HasLastprivates = false; 2271 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, 2272 PrePostActionTy &) { 2273 HasLastprivates = CGF.EmitOMPWorksharingLoop(S); 2274 }; 2275 { 2276 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2277 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 2278 } 2279 2280 // Emit an implicit barrier at the end. 2281 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) { 2282 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); 2283 } 2284 } 2285 2286 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, 2287 const Twine &Name, 2288 llvm::Value *Init = nullptr) { 2289 auto LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty); 2290 if (Init) 2291 CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true); 2292 return LVal; 2293 } 2294 2295 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { 2296 auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt(); 2297 auto *CS = dyn_cast<CompoundStmt>(Stmt); 2298 bool HasLastprivates = false; 2299 auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF, 2300 PrePostActionTy &) { 2301 auto &C = CGF.CGM.getContext(); 2302 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2303 // Emit helper vars inits. 2304 LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.", 2305 CGF.Builder.getInt32(0)); 2306 auto *GlobalUBVal = CS != nullptr ? CGF.Builder.getInt32(CS->size() - 1) 2307 : CGF.Builder.getInt32(0); 2308 LValue UB = 2309 createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal); 2310 LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.", 2311 CGF.Builder.getInt32(1)); 2312 LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.", 2313 CGF.Builder.getInt32(0)); 2314 // Loop counter. 2315 LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv."); 2316 OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 2317 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV); 2318 OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 2319 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); 2320 // Generate condition for loop. 2321 BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, 2322 OK_Ordinary, S.getLocStart(), 2323 /*fpContractable=*/false); 2324 // Increment for loop counter. 2325 UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary, 2326 S.getLocStart()); 2327 auto BodyGen = [Stmt, CS, &S, &IV](CodeGenFunction &CGF) { 2328 // Iterate through all sections and emit a switch construct: 2329 // switch (IV) { 2330 // case 0: 2331 // <SectionStmt[0]>; 2332 // break; 2333 // ... 2334 // case <NumSection> - 1: 2335 // <SectionStmt[<NumSection> - 1]>; 2336 // break; 2337 // } 2338 // .omp.sections.exit: 2339 auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit"); 2340 auto *SwitchStmt = CGF.Builder.CreateSwitch( 2341 CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB, 2342 CS == nullptr ? 1 : CS->size()); 2343 if (CS) { 2344 unsigned CaseNumber = 0; 2345 for (auto *SubStmt : CS->children()) { 2346 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 2347 CGF.EmitBlock(CaseBB); 2348 SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB); 2349 CGF.EmitStmt(SubStmt); 2350 CGF.EmitBranch(ExitBB); 2351 ++CaseNumber; 2352 } 2353 } else { 2354 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 2355 CGF.EmitBlock(CaseBB); 2356 SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB); 2357 CGF.EmitStmt(Stmt); 2358 CGF.EmitBranch(ExitBB); 2359 } 2360 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 2361 }; 2362 2363 CodeGenFunction::OMPPrivateScope LoopScope(CGF); 2364 if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) { 2365 // Emit implicit barrier to synchronize threads and avoid data races on 2366 // initialization of firstprivate variables and post-update of lastprivate 2367 // variables. 2368 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 2369 CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 2370 /*ForceSimpleCall=*/true); 2371 } 2372 CGF.EmitOMPPrivateClause(S, LoopScope); 2373 HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 2374 CGF.EmitOMPReductionClauseInit(S, LoopScope); 2375 (void)LoopScope.Privatize(); 2376 2377 // Emit static non-chunked loop. 2378 OpenMPScheduleTy ScheduleKind; 2379 ScheduleKind.Schedule = OMPC_SCHEDULE_static; 2380 CGF.CGM.getOpenMPRuntime().emitForStaticInit( 2381 CGF, S.getLocStart(), ScheduleKind, /*IVSize=*/32, 2382 /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), LB.getAddress(), 2383 UB.getAddress(), ST.getAddress()); 2384 // UB = min(UB, GlobalUB); 2385 auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart()); 2386 auto *MinUBGlobalUB = CGF.Builder.CreateSelect( 2387 CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal); 2388 CGF.EmitStoreOfScalar(MinUBGlobalUB, UB); 2389 // IV = LB; 2390 CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV); 2391 // while (idx <= UB) { BODY; ++idx; } 2392 CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen, 2393 [](CodeGenFunction &) {}); 2394 // Tell the runtime we are done. 2395 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 2396 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd()); 2397 }; 2398 CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen); 2399 CGF.EmitOMPReductionClauseFinal(S); 2400 // Emit post-update of the reduction variables if IsLastIter != 0. 2401 emitPostUpdateForReductionClause( 2402 CGF, S, [&](CodeGenFunction &CGF) -> llvm::Value * { 2403 return CGF.Builder.CreateIsNotNull( 2404 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2405 }); 2406 2407 // Emit final copy of the lastprivate variables if IsLastIter != 0. 2408 if (HasLastprivates) 2409 CGF.EmitOMPLastprivateClauseFinal( 2410 S, /*NoFinals=*/false, 2411 CGF.Builder.CreateIsNotNull( 2412 CGF.EmitLoadOfScalar(IL, S.getLocStart()))); 2413 }; 2414 2415 bool HasCancel = false; 2416 if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S)) 2417 HasCancel = OSD->hasCancel(); 2418 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S)) 2419 HasCancel = OPSD->hasCancel(); 2420 OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel); 2421 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen, 2422 HasCancel); 2423 // Emit barrier for lastprivates only if 'sections' directive has 'nowait' 2424 // clause. Otherwise the barrier will be generated by the codegen for the 2425 // directive. 2426 if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) { 2427 // Emit implicit barrier to synchronize threads and avoid data races on 2428 // initialization of firstprivate variables. 2429 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), 2430 OMPD_unknown); 2431 } 2432 } 2433 2434 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { 2435 { 2436 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2437 EmitSections(S); 2438 } 2439 // Emit an implicit barrier at the end. 2440 if (!S.getSingleClause<OMPNowaitClause>()) { 2441 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), 2442 OMPD_sections); 2443 } 2444 } 2445 2446 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { 2447 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2448 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2449 }; 2450 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2451 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen, 2452 S.hasCancel()); 2453 } 2454 2455 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { 2456 llvm::SmallVector<const Expr *, 8> CopyprivateVars; 2457 llvm::SmallVector<const Expr *, 8> DestExprs; 2458 llvm::SmallVector<const Expr *, 8> SrcExprs; 2459 llvm::SmallVector<const Expr *, 8> AssignmentOps; 2460 // Check if there are any 'copyprivate' clauses associated with this 2461 // 'single' construct. 2462 // Build a list of copyprivate variables along with helper expressions 2463 // (<source>, <destination>, <destination>=<source> expressions) 2464 for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) { 2465 CopyprivateVars.append(C->varlists().begin(), C->varlists().end()); 2466 DestExprs.append(C->destination_exprs().begin(), 2467 C->destination_exprs().end()); 2468 SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end()); 2469 AssignmentOps.append(C->assignment_ops().begin(), 2470 C->assignment_ops().end()); 2471 } 2472 // Emit code for 'single' region along with 'copyprivate' clauses 2473 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2474 Action.Enter(CGF); 2475 OMPPrivateScope SingleScope(CGF); 2476 (void)CGF.EmitOMPFirstprivateClause(S, SingleScope); 2477 CGF.EmitOMPPrivateClause(S, SingleScope); 2478 (void)SingleScope.Privatize(); 2479 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2480 }; 2481 { 2482 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2483 CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(), 2484 CopyprivateVars, DestExprs, 2485 SrcExprs, AssignmentOps); 2486 } 2487 // Emit an implicit barrier at the end (to avoid data race on firstprivate 2488 // init or if no 'nowait' clause was specified and no 'copyprivate' clause). 2489 if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) { 2490 CGM.getOpenMPRuntime().emitBarrierCall( 2491 *this, S.getLocStart(), 2492 S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single); 2493 } 2494 } 2495 2496 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { 2497 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2498 Action.Enter(CGF); 2499 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2500 }; 2501 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2502 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart()); 2503 } 2504 2505 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { 2506 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2507 Action.Enter(CGF); 2508 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2509 }; 2510 Expr *Hint = nullptr; 2511 if (auto *HintClause = S.getSingleClause<OMPHintClause>()) 2512 Hint = HintClause->getHint(); 2513 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2514 CGM.getOpenMPRuntime().emitCriticalRegion(*this, 2515 S.getDirectiveName().getAsString(), 2516 CodeGen, S.getLocStart(), Hint); 2517 } 2518 2519 void CodeGenFunction::EmitOMPParallelForDirective( 2520 const OMPParallelForDirective &S) { 2521 // Emit directive as a combined directive that consists of two implicit 2522 // directives: 'parallel' with 'for' directive. 2523 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2524 OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel()); 2525 CGF.EmitOMPWorksharingLoop(S); 2526 }; 2527 emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen); 2528 } 2529 2530 void CodeGenFunction::EmitOMPParallelForSimdDirective( 2531 const OMPParallelForSimdDirective &S) { 2532 // Emit directive as a combined directive that consists of two implicit 2533 // directives: 'parallel' with 'for' directive. 2534 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2535 CGF.EmitOMPWorksharingLoop(S); 2536 }; 2537 emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen); 2538 } 2539 2540 void CodeGenFunction::EmitOMPParallelSectionsDirective( 2541 const OMPParallelSectionsDirective &S) { 2542 // Emit directive as a combined directive that consists of two implicit 2543 // directives: 'parallel' with 'sections' directive. 2544 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2545 CGF.EmitSections(S); 2546 }; 2547 emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen); 2548 } 2549 2550 void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, 2551 const RegionCodeGenTy &BodyGen, 2552 const TaskGenTy &TaskGen, 2553 OMPTaskDataTy &Data) { 2554 // Emit outlined function for task construct. 2555 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2556 auto *I = CS->getCapturedDecl()->param_begin(); 2557 auto *PartId = std::next(I); 2558 auto *TaskT = std::next(I, 4); 2559 // Check if the task is final 2560 if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) { 2561 // If the condition constant folds and can be elided, try to avoid emitting 2562 // the condition and the dead arm of the if/else. 2563 auto *Cond = Clause->getCondition(); 2564 bool CondConstant; 2565 if (ConstantFoldsToSimpleInteger(Cond, CondConstant)) 2566 Data.Final.setInt(CondConstant); 2567 else 2568 Data.Final.setPointer(EvaluateExprAsBool(Cond)); 2569 } else { 2570 // By default the task is not final. 2571 Data.Final.setInt(/*IntVal=*/false); 2572 } 2573 // Check if the task has 'priority' clause. 2574 if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) { 2575 auto *Prio = Clause->getPriority(); 2576 Data.Priority.setInt(/*IntVal=*/true); 2577 Data.Priority.setPointer(EmitScalarConversion( 2578 EmitScalarExpr(Prio), Prio->getType(), 2579 getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1), 2580 Prio->getExprLoc())); 2581 } 2582 // The first function argument for tasks is a thread id, the second one is a 2583 // part id (0 for tied tasks, >=0 for untied task). 2584 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 2585 // Get list of private variables. 2586 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 2587 auto IRef = C->varlist_begin(); 2588 for (auto *IInit : C->private_copies()) { 2589 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2590 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2591 Data.PrivateVars.push_back(*IRef); 2592 Data.PrivateCopies.push_back(IInit); 2593 } 2594 ++IRef; 2595 } 2596 } 2597 EmittedAsPrivate.clear(); 2598 // Get list of firstprivate variables. 2599 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 2600 auto IRef = C->varlist_begin(); 2601 auto IElemInitRef = C->inits().begin(); 2602 for (auto *IInit : C->private_copies()) { 2603 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2604 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2605 Data.FirstprivateVars.push_back(*IRef); 2606 Data.FirstprivateCopies.push_back(IInit); 2607 Data.FirstprivateInits.push_back(*IElemInitRef); 2608 } 2609 ++IRef; 2610 ++IElemInitRef; 2611 } 2612 } 2613 // Get list of lastprivate variables (for taskloops). 2614 llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs; 2615 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 2616 auto IRef = C->varlist_begin(); 2617 auto ID = C->destination_exprs().begin(); 2618 for (auto *IInit : C->private_copies()) { 2619 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2620 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2621 Data.LastprivateVars.push_back(*IRef); 2622 Data.LastprivateCopies.push_back(IInit); 2623 } 2624 LastprivateDstsOrigs.insert( 2625 {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()), 2626 cast<DeclRefExpr>(*IRef)}); 2627 ++IRef; 2628 ++ID; 2629 } 2630 } 2631 // Build list of dependences. 2632 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) 2633 for (auto *IRef : C->varlists()) 2634 Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef)); 2635 auto &&CodeGen = [&Data, CS, &BodyGen, &LastprivateDstsOrigs]( 2636 CodeGenFunction &CGF, PrePostActionTy &Action) { 2637 // Set proper addresses for generated private copies. 2638 OMPPrivateScope Scope(CGF); 2639 if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() || 2640 !Data.LastprivateVars.empty()) { 2641 auto *CopyFn = CGF.Builder.CreateLoad( 2642 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3))); 2643 auto *PrivatesPtr = CGF.Builder.CreateLoad( 2644 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2))); 2645 // Map privates. 2646 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; 2647 llvm::SmallVector<llvm::Value *, 16> CallArgs; 2648 CallArgs.push_back(PrivatesPtr); 2649 for (auto *E : Data.PrivateVars) { 2650 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2651 Address PrivatePtr = CGF.CreateMemTemp( 2652 CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr"); 2653 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2654 CallArgs.push_back(PrivatePtr.getPointer()); 2655 } 2656 for (auto *E : Data.FirstprivateVars) { 2657 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2658 Address PrivatePtr = 2659 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 2660 ".firstpriv.ptr.addr"); 2661 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2662 CallArgs.push_back(PrivatePtr.getPointer()); 2663 } 2664 for (auto *E : Data.LastprivateVars) { 2665 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2666 Address PrivatePtr = 2667 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 2668 ".lastpriv.ptr.addr"); 2669 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2670 CallArgs.push_back(PrivatePtr.getPointer()); 2671 } 2672 CGF.EmitRuntimeCall(CopyFn, CallArgs); 2673 for (auto &&Pair : LastprivateDstsOrigs) { 2674 auto *OrigVD = cast<VarDecl>(Pair.second->getDecl()); 2675 DeclRefExpr DRE( 2676 const_cast<VarDecl *>(OrigVD), 2677 /*RefersToEnclosingVariableOrCapture=*/CGF.CapturedStmtInfo->lookup( 2678 OrigVD) != nullptr, 2679 Pair.second->getType(), VK_LValue, Pair.second->getExprLoc()); 2680 Scope.addPrivate(Pair.first, [&CGF, &DRE]() { 2681 return CGF.EmitLValue(&DRE).getAddress(); 2682 }); 2683 } 2684 for (auto &&Pair : PrivatePtrs) { 2685 Address Replacement(CGF.Builder.CreateLoad(Pair.second), 2686 CGF.getContext().getDeclAlign(Pair.first)); 2687 Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); 2688 } 2689 } 2690 (void)Scope.Privatize(); 2691 2692 Action.Enter(CGF); 2693 BodyGen(CGF); 2694 }; 2695 auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( 2696 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied, 2697 Data.NumberOfParts); 2698 OMPLexicalScope Scope(*this, S); 2699 TaskGen(*this, OutlinedFn, Data); 2700 } 2701 2702 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { 2703 // Emit outlined function for task construct. 2704 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2705 auto CapturedStruct = GenerateCapturedStmtArgument(*CS); 2706 auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 2707 const Expr *IfCond = nullptr; 2708 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 2709 if (C->getNameModifier() == OMPD_unknown || 2710 C->getNameModifier() == OMPD_task) { 2711 IfCond = C->getCondition(); 2712 break; 2713 } 2714 } 2715 2716 OMPTaskDataTy Data; 2717 // Check if we should emit tied or untied task. 2718 Data.Tied = !S.getSingleClause<OMPUntiedClause>(); 2719 auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { 2720 CGF.EmitStmt(CS->getCapturedStmt()); 2721 }; 2722 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 2723 IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn, 2724 const OMPTaskDataTy &Data) { 2725 CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getLocStart(), S, OutlinedFn, 2726 SharedsTy, CapturedStruct, IfCond, 2727 Data); 2728 }; 2729 EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data); 2730 } 2731 2732 void CodeGenFunction::EmitOMPTaskyieldDirective( 2733 const OMPTaskyieldDirective &S) { 2734 CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart()); 2735 } 2736 2737 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { 2738 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier); 2739 } 2740 2741 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) { 2742 CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart()); 2743 } 2744 2745 void CodeGenFunction::EmitOMPTaskgroupDirective( 2746 const OMPTaskgroupDirective &S) { 2747 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2748 Action.Enter(CGF); 2749 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2750 }; 2751 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2752 CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart()); 2753 } 2754 2755 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { 2756 CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> { 2757 if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) { 2758 return llvm::makeArrayRef(FlushClause->varlist_begin(), 2759 FlushClause->varlist_end()); 2760 } 2761 return llvm::None; 2762 }(), S.getLocStart()); 2763 } 2764 2765 void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) { 2766 // Emit the loop iteration variable. 2767 auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 2768 auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); 2769 EmitVarDecl(*IVDecl); 2770 2771 // Emit the iterations count variable. 2772 // If it is not a variable, Sema decided to calculate iterations count on each 2773 // iteration (e.g., it is foldable into a constant). 2774 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 2775 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 2776 // Emit calculation of the iterations count. 2777 EmitIgnoredExpr(S.getCalcLastIteration()); 2778 } 2779 2780 auto &RT = CGM.getOpenMPRuntime(); 2781 2782 bool HasLastprivateClause = false; 2783 // Check pre-condition. 2784 { 2785 OMPLoopScope PreInitScope(*this, S); 2786 // Skip the entire loop if we don't meet the precondition. 2787 // If the condition constant folds and can be elided, avoid emitting the 2788 // whole loop. 2789 bool CondConstant; 2790 llvm::BasicBlock *ContBlock = nullptr; 2791 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 2792 if (!CondConstant) 2793 return; 2794 } else { 2795 auto *ThenBlock = createBasicBlock("omp.precond.then"); 2796 ContBlock = createBasicBlock("omp.precond.end"); 2797 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 2798 getProfileCount(&S)); 2799 EmitBlock(ThenBlock); 2800 incrementProfileCounter(&S); 2801 } 2802 2803 // Emit 'then' code. 2804 { 2805 // Emit helper vars inits. 2806 LValue LB = 2807 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable())); 2808 LValue UB = 2809 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable())); 2810 LValue ST = 2811 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 2812 LValue IL = 2813 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 2814 2815 OMPPrivateScope LoopScope(*this); 2816 if (EmitOMPFirstprivateClause(S, LoopScope)) { 2817 // Emit implicit barrier to synchronize threads and avoid data races on 2818 // initialization of firstprivate variables and post-update of 2819 // lastprivate variables. 2820 CGM.getOpenMPRuntime().emitBarrierCall( 2821 *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 2822 /*ForceSimpleCall=*/true); 2823 } 2824 EmitOMPPrivateClause(S, LoopScope); 2825 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 2826 EmitOMPPrivateLoopCounters(S, LoopScope); 2827 (void)LoopScope.Privatize(); 2828 2829 // Detect the distribute schedule kind and chunk. 2830 llvm::Value *Chunk = nullptr; 2831 OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown; 2832 if (auto *C = S.getSingleClause<OMPDistScheduleClause>()) { 2833 ScheduleKind = C->getDistScheduleKind(); 2834 if (const auto *Ch = C->getChunkSize()) { 2835 Chunk = EmitScalarExpr(Ch); 2836 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 2837 S.getIterationVariable()->getType(), 2838 S.getLocStart()); 2839 } 2840 } 2841 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2842 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2843 2844 // OpenMP [2.10.8, distribute Construct, Description] 2845 // If dist_schedule is specified, kind must be static. If specified, 2846 // iterations are divided into chunks of size chunk_size, chunks are 2847 // assigned to the teams of the league in a round-robin fashion in the 2848 // order of the team number. When no chunk_size is specified, the 2849 // iteration space is divided into chunks that are approximately equal 2850 // in size, and at most one chunk is distributed to each team of the 2851 // league. The size of the chunks is unspecified in this case. 2852 if (RT.isStaticNonchunked(ScheduleKind, 2853 /* Chunked */ Chunk != nullptr)) { 2854 RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, 2855 IVSize, IVSigned, /* Ordered = */ false, 2856 IL.getAddress(), LB.getAddress(), 2857 UB.getAddress(), ST.getAddress()); 2858 auto LoopExit = 2859 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 2860 // UB = min(UB, GlobalUB); 2861 EmitIgnoredExpr(S.getEnsureUpperBound()); 2862 // IV = LB; 2863 EmitIgnoredExpr(S.getInit()); 2864 // while (idx <= UB) { BODY; ++idx; } 2865 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 2866 S.getInc(), 2867 [&S, LoopExit](CodeGenFunction &CGF) { 2868 CGF.EmitOMPLoopBody(S, LoopExit); 2869 CGF.EmitStopPoint(&S); 2870 }, 2871 [](CodeGenFunction &) {}); 2872 EmitBlock(LoopExit.getBlock()); 2873 // Tell the runtime we are done. 2874 RT.emitForStaticFinish(*this, S.getLocStart()); 2875 } else { 2876 // Emit the outer loop, which requests its work chunk [LB..UB] from 2877 // runtime and runs the inner loop to process it. 2878 EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, 2879 LB.getAddress(), UB.getAddress(), ST.getAddress(), 2880 IL.getAddress(), Chunk); 2881 } 2882 2883 // Emit final copy of the lastprivate variables if IsLastIter != 0. 2884 if (HasLastprivateClause) 2885 EmitOMPLastprivateClauseFinal( 2886 S, /*NoFinals=*/false, 2887 Builder.CreateIsNotNull( 2888 EmitLoadOfScalar(IL, S.getLocStart()))); 2889 } 2890 2891 // We're now done with the loop, so jump to the continuation block. 2892 if (ContBlock) { 2893 EmitBranch(ContBlock); 2894 EmitBlock(ContBlock, true); 2895 } 2896 } 2897 } 2898 2899 void CodeGenFunction::EmitOMPDistributeDirective( 2900 const OMPDistributeDirective &S) { 2901 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2902 CGF.EmitOMPDistributeLoop(S); 2903 }; 2904 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2905 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen, 2906 false); 2907 } 2908 2909 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, 2910 const CapturedStmt *S) { 2911 CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); 2912 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; 2913 CGF.CapturedStmtInfo = &CapStmtInfo; 2914 auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S); 2915 Fn->addFnAttr(llvm::Attribute::NoInline); 2916 return Fn; 2917 } 2918 2919 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { 2920 if (!S.getAssociatedStmt()) { 2921 for (const auto *DC : S.getClausesOfKind<OMPDependClause>()) 2922 CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC); 2923 return; 2924 } 2925 auto *C = S.getSingleClause<OMPSIMDClause>(); 2926 auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF, 2927 PrePostActionTy &Action) { 2928 if (C) { 2929 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2930 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 2931 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 2932 auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS); 2933 CGF.EmitNounwindRuntimeCall(OutlinedFn, CapturedVars); 2934 } else { 2935 Action.Enter(CGF); 2936 CGF.EmitStmt( 2937 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2938 } 2939 }; 2940 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2941 CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C); 2942 } 2943 2944 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, 2945 QualType SrcType, QualType DestType, 2946 SourceLocation Loc) { 2947 assert(CGF.hasScalarEvaluationKind(DestType) && 2948 "DestType must have scalar evaluation kind."); 2949 assert(!Val.isAggregate() && "Must be a scalar or complex."); 2950 return Val.isScalar() 2951 ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType, 2952 Loc) 2953 : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType, 2954 DestType, Loc); 2955 } 2956 2957 static CodeGenFunction::ComplexPairTy 2958 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, 2959 QualType DestType, SourceLocation Loc) { 2960 assert(CGF.getEvaluationKind(DestType) == TEK_Complex && 2961 "DestType must have complex evaluation kind."); 2962 CodeGenFunction::ComplexPairTy ComplexVal; 2963 if (Val.isScalar()) { 2964 // Convert the input element to the element type of the complex. 2965 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 2966 auto ScalarVal = CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, 2967 DestElementType, Loc); 2968 ComplexVal = CodeGenFunction::ComplexPairTy( 2969 ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType())); 2970 } else { 2971 assert(Val.isComplex() && "Must be a scalar or complex."); 2972 auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType(); 2973 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 2974 ComplexVal.first = CGF.EmitScalarConversion( 2975 Val.getComplexVal().first, SrcElementType, DestElementType, Loc); 2976 ComplexVal.second = CGF.EmitScalarConversion( 2977 Val.getComplexVal().second, SrcElementType, DestElementType, Loc); 2978 } 2979 return ComplexVal; 2980 } 2981 2982 static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst, 2983 LValue LVal, RValue RVal) { 2984 if (LVal.isGlobalReg()) { 2985 CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal); 2986 } else { 2987 CGF.EmitAtomicStore(RVal, LVal, 2988 IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 2989 : llvm::AtomicOrdering::Monotonic, 2990 LVal.isVolatile(), /*IsInit=*/false); 2991 } 2992 } 2993 2994 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal, 2995 QualType RValTy, SourceLocation Loc) { 2996 switch (getEvaluationKind(LVal.getType())) { 2997 case TEK_Scalar: 2998 EmitStoreThroughLValue(RValue::get(convertToScalarValue( 2999 *this, RVal, RValTy, LVal.getType(), Loc)), 3000 LVal); 3001 break; 3002 case TEK_Complex: 3003 EmitStoreOfComplex( 3004 convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal, 3005 /*isInit=*/false); 3006 break; 3007 case TEK_Aggregate: 3008 llvm_unreachable("Must be a scalar or complex."); 3009 } 3010 } 3011 3012 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst, 3013 const Expr *X, const Expr *V, 3014 SourceLocation Loc) { 3015 // v = x; 3016 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue"); 3017 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue"); 3018 LValue XLValue = CGF.EmitLValue(X); 3019 LValue VLValue = CGF.EmitLValue(V); 3020 RValue Res = XLValue.isGlobalReg() 3021 ? CGF.EmitLoadOfLValue(XLValue, Loc) 3022 : CGF.EmitAtomicLoad( 3023 XLValue, Loc, 3024 IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3025 : llvm::AtomicOrdering::Monotonic, 3026 XLValue.isVolatile()); 3027 // OpenMP, 2.12.6, atomic Construct 3028 // Any atomic construct with a seq_cst clause forces the atomically 3029 // performed operation to include an implicit flush operation without a 3030 // list. 3031 if (IsSeqCst) 3032 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3033 CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc); 3034 } 3035 3036 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst, 3037 const Expr *X, const Expr *E, 3038 SourceLocation Loc) { 3039 // x = expr; 3040 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue"); 3041 emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E)); 3042 // OpenMP, 2.12.6, atomic Construct 3043 // Any atomic construct with a seq_cst clause forces the atomically 3044 // performed operation to include an implicit flush operation without a 3045 // list. 3046 if (IsSeqCst) 3047 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3048 } 3049 3050 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, 3051 RValue Update, 3052 BinaryOperatorKind BO, 3053 llvm::AtomicOrdering AO, 3054 bool IsXLHSInRHSPart) { 3055 auto &Context = CGF.CGM.getContext(); 3056 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x' 3057 // expression is simple and atomic is allowed for the given type for the 3058 // target platform. 3059 if (BO == BO_Comma || !Update.isScalar() || 3060 !Update.getScalarVal()->getType()->isIntegerTy() || 3061 !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) && 3062 (Update.getScalarVal()->getType() != 3063 X.getAddress().getElementType())) || 3064 !X.getAddress().getElementType()->isIntegerTy() || 3065 !Context.getTargetInfo().hasBuiltinAtomic( 3066 Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment()))) 3067 return std::make_pair(false, RValue::get(nullptr)); 3068 3069 llvm::AtomicRMWInst::BinOp RMWOp; 3070 switch (BO) { 3071 case BO_Add: 3072 RMWOp = llvm::AtomicRMWInst::Add; 3073 break; 3074 case BO_Sub: 3075 if (!IsXLHSInRHSPart) 3076 return std::make_pair(false, RValue::get(nullptr)); 3077 RMWOp = llvm::AtomicRMWInst::Sub; 3078 break; 3079 case BO_And: 3080 RMWOp = llvm::AtomicRMWInst::And; 3081 break; 3082 case BO_Or: 3083 RMWOp = llvm::AtomicRMWInst::Or; 3084 break; 3085 case BO_Xor: 3086 RMWOp = llvm::AtomicRMWInst::Xor; 3087 break; 3088 case BO_LT: 3089 RMWOp = X.getType()->hasSignedIntegerRepresentation() 3090 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min 3091 : llvm::AtomicRMWInst::Max) 3092 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin 3093 : llvm::AtomicRMWInst::UMax); 3094 break; 3095 case BO_GT: 3096 RMWOp = X.getType()->hasSignedIntegerRepresentation() 3097 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max 3098 : llvm::AtomicRMWInst::Min) 3099 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax 3100 : llvm::AtomicRMWInst::UMin); 3101 break; 3102 case BO_Assign: 3103 RMWOp = llvm::AtomicRMWInst::Xchg; 3104 break; 3105 case BO_Mul: 3106 case BO_Div: 3107 case BO_Rem: 3108 case BO_Shl: 3109 case BO_Shr: 3110 case BO_LAnd: 3111 case BO_LOr: 3112 return std::make_pair(false, RValue::get(nullptr)); 3113 case BO_PtrMemD: 3114 case BO_PtrMemI: 3115 case BO_LE: 3116 case BO_GE: 3117 case BO_EQ: 3118 case BO_NE: 3119 case BO_AddAssign: 3120 case BO_SubAssign: 3121 case BO_AndAssign: 3122 case BO_OrAssign: 3123 case BO_XorAssign: 3124 case BO_MulAssign: 3125 case BO_DivAssign: 3126 case BO_RemAssign: 3127 case BO_ShlAssign: 3128 case BO_ShrAssign: 3129 case BO_Comma: 3130 llvm_unreachable("Unsupported atomic update operation"); 3131 } 3132 auto *UpdateVal = Update.getScalarVal(); 3133 if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) { 3134 UpdateVal = CGF.Builder.CreateIntCast( 3135 IC, X.getAddress().getElementType(), 3136 X.getType()->hasSignedIntegerRepresentation()); 3137 } 3138 auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO); 3139 return std::make_pair(true, RValue::get(Res)); 3140 } 3141 3142 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr( 3143 LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart, 3144 llvm::AtomicOrdering AO, SourceLocation Loc, 3145 const llvm::function_ref<RValue(RValue)> &CommonGen) { 3146 // Update expressions are allowed to have the following forms: 3147 // x binop= expr; -> xrval + expr; 3148 // x++, ++x -> xrval + 1; 3149 // x--, --x -> xrval - 1; 3150 // x = x binop expr; -> xrval binop expr 3151 // x = expr Op x; - > expr binop xrval; 3152 auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart); 3153 if (!Res.first) { 3154 if (X.isGlobalReg()) { 3155 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop 3156 // 'xrval'. 3157 EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X); 3158 } else { 3159 // Perform compare-and-swap procedure. 3160 EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified()); 3161 } 3162 } 3163 return Res; 3164 } 3165 3166 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst, 3167 const Expr *X, const Expr *E, 3168 const Expr *UE, bool IsXLHSInRHSPart, 3169 SourceLocation Loc) { 3170 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 3171 "Update expr in 'atomic update' must be a binary operator."); 3172 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 3173 // Update expressions are allowed to have the following forms: 3174 // x binop= expr; -> xrval + expr; 3175 // x++, ++x -> xrval + 1; 3176 // x--, --x -> xrval - 1; 3177 // x = x binop expr; -> xrval binop expr 3178 // x = expr Op x; - > expr binop xrval; 3179 assert(X->isLValue() && "X of 'omp atomic update' is not lvalue"); 3180 LValue XLValue = CGF.EmitLValue(X); 3181 RValue ExprRValue = CGF.EmitAnyExpr(E); 3182 auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3183 : llvm::AtomicOrdering::Monotonic; 3184 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 3185 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 3186 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 3187 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 3188 auto Gen = 3189 [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue { 3190 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3191 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 3192 return CGF.EmitAnyExpr(UE); 3193 }; 3194 (void)CGF.EmitOMPAtomicSimpleUpdateExpr( 3195 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 3196 // OpenMP, 2.12.6, atomic Construct 3197 // Any atomic construct with a seq_cst clause forces the atomically 3198 // performed operation to include an implicit flush operation without a 3199 // list. 3200 if (IsSeqCst) 3201 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3202 } 3203 3204 static RValue convertToType(CodeGenFunction &CGF, RValue Value, 3205 QualType SourceType, QualType ResType, 3206 SourceLocation Loc) { 3207 switch (CGF.getEvaluationKind(ResType)) { 3208 case TEK_Scalar: 3209 return RValue::get( 3210 convertToScalarValue(CGF, Value, SourceType, ResType, Loc)); 3211 case TEK_Complex: { 3212 auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc); 3213 return RValue::getComplex(Res.first, Res.second); 3214 } 3215 case TEK_Aggregate: 3216 break; 3217 } 3218 llvm_unreachable("Must be a scalar or complex."); 3219 } 3220 3221 static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst, 3222 bool IsPostfixUpdate, const Expr *V, 3223 const Expr *X, const Expr *E, 3224 const Expr *UE, bool IsXLHSInRHSPart, 3225 SourceLocation Loc) { 3226 assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue"); 3227 assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue"); 3228 RValue NewVVal; 3229 LValue VLValue = CGF.EmitLValue(V); 3230 LValue XLValue = CGF.EmitLValue(X); 3231 RValue ExprRValue = CGF.EmitAnyExpr(E); 3232 auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3233 : llvm::AtomicOrdering::Monotonic; 3234 QualType NewVValType; 3235 if (UE) { 3236 // 'x' is updated with some additional value. 3237 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 3238 "Update expr in 'atomic capture' must be a binary operator."); 3239 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 3240 // Update expressions are allowed to have the following forms: 3241 // x binop= expr; -> xrval + expr; 3242 // x++, ++x -> xrval + 1; 3243 // x--, --x -> xrval - 1; 3244 // x = x binop expr; -> xrval binop expr 3245 // x = expr Op x; - > expr binop xrval; 3246 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 3247 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 3248 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 3249 NewVValType = XRValExpr->getType(); 3250 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 3251 auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr, 3252 IsPostfixUpdate](RValue XRValue) -> RValue { 3253 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3254 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 3255 RValue Res = CGF.EmitAnyExpr(UE); 3256 NewVVal = IsPostfixUpdate ? XRValue : Res; 3257 return Res; 3258 }; 3259 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 3260 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 3261 if (Res.first) { 3262 // 'atomicrmw' instruction was generated. 3263 if (IsPostfixUpdate) { 3264 // Use old value from 'atomicrmw'. 3265 NewVVal = Res.second; 3266 } else { 3267 // 'atomicrmw' does not provide new value, so evaluate it using old 3268 // value of 'x'. 3269 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3270 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second); 3271 NewVVal = CGF.EmitAnyExpr(UE); 3272 } 3273 } 3274 } else { 3275 // 'x' is simply rewritten with some 'expr'. 3276 NewVValType = X->getType().getNonReferenceType(); 3277 ExprRValue = convertToType(CGF, ExprRValue, E->getType(), 3278 X->getType().getNonReferenceType(), Loc); 3279 auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) -> RValue { 3280 NewVVal = XRValue; 3281 return ExprRValue; 3282 }; 3283 // Try to perform atomicrmw xchg, otherwise simple exchange. 3284 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 3285 XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO, 3286 Loc, Gen); 3287 if (Res.first) { 3288 // 'atomicrmw' instruction was generated. 3289 NewVVal = IsPostfixUpdate ? Res.second : ExprRValue; 3290 } 3291 } 3292 // Emit post-update store to 'v' of old/new 'x' value. 3293 CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc); 3294 // OpenMP, 2.12.6, atomic Construct 3295 // Any atomic construct with a seq_cst clause forces the atomically 3296 // performed operation to include an implicit flush operation without a 3297 // list. 3298 if (IsSeqCst) 3299 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3300 } 3301 3302 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, 3303 bool IsSeqCst, bool IsPostfixUpdate, 3304 const Expr *X, const Expr *V, const Expr *E, 3305 const Expr *UE, bool IsXLHSInRHSPart, 3306 SourceLocation Loc) { 3307 switch (Kind) { 3308 case OMPC_read: 3309 EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc); 3310 break; 3311 case OMPC_write: 3312 EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc); 3313 break; 3314 case OMPC_unknown: 3315 case OMPC_update: 3316 EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc); 3317 break; 3318 case OMPC_capture: 3319 EmitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE, 3320 IsXLHSInRHSPart, Loc); 3321 break; 3322 case OMPC_if: 3323 case OMPC_final: 3324 case OMPC_num_threads: 3325 case OMPC_private: 3326 case OMPC_firstprivate: 3327 case OMPC_lastprivate: 3328 case OMPC_reduction: 3329 case OMPC_safelen: 3330 case OMPC_simdlen: 3331 case OMPC_collapse: 3332 case OMPC_default: 3333 case OMPC_seq_cst: 3334 case OMPC_shared: 3335 case OMPC_linear: 3336 case OMPC_aligned: 3337 case OMPC_copyin: 3338 case OMPC_copyprivate: 3339 case OMPC_flush: 3340 case OMPC_proc_bind: 3341 case OMPC_schedule: 3342 case OMPC_ordered: 3343 case OMPC_nowait: 3344 case OMPC_untied: 3345 case OMPC_threadprivate: 3346 case OMPC_depend: 3347 case OMPC_mergeable: 3348 case OMPC_device: 3349 case OMPC_threads: 3350 case OMPC_simd: 3351 case OMPC_map: 3352 case OMPC_num_teams: 3353 case OMPC_thread_limit: 3354 case OMPC_priority: 3355 case OMPC_grainsize: 3356 case OMPC_nogroup: 3357 case OMPC_num_tasks: 3358 case OMPC_hint: 3359 case OMPC_dist_schedule: 3360 case OMPC_defaultmap: 3361 case OMPC_uniform: 3362 case OMPC_to: 3363 case OMPC_from: 3364 case OMPC_use_device_ptr: 3365 case OMPC_is_device_ptr: 3366 llvm_unreachable("Clause is not allowed in 'omp atomic'."); 3367 } 3368 } 3369 3370 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { 3371 bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>(); 3372 OpenMPClauseKind Kind = OMPC_unknown; 3373 for (auto *C : S.clauses()) { 3374 // Find first clause (skip seq_cst clause, if it is first). 3375 if (C->getClauseKind() != OMPC_seq_cst) { 3376 Kind = C->getClauseKind(); 3377 break; 3378 } 3379 } 3380 3381 const auto *CS = 3382 S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 3383 if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) { 3384 enterFullExpression(EWC); 3385 } 3386 // Processing for statements under 'atomic capture'. 3387 if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) { 3388 for (const auto *C : Compound->body()) { 3389 if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) { 3390 enterFullExpression(EWC); 3391 } 3392 } 3393 } 3394 3395 auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF, 3396 PrePostActionTy &) { 3397 CGF.EmitStopPoint(CS); 3398 EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(), 3399 S.getV(), S.getExpr(), S.getUpdateExpr(), 3400 S.isXLHSInRHSPart(), S.getLocStart()); 3401 }; 3402 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 3403 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen); 3404 } 3405 3406 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, 3407 const OMPExecutableDirective &S, 3408 const RegionCodeGenTy &CodeGen) { 3409 assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind())); 3410 CodeGenModule &CGM = CGF.CGM; 3411 const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt()); 3412 3413 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 3414 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 3415 3416 llvm::Function *Fn = nullptr; 3417 llvm::Constant *FnID = nullptr; 3418 3419 // Check if we have any if clause associated with the directive. 3420 const Expr *IfCond = nullptr; 3421 3422 if (auto *C = S.getSingleClause<OMPIfClause>()) { 3423 IfCond = C->getCondition(); 3424 } 3425 3426 // Check if we have any device clause associated with the directive. 3427 const Expr *Device = nullptr; 3428 if (auto *C = S.getSingleClause<OMPDeviceClause>()) { 3429 Device = C->getDevice(); 3430 } 3431 3432 // Check if we have an if clause whose conditional always evaluates to false 3433 // or if we do not have any targets specified. If so the target region is not 3434 // an offload entry point. 3435 bool IsOffloadEntry = true; 3436 if (IfCond) { 3437 bool Val; 3438 if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val) 3439 IsOffloadEntry = false; 3440 } 3441 if (CGM.getLangOpts().OMPTargetTriples.empty()) 3442 IsOffloadEntry = false; 3443 3444 assert(CGF.CurFuncDecl && "No parent declaration for target region!"); 3445 StringRef ParentName; 3446 // In case we have Ctors/Dtors we use the complete type variant to produce 3447 // the mangling of the device outlined kernel. 3448 if (auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl)) 3449 ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete)); 3450 else if (auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl)) 3451 ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete)); 3452 else 3453 ParentName = 3454 CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl))); 3455 3456 // Emit target region as a standalone region. 3457 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID, 3458 IsOffloadEntry, CodeGen); 3459 OMPLexicalScope Scope(CGF, S); 3460 CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device, 3461 CapturedVars); 3462 } 3463 3464 static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S, 3465 PrePostActionTy &Action) { 3466 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 3467 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 3468 CGF.EmitOMPPrivateClause(S, PrivateScope); 3469 (void)PrivateScope.Privatize(); 3470 3471 Action.Enter(CGF); 3472 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3473 } 3474 3475 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM, 3476 StringRef ParentName, 3477 const OMPTargetDirective &S) { 3478 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3479 emitTargetRegion(CGF, S, Action); 3480 }; 3481 llvm::Function *Fn; 3482 llvm::Constant *Addr; 3483 // Emit target region as a standalone region. 3484 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 3485 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 3486 assert(Fn && Addr && "Target device function emission failed."); 3487 } 3488 3489 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { 3490 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3491 emitTargetRegion(CGF, S, Action); 3492 }; 3493 emitCommonOMPTargetDirective(*this, S, CodeGen); 3494 } 3495 3496 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF, 3497 const OMPExecutableDirective &S, 3498 OpenMPDirectiveKind InnermostKind, 3499 const RegionCodeGenTy &CodeGen) { 3500 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 3501 auto OutlinedFn = CGF.CGM.getOpenMPRuntime(). 3502 emitParallelOrTeamsOutlinedFunction(S, 3503 *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 3504 3505 const OMPTeamsDirective &TD = *dyn_cast<OMPTeamsDirective>(&S); 3506 const OMPNumTeamsClause *NT = TD.getSingleClause<OMPNumTeamsClause>(); 3507 const OMPThreadLimitClause *TL = TD.getSingleClause<OMPThreadLimitClause>(); 3508 if (NT || TL) { 3509 Expr *NumTeams = (NT) ? NT->getNumTeams() : nullptr; 3510 Expr *ThreadLimit = (TL) ? TL->getThreadLimit() : nullptr; 3511 3512 CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit, 3513 S.getLocStart()); 3514 } 3515 3516 OMPLexicalScope Scope(CGF, S); 3517 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 3518 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 3519 CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn, 3520 CapturedVars); 3521 } 3522 3523 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) { 3524 // Emit teams region as a standalone region. 3525 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3526 OMPPrivateScope PrivateScope(CGF); 3527 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 3528 CGF.EmitOMPPrivateClause(S, PrivateScope); 3529 (void)PrivateScope.Privatize(); 3530 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3531 }; 3532 emitCommonOMPTeamsDirective(*this, S, OMPD_teams, CodeGen); 3533 } 3534 3535 void CodeGenFunction::EmitOMPCancellationPointDirective( 3536 const OMPCancellationPointDirective &S) { 3537 CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(), 3538 S.getCancelRegion()); 3539 } 3540 3541 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { 3542 const Expr *IfCond = nullptr; 3543 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 3544 if (C->getNameModifier() == OMPD_unknown || 3545 C->getNameModifier() == OMPD_cancel) { 3546 IfCond = C->getCondition(); 3547 break; 3548 } 3549 } 3550 CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(), IfCond, 3551 S.getCancelRegion()); 3552 } 3553 3554 CodeGenFunction::JumpDest 3555 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) { 3556 if (Kind == OMPD_parallel || Kind == OMPD_task || 3557 Kind == OMPD_target_parallel) 3558 return ReturnBlock; 3559 assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections || 3560 Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for || 3561 Kind == OMPD_distribute_parallel_for || 3562 Kind == OMPD_target_parallel_for); 3563 return OMPCancelStack.getExitBlock(); 3564 } 3565 3566 void CodeGenFunction::EmitOMPUseDevicePtrClause( 3567 const OMPClause &NC, OMPPrivateScope &PrivateScope, 3568 const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) { 3569 const auto &C = cast<OMPUseDevicePtrClause>(NC); 3570 auto OrigVarIt = C.varlist_begin(); 3571 auto InitIt = C.inits().begin(); 3572 for (auto PvtVarIt : C.private_copies()) { 3573 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl()); 3574 auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl()); 3575 auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl()); 3576 3577 // In order to identify the right initializer we need to match the 3578 // declaration used by the mapping logic. In some cases we may get 3579 // OMPCapturedExprDecl that refers to the original declaration. 3580 const ValueDecl *MatchingVD = OrigVD; 3581 if (auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) { 3582 // OMPCapturedExprDecl are used to privative fields of the current 3583 // structure. 3584 auto *ME = cast<MemberExpr>(OED->getInit()); 3585 assert(isa<CXXThisExpr>(ME->getBase()) && 3586 "Base should be the current struct!"); 3587 MatchingVD = ME->getMemberDecl(); 3588 } 3589 3590 // If we don't have information about the current list item, move on to 3591 // the next one. 3592 auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD); 3593 if (InitAddrIt == CaptureDeviceAddrMap.end()) 3594 continue; 3595 3596 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 3597 // Initialize the temporary initialization variable with the address we 3598 // get from the runtime library. We have to cast the source address 3599 // because it is always a void *. References are materialized in the 3600 // privatization scope, so the initialization here disregards the fact 3601 // the original variable is a reference. 3602 QualType AddrQTy = 3603 getContext().getPointerType(OrigVD->getType().getNonReferenceType()); 3604 llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy); 3605 Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy); 3606 setAddrOfLocalVar(InitVD, InitAddr); 3607 3608 // Emit private declaration, it will be initialized by the value we 3609 // declaration we just added to the local declarations map. 3610 EmitDecl(*PvtVD); 3611 3612 // The initialization variables reached its purpose in the emission 3613 // ofthe previous declaration, so we don't need it anymore. 3614 LocalDeclMap.erase(InitVD); 3615 3616 // Return the address of the private variable. 3617 return GetAddrOfLocalVar(PvtVD); 3618 }); 3619 assert(IsRegistered && "firstprivate var already registered as private"); 3620 // Silence the warning about unused variable. 3621 (void)IsRegistered; 3622 3623 ++OrigVarIt; 3624 ++InitIt; 3625 } 3626 } 3627 3628 // Generate the instructions for '#pragma omp target data' directive. 3629 void CodeGenFunction::EmitOMPTargetDataDirective( 3630 const OMPTargetDataDirective &S) { 3631 CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true); 3632 3633 // Create a pre/post action to signal the privatization of the device pointer. 3634 // This action can be replaced by the OpenMP runtime code generation to 3635 // deactivate privatization. 3636 bool PrivatizeDevicePointers = false; 3637 class DevicePointerPrivActionTy : public PrePostActionTy { 3638 bool &PrivatizeDevicePointers; 3639 3640 public: 3641 explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers) 3642 : PrePostActionTy(), PrivatizeDevicePointers(PrivatizeDevicePointers) {} 3643 void Enter(CodeGenFunction &CGF) override { 3644 PrivatizeDevicePointers = true; 3645 } 3646 }; 3647 DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers); 3648 3649 auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers]( 3650 CodeGenFunction &CGF, PrePostActionTy &Action) { 3651 auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3652 CGF.EmitStmt( 3653 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3654 }; 3655 3656 // Codegen that selects wheather to generate the privatization code or not. 3657 auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers, 3658 &InnermostCodeGen](CodeGenFunction &CGF, 3659 PrePostActionTy &Action) { 3660 RegionCodeGenTy RCG(InnermostCodeGen); 3661 PrivatizeDevicePointers = false; 3662 3663 // Call the pre-action to change the status of PrivatizeDevicePointers if 3664 // needed. 3665 Action.Enter(CGF); 3666 3667 if (PrivatizeDevicePointers) { 3668 OMPPrivateScope PrivateScope(CGF); 3669 // Emit all instances of the use_device_ptr clause. 3670 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>()) 3671 CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope, 3672 Info.CaptureDeviceAddrMap); 3673 (void)PrivateScope.Privatize(); 3674 RCG(CGF); 3675 } else 3676 RCG(CGF); 3677 }; 3678 3679 // Forward the provided action to the privatization codegen. 3680 RegionCodeGenTy PrivRCG(PrivCodeGen); 3681 PrivRCG.setAction(Action); 3682 3683 // Notwithstanding the body of the region is emitted as inlined directive, 3684 // we don't use an inline scope as changes in the references inside the 3685 // region are expected to be visible outside, so we do not privative them. 3686 OMPLexicalScope Scope(CGF, S); 3687 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data, 3688 PrivRCG); 3689 }; 3690 3691 RegionCodeGenTy RCG(CodeGen); 3692 3693 // If we don't have target devices, don't bother emitting the data mapping 3694 // code. 3695 if (CGM.getLangOpts().OMPTargetTriples.empty()) { 3696 RCG(*this); 3697 return; 3698 } 3699 3700 // Check if we have any if clause associated with the directive. 3701 const Expr *IfCond = nullptr; 3702 if (auto *C = S.getSingleClause<OMPIfClause>()) 3703 IfCond = C->getCondition(); 3704 3705 // Check if we have any device clause associated with the directive. 3706 const Expr *Device = nullptr; 3707 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 3708 Device = C->getDevice(); 3709 3710 // Set the action to signal privatization of device pointers. 3711 RCG.setAction(PrivAction); 3712 3713 // Emit region code. 3714 CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG, 3715 Info); 3716 } 3717 3718 void CodeGenFunction::EmitOMPTargetEnterDataDirective( 3719 const OMPTargetEnterDataDirective &S) { 3720 // If we don't have target devices, don't bother emitting the data mapping 3721 // code. 3722 if (CGM.getLangOpts().OMPTargetTriples.empty()) 3723 return; 3724 3725 // Check if we have any if clause associated with the directive. 3726 const Expr *IfCond = nullptr; 3727 if (auto *C = S.getSingleClause<OMPIfClause>()) 3728 IfCond = C->getCondition(); 3729 3730 // Check if we have any device clause associated with the directive. 3731 const Expr *Device = nullptr; 3732 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 3733 Device = C->getDevice(); 3734 3735 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 3736 } 3737 3738 void CodeGenFunction::EmitOMPTargetExitDataDirective( 3739 const OMPTargetExitDataDirective &S) { 3740 // If we don't have target devices, don't bother emitting the data mapping 3741 // code. 3742 if (CGM.getLangOpts().OMPTargetTriples.empty()) 3743 return; 3744 3745 // Check if we have any if clause associated with the directive. 3746 const Expr *IfCond = nullptr; 3747 if (auto *C = S.getSingleClause<OMPIfClause>()) 3748 IfCond = C->getCondition(); 3749 3750 // Check if we have any device clause associated with the directive. 3751 const Expr *Device = nullptr; 3752 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 3753 Device = C->getDevice(); 3754 3755 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 3756 } 3757 3758 void CodeGenFunction::EmitOMPTargetParallelDirective( 3759 const OMPTargetParallelDirective &S) { 3760 // TODO: codegen for target parallel. 3761 } 3762 3763 void CodeGenFunction::EmitOMPTargetParallelForDirective( 3764 const OMPTargetParallelForDirective &S) { 3765 // TODO: codegen for target parallel for. 3766 } 3767 3768 /// Emit a helper variable and return corresponding lvalue. 3769 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper, 3770 const ImplicitParamDecl *PVD, 3771 CodeGenFunction::OMPPrivateScope &Privates) { 3772 auto *VDecl = cast<VarDecl>(Helper->getDecl()); 3773 Privates.addPrivate( 3774 VDecl, [&CGF, PVD]() -> Address { return CGF.GetAddrOfLocalVar(PVD); }); 3775 } 3776 3777 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { 3778 assert(isOpenMPTaskLoopDirective(S.getDirectiveKind())); 3779 // Emit outlined function for task construct. 3780 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 3781 auto CapturedStruct = GenerateCapturedStmtArgument(*CS); 3782 auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 3783 const Expr *IfCond = nullptr; 3784 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 3785 if (C->getNameModifier() == OMPD_unknown || 3786 C->getNameModifier() == OMPD_taskloop) { 3787 IfCond = C->getCondition(); 3788 break; 3789 } 3790 } 3791 3792 OMPTaskDataTy Data; 3793 // Check if taskloop must be emitted without taskgroup. 3794 Data.Nogroup = S.getSingleClause<OMPNogroupClause>(); 3795 // TODO: Check if we should emit tied or untied task. 3796 Data.Tied = true; 3797 // Set scheduling for taskloop 3798 if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) { 3799 // grainsize clause 3800 Data.Schedule.setInt(/*IntVal=*/false); 3801 Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize())); 3802 } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) { 3803 // num_tasks clause 3804 Data.Schedule.setInt(/*IntVal=*/true); 3805 Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks())); 3806 } 3807 3808 auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) { 3809 // if (PreCond) { 3810 // for (IV in 0..LastIteration) BODY; 3811 // <Final counter/linear vars updates>; 3812 // } 3813 // 3814 3815 // Emit: if (PreCond) - begin. 3816 // If the condition constant folds and can be elided, avoid emitting the 3817 // whole loop. 3818 bool CondConstant; 3819 llvm::BasicBlock *ContBlock = nullptr; 3820 OMPLoopScope PreInitScope(CGF, S); 3821 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 3822 if (!CondConstant) 3823 return; 3824 } else { 3825 auto *ThenBlock = CGF.createBasicBlock("taskloop.if.then"); 3826 ContBlock = CGF.createBasicBlock("taskloop.if.end"); 3827 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 3828 CGF.getProfileCount(&S)); 3829 CGF.EmitBlock(ThenBlock); 3830 CGF.incrementProfileCounter(&S); 3831 } 3832 3833 if (isOpenMPSimdDirective(S.getDirectiveKind())) 3834 CGF.EmitOMPSimdInit(S); 3835 3836 OMPPrivateScope LoopScope(CGF); 3837 // Emit helper vars inits. 3838 enum { LowerBound = 5, UpperBound, Stride, LastIter }; 3839 auto *I = CS->getCapturedDecl()->param_begin(); 3840 auto *LBP = std::next(I, LowerBound); 3841 auto *UBP = std::next(I, UpperBound); 3842 auto *STP = std::next(I, Stride); 3843 auto *LIP = std::next(I, LastIter); 3844 mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP, 3845 LoopScope); 3846 mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP, 3847 LoopScope); 3848 mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope); 3849 mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP, 3850 LoopScope); 3851 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 3852 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 3853 (void)LoopScope.Privatize(); 3854 // Emit the loop iteration variable. 3855 const Expr *IVExpr = S.getIterationVariable(); 3856 const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 3857 CGF.EmitVarDecl(*IVDecl); 3858 CGF.EmitIgnoredExpr(S.getInit()); 3859 3860 // Emit the iterations count variable. 3861 // If it is not a variable, Sema decided to calculate iterations count on 3862 // each iteration (e.g., it is foldable into a constant). 3863 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 3864 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 3865 // Emit calculation of the iterations count. 3866 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 3867 } 3868 3869 CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 3870 S.getInc(), 3871 [&S](CodeGenFunction &CGF) { 3872 CGF.EmitOMPLoopBody(S, JumpDest()); 3873 CGF.EmitStopPoint(&S); 3874 }, 3875 [](CodeGenFunction &) {}); 3876 // Emit: if (PreCond) - end. 3877 if (ContBlock) { 3878 CGF.EmitBranch(ContBlock); 3879 CGF.EmitBlock(ContBlock, true); 3880 } 3881 // Emit final copy of the lastprivate variables if IsLastIter != 0. 3882 if (HasLastprivateClause) { 3883 CGF.EmitOMPLastprivateClauseFinal( 3884 S, isOpenMPSimdDirective(S.getDirectiveKind()), 3885 CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar( 3886 CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false, 3887 (*LIP)->getType(), S.getLocStart()))); 3888 } 3889 }; 3890 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 3891 IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn, 3892 const OMPTaskDataTy &Data) { 3893 auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &) { 3894 OMPLoopScope PreInitScope(CGF, S); 3895 CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getLocStart(), S, 3896 OutlinedFn, SharedsTy, 3897 CapturedStruct, IfCond, Data); 3898 }; 3899 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop, 3900 CodeGen); 3901 }; 3902 EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data); 3903 } 3904 3905 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) { 3906 EmitOMPTaskLoopBasedDirective(S); 3907 } 3908 3909 void CodeGenFunction::EmitOMPTaskLoopSimdDirective( 3910 const OMPTaskLoopSimdDirective &S) { 3911 EmitOMPTaskLoopBasedDirective(S); 3912 } 3913 3914 // Generate the instructions for '#pragma omp target update' directive. 3915 void CodeGenFunction::EmitOMPTargetUpdateDirective( 3916 const OMPTargetUpdateDirective &S) { 3917 // If we don't have target devices, don't bother emitting the data mapping 3918 // code. 3919 if (CGM.getLangOpts().OMPTargetTriples.empty()) 3920 return; 3921 3922 // Check if we have any if clause associated with the directive. 3923 const Expr *IfCond = nullptr; 3924 if (auto *C = S.getSingleClause<OMPIfClause>()) 3925 IfCond = C->getCondition(); 3926 3927 // Check if we have any device clause associated with the directive. 3928 const Expr *Device = nullptr; 3929 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 3930 Device = C->getDevice(); 3931 3932 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 3933 } 3934