1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This contains code to emit OpenMP nodes as LLVM code. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGCleanup.h" 15 #include "CGOpenMPRuntime.h" 16 #include "CodeGenFunction.h" 17 #include "CodeGenModule.h" 18 #include "TargetInfo.h" 19 #include "clang/AST/Stmt.h" 20 #include "clang/AST/StmtOpenMP.h" 21 #include "clang/AST/DeclOpenMP.h" 22 #include "llvm/IR/CallSite.h" 23 using namespace clang; 24 using namespace CodeGen; 25 26 namespace { 27 /// Lexical scope for OpenMP executable constructs, that handles correct codegen 28 /// for captured expressions. 29 class OMPLexicalScope : public CodeGenFunction::LexicalScope { 30 void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 31 for (const auto *C : S.clauses()) { 32 if (auto *CPI = OMPClauseWithPreInit::get(C)) { 33 if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { 34 for (const auto *I : PreInit->decls()) { 35 if (!I->hasAttr<OMPCaptureNoInitAttr>()) 36 CGF.EmitVarDecl(cast<VarDecl>(*I)); 37 else { 38 CodeGenFunction::AutoVarEmission Emission = 39 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 40 CGF.EmitAutoVarCleanups(Emission); 41 } 42 } 43 } 44 } 45 } 46 } 47 CodeGenFunction::OMPPrivateScope InlinedShareds; 48 49 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { 50 return CGF.LambdaCaptureFields.lookup(VD) || 51 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || 52 (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl)); 53 } 54 55 public: 56 OMPLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S, 57 bool AsInlined = false, bool EmitPreInitStmt = true) 58 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), 59 InlinedShareds(CGF) { 60 if (EmitPreInitStmt) 61 emitPreInitStmt(CGF, S); 62 if (AsInlined) { 63 if (S.hasAssociatedStmt()) { 64 auto *CS = cast<CapturedStmt>(S.getAssociatedStmt()); 65 for (auto &C : CS->captures()) { 66 if (C.capturesVariable() || C.capturesVariableByCopy()) { 67 auto *VD = C.getCapturedVar(); 68 DeclRefExpr DRE(const_cast<VarDecl *>(VD), 69 isCapturedVar(CGF, VD) || 70 (CGF.CapturedStmtInfo && 71 InlinedShareds.isGlobalVarCaptured(VD)), 72 VD->getType().getNonReferenceType(), VK_LValue, 73 SourceLocation()); 74 InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address { 75 return CGF.EmitLValue(&DRE).getAddress(); 76 }); 77 } 78 } 79 (void)InlinedShareds.Privatize(); 80 } 81 } 82 } 83 }; 84 85 /// Lexical scope for OpenMP parallel construct, that handles correct codegen 86 /// for captured expressions. 87 class OMPParallelScope final : public OMPLexicalScope { 88 bool EmitPreInitStmt(const OMPExecutableDirective &S) { 89 OpenMPDirectiveKind Kind = S.getDirectiveKind(); 90 return !isOpenMPTargetExecutionDirective(Kind) && 91 isOpenMPParallelDirective(Kind); 92 } 93 94 public: 95 OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 96 : OMPLexicalScope(CGF, S, 97 /*AsInlined=*/false, 98 /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {} 99 }; 100 101 /// Private scope for OpenMP loop-based directives, that supports capturing 102 /// of used expression from loop statement. 103 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { 104 void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) { 105 if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) { 106 if (auto *PreInits = cast_or_null<DeclStmt>(LD->getPreInits())) { 107 for (const auto *I : PreInits->decls()) 108 CGF.EmitVarDecl(cast<VarDecl>(*I)); 109 } 110 } 111 } 112 113 public: 114 OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S) 115 : CodeGenFunction::RunCleanupsScope(CGF) { 116 emitPreInitStmt(CGF, S); 117 } 118 }; 119 120 } // namespace 121 122 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) { 123 auto &C = getContext(); 124 llvm::Value *Size = nullptr; 125 auto SizeInChars = C.getTypeSizeInChars(Ty); 126 if (SizeInChars.isZero()) { 127 // getTypeSizeInChars() returns 0 for a VLA. 128 while (auto *VAT = C.getAsVariableArrayType(Ty)) { 129 llvm::Value *ArraySize; 130 std::tie(ArraySize, Ty) = getVLASize(VAT); 131 Size = Size ? Builder.CreateNUWMul(Size, ArraySize) : ArraySize; 132 } 133 SizeInChars = C.getTypeSizeInChars(Ty); 134 if (SizeInChars.isZero()) 135 return llvm::ConstantInt::get(SizeTy, /*V=*/0); 136 Size = Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars)); 137 } else 138 Size = CGM.getSize(SizeInChars); 139 return Size; 140 } 141 142 void CodeGenFunction::GenerateOpenMPCapturedVars( 143 const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) { 144 const RecordDecl *RD = S.getCapturedRecordDecl(); 145 auto CurField = RD->field_begin(); 146 auto CurCap = S.captures().begin(); 147 for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(), 148 E = S.capture_init_end(); 149 I != E; ++I, ++CurField, ++CurCap) { 150 if (CurField->hasCapturedVLAType()) { 151 auto VAT = CurField->getCapturedVLAType(); 152 auto *Val = VLASizeMap[VAT->getSizeExpr()]; 153 CapturedVars.push_back(Val); 154 } else if (CurCap->capturesThis()) 155 CapturedVars.push_back(CXXThisValue); 156 else if (CurCap->capturesVariableByCopy()) { 157 llvm::Value *CV = 158 EmitLoadOfLValue(EmitLValue(*I), SourceLocation()).getScalarVal(); 159 160 // If the field is not a pointer, we need to save the actual value 161 // and load it as a void pointer. 162 if (!CurField->getType()->isAnyPointerType()) { 163 auto &Ctx = getContext(); 164 auto DstAddr = CreateMemTemp( 165 Ctx.getUIntPtrType(), 166 Twine(CurCap->getCapturedVar()->getName()) + ".casted"); 167 LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType()); 168 169 auto *SrcAddrVal = EmitScalarConversion( 170 DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()), 171 Ctx.getPointerType(CurField->getType()), SourceLocation()); 172 LValue SrcLV = 173 MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType()); 174 175 // Store the value using the source type pointer. 176 EmitStoreThroughLValue(RValue::get(CV), SrcLV); 177 178 // Load the value using the destination type pointer. 179 CV = EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal(); 180 } 181 CapturedVars.push_back(CV); 182 } else { 183 assert(CurCap->capturesVariable() && "Expected capture by reference."); 184 CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer()); 185 } 186 } 187 } 188 189 static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType, 190 StringRef Name, LValue AddrLV, 191 bool isReferenceType = false) { 192 ASTContext &Ctx = CGF.getContext(); 193 194 auto *CastedPtr = CGF.EmitScalarConversion( 195 AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(), 196 Ctx.getPointerType(DstType), SourceLocation()); 197 auto TmpAddr = 198 CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType)) 199 .getAddress(); 200 201 // If we are dealing with references we need to return the address of the 202 // reference instead of the reference of the value. 203 if (isReferenceType) { 204 QualType RefType = Ctx.getLValueReferenceType(DstType); 205 auto *RefVal = TmpAddr.getPointer(); 206 TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name) + ".ref"); 207 auto TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType); 208 CGF.EmitStoreThroughLValue(RValue::get(RefVal), TmpLVal, /*isInit*/ true); 209 } 210 211 return TmpAddr; 212 } 213 214 llvm::Function * 215 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { 216 assert( 217 CapturedStmtInfo && 218 "CapturedStmtInfo should be set when generating the captured function"); 219 const CapturedDecl *CD = S.getCapturedDecl(); 220 const RecordDecl *RD = S.getCapturedRecordDecl(); 221 assert(CD->hasBody() && "missing CapturedDecl body"); 222 223 // Build the argument list. 224 ASTContext &Ctx = CGM.getContext(); 225 FunctionArgList Args; 226 Args.append(CD->param_begin(), 227 std::next(CD->param_begin(), CD->getContextParamPosition())); 228 auto I = S.captures().begin(); 229 for (auto *FD : RD->fields()) { 230 QualType ArgType = FD->getType(); 231 IdentifierInfo *II = nullptr; 232 VarDecl *CapVar = nullptr; 233 234 // If this is a capture by copy and the type is not a pointer, the outlined 235 // function argument type should be uintptr and the value properly casted to 236 // uintptr. This is necessary given that the runtime library is only able to 237 // deal with pointers. We can pass in the same way the VLA type sizes to the 238 // outlined function. 239 if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) || 240 I->capturesVariableArrayType()) 241 ArgType = Ctx.getUIntPtrType(); 242 243 if (I->capturesVariable() || I->capturesVariableByCopy()) { 244 CapVar = I->getCapturedVar(); 245 II = CapVar->getIdentifier(); 246 } else if (I->capturesThis()) 247 II = &getContext().Idents.get("this"); 248 else { 249 assert(I->capturesVariableArrayType()); 250 II = &getContext().Idents.get("vla"); 251 } 252 if (ArgType->isVariablyModifiedType()) { 253 bool IsReference = ArgType->isLValueReferenceType(); 254 ArgType = 255 getContext().getCanonicalParamType(ArgType.getNonReferenceType()); 256 if (IsReference && !ArgType->isPointerType()) { 257 ArgType = getContext().getLValueReferenceType( 258 ArgType, /*SpelledAsLValue=*/false); 259 } 260 } 261 Args.push_back(ImplicitParamDecl::Create(getContext(), nullptr, 262 FD->getLocation(), II, ArgType)); 263 ++I; 264 } 265 Args.append( 266 std::next(CD->param_begin(), CD->getContextParamPosition() + 1), 267 CD->param_end()); 268 269 // Create the function declaration. 270 FunctionType::ExtInfo ExtInfo; 271 const CGFunctionInfo &FuncInfo = 272 CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args); 273 llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); 274 275 llvm::Function *F = llvm::Function::Create( 276 FuncLLVMTy, llvm::GlobalValue::InternalLinkage, 277 CapturedStmtInfo->getHelperName(), &CGM.getModule()); 278 CGM.SetInternalFunctionAttributes(CD, F, FuncInfo); 279 if (CD->isNothrow()) 280 F->addFnAttr(llvm::Attribute::NoUnwind); 281 282 // Generate the function. 283 StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getLocation(), 284 CD->getBody()->getLocStart()); 285 unsigned Cnt = CD->getContextParamPosition(); 286 I = S.captures().begin(); 287 for (auto *FD : RD->fields()) { 288 // If we are capturing a pointer by copy we don't need to do anything, just 289 // use the value that we get from the arguments. 290 if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) { 291 const VarDecl *CurVD = I->getCapturedVar(); 292 Address LocalAddr = GetAddrOfLocalVar(Args[Cnt]); 293 // If the variable is a reference we need to materialize it here. 294 if (CurVD->getType()->isReferenceType()) { 295 Address RefAddr = CreateMemTemp(CurVD->getType(), getPointerAlign(), 296 ".materialized_ref"); 297 EmitStoreOfScalar(LocalAddr.getPointer(), RefAddr, /*Volatile=*/false, 298 CurVD->getType()); 299 LocalAddr = RefAddr; 300 } 301 setAddrOfLocalVar(CurVD, LocalAddr); 302 ++Cnt; 303 ++I; 304 continue; 305 } 306 307 LValue ArgLVal = 308 MakeAddrLValue(GetAddrOfLocalVar(Args[Cnt]), Args[Cnt]->getType(), 309 AlignmentSource::Decl); 310 if (FD->hasCapturedVLAType()) { 311 LValue CastedArgLVal = 312 MakeAddrLValue(castValueFromUintptr(*this, FD->getType(), 313 Args[Cnt]->getName(), ArgLVal), 314 FD->getType(), AlignmentSource::Decl); 315 auto *ExprArg = 316 EmitLoadOfLValue(CastedArgLVal, SourceLocation()).getScalarVal(); 317 auto VAT = FD->getCapturedVLAType(); 318 VLASizeMap[VAT->getSizeExpr()] = ExprArg; 319 } else if (I->capturesVariable()) { 320 auto *Var = I->getCapturedVar(); 321 QualType VarTy = Var->getType(); 322 Address ArgAddr = ArgLVal.getAddress(); 323 if (!VarTy->isReferenceType()) { 324 if (ArgLVal.getType()->isLValueReferenceType()) { 325 ArgAddr = EmitLoadOfReference( 326 ArgAddr, ArgLVal.getType()->castAs<ReferenceType>()); 327 } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) { 328 assert(ArgLVal.getType()->isPointerType()); 329 ArgAddr = EmitLoadOfPointer( 330 ArgAddr, ArgLVal.getType()->castAs<PointerType>()); 331 } 332 } 333 setAddrOfLocalVar( 334 Var, Address(ArgAddr.getPointer(), getContext().getDeclAlign(Var))); 335 } else if (I->capturesVariableByCopy()) { 336 assert(!FD->getType()->isAnyPointerType() && 337 "Not expecting a captured pointer."); 338 auto *Var = I->getCapturedVar(); 339 QualType VarTy = Var->getType(); 340 setAddrOfLocalVar(Var, castValueFromUintptr(*this, FD->getType(), 341 Args[Cnt]->getName(), ArgLVal, 342 VarTy->isReferenceType())); 343 } else { 344 // If 'this' is captured, load it into CXXThisValue. 345 assert(I->capturesThis()); 346 CXXThisValue = 347 EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation()).getScalarVal(); 348 } 349 ++Cnt; 350 ++I; 351 } 352 353 PGO.assignRegionCounters(GlobalDecl(CD), F); 354 CapturedStmtInfo->EmitBody(*this, CD->getBody()); 355 FinishFunction(CD->getBodyRBrace()); 356 357 return F; 358 } 359 360 //===----------------------------------------------------------------------===// 361 // OpenMP Directive Emission 362 //===----------------------------------------------------------------------===// 363 void CodeGenFunction::EmitOMPAggregateAssign( 364 Address DestAddr, Address SrcAddr, QualType OriginalType, 365 const llvm::function_ref<void(Address, Address)> &CopyGen) { 366 // Perform element-by-element initialization. 367 QualType ElementTy; 368 369 // Drill down to the base element type on both arrays. 370 auto ArrayTy = OriginalType->getAsArrayTypeUnsafe(); 371 auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr); 372 SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 373 374 auto SrcBegin = SrcAddr.getPointer(); 375 auto DestBegin = DestAddr.getPointer(); 376 // Cast from pointer to array type to pointer to single element. 377 auto DestEnd = Builder.CreateGEP(DestBegin, NumElements); 378 // The basic structure here is a while-do loop. 379 auto BodyBB = createBasicBlock("omp.arraycpy.body"); 380 auto DoneBB = createBasicBlock("omp.arraycpy.done"); 381 auto IsEmpty = 382 Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty"); 383 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 384 385 // Enter the loop body, making that address the current address. 386 auto EntryBB = Builder.GetInsertBlock(); 387 EmitBlock(BodyBB); 388 389 CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy); 390 391 llvm::PHINode *SrcElementPHI = 392 Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 393 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 394 Address SrcElementCurrent = 395 Address(SrcElementPHI, 396 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 397 398 llvm::PHINode *DestElementPHI = 399 Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 400 DestElementPHI->addIncoming(DestBegin, EntryBB); 401 Address DestElementCurrent = 402 Address(DestElementPHI, 403 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 404 405 // Emit copy. 406 CopyGen(DestElementCurrent, SrcElementCurrent); 407 408 // Shift the address forward by one element. 409 auto DestElementNext = Builder.CreateConstGEP1_32( 410 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 411 auto SrcElementNext = Builder.CreateConstGEP1_32( 412 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 413 // Check whether we've reached the end. 414 auto Done = 415 Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 416 Builder.CreateCondBr(Done, DoneBB, BodyBB); 417 DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock()); 418 SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock()); 419 420 // Done. 421 EmitBlock(DoneBB, /*IsFinished=*/true); 422 } 423 424 /// Check if the combiner is a call to UDR combiner and if it is so return the 425 /// UDR decl used for reduction. 426 static const OMPDeclareReductionDecl * 427 getReductionInit(const Expr *ReductionOp) { 428 if (auto *CE = dyn_cast<CallExpr>(ReductionOp)) 429 if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 430 if (auto *DRE = 431 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 432 if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 433 return DRD; 434 return nullptr; 435 } 436 437 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 438 const OMPDeclareReductionDecl *DRD, 439 const Expr *InitOp, 440 Address Private, Address Original, 441 QualType Ty) { 442 if (DRD->getInitializer()) { 443 std::pair<llvm::Function *, llvm::Function *> Reduction = 444 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 445 auto *CE = cast<CallExpr>(InitOp); 446 auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 447 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 448 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 449 auto *LHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 450 auto *RHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 451 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 452 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 453 [=]() -> Address { return Private; }); 454 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 455 [=]() -> Address { return Original; }); 456 (void)PrivateScope.Privatize(); 457 RValue Func = RValue::get(Reduction.second); 458 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 459 CGF.EmitIgnoredExpr(InitOp); 460 } else { 461 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 462 auto *GV = new llvm::GlobalVariable( 463 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 464 llvm::GlobalValue::PrivateLinkage, Init, ".init"); 465 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 466 RValue InitRVal; 467 switch (CGF.getEvaluationKind(Ty)) { 468 case TEK_Scalar: 469 InitRVal = CGF.EmitLoadOfLValue(LV, SourceLocation()); 470 break; 471 case TEK_Complex: 472 InitRVal = 473 RValue::getComplex(CGF.EmitLoadOfComplex(LV, SourceLocation())); 474 break; 475 case TEK_Aggregate: 476 InitRVal = RValue::getAggregate(LV.getAddress()); 477 break; 478 } 479 OpaqueValueExpr OVE(SourceLocation(), Ty, VK_RValue); 480 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 481 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 482 /*IsInitializer=*/false); 483 } 484 } 485 486 /// \brief Emit initialization of arrays of complex types. 487 /// \param DestAddr Address of the array. 488 /// \param Type Type of array. 489 /// \param Init Initial expression of array. 490 /// \param SrcAddr Address of the original array. 491 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 492 QualType Type, const Expr *Init, 493 Address SrcAddr = Address::invalid()) { 494 auto *DRD = getReductionInit(Init); 495 // Perform element-by-element initialization. 496 QualType ElementTy; 497 498 // Drill down to the base element type on both arrays. 499 auto ArrayTy = Type->getAsArrayTypeUnsafe(); 500 auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 501 DestAddr = 502 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 503 if (DRD) 504 SrcAddr = 505 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 506 507 llvm::Value *SrcBegin = nullptr; 508 if (DRD) 509 SrcBegin = SrcAddr.getPointer(); 510 auto DestBegin = DestAddr.getPointer(); 511 // Cast from pointer to array type to pointer to single element. 512 auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 513 // The basic structure here is a while-do loop. 514 auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 515 auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 516 auto IsEmpty = 517 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 518 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 519 520 // Enter the loop body, making that address the current address. 521 auto EntryBB = CGF.Builder.GetInsertBlock(); 522 CGF.EmitBlock(BodyBB); 523 524 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 525 526 llvm::PHINode *SrcElementPHI = nullptr; 527 Address SrcElementCurrent = Address::invalid(); 528 if (DRD) { 529 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 530 "omp.arraycpy.srcElementPast"); 531 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 532 SrcElementCurrent = 533 Address(SrcElementPHI, 534 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 535 } 536 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 537 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 538 DestElementPHI->addIncoming(DestBegin, EntryBB); 539 Address DestElementCurrent = 540 Address(DestElementPHI, 541 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 542 543 // Emit copy. 544 { 545 CodeGenFunction::RunCleanupsScope InitScope(CGF); 546 if (DRD && (DRD->getInitializer() || !Init)) { 547 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 548 SrcElementCurrent, ElementTy); 549 } else 550 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 551 /*IsInitializer=*/false); 552 } 553 554 if (DRD) { 555 // Shift the address forward by one element. 556 auto SrcElementNext = CGF.Builder.CreateConstGEP1_32( 557 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 558 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 559 } 560 561 // Shift the address forward by one element. 562 auto DestElementNext = CGF.Builder.CreateConstGEP1_32( 563 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 564 // Check whether we've reached the end. 565 auto Done = 566 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 567 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 568 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 569 570 // Done. 571 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 572 } 573 574 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr, 575 Address SrcAddr, const VarDecl *DestVD, 576 const VarDecl *SrcVD, const Expr *Copy) { 577 if (OriginalType->isArrayType()) { 578 auto *BO = dyn_cast<BinaryOperator>(Copy); 579 if (BO && BO->getOpcode() == BO_Assign) { 580 // Perform simple memcpy for simple copying. 581 EmitAggregateAssign(DestAddr, SrcAddr, OriginalType); 582 } else { 583 // For arrays with complex element types perform element by element 584 // copying. 585 EmitOMPAggregateAssign( 586 DestAddr, SrcAddr, OriginalType, 587 [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) { 588 // Working with the single array element, so have to remap 589 // destination and source variables to corresponding array 590 // elements. 591 CodeGenFunction::OMPPrivateScope Remap(*this); 592 Remap.addPrivate(DestVD, [DestElement]() -> Address { 593 return DestElement; 594 }); 595 Remap.addPrivate( 596 SrcVD, [SrcElement]() -> Address { return SrcElement; }); 597 (void)Remap.Privatize(); 598 EmitIgnoredExpr(Copy); 599 }); 600 } 601 } else { 602 // Remap pseudo source variable to private copy. 603 CodeGenFunction::OMPPrivateScope Remap(*this); 604 Remap.addPrivate(SrcVD, [SrcAddr]() -> Address { return SrcAddr; }); 605 Remap.addPrivate(DestVD, [DestAddr]() -> Address { return DestAddr; }); 606 (void)Remap.Privatize(); 607 // Emit copying of the whole variable. 608 EmitIgnoredExpr(Copy); 609 } 610 } 611 612 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, 613 OMPPrivateScope &PrivateScope) { 614 if (!HaveInsertPoint()) 615 return false; 616 bool FirstprivateIsLastprivate = false; 617 llvm::DenseSet<const VarDecl *> Lastprivates; 618 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 619 for (const auto *D : C->varlists()) 620 Lastprivates.insert( 621 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); 622 } 623 llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate; 624 CGCapturedStmtInfo CapturesInfo(cast<CapturedStmt>(*D.getAssociatedStmt())); 625 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { 626 auto IRef = C->varlist_begin(); 627 auto InitsRef = C->inits().begin(); 628 for (auto IInit : C->private_copies()) { 629 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 630 bool ThisFirstprivateIsLastprivate = 631 Lastprivates.count(OrigVD->getCanonicalDecl()) > 0; 632 auto *CapFD = CapturesInfo.lookup(OrigVD); 633 auto *FD = CapturedStmtInfo->lookup(OrigVD); 634 if (!ThisFirstprivateIsLastprivate && FD && (FD == CapFD) && 635 !FD->getType()->isReferenceType()) { 636 EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()); 637 ++IRef; 638 ++InitsRef; 639 continue; 640 } 641 FirstprivateIsLastprivate = 642 FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate; 643 if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) { 644 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 645 auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl()); 646 bool IsRegistered; 647 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 648 /*RefersToEnclosingVariableOrCapture=*/FD != nullptr, 649 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 650 Address OriginalAddr = EmitLValue(&DRE).getAddress(); 651 QualType Type = VD->getType(); 652 if (Type->isArrayType()) { 653 // Emit VarDecl with copy init for arrays. 654 // Get the address of the original variable captured in current 655 // captured region. 656 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 657 auto Emission = EmitAutoVarAlloca(*VD); 658 auto *Init = VD->getInit(); 659 if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) { 660 // Perform simple memcpy. 661 EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr, 662 Type); 663 } else { 664 EmitOMPAggregateAssign( 665 Emission.getAllocatedAddress(), OriginalAddr, Type, 666 [this, VDInit, Init](Address DestElement, 667 Address SrcElement) { 668 // Clean up any temporaries needed by the initialization. 669 RunCleanupsScope InitScope(*this); 670 // Emit initialization for single element. 671 setAddrOfLocalVar(VDInit, SrcElement); 672 EmitAnyExprToMem(Init, DestElement, 673 Init->getType().getQualifiers(), 674 /*IsInitializer*/ false); 675 LocalDeclMap.erase(VDInit); 676 }); 677 } 678 EmitAutoVarCleanups(Emission); 679 return Emission.getAllocatedAddress(); 680 }); 681 } else { 682 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 683 // Emit private VarDecl with copy init. 684 // Remap temp VDInit variable to the address of the original 685 // variable 686 // (for proper handling of captured global variables). 687 setAddrOfLocalVar(VDInit, OriginalAddr); 688 EmitDecl(*VD); 689 LocalDeclMap.erase(VDInit); 690 return GetAddrOfLocalVar(VD); 691 }); 692 } 693 assert(IsRegistered && 694 "firstprivate var already registered as private"); 695 // Silence the warning about unused variable. 696 (void)IsRegistered; 697 } 698 ++IRef; 699 ++InitsRef; 700 } 701 } 702 return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty(); 703 } 704 705 void CodeGenFunction::EmitOMPPrivateClause( 706 const OMPExecutableDirective &D, 707 CodeGenFunction::OMPPrivateScope &PrivateScope) { 708 if (!HaveInsertPoint()) 709 return; 710 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 711 for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) { 712 auto IRef = C->varlist_begin(); 713 for (auto IInit : C->private_copies()) { 714 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 715 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 716 auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 717 bool IsRegistered = 718 PrivateScope.addPrivate(OrigVD, [&]() -> Address { 719 // Emit private VarDecl with copy init. 720 EmitDecl(*VD); 721 return GetAddrOfLocalVar(VD); 722 }); 723 assert(IsRegistered && "private var already registered as private"); 724 // Silence the warning about unused variable. 725 (void)IsRegistered; 726 } 727 ++IRef; 728 } 729 } 730 } 731 732 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { 733 if (!HaveInsertPoint()) 734 return false; 735 // threadprivate_var1 = master_threadprivate_var1; 736 // operator=(threadprivate_var2, master_threadprivate_var2); 737 // ... 738 // __kmpc_barrier(&loc, global_tid); 739 llvm::DenseSet<const VarDecl *> CopiedVars; 740 llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr; 741 for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) { 742 auto IRef = C->varlist_begin(); 743 auto ISrcRef = C->source_exprs().begin(); 744 auto IDestRef = C->destination_exprs().begin(); 745 for (auto *AssignOp : C->assignment_ops()) { 746 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 747 QualType Type = VD->getType(); 748 if (CopiedVars.insert(VD->getCanonicalDecl()).second) { 749 // Get the address of the master variable. If we are emitting code with 750 // TLS support, the address is passed from the master as field in the 751 // captured declaration. 752 Address MasterAddr = Address::invalid(); 753 if (getLangOpts().OpenMPUseTLS && 754 getContext().getTargetInfo().isTLSSupported()) { 755 assert(CapturedStmtInfo->lookup(VD) && 756 "Copyin threadprivates should have been captured!"); 757 DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(), 758 VK_LValue, (*IRef)->getExprLoc()); 759 MasterAddr = EmitLValue(&DRE).getAddress(); 760 LocalDeclMap.erase(VD); 761 } else { 762 MasterAddr = 763 Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD) 764 : CGM.GetAddrOfGlobal(VD), 765 getContext().getDeclAlign(VD)); 766 } 767 // Get the address of the threadprivate variable. 768 Address PrivateAddr = EmitLValue(*IRef).getAddress(); 769 if (CopiedVars.size() == 1) { 770 // At first check if current thread is a master thread. If it is, no 771 // need to copy data. 772 CopyBegin = createBasicBlock("copyin.not.master"); 773 CopyEnd = createBasicBlock("copyin.not.master.end"); 774 Builder.CreateCondBr( 775 Builder.CreateICmpNE( 776 Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy), 777 Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy)), 778 CopyBegin, CopyEnd); 779 EmitBlock(CopyBegin); 780 } 781 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 782 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 783 EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp); 784 } 785 ++IRef; 786 ++ISrcRef; 787 ++IDestRef; 788 } 789 } 790 if (CopyEnd) { 791 // Exit out of copying procedure for non-master thread. 792 EmitBlock(CopyEnd, /*IsFinished=*/true); 793 return true; 794 } 795 return false; 796 } 797 798 bool CodeGenFunction::EmitOMPLastprivateClauseInit( 799 const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) { 800 if (!HaveInsertPoint()) 801 return false; 802 bool HasAtLeastOneLastprivate = false; 803 llvm::DenseSet<const VarDecl *> SIMDLCVs; 804 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 805 auto *LoopDirective = cast<OMPLoopDirective>(&D); 806 for (auto *C : LoopDirective->counters()) { 807 SIMDLCVs.insert( 808 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 809 } 810 } 811 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 812 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 813 HasAtLeastOneLastprivate = true; 814 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) 815 break; 816 auto IRef = C->varlist_begin(); 817 auto IDestRef = C->destination_exprs().begin(); 818 for (auto *IInit : C->private_copies()) { 819 // Keep the address of the original variable for future update at the end 820 // of the loop. 821 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 822 // Taskloops do not require additional initialization, it is done in 823 // runtime support library. 824 if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) { 825 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 826 PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address { 827 DeclRefExpr DRE( 828 const_cast<VarDecl *>(OrigVD), 829 /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup( 830 OrigVD) != nullptr, 831 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 832 return EmitLValue(&DRE).getAddress(); 833 }); 834 // Check if the variable is also a firstprivate: in this case IInit is 835 // not generated. Initialization of this variable will happen in codegen 836 // for 'firstprivate' clause. 837 if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) { 838 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 839 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 840 // Emit private VarDecl with copy init. 841 EmitDecl(*VD); 842 return GetAddrOfLocalVar(VD); 843 }); 844 assert(IsRegistered && 845 "lastprivate var already registered as private"); 846 (void)IsRegistered; 847 } 848 } 849 ++IRef; 850 ++IDestRef; 851 } 852 } 853 return HasAtLeastOneLastprivate; 854 } 855 856 void CodeGenFunction::EmitOMPLastprivateClauseFinal( 857 const OMPExecutableDirective &D, bool NoFinals, 858 llvm::Value *IsLastIterCond) { 859 if (!HaveInsertPoint()) 860 return; 861 // Emit following code: 862 // if (<IsLastIterCond>) { 863 // orig_var1 = private_orig_var1; 864 // ... 865 // orig_varn = private_orig_varn; 866 // } 867 llvm::BasicBlock *ThenBB = nullptr; 868 llvm::BasicBlock *DoneBB = nullptr; 869 if (IsLastIterCond) { 870 ThenBB = createBasicBlock(".omp.lastprivate.then"); 871 DoneBB = createBasicBlock(".omp.lastprivate.done"); 872 Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB); 873 EmitBlock(ThenBB); 874 } 875 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 876 llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates; 877 if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) { 878 auto IC = LoopDirective->counters().begin(); 879 for (auto F : LoopDirective->finals()) { 880 auto *D = 881 cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl(); 882 if (NoFinals) 883 AlreadyEmittedVars.insert(D); 884 else 885 LoopCountersAndUpdates[D] = F; 886 ++IC; 887 } 888 } 889 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 890 auto IRef = C->varlist_begin(); 891 auto ISrcRef = C->source_exprs().begin(); 892 auto IDestRef = C->destination_exprs().begin(); 893 for (auto *AssignOp : C->assignment_ops()) { 894 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 895 QualType Type = PrivateVD->getType(); 896 auto *CanonicalVD = PrivateVD->getCanonicalDecl(); 897 if (AlreadyEmittedVars.insert(CanonicalVD).second) { 898 // If lastprivate variable is a loop control variable for loop-based 899 // directive, update its value before copyin back to original 900 // variable. 901 if (auto *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD)) 902 EmitIgnoredExpr(FinalExpr); 903 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 904 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 905 // Get the address of the original variable. 906 Address OriginalAddr = GetAddrOfLocalVar(DestVD); 907 // Get the address of the private variable. 908 Address PrivateAddr = GetAddrOfLocalVar(PrivateVD); 909 if (auto RefTy = PrivateVD->getType()->getAs<ReferenceType>()) 910 PrivateAddr = 911 Address(Builder.CreateLoad(PrivateAddr), 912 getNaturalTypeAlignment(RefTy->getPointeeType())); 913 EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp); 914 } 915 ++IRef; 916 ++ISrcRef; 917 ++IDestRef; 918 } 919 if (auto *PostUpdate = C->getPostUpdateExpr()) 920 EmitIgnoredExpr(PostUpdate); 921 } 922 if (IsLastIterCond) 923 EmitBlock(DoneBB, /*IsFinished=*/true); 924 } 925 926 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 927 LValue BaseLV, llvm::Value *Addr) { 928 Address Tmp = Address::invalid(); 929 Address TopTmp = Address::invalid(); 930 Address MostTopTmp = Address::invalid(); 931 BaseTy = BaseTy.getNonReferenceType(); 932 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 933 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 934 Tmp = CGF.CreateMemTemp(BaseTy); 935 if (TopTmp.isValid()) 936 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 937 else 938 MostTopTmp = Tmp; 939 TopTmp = Tmp; 940 BaseTy = BaseTy->getPointeeType(); 941 } 942 llvm::Type *Ty = BaseLV.getPointer()->getType(); 943 if (Tmp.isValid()) 944 Ty = Tmp.getElementType(); 945 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 946 if (Tmp.isValid()) { 947 CGF.Builder.CreateStore(Addr, Tmp); 948 return MostTopTmp; 949 } 950 return Address(Addr, BaseLV.getAlignment()); 951 } 952 953 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 954 LValue BaseLV) { 955 BaseTy = BaseTy.getNonReferenceType(); 956 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 957 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 958 if (auto *PtrTy = BaseTy->getAs<PointerType>()) 959 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy); 960 else { 961 BaseLV = CGF.EmitLoadOfReferenceLValue(BaseLV.getAddress(), 962 BaseTy->castAs<ReferenceType>()); 963 } 964 BaseTy = BaseTy->getPointeeType(); 965 } 966 return CGF.MakeAddrLValue( 967 Address( 968 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 969 BaseLV.getPointer(), CGF.ConvertTypeForMem(ElTy)->getPointerTo()), 970 BaseLV.getAlignment()), 971 BaseLV.getType(), BaseLV.getAlignmentSource()); 972 } 973 974 void CodeGenFunction::EmitOMPReductionClauseInit( 975 const OMPExecutableDirective &D, 976 CodeGenFunction::OMPPrivateScope &PrivateScope) { 977 if (!HaveInsertPoint()) 978 return; 979 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 980 auto ILHS = C->lhs_exprs().begin(); 981 auto IRHS = C->rhs_exprs().begin(); 982 auto IPriv = C->privates().begin(); 983 auto IRed = C->reduction_ops().begin(); 984 for (auto IRef : C->varlists()) { 985 auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 986 auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 987 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl()); 988 auto *DRD = getReductionInit(*IRed); 989 if (auto *OASE = dyn_cast<OMPArraySectionExpr>(IRef)) { 990 auto *Base = OASE->getBase()->IgnoreParenImpCasts(); 991 while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 992 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 993 while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 994 Base = TempASE->getBase()->IgnoreParenImpCasts(); 995 auto *DE = cast<DeclRefExpr>(Base); 996 auto *OrigVD = cast<VarDecl>(DE->getDecl()); 997 auto OASELValueLB = EmitOMPArraySectionExpr(OASE); 998 auto OASELValueUB = 999 EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 1000 auto OriginalBaseLValue = EmitLValue(DE); 1001 LValue BaseLValue = 1002 loadToBegin(*this, OrigVD->getType(), OASELValueLB.getType(), 1003 OriginalBaseLValue); 1004 // Store the address of the original variable associated with the LHS 1005 // implicit variable. 1006 PrivateScope.addPrivate(LHSVD, [OASELValueLB]() -> Address { 1007 return OASELValueLB.getAddress(); 1008 }); 1009 // Emit reduction copy. 1010 bool IsRegistered = PrivateScope.addPrivate( 1011 OrigVD, [this, OrigVD, PrivateVD, BaseLValue, OASELValueLB, 1012 OASELValueUB, OriginalBaseLValue, DRD, IRed]() -> Address { 1013 // Emit VarDecl with copy init for arrays. 1014 // Get the address of the original variable captured in current 1015 // captured region. 1016 auto *Size = Builder.CreatePtrDiff(OASELValueUB.getPointer(), 1017 OASELValueLB.getPointer()); 1018 Size = Builder.CreateNUWAdd( 1019 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 1020 CodeGenFunction::OpaqueValueMapping OpaqueMap( 1021 *this, cast<OpaqueValueExpr>( 1022 getContext() 1023 .getAsVariableArrayType(PrivateVD->getType()) 1024 ->getSizeExpr()), 1025 RValue::get(Size)); 1026 EmitVariablyModifiedType(PrivateVD->getType()); 1027 auto Emission = EmitAutoVarAlloca(*PrivateVD); 1028 auto Addr = Emission.getAllocatedAddress(); 1029 auto *Init = PrivateVD->getInit(); 1030 EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(), 1031 DRD ? *IRed : Init, 1032 OASELValueLB.getAddress()); 1033 EmitAutoVarCleanups(Emission); 1034 // Emit private VarDecl with reduction init. 1035 auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(), 1036 OASELValueLB.getPointer()); 1037 auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset); 1038 return castToBase(*this, OrigVD->getType(), 1039 OASELValueLB.getType(), OriginalBaseLValue, 1040 Ptr); 1041 }); 1042 assert(IsRegistered && "private var already registered as private"); 1043 // Silence the warning about unused variable. 1044 (void)IsRegistered; 1045 PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { 1046 return GetAddrOfLocalVar(PrivateVD); 1047 }); 1048 } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(IRef)) { 1049 auto *Base = ASE->getBase()->IgnoreParenImpCasts(); 1050 while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1051 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1052 auto *DE = cast<DeclRefExpr>(Base); 1053 auto *OrigVD = cast<VarDecl>(DE->getDecl()); 1054 auto ASELValue = EmitLValue(ASE); 1055 auto OriginalBaseLValue = EmitLValue(DE); 1056 LValue BaseLValue = loadToBegin( 1057 *this, OrigVD->getType(), ASELValue.getType(), OriginalBaseLValue); 1058 // Store the address of the original variable associated with the LHS 1059 // implicit variable. 1060 PrivateScope.addPrivate( 1061 LHSVD, [ASELValue]() -> Address { return ASELValue.getAddress(); }); 1062 // Emit reduction copy. 1063 bool IsRegistered = PrivateScope.addPrivate( 1064 OrigVD, [this, OrigVD, PrivateVD, BaseLValue, ASELValue, 1065 OriginalBaseLValue, DRD, IRed]() -> Address { 1066 // Emit private VarDecl with reduction init. 1067 AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD); 1068 auto Addr = Emission.getAllocatedAddress(); 1069 if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 1070 emitInitWithReductionInitializer(*this, DRD, *IRed, Addr, 1071 ASELValue.getAddress(), 1072 ASELValue.getType()); 1073 } else 1074 EmitAutoVarInit(Emission); 1075 EmitAutoVarCleanups(Emission); 1076 auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(), 1077 ASELValue.getPointer()); 1078 auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset); 1079 return castToBase(*this, OrigVD->getType(), ASELValue.getType(), 1080 OriginalBaseLValue, Ptr); 1081 }); 1082 assert(IsRegistered && "private var already registered as private"); 1083 // Silence the warning about unused variable. 1084 (void)IsRegistered; 1085 PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address { 1086 return Builder.CreateElementBitCast( 1087 GetAddrOfLocalVar(PrivateVD), ConvertTypeForMem(RHSVD->getType()), 1088 "rhs.begin"); 1089 }); 1090 } else { 1091 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); 1092 QualType Type = PrivateVD->getType(); 1093 if (getContext().getAsArrayType(Type)) { 1094 // Store the address of the original variable associated with the LHS 1095 // implicit variable. 1096 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1097 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1098 IRef->getType(), VK_LValue, IRef->getExprLoc()); 1099 Address OriginalAddr = EmitLValue(&DRE).getAddress(); 1100 PrivateScope.addPrivate(LHSVD, [this, &OriginalAddr, 1101 LHSVD]() -> Address { 1102 OriginalAddr = Builder.CreateElementBitCast( 1103 OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin"); 1104 return OriginalAddr; 1105 }); 1106 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 1107 if (Type->isVariablyModifiedType()) { 1108 CodeGenFunction::OpaqueValueMapping OpaqueMap( 1109 *this, cast<OpaqueValueExpr>( 1110 getContext() 1111 .getAsVariableArrayType(PrivateVD->getType()) 1112 ->getSizeExpr()), 1113 RValue::get( 1114 getTypeSize(OrigVD->getType().getNonReferenceType()))); 1115 EmitVariablyModifiedType(Type); 1116 } 1117 auto Emission = EmitAutoVarAlloca(*PrivateVD); 1118 auto Addr = Emission.getAllocatedAddress(); 1119 auto *Init = PrivateVD->getInit(); 1120 EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(), 1121 DRD ? *IRed : Init, OriginalAddr); 1122 EmitAutoVarCleanups(Emission); 1123 return Emission.getAllocatedAddress(); 1124 }); 1125 assert(IsRegistered && "private var already registered as private"); 1126 // Silence the warning about unused variable. 1127 (void)IsRegistered; 1128 PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address { 1129 return Builder.CreateElementBitCast( 1130 GetAddrOfLocalVar(PrivateVD), 1131 ConvertTypeForMem(RHSVD->getType()), "rhs.begin"); 1132 }); 1133 } else { 1134 // Store the address of the original variable associated with the LHS 1135 // implicit variable. 1136 Address OriginalAddr = Address::invalid(); 1137 PrivateScope.addPrivate(LHSVD, [this, OrigVD, IRef, 1138 &OriginalAddr]() -> Address { 1139 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1140 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1141 IRef->getType(), VK_LValue, IRef->getExprLoc()); 1142 OriginalAddr = EmitLValue(&DRE).getAddress(); 1143 return OriginalAddr; 1144 }); 1145 // Emit reduction copy. 1146 bool IsRegistered = PrivateScope.addPrivate( 1147 OrigVD, [this, PrivateVD, OriginalAddr, DRD, IRed]() -> Address { 1148 // Emit private VarDecl with reduction init. 1149 AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD); 1150 auto Addr = Emission.getAllocatedAddress(); 1151 if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 1152 emitInitWithReductionInitializer(*this, DRD, *IRed, Addr, 1153 OriginalAddr, 1154 PrivateVD->getType()); 1155 } else 1156 EmitAutoVarInit(Emission); 1157 EmitAutoVarCleanups(Emission); 1158 return Addr; 1159 }); 1160 assert(IsRegistered && "private var already registered as private"); 1161 // Silence the warning about unused variable. 1162 (void)IsRegistered; 1163 PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { 1164 return GetAddrOfLocalVar(PrivateVD); 1165 }); 1166 } 1167 } 1168 ++ILHS; 1169 ++IRHS; 1170 ++IPriv; 1171 ++IRed; 1172 } 1173 } 1174 } 1175 1176 void CodeGenFunction::EmitOMPReductionClauseFinal( 1177 const OMPExecutableDirective &D) { 1178 if (!HaveInsertPoint()) 1179 return; 1180 llvm::SmallVector<const Expr *, 8> Privates; 1181 llvm::SmallVector<const Expr *, 8> LHSExprs; 1182 llvm::SmallVector<const Expr *, 8> RHSExprs; 1183 llvm::SmallVector<const Expr *, 8> ReductionOps; 1184 bool HasAtLeastOneReduction = false; 1185 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1186 HasAtLeastOneReduction = true; 1187 Privates.append(C->privates().begin(), C->privates().end()); 1188 LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 1189 RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 1190 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 1191 } 1192 if (HasAtLeastOneReduction) { 1193 // Emit nowait reduction if nowait clause is present or directive is a 1194 // parallel directive (it always has implicit barrier). 1195 CGM.getOpenMPRuntime().emitReduction( 1196 *this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps, 1197 D.getSingleClause<OMPNowaitClause>() || 1198 isOpenMPParallelDirective(D.getDirectiveKind()) || 1199 D.getDirectiveKind() == OMPD_simd, 1200 D.getDirectiveKind() == OMPD_simd); 1201 } 1202 } 1203 1204 static void emitPostUpdateForReductionClause( 1205 CodeGenFunction &CGF, const OMPExecutableDirective &D, 1206 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1207 if (!CGF.HaveInsertPoint()) 1208 return; 1209 llvm::BasicBlock *DoneBB = nullptr; 1210 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1211 if (auto *PostUpdate = C->getPostUpdateExpr()) { 1212 if (!DoneBB) { 1213 if (auto *Cond = CondGen(CGF)) { 1214 // If the first post-update expression is found, emit conditional 1215 // block if it was requested. 1216 auto *ThenBB = CGF.createBasicBlock(".omp.reduction.pu"); 1217 DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done"); 1218 CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1219 CGF.EmitBlock(ThenBB); 1220 } 1221 } 1222 CGF.EmitIgnoredExpr(PostUpdate); 1223 } 1224 } 1225 if (DoneBB) 1226 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 1227 } 1228 1229 static void emitCommonOMPParallelDirective(CodeGenFunction &CGF, 1230 const OMPExecutableDirective &S, 1231 OpenMPDirectiveKind InnermostKind, 1232 const RegionCodeGenTy &CodeGen) { 1233 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); 1234 auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( 1235 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 1236 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) { 1237 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 1238 auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), 1239 /*IgnoreResultAssign*/ true); 1240 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause( 1241 CGF, NumThreads, NumThreadsClause->getLocStart()); 1242 } 1243 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) { 1244 CodeGenFunction::RunCleanupsScope ProcBindScope(CGF); 1245 CGF.CGM.getOpenMPRuntime().emitProcBindClause( 1246 CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart()); 1247 } 1248 const Expr *IfCond = nullptr; 1249 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 1250 if (C->getNameModifier() == OMPD_unknown || 1251 C->getNameModifier() == OMPD_parallel) { 1252 IfCond = C->getCondition(); 1253 break; 1254 } 1255 } 1256 1257 OMPParallelScope Scope(CGF, S); 1258 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 1259 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 1260 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn, 1261 CapturedVars, IfCond); 1262 } 1263 1264 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { 1265 // Emit parallel region as a standalone region. 1266 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1267 OMPPrivateScope PrivateScope(CGF); 1268 bool Copyins = CGF.EmitOMPCopyinClause(S); 1269 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 1270 if (Copyins) { 1271 // Emit implicit barrier to synchronize threads and avoid data races on 1272 // propagation master's thread values of threadprivate variables to local 1273 // instances of that variables of all other implicit threads. 1274 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 1275 CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 1276 /*ForceSimpleCall=*/true); 1277 } 1278 CGF.EmitOMPPrivateClause(S, PrivateScope); 1279 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 1280 (void)PrivateScope.Privatize(); 1281 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1282 CGF.EmitOMPReductionClauseFinal(S); 1283 }; 1284 emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen); 1285 emitPostUpdateForReductionClause( 1286 *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1287 } 1288 1289 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, 1290 JumpDest LoopExit) { 1291 RunCleanupsScope BodyScope(*this); 1292 // Update counters values on current iteration. 1293 for (auto I : D.updates()) { 1294 EmitIgnoredExpr(I); 1295 } 1296 // Update the linear variables. 1297 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1298 for (auto *U : C->updates()) 1299 EmitIgnoredExpr(U); 1300 } 1301 1302 // On a continue in the body, jump to the end. 1303 auto Continue = getJumpDestInCurrentScope("omp.body.continue"); 1304 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1305 // Emit loop body. 1306 EmitStmt(D.getBody()); 1307 // The end (updates/cleanups). 1308 EmitBlock(Continue.getBlock()); 1309 BreakContinueStack.pop_back(); 1310 } 1311 1312 void CodeGenFunction::EmitOMPInnerLoop( 1313 const Stmt &S, bool RequiresCleanup, const Expr *LoopCond, 1314 const Expr *IncExpr, 1315 const llvm::function_ref<void(CodeGenFunction &)> &BodyGen, 1316 const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen) { 1317 auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end"); 1318 1319 // Start the loop with a block that tests the condition. 1320 auto CondBlock = createBasicBlock("omp.inner.for.cond"); 1321 EmitBlock(CondBlock); 1322 const SourceRange &R = S.getSourceRange(); 1323 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), 1324 SourceLocToDebugLoc(R.getEnd())); 1325 1326 // If there are any cleanups between here and the loop-exit scope, 1327 // create a block to stage a loop exit along. 1328 auto ExitBlock = LoopExit.getBlock(); 1329 if (RequiresCleanup) 1330 ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup"); 1331 1332 auto LoopBody = createBasicBlock("omp.inner.for.body"); 1333 1334 // Emit condition. 1335 EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S)); 1336 if (ExitBlock != LoopExit.getBlock()) { 1337 EmitBlock(ExitBlock); 1338 EmitBranchThroughCleanup(LoopExit); 1339 } 1340 1341 EmitBlock(LoopBody); 1342 incrementProfileCounter(&S); 1343 1344 // Create a block for the increment. 1345 auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc"); 1346 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1347 1348 BodyGen(*this); 1349 1350 // Emit "IV = IV + 1" and a back-edge to the condition block. 1351 EmitBlock(Continue.getBlock()); 1352 EmitIgnoredExpr(IncExpr); 1353 PostIncGen(*this); 1354 BreakContinueStack.pop_back(); 1355 EmitBranch(CondBlock); 1356 LoopStack.pop(); 1357 // Emit the fall-through block. 1358 EmitBlock(LoopExit.getBlock()); 1359 } 1360 1361 void CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { 1362 if (!HaveInsertPoint()) 1363 return; 1364 // Emit inits for the linear variables. 1365 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1366 for (auto *Init : C->inits()) { 1367 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl()); 1368 if (auto *Ref = dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) { 1369 AutoVarEmission Emission = EmitAutoVarAlloca(*VD); 1370 auto *OrigVD = cast<VarDecl>(Ref->getDecl()); 1371 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1372 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1373 VD->getInit()->getType(), VK_LValue, 1374 VD->getInit()->getExprLoc()); 1375 EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(), 1376 VD->getType()), 1377 /*capturedByInit=*/false); 1378 EmitAutoVarCleanups(Emission); 1379 } else 1380 EmitVarDecl(*VD); 1381 } 1382 // Emit the linear steps for the linear clauses. 1383 // If a step is not constant, it is pre-calculated before the loop. 1384 if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep())) 1385 if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) { 1386 EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl())); 1387 // Emit calculation of the linear step. 1388 EmitIgnoredExpr(CS); 1389 } 1390 } 1391 } 1392 1393 void CodeGenFunction::EmitOMPLinearClauseFinal( 1394 const OMPLoopDirective &D, 1395 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1396 if (!HaveInsertPoint()) 1397 return; 1398 llvm::BasicBlock *DoneBB = nullptr; 1399 // Emit the final values of the linear variables. 1400 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1401 auto IC = C->varlist_begin(); 1402 for (auto *F : C->finals()) { 1403 if (!DoneBB) { 1404 if (auto *Cond = CondGen(*this)) { 1405 // If the first post-update expression is found, emit conditional 1406 // block if it was requested. 1407 auto *ThenBB = createBasicBlock(".omp.linear.pu"); 1408 DoneBB = createBasicBlock(".omp.linear.pu.done"); 1409 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1410 EmitBlock(ThenBB); 1411 } 1412 } 1413 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl()); 1414 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1415 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1416 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); 1417 Address OrigAddr = EmitLValue(&DRE).getAddress(); 1418 CodeGenFunction::OMPPrivateScope VarScope(*this); 1419 VarScope.addPrivate(OrigVD, [OrigAddr]() -> Address { return OrigAddr; }); 1420 (void)VarScope.Privatize(); 1421 EmitIgnoredExpr(F); 1422 ++IC; 1423 } 1424 if (auto *PostUpdate = C->getPostUpdateExpr()) 1425 EmitIgnoredExpr(PostUpdate); 1426 } 1427 if (DoneBB) 1428 EmitBlock(DoneBB, /*IsFinished=*/true); 1429 } 1430 1431 static void emitAlignedClause(CodeGenFunction &CGF, 1432 const OMPExecutableDirective &D) { 1433 if (!CGF.HaveInsertPoint()) 1434 return; 1435 for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) { 1436 unsigned ClauseAlignment = 0; 1437 if (auto AlignmentExpr = Clause->getAlignment()) { 1438 auto AlignmentCI = 1439 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); 1440 ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue()); 1441 } 1442 for (auto E : Clause->varlists()) { 1443 unsigned Alignment = ClauseAlignment; 1444 if (Alignment == 0) { 1445 // OpenMP [2.8.1, Description] 1446 // If no optional parameter is specified, implementation-defined default 1447 // alignments for SIMD instructions on the target platforms are assumed. 1448 Alignment = 1449 CGF.getContext() 1450 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign( 1451 E->getType()->getPointeeType())) 1452 .getQuantity(); 1453 } 1454 assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) && 1455 "alignment is not power of 2"); 1456 if (Alignment != 0) { 1457 llvm::Value *PtrValue = CGF.EmitScalarExpr(E); 1458 CGF.EmitAlignmentAssumption(PtrValue, Alignment); 1459 } 1460 } 1461 } 1462 } 1463 1464 void CodeGenFunction::EmitOMPPrivateLoopCounters( 1465 const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) { 1466 if (!HaveInsertPoint()) 1467 return; 1468 auto I = S.private_counters().begin(); 1469 for (auto *E : S.counters()) { 1470 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1471 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()); 1472 (void)LoopScope.addPrivate(VD, [&]() -> Address { 1473 // Emit var without initialization. 1474 if (!LocalDeclMap.count(PrivateVD)) { 1475 auto VarEmission = EmitAutoVarAlloca(*PrivateVD); 1476 EmitAutoVarCleanups(VarEmission); 1477 } 1478 DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD), 1479 /*RefersToEnclosingVariableOrCapture=*/false, 1480 (*I)->getType(), VK_LValue, (*I)->getExprLoc()); 1481 return EmitLValue(&DRE).getAddress(); 1482 }); 1483 if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) || 1484 VD->hasGlobalStorage()) { 1485 (void)LoopScope.addPrivate(PrivateVD, [&]() -> Address { 1486 DeclRefExpr DRE(const_cast<VarDecl *>(VD), 1487 LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD), 1488 E->getType(), VK_LValue, E->getExprLoc()); 1489 return EmitLValue(&DRE).getAddress(); 1490 }); 1491 } 1492 ++I; 1493 } 1494 } 1495 1496 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S, 1497 const Expr *Cond, llvm::BasicBlock *TrueBlock, 1498 llvm::BasicBlock *FalseBlock, uint64_t TrueCount) { 1499 if (!CGF.HaveInsertPoint()) 1500 return; 1501 { 1502 CodeGenFunction::OMPPrivateScope PreCondScope(CGF); 1503 CGF.EmitOMPPrivateLoopCounters(S, PreCondScope); 1504 (void)PreCondScope.Privatize(); 1505 // Get initial values of real counters. 1506 for (auto I : S.inits()) { 1507 CGF.EmitIgnoredExpr(I); 1508 } 1509 } 1510 // Check that loop is executed at least one time. 1511 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount); 1512 } 1513 1514 void CodeGenFunction::EmitOMPLinearClause( 1515 const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) { 1516 if (!HaveInsertPoint()) 1517 return; 1518 llvm::DenseSet<const VarDecl *> SIMDLCVs; 1519 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 1520 auto *LoopDirective = cast<OMPLoopDirective>(&D); 1521 for (auto *C : LoopDirective->counters()) { 1522 SIMDLCVs.insert( 1523 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 1524 } 1525 } 1526 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1527 auto CurPrivate = C->privates().begin(); 1528 for (auto *E : C->varlists()) { 1529 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1530 auto *PrivateVD = 1531 cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl()); 1532 if (!SIMDLCVs.count(VD->getCanonicalDecl())) { 1533 bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address { 1534 // Emit private VarDecl with copy init. 1535 EmitVarDecl(*PrivateVD); 1536 return GetAddrOfLocalVar(PrivateVD); 1537 }); 1538 assert(IsRegistered && "linear var already registered as private"); 1539 // Silence the warning about unused variable. 1540 (void)IsRegistered; 1541 } else 1542 EmitVarDecl(*PrivateVD); 1543 ++CurPrivate; 1544 } 1545 } 1546 } 1547 1548 static void emitSimdlenSafelenClause(CodeGenFunction &CGF, 1549 const OMPExecutableDirective &D, 1550 bool IsMonotonic) { 1551 if (!CGF.HaveInsertPoint()) 1552 return; 1553 if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) { 1554 RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(), 1555 /*ignoreResult=*/true); 1556 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 1557 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 1558 // In presence of finite 'safelen', it may be unsafe to mark all 1559 // the memory instructions parallel, because loop-carried 1560 // dependences of 'safelen' iterations are possible. 1561 if (!IsMonotonic) 1562 CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>()); 1563 } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) { 1564 RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(), 1565 /*ignoreResult=*/true); 1566 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 1567 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 1568 // In presence of finite 'safelen', it may be unsafe to mark all 1569 // the memory instructions parallel, because loop-carried 1570 // dependences of 'safelen' iterations are possible. 1571 CGF.LoopStack.setParallel(false); 1572 } 1573 } 1574 1575 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D, 1576 bool IsMonotonic) { 1577 // Walk clauses and process safelen/lastprivate. 1578 LoopStack.setParallel(!IsMonotonic); 1579 LoopStack.setVectorizeEnable(true); 1580 emitSimdlenSafelenClause(*this, D, IsMonotonic); 1581 } 1582 1583 void CodeGenFunction::EmitOMPSimdFinal( 1584 const OMPLoopDirective &D, 1585 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1586 if (!HaveInsertPoint()) 1587 return; 1588 llvm::BasicBlock *DoneBB = nullptr; 1589 auto IC = D.counters().begin(); 1590 auto IPC = D.private_counters().begin(); 1591 for (auto F : D.finals()) { 1592 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl()); 1593 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl()); 1594 auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD); 1595 if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) || 1596 OrigVD->hasGlobalStorage() || CED) { 1597 if (!DoneBB) { 1598 if (auto *Cond = CondGen(*this)) { 1599 // If the first post-update expression is found, emit conditional 1600 // block if it was requested. 1601 auto *ThenBB = createBasicBlock(".omp.final.then"); 1602 DoneBB = createBasicBlock(".omp.final.done"); 1603 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1604 EmitBlock(ThenBB); 1605 } 1606 } 1607 Address OrigAddr = Address::invalid(); 1608 if (CED) 1609 OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress(); 1610 else { 1611 DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD), 1612 /*RefersToEnclosingVariableOrCapture=*/false, 1613 (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc()); 1614 OrigAddr = EmitLValue(&DRE).getAddress(); 1615 } 1616 OMPPrivateScope VarScope(*this); 1617 VarScope.addPrivate(OrigVD, 1618 [OrigAddr]() -> Address { return OrigAddr; }); 1619 (void)VarScope.Privatize(); 1620 EmitIgnoredExpr(F); 1621 } 1622 ++IC; 1623 ++IPC; 1624 } 1625 if (DoneBB) 1626 EmitBlock(DoneBB, /*IsFinished=*/true); 1627 } 1628 1629 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { 1630 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1631 OMPLoopScope PreInitScope(CGF, S); 1632 // if (PreCond) { 1633 // for (IV in 0..LastIteration) BODY; 1634 // <Final counter/linear vars updates>; 1635 // } 1636 // 1637 1638 // Emit: if (PreCond) - begin. 1639 // If the condition constant folds and can be elided, avoid emitting the 1640 // whole loop. 1641 bool CondConstant; 1642 llvm::BasicBlock *ContBlock = nullptr; 1643 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 1644 if (!CondConstant) 1645 return; 1646 } else { 1647 auto *ThenBlock = CGF.createBasicBlock("simd.if.then"); 1648 ContBlock = CGF.createBasicBlock("simd.if.end"); 1649 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 1650 CGF.getProfileCount(&S)); 1651 CGF.EmitBlock(ThenBlock); 1652 CGF.incrementProfileCounter(&S); 1653 } 1654 1655 // Emit the loop iteration variable. 1656 const Expr *IVExpr = S.getIterationVariable(); 1657 const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 1658 CGF.EmitVarDecl(*IVDecl); 1659 CGF.EmitIgnoredExpr(S.getInit()); 1660 1661 // Emit the iterations count variable. 1662 // If it is not a variable, Sema decided to calculate iterations count on 1663 // each iteration (e.g., it is foldable into a constant). 1664 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 1665 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 1666 // Emit calculation of the iterations count. 1667 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 1668 } 1669 1670 CGF.EmitOMPSimdInit(S); 1671 1672 emitAlignedClause(CGF, S); 1673 CGF.EmitOMPLinearClauseInit(S); 1674 { 1675 OMPPrivateScope LoopScope(CGF); 1676 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 1677 CGF.EmitOMPLinearClause(S, LoopScope); 1678 CGF.EmitOMPPrivateClause(S, LoopScope); 1679 CGF.EmitOMPReductionClauseInit(S, LoopScope); 1680 bool HasLastprivateClause = 1681 CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 1682 (void)LoopScope.Privatize(); 1683 CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 1684 S.getInc(), 1685 [&S](CodeGenFunction &CGF) { 1686 CGF.EmitOMPLoopBody(S, JumpDest()); 1687 CGF.EmitStopPoint(&S); 1688 }, 1689 [](CodeGenFunction &) {}); 1690 CGF.EmitOMPSimdFinal( 1691 S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1692 // Emit final copy of the lastprivate variables at the end of loops. 1693 if (HasLastprivateClause) 1694 CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true); 1695 CGF.EmitOMPReductionClauseFinal(S); 1696 emitPostUpdateForReductionClause( 1697 CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1698 } 1699 CGF.EmitOMPLinearClauseFinal( 1700 S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1701 // Emit: if (PreCond) - end. 1702 if (ContBlock) { 1703 CGF.EmitBranch(ContBlock); 1704 CGF.EmitBlock(ContBlock, true); 1705 } 1706 }; 1707 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1708 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 1709 } 1710 1711 void CodeGenFunction::EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic, 1712 const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, 1713 Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) { 1714 auto &RT = CGM.getOpenMPRuntime(); 1715 1716 const Expr *IVExpr = S.getIterationVariable(); 1717 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1718 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1719 1720 auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end"); 1721 1722 // Start the loop with a block that tests the condition. 1723 auto CondBlock = createBasicBlock("omp.dispatch.cond"); 1724 EmitBlock(CondBlock); 1725 const SourceRange &R = S.getSourceRange(); 1726 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), 1727 SourceLocToDebugLoc(R.getEnd())); 1728 1729 llvm::Value *BoolCondVal = nullptr; 1730 if (!DynamicOrOrdered) { 1731 // UB = min(UB, GlobalUB) 1732 EmitIgnoredExpr(S.getEnsureUpperBound()); 1733 // IV = LB 1734 EmitIgnoredExpr(S.getInit()); 1735 // IV < UB 1736 BoolCondVal = EvaluateExprAsBool(S.getCond()); 1737 } else { 1738 BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, IL, 1739 LB, UB, ST); 1740 } 1741 1742 // If there are any cleanups between here and the loop-exit scope, 1743 // create a block to stage a loop exit along. 1744 auto ExitBlock = LoopExit.getBlock(); 1745 if (LoopScope.requiresCleanups()) 1746 ExitBlock = createBasicBlock("omp.dispatch.cleanup"); 1747 1748 auto LoopBody = createBasicBlock("omp.dispatch.body"); 1749 Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock); 1750 if (ExitBlock != LoopExit.getBlock()) { 1751 EmitBlock(ExitBlock); 1752 EmitBranchThroughCleanup(LoopExit); 1753 } 1754 EmitBlock(LoopBody); 1755 1756 // Emit "IV = LB" (in case of static schedule, we have already calculated new 1757 // LB for loop condition and emitted it above). 1758 if (DynamicOrOrdered) 1759 EmitIgnoredExpr(S.getInit()); 1760 1761 // Create a block for the increment. 1762 auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); 1763 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1764 1765 // Generate !llvm.loop.parallel metadata for loads and stores for loops 1766 // with dynamic/guided scheduling and without ordered clause. 1767 if (!isOpenMPSimdDirective(S.getDirectiveKind())) 1768 LoopStack.setParallel(!IsMonotonic); 1769 else 1770 EmitOMPSimdInit(S, IsMonotonic); 1771 1772 SourceLocation Loc = S.getLocStart(); 1773 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), 1774 [&S, LoopExit](CodeGenFunction &CGF) { 1775 CGF.EmitOMPLoopBody(S, LoopExit); 1776 CGF.EmitStopPoint(&S); 1777 }, 1778 [Ordered, IVSize, IVSigned, Loc](CodeGenFunction &CGF) { 1779 if (Ordered) { 1780 CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd( 1781 CGF, Loc, IVSize, IVSigned); 1782 } 1783 }); 1784 1785 EmitBlock(Continue.getBlock()); 1786 BreakContinueStack.pop_back(); 1787 if (!DynamicOrOrdered) { 1788 // Emit "LB = LB + Stride", "UB = UB + Stride". 1789 EmitIgnoredExpr(S.getNextLowerBound()); 1790 EmitIgnoredExpr(S.getNextUpperBound()); 1791 } 1792 1793 EmitBranch(CondBlock); 1794 LoopStack.pop(); 1795 // Emit the fall-through block. 1796 EmitBlock(LoopExit.getBlock()); 1797 1798 // Tell the runtime we are done. 1799 auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) { 1800 if (!DynamicOrOrdered) 1801 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd()); 1802 }; 1803 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); 1804 } 1805 1806 void CodeGenFunction::EmitOMPForOuterLoop( 1807 const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic, 1808 const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, 1809 Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) { 1810 auto &RT = CGM.getOpenMPRuntime(); 1811 1812 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). 1813 const bool DynamicOrOrdered = 1814 Ordered || RT.isDynamic(ScheduleKind.Schedule); 1815 1816 assert((Ordered || 1817 !RT.isStaticNonchunked(ScheduleKind.Schedule, 1818 /*Chunked=*/Chunk != nullptr)) && 1819 "static non-chunked schedule does not need outer loop"); 1820 1821 // Emit outer loop. 1822 // 1823 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1824 // When schedule(dynamic,chunk_size) is specified, the iterations are 1825 // distributed to threads in the team in chunks as the threads request them. 1826 // Each thread executes a chunk of iterations, then requests another chunk, 1827 // until no chunks remain to be distributed. Each chunk contains chunk_size 1828 // iterations, except for the last chunk to be distributed, which may have 1829 // fewer iterations. When no chunk_size is specified, it defaults to 1. 1830 // 1831 // When schedule(guided,chunk_size) is specified, the iterations are assigned 1832 // to threads in the team in chunks as the executing threads request them. 1833 // Each thread executes a chunk of iterations, then requests another chunk, 1834 // until no chunks remain to be assigned. For a chunk_size of 1, the size of 1835 // each chunk is proportional to the number of unassigned iterations divided 1836 // by the number of threads in the team, decreasing to 1. For a chunk_size 1837 // with value k (greater than 1), the size of each chunk is determined in the 1838 // same way, with the restriction that the chunks do not contain fewer than k 1839 // iterations (except for the last chunk to be assigned, which may have fewer 1840 // than k iterations). 1841 // 1842 // When schedule(auto) is specified, the decision regarding scheduling is 1843 // delegated to the compiler and/or runtime system. The programmer gives the 1844 // implementation the freedom to choose any possible mapping of iterations to 1845 // threads in the team. 1846 // 1847 // When schedule(runtime) is specified, the decision regarding scheduling is 1848 // deferred until run time, and the schedule and chunk size are taken from the 1849 // run-sched-var ICV. If the ICV is set to auto, the schedule is 1850 // implementation defined 1851 // 1852 // while(__kmpc_dispatch_next(&LB, &UB)) { 1853 // idx = LB; 1854 // while (idx <= UB) { BODY; ++idx; 1855 // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only. 1856 // } // inner loop 1857 // } 1858 // 1859 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1860 // When schedule(static, chunk_size) is specified, iterations are divided into 1861 // chunks of size chunk_size, and the chunks are assigned to the threads in 1862 // the team in a round-robin fashion in the order of the thread number. 1863 // 1864 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { 1865 // while (idx <= UB) { BODY; ++idx; } // inner loop 1866 // LB = LB + ST; 1867 // UB = UB + ST; 1868 // } 1869 // 1870 1871 const Expr *IVExpr = S.getIterationVariable(); 1872 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1873 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1874 1875 if (DynamicOrOrdered) { 1876 llvm::Value *UBVal = EmitScalarExpr(S.getLastIteration()); 1877 RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, IVSize, 1878 IVSigned, Ordered, UBVal, Chunk); 1879 } else { 1880 RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, 1881 Ordered, IL, LB, UB, ST, Chunk); 1882 } 1883 1884 EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, Ordered, LB, UB, 1885 ST, IL, Chunk); 1886 } 1887 1888 void CodeGenFunction::EmitOMPDistributeOuterLoop( 1889 OpenMPDistScheduleClauseKind ScheduleKind, 1890 const OMPDistributeDirective &S, OMPPrivateScope &LoopScope, 1891 Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) { 1892 1893 auto &RT = CGM.getOpenMPRuntime(); 1894 1895 // Emit outer loop. 1896 // Same behavior as a OMPForOuterLoop, except that schedule cannot be 1897 // dynamic 1898 // 1899 1900 const Expr *IVExpr = S.getIterationVariable(); 1901 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1902 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1903 1904 RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, 1905 IVSize, IVSigned, /* Ordered = */ false, 1906 IL, LB, UB, ST, Chunk); 1907 1908 EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, 1909 S, LoopScope, /* Ordered = */ false, LB, UB, ST, IL, Chunk); 1910 } 1911 1912 void CodeGenFunction::EmitOMPDistributeParallelForDirective( 1913 const OMPDistributeParallelForDirective &S) { 1914 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1915 CGM.getOpenMPRuntime().emitInlinedDirective( 1916 *this, OMPD_distribute_parallel_for, 1917 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1918 OMPLoopScope PreInitScope(CGF, S); 1919 OMPCancelStackRAII CancelRegion(CGF, OMPD_distribute_parallel_for, 1920 /*HasCancel=*/false); 1921 CGF.EmitStmt( 1922 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1923 }); 1924 } 1925 1926 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective( 1927 const OMPDistributeParallelForSimdDirective &S) { 1928 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1929 CGM.getOpenMPRuntime().emitInlinedDirective( 1930 *this, OMPD_distribute_parallel_for_simd, 1931 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1932 OMPLoopScope PreInitScope(CGF, S); 1933 CGF.EmitStmt( 1934 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1935 }); 1936 } 1937 1938 void CodeGenFunction::EmitOMPDistributeSimdDirective( 1939 const OMPDistributeSimdDirective &S) { 1940 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1941 CGM.getOpenMPRuntime().emitInlinedDirective( 1942 *this, OMPD_distribute_simd, 1943 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1944 OMPLoopScope PreInitScope(CGF, S); 1945 CGF.EmitStmt( 1946 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1947 }); 1948 } 1949 1950 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective( 1951 const OMPTargetParallelForSimdDirective &S) { 1952 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1953 CGM.getOpenMPRuntime().emitInlinedDirective( 1954 *this, OMPD_target_parallel_for_simd, 1955 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1956 OMPLoopScope PreInitScope(CGF, S); 1957 CGF.EmitStmt( 1958 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1959 }); 1960 } 1961 1962 void CodeGenFunction::EmitOMPTargetSimdDirective( 1963 const OMPTargetSimdDirective &S) { 1964 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1965 CGM.getOpenMPRuntime().emitInlinedDirective( 1966 *this, OMPD_target_simd, [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1967 OMPLoopScope PreInitScope(CGF, S); 1968 CGF.EmitStmt( 1969 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1970 }); 1971 } 1972 1973 void CodeGenFunction::EmitOMPTeamsDistributeDirective( 1974 const OMPTeamsDistributeDirective &S) { 1975 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1976 CGM.getOpenMPRuntime().emitInlinedDirective( 1977 *this, OMPD_teams_distribute, 1978 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1979 OMPLoopScope PreInitScope(CGF, S); 1980 CGF.EmitStmt( 1981 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1982 }); 1983 } 1984 1985 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective( 1986 const OMPTeamsDistributeSimdDirective &S) { 1987 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1988 CGM.getOpenMPRuntime().emitInlinedDirective( 1989 *this, OMPD_teams_distribute_simd, 1990 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1991 OMPLoopScope PreInitScope(CGF, S); 1992 CGF.EmitStmt( 1993 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1994 }); 1995 } 1996 1997 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective( 1998 const OMPTeamsDistributeParallelForSimdDirective &S) { 1999 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2000 CGM.getOpenMPRuntime().emitInlinedDirective( 2001 *this, OMPD_teams_distribute_parallel_for_simd, 2002 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2003 OMPLoopScope PreInitScope(CGF, S); 2004 CGF.EmitStmt( 2005 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2006 }); 2007 } 2008 2009 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective( 2010 const OMPTeamsDistributeParallelForDirective &S) { 2011 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2012 CGM.getOpenMPRuntime().emitInlinedDirective( 2013 *this, OMPD_teams_distribute_parallel_for, 2014 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2015 OMPLoopScope PreInitScope(CGF, S); 2016 CGF.EmitStmt( 2017 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2018 }); 2019 } 2020 2021 void CodeGenFunction::EmitOMPTargetTeamsDirective( 2022 const OMPTargetTeamsDirective &S) { 2023 CGM.getOpenMPRuntime().emitInlinedDirective( 2024 *this, OMPD_target_teams, [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2025 CGF.EmitStmt( 2026 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2027 }); 2028 } 2029 2030 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective( 2031 const OMPTargetTeamsDistributeDirective &S) { 2032 CGM.getOpenMPRuntime().emitInlinedDirective( 2033 *this, OMPD_target_teams_distribute, 2034 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2035 CGF.EmitStmt( 2036 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2037 }); 2038 } 2039 2040 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective( 2041 const OMPTargetTeamsDistributeParallelForDirective &S) { 2042 CGM.getOpenMPRuntime().emitInlinedDirective( 2043 *this, OMPD_target_teams_distribute_parallel_for, 2044 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2045 CGF.EmitStmt( 2046 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2047 }); 2048 } 2049 2050 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective( 2051 const OMPTargetTeamsDistributeParallelForSimdDirective &S) { 2052 CGM.getOpenMPRuntime().emitInlinedDirective( 2053 *this, OMPD_target_teams_distribute_parallel_for_simd, 2054 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2055 CGF.EmitStmt( 2056 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2057 }); 2058 } 2059 2060 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective( 2061 const OMPTargetTeamsDistributeSimdDirective &S) { 2062 CGM.getOpenMPRuntime().emitInlinedDirective( 2063 *this, OMPD_target_teams_distribute_simd, 2064 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2065 CGF.EmitStmt( 2066 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2067 }); 2068 } 2069 2070 /// \brief Emit a helper variable and return corresponding lvalue. 2071 static LValue EmitOMPHelperVar(CodeGenFunction &CGF, 2072 const DeclRefExpr *Helper) { 2073 auto VDecl = cast<VarDecl>(Helper->getDecl()); 2074 CGF.EmitVarDecl(*VDecl); 2075 return CGF.EmitLValue(Helper); 2076 } 2077 2078 namespace { 2079 struct ScheduleKindModifiersTy { 2080 OpenMPScheduleClauseKind Kind; 2081 OpenMPScheduleClauseModifier M1; 2082 OpenMPScheduleClauseModifier M2; 2083 ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind, 2084 OpenMPScheduleClauseModifier M1, 2085 OpenMPScheduleClauseModifier M2) 2086 : Kind(Kind), M1(M1), M2(M2) {} 2087 }; 2088 } // namespace 2089 2090 bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { 2091 // Emit the loop iteration variable. 2092 auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 2093 auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); 2094 EmitVarDecl(*IVDecl); 2095 2096 // Emit the iterations count variable. 2097 // If it is not a variable, Sema decided to calculate iterations count on each 2098 // iteration (e.g., it is foldable into a constant). 2099 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 2100 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 2101 // Emit calculation of the iterations count. 2102 EmitIgnoredExpr(S.getCalcLastIteration()); 2103 } 2104 2105 auto &RT = CGM.getOpenMPRuntime(); 2106 2107 bool HasLastprivateClause; 2108 // Check pre-condition. 2109 { 2110 OMPLoopScope PreInitScope(*this, S); 2111 // Skip the entire loop if we don't meet the precondition. 2112 // If the condition constant folds and can be elided, avoid emitting the 2113 // whole loop. 2114 bool CondConstant; 2115 llvm::BasicBlock *ContBlock = nullptr; 2116 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 2117 if (!CondConstant) 2118 return false; 2119 } else { 2120 auto *ThenBlock = createBasicBlock("omp.precond.then"); 2121 ContBlock = createBasicBlock("omp.precond.end"); 2122 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 2123 getProfileCount(&S)); 2124 EmitBlock(ThenBlock); 2125 incrementProfileCounter(&S); 2126 } 2127 2128 bool Ordered = false; 2129 if (auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) { 2130 if (OrderedClause->getNumForLoops()) 2131 RT.emitDoacrossInit(*this, S); 2132 else 2133 Ordered = true; 2134 } 2135 2136 llvm::DenseSet<const Expr *> EmittedFinals; 2137 emitAlignedClause(*this, S); 2138 EmitOMPLinearClauseInit(S); 2139 // Emit helper vars inits. 2140 LValue LB = 2141 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable())); 2142 LValue UB = 2143 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable())); 2144 LValue ST = 2145 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 2146 LValue IL = 2147 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 2148 2149 // Emit 'then' code. 2150 { 2151 OMPPrivateScope LoopScope(*this); 2152 if (EmitOMPFirstprivateClause(S, LoopScope)) { 2153 // Emit implicit barrier to synchronize threads and avoid data races on 2154 // initialization of firstprivate variables and post-update of 2155 // lastprivate variables. 2156 CGM.getOpenMPRuntime().emitBarrierCall( 2157 *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 2158 /*ForceSimpleCall=*/true); 2159 } 2160 EmitOMPPrivateClause(S, LoopScope); 2161 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 2162 EmitOMPReductionClauseInit(S, LoopScope); 2163 EmitOMPPrivateLoopCounters(S, LoopScope); 2164 EmitOMPLinearClause(S, LoopScope); 2165 (void)LoopScope.Privatize(); 2166 2167 // Detect the loop schedule kind and chunk. 2168 llvm::Value *Chunk = nullptr; 2169 OpenMPScheduleTy ScheduleKind; 2170 if (auto *C = S.getSingleClause<OMPScheduleClause>()) { 2171 ScheduleKind.Schedule = C->getScheduleKind(); 2172 ScheduleKind.M1 = C->getFirstScheduleModifier(); 2173 ScheduleKind.M2 = C->getSecondScheduleModifier(); 2174 if (const auto *Ch = C->getChunkSize()) { 2175 Chunk = EmitScalarExpr(Ch); 2176 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 2177 S.getIterationVariable()->getType(), 2178 S.getLocStart()); 2179 } 2180 } 2181 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2182 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2183 // OpenMP 4.5, 2.7.1 Loop Construct, Description. 2184 // If the static schedule kind is specified or if the ordered clause is 2185 // specified, and if no monotonic modifier is specified, the effect will 2186 // be as if the monotonic modifier was specified. 2187 if (RT.isStaticNonchunked(ScheduleKind.Schedule, 2188 /* Chunked */ Chunk != nullptr) && 2189 !Ordered) { 2190 if (isOpenMPSimdDirective(S.getDirectiveKind())) 2191 EmitOMPSimdInit(S, /*IsMonotonic=*/true); 2192 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 2193 // When no chunk_size is specified, the iteration space is divided into 2194 // chunks that are approximately equal in size, and at most one chunk is 2195 // distributed to each thread. Note that the size of the chunks is 2196 // unspecified in this case. 2197 RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, 2198 IVSize, IVSigned, Ordered, 2199 IL.getAddress(), LB.getAddress(), 2200 UB.getAddress(), ST.getAddress()); 2201 auto LoopExit = 2202 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 2203 // UB = min(UB, GlobalUB); 2204 EmitIgnoredExpr(S.getEnsureUpperBound()); 2205 // IV = LB; 2206 EmitIgnoredExpr(S.getInit()); 2207 // while (idx <= UB) { BODY; ++idx; } 2208 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 2209 S.getInc(), 2210 [&S, LoopExit](CodeGenFunction &CGF) { 2211 CGF.EmitOMPLoopBody(S, LoopExit); 2212 CGF.EmitStopPoint(&S); 2213 }, 2214 [](CodeGenFunction &) {}); 2215 EmitBlock(LoopExit.getBlock()); 2216 // Tell the runtime we are done. 2217 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 2218 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd()); 2219 }; 2220 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); 2221 } else { 2222 const bool IsMonotonic = 2223 Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static || 2224 ScheduleKind.Schedule == OMPC_SCHEDULE_unknown || 2225 ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic || 2226 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic; 2227 // Emit the outer loop, which requests its work chunk [LB..UB] from 2228 // runtime and runs the inner loop to process it. 2229 EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered, 2230 LB.getAddress(), UB.getAddress(), ST.getAddress(), 2231 IL.getAddress(), Chunk); 2232 } 2233 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 2234 EmitOMPSimdFinal(S, 2235 [&](CodeGenFunction &CGF) -> llvm::Value * { 2236 return CGF.Builder.CreateIsNotNull( 2237 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2238 }); 2239 } 2240 EmitOMPReductionClauseFinal(S); 2241 // Emit post-update of the reduction variables if IsLastIter != 0. 2242 emitPostUpdateForReductionClause( 2243 *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * { 2244 return CGF.Builder.CreateIsNotNull( 2245 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2246 }); 2247 // Emit final copy of the lastprivate variables if IsLastIter != 0. 2248 if (HasLastprivateClause) 2249 EmitOMPLastprivateClauseFinal( 2250 S, isOpenMPSimdDirective(S.getDirectiveKind()), 2251 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart()))); 2252 } 2253 EmitOMPLinearClauseFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * { 2254 return CGF.Builder.CreateIsNotNull( 2255 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2256 }); 2257 // We're now done with the loop, so jump to the continuation block. 2258 if (ContBlock) { 2259 EmitBranch(ContBlock); 2260 EmitBlock(ContBlock, true); 2261 } 2262 } 2263 return HasLastprivateClause; 2264 } 2265 2266 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { 2267 bool HasLastprivates = false; 2268 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, 2269 PrePostActionTy &) { 2270 OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel()); 2271 HasLastprivates = CGF.EmitOMPWorksharingLoop(S); 2272 }; 2273 { 2274 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2275 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen, 2276 S.hasCancel()); 2277 } 2278 2279 // Emit an implicit barrier at the end. 2280 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) { 2281 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); 2282 } 2283 } 2284 2285 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { 2286 bool HasLastprivates = false; 2287 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, 2288 PrePostActionTy &) { 2289 HasLastprivates = CGF.EmitOMPWorksharingLoop(S); 2290 }; 2291 { 2292 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2293 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 2294 } 2295 2296 // Emit an implicit barrier at the end. 2297 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) { 2298 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); 2299 } 2300 } 2301 2302 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, 2303 const Twine &Name, 2304 llvm::Value *Init = nullptr) { 2305 auto LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty); 2306 if (Init) 2307 CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true); 2308 return LVal; 2309 } 2310 2311 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { 2312 auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt(); 2313 auto *CS = dyn_cast<CompoundStmt>(Stmt); 2314 bool HasLastprivates = false; 2315 auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF, 2316 PrePostActionTy &) { 2317 auto &C = CGF.CGM.getContext(); 2318 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2319 // Emit helper vars inits. 2320 LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.", 2321 CGF.Builder.getInt32(0)); 2322 auto *GlobalUBVal = CS != nullptr ? CGF.Builder.getInt32(CS->size() - 1) 2323 : CGF.Builder.getInt32(0); 2324 LValue UB = 2325 createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal); 2326 LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.", 2327 CGF.Builder.getInt32(1)); 2328 LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.", 2329 CGF.Builder.getInt32(0)); 2330 // Loop counter. 2331 LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv."); 2332 OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 2333 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV); 2334 OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 2335 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); 2336 // Generate condition for loop. 2337 BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, 2338 OK_Ordinary, S.getLocStart(), 2339 /*fpContractable=*/false); 2340 // Increment for loop counter. 2341 UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary, 2342 S.getLocStart()); 2343 auto BodyGen = [Stmt, CS, &S, &IV](CodeGenFunction &CGF) { 2344 // Iterate through all sections and emit a switch construct: 2345 // switch (IV) { 2346 // case 0: 2347 // <SectionStmt[0]>; 2348 // break; 2349 // ... 2350 // case <NumSection> - 1: 2351 // <SectionStmt[<NumSection> - 1]>; 2352 // break; 2353 // } 2354 // .omp.sections.exit: 2355 auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit"); 2356 auto *SwitchStmt = CGF.Builder.CreateSwitch( 2357 CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB, 2358 CS == nullptr ? 1 : CS->size()); 2359 if (CS) { 2360 unsigned CaseNumber = 0; 2361 for (auto *SubStmt : CS->children()) { 2362 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 2363 CGF.EmitBlock(CaseBB); 2364 SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB); 2365 CGF.EmitStmt(SubStmt); 2366 CGF.EmitBranch(ExitBB); 2367 ++CaseNumber; 2368 } 2369 } else { 2370 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 2371 CGF.EmitBlock(CaseBB); 2372 SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB); 2373 CGF.EmitStmt(Stmt); 2374 CGF.EmitBranch(ExitBB); 2375 } 2376 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 2377 }; 2378 2379 CodeGenFunction::OMPPrivateScope LoopScope(CGF); 2380 if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) { 2381 // Emit implicit barrier to synchronize threads and avoid data races on 2382 // initialization of firstprivate variables and post-update of lastprivate 2383 // variables. 2384 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 2385 CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 2386 /*ForceSimpleCall=*/true); 2387 } 2388 CGF.EmitOMPPrivateClause(S, LoopScope); 2389 HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 2390 CGF.EmitOMPReductionClauseInit(S, LoopScope); 2391 (void)LoopScope.Privatize(); 2392 2393 // Emit static non-chunked loop. 2394 OpenMPScheduleTy ScheduleKind; 2395 ScheduleKind.Schedule = OMPC_SCHEDULE_static; 2396 CGF.CGM.getOpenMPRuntime().emitForStaticInit( 2397 CGF, S.getLocStart(), ScheduleKind, /*IVSize=*/32, 2398 /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), LB.getAddress(), 2399 UB.getAddress(), ST.getAddress()); 2400 // UB = min(UB, GlobalUB); 2401 auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart()); 2402 auto *MinUBGlobalUB = CGF.Builder.CreateSelect( 2403 CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal); 2404 CGF.EmitStoreOfScalar(MinUBGlobalUB, UB); 2405 // IV = LB; 2406 CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV); 2407 // while (idx <= UB) { BODY; ++idx; } 2408 CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen, 2409 [](CodeGenFunction &) {}); 2410 // Tell the runtime we are done. 2411 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 2412 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd()); 2413 }; 2414 CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen); 2415 CGF.EmitOMPReductionClauseFinal(S); 2416 // Emit post-update of the reduction variables if IsLastIter != 0. 2417 emitPostUpdateForReductionClause( 2418 CGF, S, [&](CodeGenFunction &CGF) -> llvm::Value * { 2419 return CGF.Builder.CreateIsNotNull( 2420 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2421 }); 2422 2423 // Emit final copy of the lastprivate variables if IsLastIter != 0. 2424 if (HasLastprivates) 2425 CGF.EmitOMPLastprivateClauseFinal( 2426 S, /*NoFinals=*/false, 2427 CGF.Builder.CreateIsNotNull( 2428 CGF.EmitLoadOfScalar(IL, S.getLocStart()))); 2429 }; 2430 2431 bool HasCancel = false; 2432 if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S)) 2433 HasCancel = OSD->hasCancel(); 2434 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S)) 2435 HasCancel = OPSD->hasCancel(); 2436 OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel); 2437 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen, 2438 HasCancel); 2439 // Emit barrier for lastprivates only if 'sections' directive has 'nowait' 2440 // clause. Otherwise the barrier will be generated by the codegen for the 2441 // directive. 2442 if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) { 2443 // Emit implicit barrier to synchronize threads and avoid data races on 2444 // initialization of firstprivate variables. 2445 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), 2446 OMPD_unknown); 2447 } 2448 } 2449 2450 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { 2451 { 2452 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2453 EmitSections(S); 2454 } 2455 // Emit an implicit barrier at the end. 2456 if (!S.getSingleClause<OMPNowaitClause>()) { 2457 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), 2458 OMPD_sections); 2459 } 2460 } 2461 2462 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { 2463 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2464 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2465 }; 2466 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2467 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen, 2468 S.hasCancel()); 2469 } 2470 2471 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { 2472 llvm::SmallVector<const Expr *, 8> CopyprivateVars; 2473 llvm::SmallVector<const Expr *, 8> DestExprs; 2474 llvm::SmallVector<const Expr *, 8> SrcExprs; 2475 llvm::SmallVector<const Expr *, 8> AssignmentOps; 2476 // Check if there are any 'copyprivate' clauses associated with this 2477 // 'single' construct. 2478 // Build a list of copyprivate variables along with helper expressions 2479 // (<source>, <destination>, <destination>=<source> expressions) 2480 for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) { 2481 CopyprivateVars.append(C->varlists().begin(), C->varlists().end()); 2482 DestExprs.append(C->destination_exprs().begin(), 2483 C->destination_exprs().end()); 2484 SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end()); 2485 AssignmentOps.append(C->assignment_ops().begin(), 2486 C->assignment_ops().end()); 2487 } 2488 // Emit code for 'single' region along with 'copyprivate' clauses 2489 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2490 Action.Enter(CGF); 2491 OMPPrivateScope SingleScope(CGF); 2492 (void)CGF.EmitOMPFirstprivateClause(S, SingleScope); 2493 CGF.EmitOMPPrivateClause(S, SingleScope); 2494 (void)SingleScope.Privatize(); 2495 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2496 }; 2497 { 2498 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2499 CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(), 2500 CopyprivateVars, DestExprs, 2501 SrcExprs, AssignmentOps); 2502 } 2503 // Emit an implicit barrier at the end (to avoid data race on firstprivate 2504 // init or if no 'nowait' clause was specified and no 'copyprivate' clause). 2505 if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) { 2506 CGM.getOpenMPRuntime().emitBarrierCall( 2507 *this, S.getLocStart(), 2508 S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single); 2509 } 2510 } 2511 2512 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { 2513 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2514 Action.Enter(CGF); 2515 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2516 }; 2517 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2518 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart()); 2519 } 2520 2521 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { 2522 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2523 Action.Enter(CGF); 2524 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2525 }; 2526 Expr *Hint = nullptr; 2527 if (auto *HintClause = S.getSingleClause<OMPHintClause>()) 2528 Hint = HintClause->getHint(); 2529 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2530 CGM.getOpenMPRuntime().emitCriticalRegion(*this, 2531 S.getDirectiveName().getAsString(), 2532 CodeGen, S.getLocStart(), Hint); 2533 } 2534 2535 void CodeGenFunction::EmitOMPParallelForDirective( 2536 const OMPParallelForDirective &S) { 2537 // Emit directive as a combined directive that consists of two implicit 2538 // directives: 'parallel' with 'for' directive. 2539 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2540 OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel()); 2541 CGF.EmitOMPWorksharingLoop(S); 2542 }; 2543 emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen); 2544 } 2545 2546 void CodeGenFunction::EmitOMPParallelForSimdDirective( 2547 const OMPParallelForSimdDirective &S) { 2548 // Emit directive as a combined directive that consists of two implicit 2549 // directives: 'parallel' with 'for' directive. 2550 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2551 CGF.EmitOMPWorksharingLoop(S); 2552 }; 2553 emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen); 2554 } 2555 2556 void CodeGenFunction::EmitOMPParallelSectionsDirective( 2557 const OMPParallelSectionsDirective &S) { 2558 // Emit directive as a combined directive that consists of two implicit 2559 // directives: 'parallel' with 'sections' directive. 2560 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2561 CGF.EmitSections(S); 2562 }; 2563 emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen); 2564 } 2565 2566 void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, 2567 const RegionCodeGenTy &BodyGen, 2568 const TaskGenTy &TaskGen, 2569 OMPTaskDataTy &Data) { 2570 // Emit outlined function for task construct. 2571 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2572 auto *I = CS->getCapturedDecl()->param_begin(); 2573 auto *PartId = std::next(I); 2574 auto *TaskT = std::next(I, 4); 2575 // Check if the task is final 2576 if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) { 2577 // If the condition constant folds and can be elided, try to avoid emitting 2578 // the condition and the dead arm of the if/else. 2579 auto *Cond = Clause->getCondition(); 2580 bool CondConstant; 2581 if (ConstantFoldsToSimpleInteger(Cond, CondConstant)) 2582 Data.Final.setInt(CondConstant); 2583 else 2584 Data.Final.setPointer(EvaluateExprAsBool(Cond)); 2585 } else { 2586 // By default the task is not final. 2587 Data.Final.setInt(/*IntVal=*/false); 2588 } 2589 // Check if the task has 'priority' clause. 2590 if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) { 2591 auto *Prio = Clause->getPriority(); 2592 Data.Priority.setInt(/*IntVal=*/true); 2593 Data.Priority.setPointer(EmitScalarConversion( 2594 EmitScalarExpr(Prio), Prio->getType(), 2595 getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1), 2596 Prio->getExprLoc())); 2597 } 2598 // The first function argument for tasks is a thread id, the second one is a 2599 // part id (0 for tied tasks, >=0 for untied task). 2600 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 2601 // Get list of private variables. 2602 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 2603 auto IRef = C->varlist_begin(); 2604 for (auto *IInit : C->private_copies()) { 2605 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2606 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2607 Data.PrivateVars.push_back(*IRef); 2608 Data.PrivateCopies.push_back(IInit); 2609 } 2610 ++IRef; 2611 } 2612 } 2613 EmittedAsPrivate.clear(); 2614 // Get list of firstprivate variables. 2615 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 2616 auto IRef = C->varlist_begin(); 2617 auto IElemInitRef = C->inits().begin(); 2618 for (auto *IInit : C->private_copies()) { 2619 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2620 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2621 Data.FirstprivateVars.push_back(*IRef); 2622 Data.FirstprivateCopies.push_back(IInit); 2623 Data.FirstprivateInits.push_back(*IElemInitRef); 2624 } 2625 ++IRef; 2626 ++IElemInitRef; 2627 } 2628 } 2629 // Get list of lastprivate variables (for taskloops). 2630 llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs; 2631 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 2632 auto IRef = C->varlist_begin(); 2633 auto ID = C->destination_exprs().begin(); 2634 for (auto *IInit : C->private_copies()) { 2635 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2636 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2637 Data.LastprivateVars.push_back(*IRef); 2638 Data.LastprivateCopies.push_back(IInit); 2639 } 2640 LastprivateDstsOrigs.insert( 2641 {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()), 2642 cast<DeclRefExpr>(*IRef)}); 2643 ++IRef; 2644 ++ID; 2645 } 2646 } 2647 // Build list of dependences. 2648 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) 2649 for (auto *IRef : C->varlists()) 2650 Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef)); 2651 auto &&CodeGen = [&Data, CS, &BodyGen, &LastprivateDstsOrigs]( 2652 CodeGenFunction &CGF, PrePostActionTy &Action) { 2653 // Set proper addresses for generated private copies. 2654 OMPPrivateScope Scope(CGF); 2655 if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() || 2656 !Data.LastprivateVars.empty()) { 2657 auto *CopyFn = CGF.Builder.CreateLoad( 2658 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3))); 2659 auto *PrivatesPtr = CGF.Builder.CreateLoad( 2660 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2))); 2661 // Map privates. 2662 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; 2663 llvm::SmallVector<llvm::Value *, 16> CallArgs; 2664 CallArgs.push_back(PrivatesPtr); 2665 for (auto *E : Data.PrivateVars) { 2666 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2667 Address PrivatePtr = CGF.CreateMemTemp( 2668 CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr"); 2669 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2670 CallArgs.push_back(PrivatePtr.getPointer()); 2671 } 2672 for (auto *E : Data.FirstprivateVars) { 2673 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2674 Address PrivatePtr = 2675 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 2676 ".firstpriv.ptr.addr"); 2677 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2678 CallArgs.push_back(PrivatePtr.getPointer()); 2679 } 2680 for (auto *E : Data.LastprivateVars) { 2681 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2682 Address PrivatePtr = 2683 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 2684 ".lastpriv.ptr.addr"); 2685 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2686 CallArgs.push_back(PrivatePtr.getPointer()); 2687 } 2688 CGF.EmitRuntimeCall(CopyFn, CallArgs); 2689 for (auto &&Pair : LastprivateDstsOrigs) { 2690 auto *OrigVD = cast<VarDecl>(Pair.second->getDecl()); 2691 DeclRefExpr DRE( 2692 const_cast<VarDecl *>(OrigVD), 2693 /*RefersToEnclosingVariableOrCapture=*/CGF.CapturedStmtInfo->lookup( 2694 OrigVD) != nullptr, 2695 Pair.second->getType(), VK_LValue, Pair.second->getExprLoc()); 2696 Scope.addPrivate(Pair.first, [&CGF, &DRE]() { 2697 return CGF.EmitLValue(&DRE).getAddress(); 2698 }); 2699 } 2700 for (auto &&Pair : PrivatePtrs) { 2701 Address Replacement(CGF.Builder.CreateLoad(Pair.second), 2702 CGF.getContext().getDeclAlign(Pair.first)); 2703 Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); 2704 } 2705 } 2706 (void)Scope.Privatize(); 2707 2708 Action.Enter(CGF); 2709 BodyGen(CGF); 2710 }; 2711 auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( 2712 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied, 2713 Data.NumberOfParts); 2714 OMPLexicalScope Scope(*this, S); 2715 TaskGen(*this, OutlinedFn, Data); 2716 } 2717 2718 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { 2719 // Emit outlined function for task construct. 2720 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2721 auto CapturedStruct = GenerateCapturedStmtArgument(*CS); 2722 auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 2723 const Expr *IfCond = nullptr; 2724 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 2725 if (C->getNameModifier() == OMPD_unknown || 2726 C->getNameModifier() == OMPD_task) { 2727 IfCond = C->getCondition(); 2728 break; 2729 } 2730 } 2731 2732 OMPTaskDataTy Data; 2733 // Check if we should emit tied or untied task. 2734 Data.Tied = !S.getSingleClause<OMPUntiedClause>(); 2735 auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { 2736 CGF.EmitStmt(CS->getCapturedStmt()); 2737 }; 2738 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 2739 IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn, 2740 const OMPTaskDataTy &Data) { 2741 CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getLocStart(), S, OutlinedFn, 2742 SharedsTy, CapturedStruct, IfCond, 2743 Data); 2744 }; 2745 EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data); 2746 } 2747 2748 void CodeGenFunction::EmitOMPTaskyieldDirective( 2749 const OMPTaskyieldDirective &S) { 2750 CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart()); 2751 } 2752 2753 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { 2754 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier); 2755 } 2756 2757 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) { 2758 CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart()); 2759 } 2760 2761 void CodeGenFunction::EmitOMPTaskgroupDirective( 2762 const OMPTaskgroupDirective &S) { 2763 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2764 Action.Enter(CGF); 2765 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2766 }; 2767 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2768 CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart()); 2769 } 2770 2771 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { 2772 CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> { 2773 if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) { 2774 return llvm::makeArrayRef(FlushClause->varlist_begin(), 2775 FlushClause->varlist_end()); 2776 } 2777 return llvm::None; 2778 }(), S.getLocStart()); 2779 } 2780 2781 void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) { 2782 // Emit the loop iteration variable. 2783 auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 2784 auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); 2785 EmitVarDecl(*IVDecl); 2786 2787 // Emit the iterations count variable. 2788 // If it is not a variable, Sema decided to calculate iterations count on each 2789 // iteration (e.g., it is foldable into a constant). 2790 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 2791 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 2792 // Emit calculation of the iterations count. 2793 EmitIgnoredExpr(S.getCalcLastIteration()); 2794 } 2795 2796 auto &RT = CGM.getOpenMPRuntime(); 2797 2798 bool HasLastprivateClause = false; 2799 // Check pre-condition. 2800 { 2801 OMPLoopScope PreInitScope(*this, S); 2802 // Skip the entire loop if we don't meet the precondition. 2803 // If the condition constant folds and can be elided, avoid emitting the 2804 // whole loop. 2805 bool CondConstant; 2806 llvm::BasicBlock *ContBlock = nullptr; 2807 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 2808 if (!CondConstant) 2809 return; 2810 } else { 2811 auto *ThenBlock = createBasicBlock("omp.precond.then"); 2812 ContBlock = createBasicBlock("omp.precond.end"); 2813 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 2814 getProfileCount(&S)); 2815 EmitBlock(ThenBlock); 2816 incrementProfileCounter(&S); 2817 } 2818 2819 // Emit 'then' code. 2820 { 2821 // Emit helper vars inits. 2822 LValue LB = 2823 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable())); 2824 LValue UB = 2825 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable())); 2826 LValue ST = 2827 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 2828 LValue IL = 2829 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 2830 2831 OMPPrivateScope LoopScope(*this); 2832 if (EmitOMPFirstprivateClause(S, LoopScope)) { 2833 // Emit implicit barrier to synchronize threads and avoid data races on 2834 // initialization of firstprivate variables and post-update of 2835 // lastprivate variables. 2836 CGM.getOpenMPRuntime().emitBarrierCall( 2837 *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 2838 /*ForceSimpleCall=*/true); 2839 } 2840 EmitOMPPrivateClause(S, LoopScope); 2841 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 2842 EmitOMPPrivateLoopCounters(S, LoopScope); 2843 (void)LoopScope.Privatize(); 2844 2845 // Detect the distribute schedule kind and chunk. 2846 llvm::Value *Chunk = nullptr; 2847 OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown; 2848 if (auto *C = S.getSingleClause<OMPDistScheduleClause>()) { 2849 ScheduleKind = C->getDistScheduleKind(); 2850 if (const auto *Ch = C->getChunkSize()) { 2851 Chunk = EmitScalarExpr(Ch); 2852 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 2853 S.getIterationVariable()->getType(), 2854 S.getLocStart()); 2855 } 2856 } 2857 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2858 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2859 2860 // OpenMP [2.10.8, distribute Construct, Description] 2861 // If dist_schedule is specified, kind must be static. If specified, 2862 // iterations are divided into chunks of size chunk_size, chunks are 2863 // assigned to the teams of the league in a round-robin fashion in the 2864 // order of the team number. When no chunk_size is specified, the 2865 // iteration space is divided into chunks that are approximately equal 2866 // in size, and at most one chunk is distributed to each team of the 2867 // league. The size of the chunks is unspecified in this case. 2868 if (RT.isStaticNonchunked(ScheduleKind, 2869 /* Chunked */ Chunk != nullptr)) { 2870 RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, 2871 IVSize, IVSigned, /* Ordered = */ false, 2872 IL.getAddress(), LB.getAddress(), 2873 UB.getAddress(), ST.getAddress()); 2874 auto LoopExit = 2875 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 2876 // UB = min(UB, GlobalUB); 2877 EmitIgnoredExpr(S.getEnsureUpperBound()); 2878 // IV = LB; 2879 EmitIgnoredExpr(S.getInit()); 2880 // while (idx <= UB) { BODY; ++idx; } 2881 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 2882 S.getInc(), 2883 [&S, LoopExit](CodeGenFunction &CGF) { 2884 CGF.EmitOMPLoopBody(S, LoopExit); 2885 CGF.EmitStopPoint(&S); 2886 }, 2887 [](CodeGenFunction &) {}); 2888 EmitBlock(LoopExit.getBlock()); 2889 // Tell the runtime we are done. 2890 RT.emitForStaticFinish(*this, S.getLocStart()); 2891 } else { 2892 // Emit the outer loop, which requests its work chunk [LB..UB] from 2893 // runtime and runs the inner loop to process it. 2894 EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, 2895 LB.getAddress(), UB.getAddress(), ST.getAddress(), 2896 IL.getAddress(), Chunk); 2897 } 2898 2899 // Emit final copy of the lastprivate variables if IsLastIter != 0. 2900 if (HasLastprivateClause) 2901 EmitOMPLastprivateClauseFinal( 2902 S, /*NoFinals=*/false, 2903 Builder.CreateIsNotNull( 2904 EmitLoadOfScalar(IL, S.getLocStart()))); 2905 } 2906 2907 // We're now done with the loop, so jump to the continuation block. 2908 if (ContBlock) { 2909 EmitBranch(ContBlock); 2910 EmitBlock(ContBlock, true); 2911 } 2912 } 2913 } 2914 2915 void CodeGenFunction::EmitOMPDistributeDirective( 2916 const OMPDistributeDirective &S) { 2917 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2918 CGF.EmitOMPDistributeLoop(S); 2919 }; 2920 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2921 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen, 2922 false); 2923 } 2924 2925 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, 2926 const CapturedStmt *S) { 2927 CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); 2928 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; 2929 CGF.CapturedStmtInfo = &CapStmtInfo; 2930 auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S); 2931 Fn->addFnAttr(llvm::Attribute::NoInline); 2932 return Fn; 2933 } 2934 2935 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { 2936 if (!S.getAssociatedStmt()) { 2937 for (const auto *DC : S.getClausesOfKind<OMPDependClause>()) 2938 CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC); 2939 return; 2940 } 2941 auto *C = S.getSingleClause<OMPSIMDClause>(); 2942 auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF, 2943 PrePostActionTy &Action) { 2944 if (C) { 2945 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2946 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 2947 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 2948 auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS); 2949 CGF.EmitNounwindRuntimeCall(OutlinedFn, CapturedVars); 2950 } else { 2951 Action.Enter(CGF); 2952 CGF.EmitStmt( 2953 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2954 } 2955 }; 2956 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2957 CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C); 2958 } 2959 2960 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, 2961 QualType SrcType, QualType DestType, 2962 SourceLocation Loc) { 2963 assert(CGF.hasScalarEvaluationKind(DestType) && 2964 "DestType must have scalar evaluation kind."); 2965 assert(!Val.isAggregate() && "Must be a scalar or complex."); 2966 return Val.isScalar() 2967 ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType, 2968 Loc) 2969 : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType, 2970 DestType, Loc); 2971 } 2972 2973 static CodeGenFunction::ComplexPairTy 2974 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, 2975 QualType DestType, SourceLocation Loc) { 2976 assert(CGF.getEvaluationKind(DestType) == TEK_Complex && 2977 "DestType must have complex evaluation kind."); 2978 CodeGenFunction::ComplexPairTy ComplexVal; 2979 if (Val.isScalar()) { 2980 // Convert the input element to the element type of the complex. 2981 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 2982 auto ScalarVal = CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, 2983 DestElementType, Loc); 2984 ComplexVal = CodeGenFunction::ComplexPairTy( 2985 ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType())); 2986 } else { 2987 assert(Val.isComplex() && "Must be a scalar or complex."); 2988 auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType(); 2989 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 2990 ComplexVal.first = CGF.EmitScalarConversion( 2991 Val.getComplexVal().first, SrcElementType, DestElementType, Loc); 2992 ComplexVal.second = CGF.EmitScalarConversion( 2993 Val.getComplexVal().second, SrcElementType, DestElementType, Loc); 2994 } 2995 return ComplexVal; 2996 } 2997 2998 static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst, 2999 LValue LVal, RValue RVal) { 3000 if (LVal.isGlobalReg()) { 3001 CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal); 3002 } else { 3003 CGF.EmitAtomicStore(RVal, LVal, 3004 IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3005 : llvm::AtomicOrdering::Monotonic, 3006 LVal.isVolatile(), /*IsInit=*/false); 3007 } 3008 } 3009 3010 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal, 3011 QualType RValTy, SourceLocation Loc) { 3012 switch (getEvaluationKind(LVal.getType())) { 3013 case TEK_Scalar: 3014 EmitStoreThroughLValue(RValue::get(convertToScalarValue( 3015 *this, RVal, RValTy, LVal.getType(), Loc)), 3016 LVal); 3017 break; 3018 case TEK_Complex: 3019 EmitStoreOfComplex( 3020 convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal, 3021 /*isInit=*/false); 3022 break; 3023 case TEK_Aggregate: 3024 llvm_unreachable("Must be a scalar or complex."); 3025 } 3026 } 3027 3028 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst, 3029 const Expr *X, const Expr *V, 3030 SourceLocation Loc) { 3031 // v = x; 3032 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue"); 3033 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue"); 3034 LValue XLValue = CGF.EmitLValue(X); 3035 LValue VLValue = CGF.EmitLValue(V); 3036 RValue Res = XLValue.isGlobalReg() 3037 ? CGF.EmitLoadOfLValue(XLValue, Loc) 3038 : CGF.EmitAtomicLoad( 3039 XLValue, Loc, 3040 IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3041 : llvm::AtomicOrdering::Monotonic, 3042 XLValue.isVolatile()); 3043 // OpenMP, 2.12.6, atomic Construct 3044 // Any atomic construct with a seq_cst clause forces the atomically 3045 // performed operation to include an implicit flush operation without a 3046 // list. 3047 if (IsSeqCst) 3048 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3049 CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc); 3050 } 3051 3052 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst, 3053 const Expr *X, const Expr *E, 3054 SourceLocation Loc) { 3055 // x = expr; 3056 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue"); 3057 emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E)); 3058 // OpenMP, 2.12.6, atomic Construct 3059 // Any atomic construct with a seq_cst clause forces the atomically 3060 // performed operation to include an implicit flush operation without a 3061 // list. 3062 if (IsSeqCst) 3063 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3064 } 3065 3066 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, 3067 RValue Update, 3068 BinaryOperatorKind BO, 3069 llvm::AtomicOrdering AO, 3070 bool IsXLHSInRHSPart) { 3071 auto &Context = CGF.CGM.getContext(); 3072 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x' 3073 // expression is simple and atomic is allowed for the given type for the 3074 // target platform. 3075 if (BO == BO_Comma || !Update.isScalar() || 3076 !Update.getScalarVal()->getType()->isIntegerTy() || 3077 !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) && 3078 (Update.getScalarVal()->getType() != 3079 X.getAddress().getElementType())) || 3080 !X.getAddress().getElementType()->isIntegerTy() || 3081 !Context.getTargetInfo().hasBuiltinAtomic( 3082 Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment()))) 3083 return std::make_pair(false, RValue::get(nullptr)); 3084 3085 llvm::AtomicRMWInst::BinOp RMWOp; 3086 switch (BO) { 3087 case BO_Add: 3088 RMWOp = llvm::AtomicRMWInst::Add; 3089 break; 3090 case BO_Sub: 3091 if (!IsXLHSInRHSPart) 3092 return std::make_pair(false, RValue::get(nullptr)); 3093 RMWOp = llvm::AtomicRMWInst::Sub; 3094 break; 3095 case BO_And: 3096 RMWOp = llvm::AtomicRMWInst::And; 3097 break; 3098 case BO_Or: 3099 RMWOp = llvm::AtomicRMWInst::Or; 3100 break; 3101 case BO_Xor: 3102 RMWOp = llvm::AtomicRMWInst::Xor; 3103 break; 3104 case BO_LT: 3105 RMWOp = X.getType()->hasSignedIntegerRepresentation() 3106 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min 3107 : llvm::AtomicRMWInst::Max) 3108 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin 3109 : llvm::AtomicRMWInst::UMax); 3110 break; 3111 case BO_GT: 3112 RMWOp = X.getType()->hasSignedIntegerRepresentation() 3113 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max 3114 : llvm::AtomicRMWInst::Min) 3115 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax 3116 : llvm::AtomicRMWInst::UMin); 3117 break; 3118 case BO_Assign: 3119 RMWOp = llvm::AtomicRMWInst::Xchg; 3120 break; 3121 case BO_Mul: 3122 case BO_Div: 3123 case BO_Rem: 3124 case BO_Shl: 3125 case BO_Shr: 3126 case BO_LAnd: 3127 case BO_LOr: 3128 return std::make_pair(false, RValue::get(nullptr)); 3129 case BO_PtrMemD: 3130 case BO_PtrMemI: 3131 case BO_LE: 3132 case BO_GE: 3133 case BO_EQ: 3134 case BO_NE: 3135 case BO_AddAssign: 3136 case BO_SubAssign: 3137 case BO_AndAssign: 3138 case BO_OrAssign: 3139 case BO_XorAssign: 3140 case BO_MulAssign: 3141 case BO_DivAssign: 3142 case BO_RemAssign: 3143 case BO_ShlAssign: 3144 case BO_ShrAssign: 3145 case BO_Comma: 3146 llvm_unreachable("Unsupported atomic update operation"); 3147 } 3148 auto *UpdateVal = Update.getScalarVal(); 3149 if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) { 3150 UpdateVal = CGF.Builder.CreateIntCast( 3151 IC, X.getAddress().getElementType(), 3152 X.getType()->hasSignedIntegerRepresentation()); 3153 } 3154 auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO); 3155 return std::make_pair(true, RValue::get(Res)); 3156 } 3157 3158 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr( 3159 LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart, 3160 llvm::AtomicOrdering AO, SourceLocation Loc, 3161 const llvm::function_ref<RValue(RValue)> &CommonGen) { 3162 // Update expressions are allowed to have the following forms: 3163 // x binop= expr; -> xrval + expr; 3164 // x++, ++x -> xrval + 1; 3165 // x--, --x -> xrval - 1; 3166 // x = x binop expr; -> xrval binop expr 3167 // x = expr Op x; - > expr binop xrval; 3168 auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart); 3169 if (!Res.first) { 3170 if (X.isGlobalReg()) { 3171 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop 3172 // 'xrval'. 3173 EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X); 3174 } else { 3175 // Perform compare-and-swap procedure. 3176 EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified()); 3177 } 3178 } 3179 return Res; 3180 } 3181 3182 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst, 3183 const Expr *X, const Expr *E, 3184 const Expr *UE, bool IsXLHSInRHSPart, 3185 SourceLocation Loc) { 3186 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 3187 "Update expr in 'atomic update' must be a binary operator."); 3188 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 3189 // Update expressions are allowed to have the following forms: 3190 // x binop= expr; -> xrval + expr; 3191 // x++, ++x -> xrval + 1; 3192 // x--, --x -> xrval - 1; 3193 // x = x binop expr; -> xrval binop expr 3194 // x = expr Op x; - > expr binop xrval; 3195 assert(X->isLValue() && "X of 'omp atomic update' is not lvalue"); 3196 LValue XLValue = CGF.EmitLValue(X); 3197 RValue ExprRValue = CGF.EmitAnyExpr(E); 3198 auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3199 : llvm::AtomicOrdering::Monotonic; 3200 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 3201 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 3202 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 3203 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 3204 auto Gen = 3205 [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue { 3206 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3207 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 3208 return CGF.EmitAnyExpr(UE); 3209 }; 3210 (void)CGF.EmitOMPAtomicSimpleUpdateExpr( 3211 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 3212 // OpenMP, 2.12.6, atomic Construct 3213 // Any atomic construct with a seq_cst clause forces the atomically 3214 // performed operation to include an implicit flush operation without a 3215 // list. 3216 if (IsSeqCst) 3217 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3218 } 3219 3220 static RValue convertToType(CodeGenFunction &CGF, RValue Value, 3221 QualType SourceType, QualType ResType, 3222 SourceLocation Loc) { 3223 switch (CGF.getEvaluationKind(ResType)) { 3224 case TEK_Scalar: 3225 return RValue::get( 3226 convertToScalarValue(CGF, Value, SourceType, ResType, Loc)); 3227 case TEK_Complex: { 3228 auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc); 3229 return RValue::getComplex(Res.first, Res.second); 3230 } 3231 case TEK_Aggregate: 3232 break; 3233 } 3234 llvm_unreachable("Must be a scalar or complex."); 3235 } 3236 3237 static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst, 3238 bool IsPostfixUpdate, const Expr *V, 3239 const Expr *X, const Expr *E, 3240 const Expr *UE, bool IsXLHSInRHSPart, 3241 SourceLocation Loc) { 3242 assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue"); 3243 assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue"); 3244 RValue NewVVal; 3245 LValue VLValue = CGF.EmitLValue(V); 3246 LValue XLValue = CGF.EmitLValue(X); 3247 RValue ExprRValue = CGF.EmitAnyExpr(E); 3248 auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3249 : llvm::AtomicOrdering::Monotonic; 3250 QualType NewVValType; 3251 if (UE) { 3252 // 'x' is updated with some additional value. 3253 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 3254 "Update expr in 'atomic capture' must be a binary operator."); 3255 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 3256 // Update expressions are allowed to have the following forms: 3257 // x binop= expr; -> xrval + expr; 3258 // x++, ++x -> xrval + 1; 3259 // x--, --x -> xrval - 1; 3260 // x = x binop expr; -> xrval binop expr 3261 // x = expr Op x; - > expr binop xrval; 3262 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 3263 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 3264 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 3265 NewVValType = XRValExpr->getType(); 3266 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 3267 auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr, 3268 IsPostfixUpdate](RValue XRValue) -> RValue { 3269 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3270 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 3271 RValue Res = CGF.EmitAnyExpr(UE); 3272 NewVVal = IsPostfixUpdate ? XRValue : Res; 3273 return Res; 3274 }; 3275 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 3276 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 3277 if (Res.first) { 3278 // 'atomicrmw' instruction was generated. 3279 if (IsPostfixUpdate) { 3280 // Use old value from 'atomicrmw'. 3281 NewVVal = Res.second; 3282 } else { 3283 // 'atomicrmw' does not provide new value, so evaluate it using old 3284 // value of 'x'. 3285 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3286 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second); 3287 NewVVal = CGF.EmitAnyExpr(UE); 3288 } 3289 } 3290 } else { 3291 // 'x' is simply rewritten with some 'expr'. 3292 NewVValType = X->getType().getNonReferenceType(); 3293 ExprRValue = convertToType(CGF, ExprRValue, E->getType(), 3294 X->getType().getNonReferenceType(), Loc); 3295 auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) -> RValue { 3296 NewVVal = XRValue; 3297 return ExprRValue; 3298 }; 3299 // Try to perform atomicrmw xchg, otherwise simple exchange. 3300 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 3301 XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO, 3302 Loc, Gen); 3303 if (Res.first) { 3304 // 'atomicrmw' instruction was generated. 3305 NewVVal = IsPostfixUpdate ? Res.second : ExprRValue; 3306 } 3307 } 3308 // Emit post-update store to 'v' of old/new 'x' value. 3309 CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc); 3310 // OpenMP, 2.12.6, atomic Construct 3311 // Any atomic construct with a seq_cst clause forces the atomically 3312 // performed operation to include an implicit flush operation without a 3313 // list. 3314 if (IsSeqCst) 3315 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3316 } 3317 3318 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, 3319 bool IsSeqCst, bool IsPostfixUpdate, 3320 const Expr *X, const Expr *V, const Expr *E, 3321 const Expr *UE, bool IsXLHSInRHSPart, 3322 SourceLocation Loc) { 3323 switch (Kind) { 3324 case OMPC_read: 3325 EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc); 3326 break; 3327 case OMPC_write: 3328 EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc); 3329 break; 3330 case OMPC_unknown: 3331 case OMPC_update: 3332 EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc); 3333 break; 3334 case OMPC_capture: 3335 EmitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE, 3336 IsXLHSInRHSPart, Loc); 3337 break; 3338 case OMPC_if: 3339 case OMPC_final: 3340 case OMPC_num_threads: 3341 case OMPC_private: 3342 case OMPC_firstprivate: 3343 case OMPC_lastprivate: 3344 case OMPC_reduction: 3345 case OMPC_safelen: 3346 case OMPC_simdlen: 3347 case OMPC_collapse: 3348 case OMPC_default: 3349 case OMPC_seq_cst: 3350 case OMPC_shared: 3351 case OMPC_linear: 3352 case OMPC_aligned: 3353 case OMPC_copyin: 3354 case OMPC_copyprivate: 3355 case OMPC_flush: 3356 case OMPC_proc_bind: 3357 case OMPC_schedule: 3358 case OMPC_ordered: 3359 case OMPC_nowait: 3360 case OMPC_untied: 3361 case OMPC_threadprivate: 3362 case OMPC_depend: 3363 case OMPC_mergeable: 3364 case OMPC_device: 3365 case OMPC_threads: 3366 case OMPC_simd: 3367 case OMPC_map: 3368 case OMPC_num_teams: 3369 case OMPC_thread_limit: 3370 case OMPC_priority: 3371 case OMPC_grainsize: 3372 case OMPC_nogroup: 3373 case OMPC_num_tasks: 3374 case OMPC_hint: 3375 case OMPC_dist_schedule: 3376 case OMPC_defaultmap: 3377 case OMPC_uniform: 3378 case OMPC_to: 3379 case OMPC_from: 3380 case OMPC_use_device_ptr: 3381 case OMPC_is_device_ptr: 3382 llvm_unreachable("Clause is not allowed in 'omp atomic'."); 3383 } 3384 } 3385 3386 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { 3387 bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>(); 3388 OpenMPClauseKind Kind = OMPC_unknown; 3389 for (auto *C : S.clauses()) { 3390 // Find first clause (skip seq_cst clause, if it is first). 3391 if (C->getClauseKind() != OMPC_seq_cst) { 3392 Kind = C->getClauseKind(); 3393 break; 3394 } 3395 } 3396 3397 const auto *CS = 3398 S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 3399 if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) { 3400 enterFullExpression(EWC); 3401 } 3402 // Processing for statements under 'atomic capture'. 3403 if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) { 3404 for (const auto *C : Compound->body()) { 3405 if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) { 3406 enterFullExpression(EWC); 3407 } 3408 } 3409 } 3410 3411 auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF, 3412 PrePostActionTy &) { 3413 CGF.EmitStopPoint(CS); 3414 EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(), 3415 S.getV(), S.getExpr(), S.getUpdateExpr(), 3416 S.isXLHSInRHSPart(), S.getLocStart()); 3417 }; 3418 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 3419 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen); 3420 } 3421 3422 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, 3423 const OMPExecutableDirective &S, 3424 const RegionCodeGenTy &CodeGen) { 3425 assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind())); 3426 CodeGenModule &CGM = CGF.CGM; 3427 const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt()); 3428 3429 llvm::Function *Fn = nullptr; 3430 llvm::Constant *FnID = nullptr; 3431 3432 const Expr *IfCond = nullptr; 3433 // Check for the at most one if clause associated with the target region. 3434 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 3435 if (C->getNameModifier() == OMPD_unknown || 3436 C->getNameModifier() == OMPD_target) { 3437 IfCond = C->getCondition(); 3438 break; 3439 } 3440 } 3441 3442 // Check if we have any device clause associated with the directive. 3443 const Expr *Device = nullptr; 3444 if (auto *C = S.getSingleClause<OMPDeviceClause>()) { 3445 Device = C->getDevice(); 3446 } 3447 3448 // Check if we have an if clause whose conditional always evaluates to false 3449 // or if we do not have any targets specified. If so the target region is not 3450 // an offload entry point. 3451 bool IsOffloadEntry = true; 3452 if (IfCond) { 3453 bool Val; 3454 if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val) 3455 IsOffloadEntry = false; 3456 } 3457 if (CGM.getLangOpts().OMPTargetTriples.empty()) 3458 IsOffloadEntry = false; 3459 3460 assert(CGF.CurFuncDecl && "No parent declaration for target region!"); 3461 StringRef ParentName; 3462 // In case we have Ctors/Dtors we use the complete type variant to produce 3463 // the mangling of the device outlined kernel. 3464 if (auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl)) 3465 ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete)); 3466 else if (auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl)) 3467 ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete)); 3468 else 3469 ParentName = 3470 CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl))); 3471 3472 // Emit target region as a standalone region. 3473 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID, 3474 IsOffloadEntry, CodeGen); 3475 OMPLexicalScope Scope(CGF, S); 3476 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 3477 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 3478 CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device, 3479 CapturedVars); 3480 } 3481 3482 static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S, 3483 PrePostActionTy &Action) { 3484 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 3485 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 3486 CGF.EmitOMPPrivateClause(S, PrivateScope); 3487 (void)PrivateScope.Privatize(); 3488 3489 Action.Enter(CGF); 3490 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3491 } 3492 3493 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM, 3494 StringRef ParentName, 3495 const OMPTargetDirective &S) { 3496 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3497 emitTargetRegion(CGF, S, Action); 3498 }; 3499 llvm::Function *Fn; 3500 llvm::Constant *Addr; 3501 // Emit target region as a standalone region. 3502 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 3503 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 3504 assert(Fn && Addr && "Target device function emission failed."); 3505 } 3506 3507 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { 3508 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3509 emitTargetRegion(CGF, S, Action); 3510 }; 3511 emitCommonOMPTargetDirective(*this, S, CodeGen); 3512 } 3513 3514 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF, 3515 const OMPExecutableDirective &S, 3516 OpenMPDirectiveKind InnermostKind, 3517 const RegionCodeGenTy &CodeGen) { 3518 const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams); 3519 auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction( 3520 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 3521 3522 const OMPTeamsDirective &TD = *dyn_cast<OMPTeamsDirective>(&S); 3523 const OMPNumTeamsClause *NT = TD.getSingleClause<OMPNumTeamsClause>(); 3524 const OMPThreadLimitClause *TL = TD.getSingleClause<OMPThreadLimitClause>(); 3525 if (NT || TL) { 3526 Expr *NumTeams = (NT) ? NT->getNumTeams() : nullptr; 3527 Expr *ThreadLimit = (TL) ? TL->getThreadLimit() : nullptr; 3528 3529 CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit, 3530 S.getLocStart()); 3531 } 3532 3533 OMPLexicalScope Scope(CGF, S); 3534 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 3535 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 3536 CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn, 3537 CapturedVars); 3538 } 3539 3540 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) { 3541 // Emit teams region as a standalone region. 3542 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3543 OMPPrivateScope PrivateScope(CGF); 3544 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 3545 CGF.EmitOMPPrivateClause(S, PrivateScope); 3546 (void)PrivateScope.Privatize(); 3547 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3548 }; 3549 emitCommonOMPTeamsDirective(*this, S, OMPD_teams, CodeGen); 3550 } 3551 3552 void CodeGenFunction::EmitOMPCancellationPointDirective( 3553 const OMPCancellationPointDirective &S) { 3554 CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(), 3555 S.getCancelRegion()); 3556 } 3557 3558 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { 3559 const Expr *IfCond = nullptr; 3560 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 3561 if (C->getNameModifier() == OMPD_unknown || 3562 C->getNameModifier() == OMPD_cancel) { 3563 IfCond = C->getCondition(); 3564 break; 3565 } 3566 } 3567 CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(), IfCond, 3568 S.getCancelRegion()); 3569 } 3570 3571 CodeGenFunction::JumpDest 3572 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) { 3573 if (Kind == OMPD_parallel || Kind == OMPD_task || 3574 Kind == OMPD_target_parallel) 3575 return ReturnBlock; 3576 assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections || 3577 Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for || 3578 Kind == OMPD_distribute_parallel_for || 3579 Kind == OMPD_target_parallel_for); 3580 return OMPCancelStack.getExitBlock(); 3581 } 3582 3583 void CodeGenFunction::EmitOMPUseDevicePtrClause( 3584 const OMPClause &NC, OMPPrivateScope &PrivateScope, 3585 const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) { 3586 const auto &C = cast<OMPUseDevicePtrClause>(NC); 3587 auto OrigVarIt = C.varlist_begin(); 3588 auto InitIt = C.inits().begin(); 3589 for (auto PvtVarIt : C.private_copies()) { 3590 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl()); 3591 auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl()); 3592 auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl()); 3593 3594 // In order to identify the right initializer we need to match the 3595 // declaration used by the mapping logic. In some cases we may get 3596 // OMPCapturedExprDecl that refers to the original declaration. 3597 const ValueDecl *MatchingVD = OrigVD; 3598 if (auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) { 3599 // OMPCapturedExprDecl are used to privative fields of the current 3600 // structure. 3601 auto *ME = cast<MemberExpr>(OED->getInit()); 3602 assert(isa<CXXThisExpr>(ME->getBase()) && 3603 "Base should be the current struct!"); 3604 MatchingVD = ME->getMemberDecl(); 3605 } 3606 3607 // If we don't have information about the current list item, move on to 3608 // the next one. 3609 auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD); 3610 if (InitAddrIt == CaptureDeviceAddrMap.end()) 3611 continue; 3612 3613 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 3614 // Initialize the temporary initialization variable with the address we 3615 // get from the runtime library. We have to cast the source address 3616 // because it is always a void *. References are materialized in the 3617 // privatization scope, so the initialization here disregards the fact 3618 // the original variable is a reference. 3619 QualType AddrQTy = 3620 getContext().getPointerType(OrigVD->getType().getNonReferenceType()); 3621 llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy); 3622 Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy); 3623 setAddrOfLocalVar(InitVD, InitAddr); 3624 3625 // Emit private declaration, it will be initialized by the value we 3626 // declaration we just added to the local declarations map. 3627 EmitDecl(*PvtVD); 3628 3629 // The initialization variables reached its purpose in the emission 3630 // ofthe previous declaration, so we don't need it anymore. 3631 LocalDeclMap.erase(InitVD); 3632 3633 // Return the address of the private variable. 3634 return GetAddrOfLocalVar(PvtVD); 3635 }); 3636 assert(IsRegistered && "firstprivate var already registered as private"); 3637 // Silence the warning about unused variable. 3638 (void)IsRegistered; 3639 3640 ++OrigVarIt; 3641 ++InitIt; 3642 } 3643 } 3644 3645 // Generate the instructions for '#pragma omp target data' directive. 3646 void CodeGenFunction::EmitOMPTargetDataDirective( 3647 const OMPTargetDataDirective &S) { 3648 CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true); 3649 3650 // Create a pre/post action to signal the privatization of the device pointer. 3651 // This action can be replaced by the OpenMP runtime code generation to 3652 // deactivate privatization. 3653 bool PrivatizeDevicePointers = false; 3654 class DevicePointerPrivActionTy : public PrePostActionTy { 3655 bool &PrivatizeDevicePointers; 3656 3657 public: 3658 explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers) 3659 : PrePostActionTy(), PrivatizeDevicePointers(PrivatizeDevicePointers) {} 3660 void Enter(CodeGenFunction &CGF) override { 3661 PrivatizeDevicePointers = true; 3662 } 3663 }; 3664 DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers); 3665 3666 auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers]( 3667 CodeGenFunction &CGF, PrePostActionTy &Action) { 3668 auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3669 CGF.EmitStmt( 3670 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3671 }; 3672 3673 // Codegen that selects wheather to generate the privatization code or not. 3674 auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers, 3675 &InnermostCodeGen](CodeGenFunction &CGF, 3676 PrePostActionTy &Action) { 3677 RegionCodeGenTy RCG(InnermostCodeGen); 3678 PrivatizeDevicePointers = false; 3679 3680 // Call the pre-action to change the status of PrivatizeDevicePointers if 3681 // needed. 3682 Action.Enter(CGF); 3683 3684 if (PrivatizeDevicePointers) { 3685 OMPPrivateScope PrivateScope(CGF); 3686 // Emit all instances of the use_device_ptr clause. 3687 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>()) 3688 CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope, 3689 Info.CaptureDeviceAddrMap); 3690 (void)PrivateScope.Privatize(); 3691 RCG(CGF); 3692 } else 3693 RCG(CGF); 3694 }; 3695 3696 // Forward the provided action to the privatization codegen. 3697 RegionCodeGenTy PrivRCG(PrivCodeGen); 3698 PrivRCG.setAction(Action); 3699 3700 // Notwithstanding the body of the region is emitted as inlined directive, 3701 // we don't use an inline scope as changes in the references inside the 3702 // region are expected to be visible outside, so we do not privative them. 3703 OMPLexicalScope Scope(CGF, S); 3704 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data, 3705 PrivRCG); 3706 }; 3707 3708 RegionCodeGenTy RCG(CodeGen); 3709 3710 // If we don't have target devices, don't bother emitting the data mapping 3711 // code. 3712 if (CGM.getLangOpts().OMPTargetTriples.empty()) { 3713 RCG(*this); 3714 return; 3715 } 3716 3717 // Check if we have any if clause associated with the directive. 3718 const Expr *IfCond = nullptr; 3719 if (auto *C = S.getSingleClause<OMPIfClause>()) 3720 IfCond = C->getCondition(); 3721 3722 // Check if we have any device clause associated with the directive. 3723 const Expr *Device = nullptr; 3724 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 3725 Device = C->getDevice(); 3726 3727 // Set the action to signal privatization of device pointers. 3728 RCG.setAction(PrivAction); 3729 3730 // Emit region code. 3731 CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG, 3732 Info); 3733 } 3734 3735 void CodeGenFunction::EmitOMPTargetEnterDataDirective( 3736 const OMPTargetEnterDataDirective &S) { 3737 // If we don't have target devices, don't bother emitting the data mapping 3738 // code. 3739 if (CGM.getLangOpts().OMPTargetTriples.empty()) 3740 return; 3741 3742 // Check if we have any if clause associated with the directive. 3743 const Expr *IfCond = nullptr; 3744 if (auto *C = S.getSingleClause<OMPIfClause>()) 3745 IfCond = C->getCondition(); 3746 3747 // Check if we have any device clause associated with the directive. 3748 const Expr *Device = nullptr; 3749 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 3750 Device = C->getDevice(); 3751 3752 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 3753 } 3754 3755 void CodeGenFunction::EmitOMPTargetExitDataDirective( 3756 const OMPTargetExitDataDirective &S) { 3757 // If we don't have target devices, don't bother emitting the data mapping 3758 // code. 3759 if (CGM.getLangOpts().OMPTargetTriples.empty()) 3760 return; 3761 3762 // Check if we have any if clause associated with the directive. 3763 const Expr *IfCond = nullptr; 3764 if (auto *C = S.getSingleClause<OMPIfClause>()) 3765 IfCond = C->getCondition(); 3766 3767 // Check if we have any device clause associated with the directive. 3768 const Expr *Device = nullptr; 3769 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 3770 Device = C->getDevice(); 3771 3772 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 3773 } 3774 3775 static void emitTargetParallelRegion(CodeGenFunction &CGF, 3776 const OMPTargetParallelDirective &S, 3777 PrePostActionTy &Action) { 3778 // Get the captured statement associated with the 'parallel' region. 3779 auto *CS = S.getCapturedStmt(OMPD_parallel); 3780 Action.Enter(CGF); 3781 auto &&CodeGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { 3782 // TODO: Add support for clauses. 3783 CGF.EmitStmt(CS->getCapturedStmt()); 3784 }; 3785 emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen); 3786 } 3787 3788 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 3789 CodeGenModule &CGM, StringRef ParentName, 3790 const OMPTargetParallelDirective &S) { 3791 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3792 emitTargetParallelRegion(CGF, S, Action); 3793 }; 3794 llvm::Function *Fn; 3795 llvm::Constant *Addr; 3796 // Emit target region as a standalone region. 3797 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 3798 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 3799 assert(Fn && Addr && "Target device function emission failed."); 3800 } 3801 3802 void CodeGenFunction::EmitOMPTargetParallelDirective( 3803 const OMPTargetParallelDirective &S) { 3804 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3805 emitTargetParallelRegion(CGF, S, Action); 3806 }; 3807 emitCommonOMPTargetDirective(*this, S, CodeGen); 3808 } 3809 3810 void CodeGenFunction::EmitOMPTargetParallelForDirective( 3811 const OMPTargetParallelForDirective &S) { 3812 // TODO: codegen for target parallel for. 3813 } 3814 3815 /// Emit a helper variable and return corresponding lvalue. 3816 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper, 3817 const ImplicitParamDecl *PVD, 3818 CodeGenFunction::OMPPrivateScope &Privates) { 3819 auto *VDecl = cast<VarDecl>(Helper->getDecl()); 3820 Privates.addPrivate( 3821 VDecl, [&CGF, PVD]() -> Address { return CGF.GetAddrOfLocalVar(PVD); }); 3822 } 3823 3824 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { 3825 assert(isOpenMPTaskLoopDirective(S.getDirectiveKind())); 3826 // Emit outlined function for task construct. 3827 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 3828 auto CapturedStruct = GenerateCapturedStmtArgument(*CS); 3829 auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 3830 const Expr *IfCond = nullptr; 3831 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 3832 if (C->getNameModifier() == OMPD_unknown || 3833 C->getNameModifier() == OMPD_taskloop) { 3834 IfCond = C->getCondition(); 3835 break; 3836 } 3837 } 3838 3839 OMPTaskDataTy Data; 3840 // Check if taskloop must be emitted without taskgroup. 3841 Data.Nogroup = S.getSingleClause<OMPNogroupClause>(); 3842 // TODO: Check if we should emit tied or untied task. 3843 Data.Tied = true; 3844 // Set scheduling for taskloop 3845 if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) { 3846 // grainsize clause 3847 Data.Schedule.setInt(/*IntVal=*/false); 3848 Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize())); 3849 } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) { 3850 // num_tasks clause 3851 Data.Schedule.setInt(/*IntVal=*/true); 3852 Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks())); 3853 } 3854 3855 auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) { 3856 // if (PreCond) { 3857 // for (IV in 0..LastIteration) BODY; 3858 // <Final counter/linear vars updates>; 3859 // } 3860 // 3861 3862 // Emit: if (PreCond) - begin. 3863 // If the condition constant folds and can be elided, avoid emitting the 3864 // whole loop. 3865 bool CondConstant; 3866 llvm::BasicBlock *ContBlock = nullptr; 3867 OMPLoopScope PreInitScope(CGF, S); 3868 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 3869 if (!CondConstant) 3870 return; 3871 } else { 3872 auto *ThenBlock = CGF.createBasicBlock("taskloop.if.then"); 3873 ContBlock = CGF.createBasicBlock("taskloop.if.end"); 3874 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 3875 CGF.getProfileCount(&S)); 3876 CGF.EmitBlock(ThenBlock); 3877 CGF.incrementProfileCounter(&S); 3878 } 3879 3880 if (isOpenMPSimdDirective(S.getDirectiveKind())) 3881 CGF.EmitOMPSimdInit(S); 3882 3883 OMPPrivateScope LoopScope(CGF); 3884 // Emit helper vars inits. 3885 enum { LowerBound = 5, UpperBound, Stride, LastIter }; 3886 auto *I = CS->getCapturedDecl()->param_begin(); 3887 auto *LBP = std::next(I, LowerBound); 3888 auto *UBP = std::next(I, UpperBound); 3889 auto *STP = std::next(I, Stride); 3890 auto *LIP = std::next(I, LastIter); 3891 mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP, 3892 LoopScope); 3893 mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP, 3894 LoopScope); 3895 mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope); 3896 mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP, 3897 LoopScope); 3898 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 3899 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 3900 (void)LoopScope.Privatize(); 3901 // Emit the loop iteration variable. 3902 const Expr *IVExpr = S.getIterationVariable(); 3903 const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 3904 CGF.EmitVarDecl(*IVDecl); 3905 CGF.EmitIgnoredExpr(S.getInit()); 3906 3907 // Emit the iterations count variable. 3908 // If it is not a variable, Sema decided to calculate iterations count on 3909 // each iteration (e.g., it is foldable into a constant). 3910 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 3911 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 3912 // Emit calculation of the iterations count. 3913 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 3914 } 3915 3916 CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 3917 S.getInc(), 3918 [&S](CodeGenFunction &CGF) { 3919 CGF.EmitOMPLoopBody(S, JumpDest()); 3920 CGF.EmitStopPoint(&S); 3921 }, 3922 [](CodeGenFunction &) {}); 3923 // Emit: if (PreCond) - end. 3924 if (ContBlock) { 3925 CGF.EmitBranch(ContBlock); 3926 CGF.EmitBlock(ContBlock, true); 3927 } 3928 // Emit final copy of the lastprivate variables if IsLastIter != 0. 3929 if (HasLastprivateClause) { 3930 CGF.EmitOMPLastprivateClauseFinal( 3931 S, isOpenMPSimdDirective(S.getDirectiveKind()), 3932 CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar( 3933 CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false, 3934 (*LIP)->getType(), S.getLocStart()))); 3935 } 3936 }; 3937 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 3938 IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn, 3939 const OMPTaskDataTy &Data) { 3940 auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &) { 3941 OMPLoopScope PreInitScope(CGF, S); 3942 CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getLocStart(), S, 3943 OutlinedFn, SharedsTy, 3944 CapturedStruct, IfCond, Data); 3945 }; 3946 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop, 3947 CodeGen); 3948 }; 3949 EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data); 3950 } 3951 3952 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) { 3953 EmitOMPTaskLoopBasedDirective(S); 3954 } 3955 3956 void CodeGenFunction::EmitOMPTaskLoopSimdDirective( 3957 const OMPTaskLoopSimdDirective &S) { 3958 EmitOMPTaskLoopBasedDirective(S); 3959 } 3960 3961 // Generate the instructions for '#pragma omp target update' directive. 3962 void CodeGenFunction::EmitOMPTargetUpdateDirective( 3963 const OMPTargetUpdateDirective &S) { 3964 // If we don't have target devices, don't bother emitting the data mapping 3965 // code. 3966 if (CGM.getLangOpts().OMPTargetTriples.empty()) 3967 return; 3968 3969 // Check if we have any if clause associated with the directive. 3970 const Expr *IfCond = nullptr; 3971 if (auto *C = S.getSingleClause<OMPIfClause>()) 3972 IfCond = C->getCondition(); 3973 3974 // Check if we have any device clause associated with the directive. 3975 const Expr *Device = nullptr; 3976 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 3977 Device = C->getDevice(); 3978 3979 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 3980 } 3981