1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This contains code to emit OpenMP nodes as LLVM code. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGCleanup.h" 15 #include "CGOpenMPRuntime.h" 16 #include "CodeGenFunction.h" 17 #include "CodeGenModule.h" 18 #include "TargetInfo.h" 19 #include "clang/AST/Stmt.h" 20 #include "clang/AST/StmtOpenMP.h" 21 #include "clang/AST/DeclOpenMP.h" 22 #include "llvm/IR/CallSite.h" 23 using namespace clang; 24 using namespace CodeGen; 25 26 namespace { 27 /// Lexical scope for OpenMP executable constructs, that handles correct codegen 28 /// for captured expressions. 29 class OMPLexicalScope : public CodeGenFunction::LexicalScope { 30 void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 31 for (const auto *C : S.clauses()) { 32 if (auto *CPI = OMPClauseWithPreInit::get(C)) { 33 if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { 34 for (const auto *I : PreInit->decls()) { 35 if (!I->hasAttr<OMPCaptureNoInitAttr>()) 36 CGF.EmitVarDecl(cast<VarDecl>(*I)); 37 else { 38 CodeGenFunction::AutoVarEmission Emission = 39 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 40 CGF.EmitAutoVarCleanups(Emission); 41 } 42 } 43 } 44 } 45 } 46 } 47 CodeGenFunction::OMPPrivateScope InlinedShareds; 48 49 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { 50 return CGF.LambdaCaptureFields.lookup(VD) || 51 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || 52 (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl)); 53 } 54 55 public: 56 OMPLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S, 57 bool AsInlined = false, bool EmitPreInitStmt = true) 58 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), 59 InlinedShareds(CGF) { 60 if (EmitPreInitStmt) 61 emitPreInitStmt(CGF, S); 62 if (AsInlined) { 63 if (S.hasAssociatedStmt()) { 64 auto *CS = cast<CapturedStmt>(S.getAssociatedStmt()); 65 for (auto &C : CS->captures()) { 66 if (C.capturesVariable() || C.capturesVariableByCopy()) { 67 auto *VD = C.getCapturedVar(); 68 DeclRefExpr DRE(const_cast<VarDecl *>(VD), 69 isCapturedVar(CGF, VD) || 70 (CGF.CapturedStmtInfo && 71 InlinedShareds.isGlobalVarCaptured(VD)), 72 VD->getType().getNonReferenceType(), VK_LValue, 73 SourceLocation()); 74 InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address { 75 return CGF.EmitLValue(&DRE).getAddress(); 76 }); 77 } 78 } 79 (void)InlinedShareds.Privatize(); 80 } 81 } 82 } 83 }; 84 85 /// Lexical scope for OpenMP parallel construct, that handles correct codegen 86 /// for captured expressions. 87 class OMPParallelScope final : public OMPLexicalScope { 88 bool EmitPreInitStmt(const OMPExecutableDirective &S) { 89 OpenMPDirectiveKind Kind = S.getDirectiveKind(); 90 return !isOpenMPTargetExecutionDirective(Kind) && 91 isOpenMPParallelDirective(Kind); 92 } 93 94 public: 95 OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 96 : OMPLexicalScope(CGF, S, 97 /*AsInlined=*/false, 98 /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {} 99 }; 100 101 /// Lexical scope for OpenMP teams construct, that handles correct codegen 102 /// for captured expressions. 103 class OMPTeamsScope final : public OMPLexicalScope { 104 bool EmitPreInitStmt(const OMPExecutableDirective &S) { 105 OpenMPDirectiveKind Kind = S.getDirectiveKind(); 106 return !isOpenMPTargetExecutionDirective(Kind) && 107 isOpenMPTeamsDirective(Kind); 108 } 109 110 public: 111 OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 112 : OMPLexicalScope(CGF, S, 113 /*AsInlined=*/false, 114 /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {} 115 }; 116 117 /// Private scope for OpenMP loop-based directives, that supports capturing 118 /// of used expression from loop statement. 119 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { 120 void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) { 121 if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) { 122 if (auto *PreInits = cast_or_null<DeclStmt>(LD->getPreInits())) { 123 for (const auto *I : PreInits->decls()) 124 CGF.EmitVarDecl(cast<VarDecl>(*I)); 125 } 126 } 127 } 128 129 public: 130 OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S) 131 : CodeGenFunction::RunCleanupsScope(CGF) { 132 emitPreInitStmt(CGF, S); 133 } 134 }; 135 136 } // namespace 137 138 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) { 139 auto &C = getContext(); 140 llvm::Value *Size = nullptr; 141 auto SizeInChars = C.getTypeSizeInChars(Ty); 142 if (SizeInChars.isZero()) { 143 // getTypeSizeInChars() returns 0 for a VLA. 144 while (auto *VAT = C.getAsVariableArrayType(Ty)) { 145 llvm::Value *ArraySize; 146 std::tie(ArraySize, Ty) = getVLASize(VAT); 147 Size = Size ? Builder.CreateNUWMul(Size, ArraySize) : ArraySize; 148 } 149 SizeInChars = C.getTypeSizeInChars(Ty); 150 if (SizeInChars.isZero()) 151 return llvm::ConstantInt::get(SizeTy, /*V=*/0); 152 Size = Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars)); 153 } else 154 Size = CGM.getSize(SizeInChars); 155 return Size; 156 } 157 158 void CodeGenFunction::GenerateOpenMPCapturedVars( 159 const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) { 160 const RecordDecl *RD = S.getCapturedRecordDecl(); 161 auto CurField = RD->field_begin(); 162 auto CurCap = S.captures().begin(); 163 for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(), 164 E = S.capture_init_end(); 165 I != E; ++I, ++CurField, ++CurCap) { 166 if (CurField->hasCapturedVLAType()) { 167 auto VAT = CurField->getCapturedVLAType(); 168 auto *Val = VLASizeMap[VAT->getSizeExpr()]; 169 CapturedVars.push_back(Val); 170 } else if (CurCap->capturesThis()) 171 CapturedVars.push_back(CXXThisValue); 172 else if (CurCap->capturesVariableByCopy()) { 173 llvm::Value *CV = 174 EmitLoadOfLValue(EmitLValue(*I), SourceLocation()).getScalarVal(); 175 176 // If the field is not a pointer, we need to save the actual value 177 // and load it as a void pointer. 178 if (!CurField->getType()->isAnyPointerType()) { 179 auto &Ctx = getContext(); 180 auto DstAddr = CreateMemTemp( 181 Ctx.getUIntPtrType(), 182 Twine(CurCap->getCapturedVar()->getName()) + ".casted"); 183 LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType()); 184 185 auto *SrcAddrVal = EmitScalarConversion( 186 DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()), 187 Ctx.getPointerType(CurField->getType()), SourceLocation()); 188 LValue SrcLV = 189 MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType()); 190 191 // Store the value using the source type pointer. 192 EmitStoreThroughLValue(RValue::get(CV), SrcLV); 193 194 // Load the value using the destination type pointer. 195 CV = EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal(); 196 } 197 CapturedVars.push_back(CV); 198 } else { 199 assert(CurCap->capturesVariable() && "Expected capture by reference."); 200 CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer()); 201 } 202 } 203 } 204 205 static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType, 206 StringRef Name, LValue AddrLV, 207 bool isReferenceType = false) { 208 ASTContext &Ctx = CGF.getContext(); 209 210 auto *CastedPtr = CGF.EmitScalarConversion( 211 AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(), 212 Ctx.getPointerType(DstType), SourceLocation()); 213 auto TmpAddr = 214 CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType)) 215 .getAddress(); 216 217 // If we are dealing with references we need to return the address of the 218 // reference instead of the reference of the value. 219 if (isReferenceType) { 220 QualType RefType = Ctx.getLValueReferenceType(DstType); 221 auto *RefVal = TmpAddr.getPointer(); 222 TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name) + ".ref"); 223 auto TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType); 224 CGF.EmitStoreThroughLValue(RValue::get(RefVal), TmpLVal, /*isInit*/ true); 225 } 226 227 return TmpAddr; 228 } 229 230 llvm::Function * 231 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { 232 assert( 233 CapturedStmtInfo && 234 "CapturedStmtInfo should be set when generating the captured function"); 235 const CapturedDecl *CD = S.getCapturedDecl(); 236 const RecordDecl *RD = S.getCapturedRecordDecl(); 237 assert(CD->hasBody() && "missing CapturedDecl body"); 238 239 // Build the argument list. 240 ASTContext &Ctx = CGM.getContext(); 241 FunctionArgList Args; 242 Args.append(CD->param_begin(), 243 std::next(CD->param_begin(), CD->getContextParamPosition())); 244 auto I = S.captures().begin(); 245 for (auto *FD : RD->fields()) { 246 QualType ArgType = FD->getType(); 247 IdentifierInfo *II = nullptr; 248 VarDecl *CapVar = nullptr; 249 250 // If this is a capture by copy and the type is not a pointer, the outlined 251 // function argument type should be uintptr and the value properly casted to 252 // uintptr. This is necessary given that the runtime library is only able to 253 // deal with pointers. We can pass in the same way the VLA type sizes to the 254 // outlined function. 255 if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) || 256 I->capturesVariableArrayType()) 257 ArgType = Ctx.getUIntPtrType(); 258 259 if (I->capturesVariable() || I->capturesVariableByCopy()) { 260 CapVar = I->getCapturedVar(); 261 II = CapVar->getIdentifier(); 262 } else if (I->capturesThis()) 263 II = &getContext().Idents.get("this"); 264 else { 265 assert(I->capturesVariableArrayType()); 266 II = &getContext().Idents.get("vla"); 267 } 268 if (ArgType->isVariablyModifiedType()) { 269 bool IsReference = ArgType->isLValueReferenceType(); 270 ArgType = 271 getContext().getCanonicalParamType(ArgType.getNonReferenceType()); 272 if (IsReference && !ArgType->isPointerType()) { 273 ArgType = getContext().getLValueReferenceType( 274 ArgType, /*SpelledAsLValue=*/false); 275 } 276 } 277 Args.push_back(ImplicitParamDecl::Create(getContext(), nullptr, 278 FD->getLocation(), II, ArgType)); 279 ++I; 280 } 281 Args.append( 282 std::next(CD->param_begin(), CD->getContextParamPosition() + 1), 283 CD->param_end()); 284 285 // Create the function declaration. 286 FunctionType::ExtInfo ExtInfo; 287 const CGFunctionInfo &FuncInfo = 288 CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args); 289 llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); 290 291 llvm::Function *F = llvm::Function::Create( 292 FuncLLVMTy, llvm::GlobalValue::InternalLinkage, 293 CapturedStmtInfo->getHelperName(), &CGM.getModule()); 294 CGM.SetInternalFunctionAttributes(CD, F, FuncInfo); 295 if (CD->isNothrow()) 296 F->addFnAttr(llvm::Attribute::NoUnwind); 297 298 // Generate the function. 299 StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getLocation(), 300 CD->getBody()->getLocStart()); 301 unsigned Cnt = CD->getContextParamPosition(); 302 I = S.captures().begin(); 303 for (auto *FD : RD->fields()) { 304 // If we are capturing a pointer by copy we don't need to do anything, just 305 // use the value that we get from the arguments. 306 if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) { 307 const VarDecl *CurVD = I->getCapturedVar(); 308 Address LocalAddr = GetAddrOfLocalVar(Args[Cnt]); 309 // If the variable is a reference we need to materialize it here. 310 if (CurVD->getType()->isReferenceType()) { 311 Address RefAddr = CreateMemTemp(CurVD->getType(), getPointerAlign(), 312 ".materialized_ref"); 313 EmitStoreOfScalar(LocalAddr.getPointer(), RefAddr, /*Volatile=*/false, 314 CurVD->getType()); 315 LocalAddr = RefAddr; 316 } 317 setAddrOfLocalVar(CurVD, LocalAddr); 318 ++Cnt; 319 ++I; 320 continue; 321 } 322 323 LValue ArgLVal = 324 MakeAddrLValue(GetAddrOfLocalVar(Args[Cnt]), Args[Cnt]->getType(), 325 AlignmentSource::Decl); 326 if (FD->hasCapturedVLAType()) { 327 LValue CastedArgLVal = 328 MakeAddrLValue(castValueFromUintptr(*this, FD->getType(), 329 Args[Cnt]->getName(), ArgLVal), 330 FD->getType(), AlignmentSource::Decl); 331 auto *ExprArg = 332 EmitLoadOfLValue(CastedArgLVal, SourceLocation()).getScalarVal(); 333 auto VAT = FD->getCapturedVLAType(); 334 VLASizeMap[VAT->getSizeExpr()] = ExprArg; 335 } else if (I->capturesVariable()) { 336 auto *Var = I->getCapturedVar(); 337 QualType VarTy = Var->getType(); 338 Address ArgAddr = ArgLVal.getAddress(); 339 if (!VarTy->isReferenceType()) { 340 if (ArgLVal.getType()->isLValueReferenceType()) { 341 ArgAddr = EmitLoadOfReference( 342 ArgAddr, ArgLVal.getType()->castAs<ReferenceType>()); 343 } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) { 344 assert(ArgLVal.getType()->isPointerType()); 345 ArgAddr = EmitLoadOfPointer( 346 ArgAddr, ArgLVal.getType()->castAs<PointerType>()); 347 } 348 } 349 setAddrOfLocalVar( 350 Var, Address(ArgAddr.getPointer(), getContext().getDeclAlign(Var))); 351 } else if (I->capturesVariableByCopy()) { 352 assert(!FD->getType()->isAnyPointerType() && 353 "Not expecting a captured pointer."); 354 auto *Var = I->getCapturedVar(); 355 QualType VarTy = Var->getType(); 356 setAddrOfLocalVar(Var, castValueFromUintptr(*this, FD->getType(), 357 Args[Cnt]->getName(), ArgLVal, 358 VarTy->isReferenceType())); 359 } else { 360 // If 'this' is captured, load it into CXXThisValue. 361 assert(I->capturesThis()); 362 CXXThisValue = 363 EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation()).getScalarVal(); 364 } 365 ++Cnt; 366 ++I; 367 } 368 369 PGO.assignRegionCounters(GlobalDecl(CD), F); 370 CapturedStmtInfo->EmitBody(*this, CD->getBody()); 371 FinishFunction(CD->getBodyRBrace()); 372 373 return F; 374 } 375 376 //===----------------------------------------------------------------------===// 377 // OpenMP Directive Emission 378 //===----------------------------------------------------------------------===// 379 void CodeGenFunction::EmitOMPAggregateAssign( 380 Address DestAddr, Address SrcAddr, QualType OriginalType, 381 const llvm::function_ref<void(Address, Address)> &CopyGen) { 382 // Perform element-by-element initialization. 383 QualType ElementTy; 384 385 // Drill down to the base element type on both arrays. 386 auto ArrayTy = OriginalType->getAsArrayTypeUnsafe(); 387 auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr); 388 SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 389 390 auto SrcBegin = SrcAddr.getPointer(); 391 auto DestBegin = DestAddr.getPointer(); 392 // Cast from pointer to array type to pointer to single element. 393 auto DestEnd = Builder.CreateGEP(DestBegin, NumElements); 394 // The basic structure here is a while-do loop. 395 auto BodyBB = createBasicBlock("omp.arraycpy.body"); 396 auto DoneBB = createBasicBlock("omp.arraycpy.done"); 397 auto IsEmpty = 398 Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty"); 399 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 400 401 // Enter the loop body, making that address the current address. 402 auto EntryBB = Builder.GetInsertBlock(); 403 EmitBlock(BodyBB); 404 405 CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy); 406 407 llvm::PHINode *SrcElementPHI = 408 Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 409 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 410 Address SrcElementCurrent = 411 Address(SrcElementPHI, 412 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 413 414 llvm::PHINode *DestElementPHI = 415 Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 416 DestElementPHI->addIncoming(DestBegin, EntryBB); 417 Address DestElementCurrent = 418 Address(DestElementPHI, 419 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 420 421 // Emit copy. 422 CopyGen(DestElementCurrent, SrcElementCurrent); 423 424 // Shift the address forward by one element. 425 auto DestElementNext = Builder.CreateConstGEP1_32( 426 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 427 auto SrcElementNext = Builder.CreateConstGEP1_32( 428 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 429 // Check whether we've reached the end. 430 auto Done = 431 Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 432 Builder.CreateCondBr(Done, DoneBB, BodyBB); 433 DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock()); 434 SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock()); 435 436 // Done. 437 EmitBlock(DoneBB, /*IsFinished=*/true); 438 } 439 440 /// Check if the combiner is a call to UDR combiner and if it is so return the 441 /// UDR decl used for reduction. 442 static const OMPDeclareReductionDecl * 443 getReductionInit(const Expr *ReductionOp) { 444 if (auto *CE = dyn_cast<CallExpr>(ReductionOp)) 445 if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 446 if (auto *DRE = 447 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 448 if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 449 return DRD; 450 return nullptr; 451 } 452 453 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 454 const OMPDeclareReductionDecl *DRD, 455 const Expr *InitOp, 456 Address Private, Address Original, 457 QualType Ty) { 458 if (DRD->getInitializer()) { 459 std::pair<llvm::Function *, llvm::Function *> Reduction = 460 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 461 auto *CE = cast<CallExpr>(InitOp); 462 auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 463 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 464 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 465 auto *LHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 466 auto *RHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 467 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 468 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 469 [=]() -> Address { return Private; }); 470 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 471 [=]() -> Address { return Original; }); 472 (void)PrivateScope.Privatize(); 473 RValue Func = RValue::get(Reduction.second); 474 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 475 CGF.EmitIgnoredExpr(InitOp); 476 } else { 477 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 478 auto *GV = new llvm::GlobalVariable( 479 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 480 llvm::GlobalValue::PrivateLinkage, Init, ".init"); 481 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 482 RValue InitRVal; 483 switch (CGF.getEvaluationKind(Ty)) { 484 case TEK_Scalar: 485 InitRVal = CGF.EmitLoadOfLValue(LV, SourceLocation()); 486 break; 487 case TEK_Complex: 488 InitRVal = 489 RValue::getComplex(CGF.EmitLoadOfComplex(LV, SourceLocation())); 490 break; 491 case TEK_Aggregate: 492 InitRVal = RValue::getAggregate(LV.getAddress()); 493 break; 494 } 495 OpaqueValueExpr OVE(SourceLocation(), Ty, VK_RValue); 496 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 497 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 498 /*IsInitializer=*/false); 499 } 500 } 501 502 /// \brief Emit initialization of arrays of complex types. 503 /// \param DestAddr Address of the array. 504 /// \param Type Type of array. 505 /// \param Init Initial expression of array. 506 /// \param SrcAddr Address of the original array. 507 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 508 QualType Type, const Expr *Init, 509 Address SrcAddr = Address::invalid()) { 510 auto *DRD = getReductionInit(Init); 511 // Perform element-by-element initialization. 512 QualType ElementTy; 513 514 // Drill down to the base element type on both arrays. 515 auto ArrayTy = Type->getAsArrayTypeUnsafe(); 516 auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 517 DestAddr = 518 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 519 if (DRD) 520 SrcAddr = 521 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 522 523 llvm::Value *SrcBegin = nullptr; 524 if (DRD) 525 SrcBegin = SrcAddr.getPointer(); 526 auto DestBegin = DestAddr.getPointer(); 527 // Cast from pointer to array type to pointer to single element. 528 auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 529 // The basic structure here is a while-do loop. 530 auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 531 auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 532 auto IsEmpty = 533 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 534 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 535 536 // Enter the loop body, making that address the current address. 537 auto EntryBB = CGF.Builder.GetInsertBlock(); 538 CGF.EmitBlock(BodyBB); 539 540 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 541 542 llvm::PHINode *SrcElementPHI = nullptr; 543 Address SrcElementCurrent = Address::invalid(); 544 if (DRD) { 545 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 546 "omp.arraycpy.srcElementPast"); 547 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 548 SrcElementCurrent = 549 Address(SrcElementPHI, 550 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 551 } 552 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 553 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 554 DestElementPHI->addIncoming(DestBegin, EntryBB); 555 Address DestElementCurrent = 556 Address(DestElementPHI, 557 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 558 559 // Emit copy. 560 { 561 CodeGenFunction::RunCleanupsScope InitScope(CGF); 562 if (DRD && (DRD->getInitializer() || !Init)) { 563 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 564 SrcElementCurrent, ElementTy); 565 } else 566 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 567 /*IsInitializer=*/false); 568 } 569 570 if (DRD) { 571 // Shift the address forward by one element. 572 auto SrcElementNext = CGF.Builder.CreateConstGEP1_32( 573 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 574 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 575 } 576 577 // Shift the address forward by one element. 578 auto DestElementNext = CGF.Builder.CreateConstGEP1_32( 579 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 580 // Check whether we've reached the end. 581 auto Done = 582 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 583 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 584 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 585 586 // Done. 587 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 588 } 589 590 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr, 591 Address SrcAddr, const VarDecl *DestVD, 592 const VarDecl *SrcVD, const Expr *Copy) { 593 if (OriginalType->isArrayType()) { 594 auto *BO = dyn_cast<BinaryOperator>(Copy); 595 if (BO && BO->getOpcode() == BO_Assign) { 596 // Perform simple memcpy for simple copying. 597 EmitAggregateAssign(DestAddr, SrcAddr, OriginalType); 598 } else { 599 // For arrays with complex element types perform element by element 600 // copying. 601 EmitOMPAggregateAssign( 602 DestAddr, SrcAddr, OriginalType, 603 [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) { 604 // Working with the single array element, so have to remap 605 // destination and source variables to corresponding array 606 // elements. 607 CodeGenFunction::OMPPrivateScope Remap(*this); 608 Remap.addPrivate(DestVD, [DestElement]() -> Address { 609 return DestElement; 610 }); 611 Remap.addPrivate( 612 SrcVD, [SrcElement]() -> Address { return SrcElement; }); 613 (void)Remap.Privatize(); 614 EmitIgnoredExpr(Copy); 615 }); 616 } 617 } else { 618 // Remap pseudo source variable to private copy. 619 CodeGenFunction::OMPPrivateScope Remap(*this); 620 Remap.addPrivate(SrcVD, [SrcAddr]() -> Address { return SrcAddr; }); 621 Remap.addPrivate(DestVD, [DestAddr]() -> Address { return DestAddr; }); 622 (void)Remap.Privatize(); 623 // Emit copying of the whole variable. 624 EmitIgnoredExpr(Copy); 625 } 626 } 627 628 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, 629 OMPPrivateScope &PrivateScope) { 630 if (!HaveInsertPoint()) 631 return false; 632 bool FirstprivateIsLastprivate = false; 633 llvm::DenseSet<const VarDecl *> Lastprivates; 634 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 635 for (const auto *D : C->varlists()) 636 Lastprivates.insert( 637 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); 638 } 639 llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate; 640 CGCapturedStmtInfo CapturesInfo(cast<CapturedStmt>(*D.getAssociatedStmt())); 641 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { 642 auto IRef = C->varlist_begin(); 643 auto InitsRef = C->inits().begin(); 644 for (auto IInit : C->private_copies()) { 645 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 646 bool ThisFirstprivateIsLastprivate = 647 Lastprivates.count(OrigVD->getCanonicalDecl()) > 0; 648 auto *CapFD = CapturesInfo.lookup(OrigVD); 649 auto *FD = CapturedStmtInfo->lookup(OrigVD); 650 if (!ThisFirstprivateIsLastprivate && FD && (FD == CapFD) && 651 !FD->getType()->isReferenceType()) { 652 EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()); 653 ++IRef; 654 ++InitsRef; 655 continue; 656 } 657 FirstprivateIsLastprivate = 658 FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate; 659 if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) { 660 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 661 auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl()); 662 bool IsRegistered; 663 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 664 /*RefersToEnclosingVariableOrCapture=*/FD != nullptr, 665 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 666 Address OriginalAddr = EmitLValue(&DRE).getAddress(); 667 QualType Type = VD->getType(); 668 if (Type->isArrayType()) { 669 // Emit VarDecl with copy init for arrays. 670 // Get the address of the original variable captured in current 671 // captured region. 672 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 673 auto Emission = EmitAutoVarAlloca(*VD); 674 auto *Init = VD->getInit(); 675 if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) { 676 // Perform simple memcpy. 677 EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr, 678 Type); 679 } else { 680 EmitOMPAggregateAssign( 681 Emission.getAllocatedAddress(), OriginalAddr, Type, 682 [this, VDInit, Init](Address DestElement, 683 Address SrcElement) { 684 // Clean up any temporaries needed by the initialization. 685 RunCleanupsScope InitScope(*this); 686 // Emit initialization for single element. 687 setAddrOfLocalVar(VDInit, SrcElement); 688 EmitAnyExprToMem(Init, DestElement, 689 Init->getType().getQualifiers(), 690 /*IsInitializer*/ false); 691 LocalDeclMap.erase(VDInit); 692 }); 693 } 694 EmitAutoVarCleanups(Emission); 695 return Emission.getAllocatedAddress(); 696 }); 697 } else { 698 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 699 // Emit private VarDecl with copy init. 700 // Remap temp VDInit variable to the address of the original 701 // variable 702 // (for proper handling of captured global variables). 703 setAddrOfLocalVar(VDInit, OriginalAddr); 704 EmitDecl(*VD); 705 LocalDeclMap.erase(VDInit); 706 return GetAddrOfLocalVar(VD); 707 }); 708 } 709 assert(IsRegistered && 710 "firstprivate var already registered as private"); 711 // Silence the warning about unused variable. 712 (void)IsRegistered; 713 } 714 ++IRef; 715 ++InitsRef; 716 } 717 } 718 return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty(); 719 } 720 721 void CodeGenFunction::EmitOMPPrivateClause( 722 const OMPExecutableDirective &D, 723 CodeGenFunction::OMPPrivateScope &PrivateScope) { 724 if (!HaveInsertPoint()) 725 return; 726 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 727 for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) { 728 auto IRef = C->varlist_begin(); 729 for (auto IInit : C->private_copies()) { 730 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 731 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 732 auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 733 bool IsRegistered = 734 PrivateScope.addPrivate(OrigVD, [&]() -> Address { 735 // Emit private VarDecl with copy init. 736 EmitDecl(*VD); 737 return GetAddrOfLocalVar(VD); 738 }); 739 assert(IsRegistered && "private var already registered as private"); 740 // Silence the warning about unused variable. 741 (void)IsRegistered; 742 } 743 ++IRef; 744 } 745 } 746 } 747 748 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { 749 if (!HaveInsertPoint()) 750 return false; 751 // threadprivate_var1 = master_threadprivate_var1; 752 // operator=(threadprivate_var2, master_threadprivate_var2); 753 // ... 754 // __kmpc_barrier(&loc, global_tid); 755 llvm::DenseSet<const VarDecl *> CopiedVars; 756 llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr; 757 for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) { 758 auto IRef = C->varlist_begin(); 759 auto ISrcRef = C->source_exprs().begin(); 760 auto IDestRef = C->destination_exprs().begin(); 761 for (auto *AssignOp : C->assignment_ops()) { 762 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 763 QualType Type = VD->getType(); 764 if (CopiedVars.insert(VD->getCanonicalDecl()).second) { 765 // Get the address of the master variable. If we are emitting code with 766 // TLS support, the address is passed from the master as field in the 767 // captured declaration. 768 Address MasterAddr = Address::invalid(); 769 if (getLangOpts().OpenMPUseTLS && 770 getContext().getTargetInfo().isTLSSupported()) { 771 assert(CapturedStmtInfo->lookup(VD) && 772 "Copyin threadprivates should have been captured!"); 773 DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(), 774 VK_LValue, (*IRef)->getExprLoc()); 775 MasterAddr = EmitLValue(&DRE).getAddress(); 776 LocalDeclMap.erase(VD); 777 } else { 778 MasterAddr = 779 Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD) 780 : CGM.GetAddrOfGlobal(VD), 781 getContext().getDeclAlign(VD)); 782 } 783 // Get the address of the threadprivate variable. 784 Address PrivateAddr = EmitLValue(*IRef).getAddress(); 785 if (CopiedVars.size() == 1) { 786 // At first check if current thread is a master thread. If it is, no 787 // need to copy data. 788 CopyBegin = createBasicBlock("copyin.not.master"); 789 CopyEnd = createBasicBlock("copyin.not.master.end"); 790 Builder.CreateCondBr( 791 Builder.CreateICmpNE( 792 Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy), 793 Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy)), 794 CopyBegin, CopyEnd); 795 EmitBlock(CopyBegin); 796 } 797 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 798 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 799 EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp); 800 } 801 ++IRef; 802 ++ISrcRef; 803 ++IDestRef; 804 } 805 } 806 if (CopyEnd) { 807 // Exit out of copying procedure for non-master thread. 808 EmitBlock(CopyEnd, /*IsFinished=*/true); 809 return true; 810 } 811 return false; 812 } 813 814 bool CodeGenFunction::EmitOMPLastprivateClauseInit( 815 const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) { 816 if (!HaveInsertPoint()) 817 return false; 818 bool HasAtLeastOneLastprivate = false; 819 llvm::DenseSet<const VarDecl *> SIMDLCVs; 820 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 821 auto *LoopDirective = cast<OMPLoopDirective>(&D); 822 for (auto *C : LoopDirective->counters()) { 823 SIMDLCVs.insert( 824 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 825 } 826 } 827 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 828 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 829 HasAtLeastOneLastprivate = true; 830 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) 831 break; 832 auto IRef = C->varlist_begin(); 833 auto IDestRef = C->destination_exprs().begin(); 834 for (auto *IInit : C->private_copies()) { 835 // Keep the address of the original variable for future update at the end 836 // of the loop. 837 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 838 // Taskloops do not require additional initialization, it is done in 839 // runtime support library. 840 if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) { 841 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 842 PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address { 843 DeclRefExpr DRE( 844 const_cast<VarDecl *>(OrigVD), 845 /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup( 846 OrigVD) != nullptr, 847 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 848 return EmitLValue(&DRE).getAddress(); 849 }); 850 // Check if the variable is also a firstprivate: in this case IInit is 851 // not generated. Initialization of this variable will happen in codegen 852 // for 'firstprivate' clause. 853 if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) { 854 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 855 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 856 // Emit private VarDecl with copy init. 857 EmitDecl(*VD); 858 return GetAddrOfLocalVar(VD); 859 }); 860 assert(IsRegistered && 861 "lastprivate var already registered as private"); 862 (void)IsRegistered; 863 } 864 } 865 ++IRef; 866 ++IDestRef; 867 } 868 } 869 return HasAtLeastOneLastprivate; 870 } 871 872 void CodeGenFunction::EmitOMPLastprivateClauseFinal( 873 const OMPExecutableDirective &D, bool NoFinals, 874 llvm::Value *IsLastIterCond) { 875 if (!HaveInsertPoint()) 876 return; 877 // Emit following code: 878 // if (<IsLastIterCond>) { 879 // orig_var1 = private_orig_var1; 880 // ... 881 // orig_varn = private_orig_varn; 882 // } 883 llvm::BasicBlock *ThenBB = nullptr; 884 llvm::BasicBlock *DoneBB = nullptr; 885 if (IsLastIterCond) { 886 ThenBB = createBasicBlock(".omp.lastprivate.then"); 887 DoneBB = createBasicBlock(".omp.lastprivate.done"); 888 Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB); 889 EmitBlock(ThenBB); 890 } 891 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 892 llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates; 893 if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) { 894 auto IC = LoopDirective->counters().begin(); 895 for (auto F : LoopDirective->finals()) { 896 auto *D = 897 cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl(); 898 if (NoFinals) 899 AlreadyEmittedVars.insert(D); 900 else 901 LoopCountersAndUpdates[D] = F; 902 ++IC; 903 } 904 } 905 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 906 auto IRef = C->varlist_begin(); 907 auto ISrcRef = C->source_exprs().begin(); 908 auto IDestRef = C->destination_exprs().begin(); 909 for (auto *AssignOp : C->assignment_ops()) { 910 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 911 QualType Type = PrivateVD->getType(); 912 auto *CanonicalVD = PrivateVD->getCanonicalDecl(); 913 if (AlreadyEmittedVars.insert(CanonicalVD).second) { 914 // If lastprivate variable is a loop control variable for loop-based 915 // directive, update its value before copyin back to original 916 // variable. 917 if (auto *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD)) 918 EmitIgnoredExpr(FinalExpr); 919 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 920 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 921 // Get the address of the original variable. 922 Address OriginalAddr = GetAddrOfLocalVar(DestVD); 923 // Get the address of the private variable. 924 Address PrivateAddr = GetAddrOfLocalVar(PrivateVD); 925 if (auto RefTy = PrivateVD->getType()->getAs<ReferenceType>()) 926 PrivateAddr = 927 Address(Builder.CreateLoad(PrivateAddr), 928 getNaturalTypeAlignment(RefTy->getPointeeType())); 929 EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp); 930 } 931 ++IRef; 932 ++ISrcRef; 933 ++IDestRef; 934 } 935 if (auto *PostUpdate = C->getPostUpdateExpr()) 936 EmitIgnoredExpr(PostUpdate); 937 } 938 if (IsLastIterCond) 939 EmitBlock(DoneBB, /*IsFinished=*/true); 940 } 941 942 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 943 LValue BaseLV, llvm::Value *Addr) { 944 Address Tmp = Address::invalid(); 945 Address TopTmp = Address::invalid(); 946 Address MostTopTmp = Address::invalid(); 947 BaseTy = BaseTy.getNonReferenceType(); 948 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 949 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 950 Tmp = CGF.CreateMemTemp(BaseTy); 951 if (TopTmp.isValid()) 952 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 953 else 954 MostTopTmp = Tmp; 955 TopTmp = Tmp; 956 BaseTy = BaseTy->getPointeeType(); 957 } 958 llvm::Type *Ty = BaseLV.getPointer()->getType(); 959 if (Tmp.isValid()) 960 Ty = Tmp.getElementType(); 961 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 962 if (Tmp.isValid()) { 963 CGF.Builder.CreateStore(Addr, Tmp); 964 return MostTopTmp; 965 } 966 return Address(Addr, BaseLV.getAlignment()); 967 } 968 969 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 970 LValue BaseLV) { 971 BaseTy = BaseTy.getNonReferenceType(); 972 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 973 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 974 if (auto *PtrTy = BaseTy->getAs<PointerType>()) 975 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy); 976 else { 977 BaseLV = CGF.EmitLoadOfReferenceLValue(BaseLV.getAddress(), 978 BaseTy->castAs<ReferenceType>()); 979 } 980 BaseTy = BaseTy->getPointeeType(); 981 } 982 return CGF.MakeAddrLValue( 983 Address( 984 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 985 BaseLV.getPointer(), CGF.ConvertTypeForMem(ElTy)->getPointerTo()), 986 BaseLV.getAlignment()), 987 BaseLV.getType(), BaseLV.getAlignmentSource()); 988 } 989 990 void CodeGenFunction::EmitOMPReductionClauseInit( 991 const OMPExecutableDirective &D, 992 CodeGenFunction::OMPPrivateScope &PrivateScope) { 993 if (!HaveInsertPoint()) 994 return; 995 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 996 auto ILHS = C->lhs_exprs().begin(); 997 auto IRHS = C->rhs_exprs().begin(); 998 auto IPriv = C->privates().begin(); 999 auto IRed = C->reduction_ops().begin(); 1000 for (auto IRef : C->varlists()) { 1001 auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 1002 auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 1003 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl()); 1004 auto *DRD = getReductionInit(*IRed); 1005 if (auto *OASE = dyn_cast<OMPArraySectionExpr>(IRef)) { 1006 auto *Base = OASE->getBase()->IgnoreParenImpCasts(); 1007 while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 1008 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 1009 while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1010 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1011 auto *DE = cast<DeclRefExpr>(Base); 1012 auto *OrigVD = cast<VarDecl>(DE->getDecl()); 1013 auto OASELValueLB = EmitOMPArraySectionExpr(OASE); 1014 auto OASELValueUB = 1015 EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 1016 auto OriginalBaseLValue = EmitLValue(DE); 1017 LValue BaseLValue = 1018 loadToBegin(*this, OrigVD->getType(), OASELValueLB.getType(), 1019 OriginalBaseLValue); 1020 // Store the address of the original variable associated with the LHS 1021 // implicit variable. 1022 PrivateScope.addPrivate(LHSVD, [OASELValueLB]() -> Address { 1023 return OASELValueLB.getAddress(); 1024 }); 1025 // Emit reduction copy. 1026 bool IsRegistered = PrivateScope.addPrivate( 1027 OrigVD, [this, OrigVD, PrivateVD, BaseLValue, OASELValueLB, 1028 OASELValueUB, OriginalBaseLValue, DRD, IRed]() -> Address { 1029 // Emit VarDecl with copy init for arrays. 1030 // Get the address of the original variable captured in current 1031 // captured region. 1032 auto *Size = Builder.CreatePtrDiff(OASELValueUB.getPointer(), 1033 OASELValueLB.getPointer()); 1034 Size = Builder.CreateNUWAdd( 1035 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 1036 CodeGenFunction::OpaqueValueMapping OpaqueMap( 1037 *this, cast<OpaqueValueExpr>( 1038 getContext() 1039 .getAsVariableArrayType(PrivateVD->getType()) 1040 ->getSizeExpr()), 1041 RValue::get(Size)); 1042 EmitVariablyModifiedType(PrivateVD->getType()); 1043 auto Emission = EmitAutoVarAlloca(*PrivateVD); 1044 auto Addr = Emission.getAllocatedAddress(); 1045 auto *Init = PrivateVD->getInit(); 1046 EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(), 1047 DRD ? *IRed : Init, 1048 OASELValueLB.getAddress()); 1049 EmitAutoVarCleanups(Emission); 1050 // Emit private VarDecl with reduction init. 1051 auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(), 1052 OASELValueLB.getPointer()); 1053 auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset); 1054 return castToBase(*this, OrigVD->getType(), 1055 OASELValueLB.getType(), OriginalBaseLValue, 1056 Ptr); 1057 }); 1058 assert(IsRegistered && "private var already registered as private"); 1059 // Silence the warning about unused variable. 1060 (void)IsRegistered; 1061 PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { 1062 return GetAddrOfLocalVar(PrivateVD); 1063 }); 1064 } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(IRef)) { 1065 auto *Base = ASE->getBase()->IgnoreParenImpCasts(); 1066 while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1067 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1068 auto *DE = cast<DeclRefExpr>(Base); 1069 auto *OrigVD = cast<VarDecl>(DE->getDecl()); 1070 auto ASELValue = EmitLValue(ASE); 1071 auto OriginalBaseLValue = EmitLValue(DE); 1072 LValue BaseLValue = loadToBegin( 1073 *this, OrigVD->getType(), ASELValue.getType(), OriginalBaseLValue); 1074 // Store the address of the original variable associated with the LHS 1075 // implicit variable. 1076 PrivateScope.addPrivate( 1077 LHSVD, [ASELValue]() -> Address { return ASELValue.getAddress(); }); 1078 // Emit reduction copy. 1079 bool IsRegistered = PrivateScope.addPrivate( 1080 OrigVD, [this, OrigVD, PrivateVD, BaseLValue, ASELValue, 1081 OriginalBaseLValue, DRD, IRed]() -> Address { 1082 // Emit private VarDecl with reduction init. 1083 AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD); 1084 auto Addr = Emission.getAllocatedAddress(); 1085 if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 1086 emitInitWithReductionInitializer(*this, DRD, *IRed, Addr, 1087 ASELValue.getAddress(), 1088 ASELValue.getType()); 1089 } else 1090 EmitAutoVarInit(Emission); 1091 EmitAutoVarCleanups(Emission); 1092 auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(), 1093 ASELValue.getPointer()); 1094 auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset); 1095 return castToBase(*this, OrigVD->getType(), ASELValue.getType(), 1096 OriginalBaseLValue, Ptr); 1097 }); 1098 assert(IsRegistered && "private var already registered as private"); 1099 // Silence the warning about unused variable. 1100 (void)IsRegistered; 1101 PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address { 1102 return Builder.CreateElementBitCast( 1103 GetAddrOfLocalVar(PrivateVD), ConvertTypeForMem(RHSVD->getType()), 1104 "rhs.begin"); 1105 }); 1106 } else { 1107 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); 1108 QualType Type = PrivateVD->getType(); 1109 if (getContext().getAsArrayType(Type)) { 1110 // Store the address of the original variable associated with the LHS 1111 // implicit variable. 1112 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1113 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1114 IRef->getType(), VK_LValue, IRef->getExprLoc()); 1115 Address OriginalAddr = EmitLValue(&DRE).getAddress(); 1116 PrivateScope.addPrivate(LHSVD, [this, &OriginalAddr, 1117 LHSVD]() -> Address { 1118 OriginalAddr = Builder.CreateElementBitCast( 1119 OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin"); 1120 return OriginalAddr; 1121 }); 1122 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 1123 if (Type->isVariablyModifiedType()) { 1124 CodeGenFunction::OpaqueValueMapping OpaqueMap( 1125 *this, cast<OpaqueValueExpr>( 1126 getContext() 1127 .getAsVariableArrayType(PrivateVD->getType()) 1128 ->getSizeExpr()), 1129 RValue::get( 1130 getTypeSize(OrigVD->getType().getNonReferenceType()))); 1131 EmitVariablyModifiedType(Type); 1132 } 1133 auto Emission = EmitAutoVarAlloca(*PrivateVD); 1134 auto Addr = Emission.getAllocatedAddress(); 1135 auto *Init = PrivateVD->getInit(); 1136 EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(), 1137 DRD ? *IRed : Init, OriginalAddr); 1138 EmitAutoVarCleanups(Emission); 1139 return Emission.getAllocatedAddress(); 1140 }); 1141 assert(IsRegistered && "private var already registered as private"); 1142 // Silence the warning about unused variable. 1143 (void)IsRegistered; 1144 PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address { 1145 return Builder.CreateElementBitCast( 1146 GetAddrOfLocalVar(PrivateVD), 1147 ConvertTypeForMem(RHSVD->getType()), "rhs.begin"); 1148 }); 1149 } else { 1150 // Store the address of the original variable associated with the LHS 1151 // implicit variable. 1152 Address OriginalAddr = Address::invalid(); 1153 PrivateScope.addPrivate(LHSVD, [this, OrigVD, IRef, 1154 &OriginalAddr]() -> Address { 1155 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1156 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1157 IRef->getType(), VK_LValue, IRef->getExprLoc()); 1158 OriginalAddr = EmitLValue(&DRE).getAddress(); 1159 return OriginalAddr; 1160 }); 1161 // Emit reduction copy. 1162 bool IsRegistered = PrivateScope.addPrivate( 1163 OrigVD, [this, PrivateVD, OriginalAddr, DRD, IRed]() -> Address { 1164 // Emit private VarDecl with reduction init. 1165 AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD); 1166 auto Addr = Emission.getAllocatedAddress(); 1167 if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 1168 emitInitWithReductionInitializer(*this, DRD, *IRed, Addr, 1169 OriginalAddr, 1170 PrivateVD->getType()); 1171 } else 1172 EmitAutoVarInit(Emission); 1173 EmitAutoVarCleanups(Emission); 1174 return Addr; 1175 }); 1176 assert(IsRegistered && "private var already registered as private"); 1177 // Silence the warning about unused variable. 1178 (void)IsRegistered; 1179 PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { 1180 return GetAddrOfLocalVar(PrivateVD); 1181 }); 1182 } 1183 } 1184 ++ILHS; 1185 ++IRHS; 1186 ++IPriv; 1187 ++IRed; 1188 } 1189 } 1190 } 1191 1192 void CodeGenFunction::EmitOMPReductionClauseFinal( 1193 const OMPExecutableDirective &D) { 1194 if (!HaveInsertPoint()) 1195 return; 1196 llvm::SmallVector<const Expr *, 8> Privates; 1197 llvm::SmallVector<const Expr *, 8> LHSExprs; 1198 llvm::SmallVector<const Expr *, 8> RHSExprs; 1199 llvm::SmallVector<const Expr *, 8> ReductionOps; 1200 bool HasAtLeastOneReduction = false; 1201 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1202 HasAtLeastOneReduction = true; 1203 Privates.append(C->privates().begin(), C->privates().end()); 1204 LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 1205 RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 1206 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 1207 } 1208 if (HasAtLeastOneReduction) { 1209 // Emit nowait reduction if nowait clause is present or directive is a 1210 // parallel directive (it always has implicit barrier). 1211 CGM.getOpenMPRuntime().emitReduction( 1212 *this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps, 1213 D.getSingleClause<OMPNowaitClause>() || 1214 isOpenMPParallelDirective(D.getDirectiveKind()) || 1215 D.getDirectiveKind() == OMPD_simd, 1216 D.getDirectiveKind() == OMPD_simd); 1217 } 1218 } 1219 1220 static void emitPostUpdateForReductionClause( 1221 CodeGenFunction &CGF, const OMPExecutableDirective &D, 1222 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1223 if (!CGF.HaveInsertPoint()) 1224 return; 1225 llvm::BasicBlock *DoneBB = nullptr; 1226 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1227 if (auto *PostUpdate = C->getPostUpdateExpr()) { 1228 if (!DoneBB) { 1229 if (auto *Cond = CondGen(CGF)) { 1230 // If the first post-update expression is found, emit conditional 1231 // block if it was requested. 1232 auto *ThenBB = CGF.createBasicBlock(".omp.reduction.pu"); 1233 DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done"); 1234 CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1235 CGF.EmitBlock(ThenBB); 1236 } 1237 } 1238 CGF.EmitIgnoredExpr(PostUpdate); 1239 } 1240 } 1241 if (DoneBB) 1242 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 1243 } 1244 1245 static void emitCommonOMPParallelDirective(CodeGenFunction &CGF, 1246 const OMPExecutableDirective &S, 1247 OpenMPDirectiveKind InnermostKind, 1248 const RegionCodeGenTy &CodeGen) { 1249 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); 1250 auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( 1251 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 1252 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) { 1253 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 1254 auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), 1255 /*IgnoreResultAssign*/ true); 1256 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause( 1257 CGF, NumThreads, NumThreadsClause->getLocStart()); 1258 } 1259 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) { 1260 CodeGenFunction::RunCleanupsScope ProcBindScope(CGF); 1261 CGF.CGM.getOpenMPRuntime().emitProcBindClause( 1262 CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart()); 1263 } 1264 const Expr *IfCond = nullptr; 1265 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 1266 if (C->getNameModifier() == OMPD_unknown || 1267 C->getNameModifier() == OMPD_parallel) { 1268 IfCond = C->getCondition(); 1269 break; 1270 } 1271 } 1272 1273 OMPParallelScope Scope(CGF, S); 1274 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 1275 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 1276 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn, 1277 CapturedVars, IfCond); 1278 } 1279 1280 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { 1281 // Emit parallel region as a standalone region. 1282 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1283 OMPPrivateScope PrivateScope(CGF); 1284 bool Copyins = CGF.EmitOMPCopyinClause(S); 1285 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 1286 if (Copyins) { 1287 // Emit implicit barrier to synchronize threads and avoid data races on 1288 // propagation master's thread values of threadprivate variables to local 1289 // instances of that variables of all other implicit threads. 1290 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 1291 CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 1292 /*ForceSimpleCall=*/true); 1293 } 1294 CGF.EmitOMPPrivateClause(S, PrivateScope); 1295 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 1296 (void)PrivateScope.Privatize(); 1297 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1298 CGF.EmitOMPReductionClauseFinal(S); 1299 }; 1300 emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen); 1301 emitPostUpdateForReductionClause( 1302 *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1303 } 1304 1305 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, 1306 JumpDest LoopExit) { 1307 RunCleanupsScope BodyScope(*this); 1308 // Update counters values on current iteration. 1309 for (auto I : D.updates()) { 1310 EmitIgnoredExpr(I); 1311 } 1312 // Update the linear variables. 1313 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1314 for (auto *U : C->updates()) 1315 EmitIgnoredExpr(U); 1316 } 1317 1318 // On a continue in the body, jump to the end. 1319 auto Continue = getJumpDestInCurrentScope("omp.body.continue"); 1320 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1321 // Emit loop body. 1322 EmitStmt(D.getBody()); 1323 // The end (updates/cleanups). 1324 EmitBlock(Continue.getBlock()); 1325 BreakContinueStack.pop_back(); 1326 } 1327 1328 void CodeGenFunction::EmitOMPInnerLoop( 1329 const Stmt &S, bool RequiresCleanup, const Expr *LoopCond, 1330 const Expr *IncExpr, 1331 const llvm::function_ref<void(CodeGenFunction &)> &BodyGen, 1332 const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen) { 1333 auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end"); 1334 1335 // Start the loop with a block that tests the condition. 1336 auto CondBlock = createBasicBlock("omp.inner.for.cond"); 1337 EmitBlock(CondBlock); 1338 const SourceRange &R = S.getSourceRange(); 1339 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), 1340 SourceLocToDebugLoc(R.getEnd())); 1341 1342 // If there are any cleanups between here and the loop-exit scope, 1343 // create a block to stage a loop exit along. 1344 auto ExitBlock = LoopExit.getBlock(); 1345 if (RequiresCleanup) 1346 ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup"); 1347 1348 auto LoopBody = createBasicBlock("omp.inner.for.body"); 1349 1350 // Emit condition. 1351 EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S)); 1352 if (ExitBlock != LoopExit.getBlock()) { 1353 EmitBlock(ExitBlock); 1354 EmitBranchThroughCleanup(LoopExit); 1355 } 1356 1357 EmitBlock(LoopBody); 1358 incrementProfileCounter(&S); 1359 1360 // Create a block for the increment. 1361 auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc"); 1362 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1363 1364 BodyGen(*this); 1365 1366 // Emit "IV = IV + 1" and a back-edge to the condition block. 1367 EmitBlock(Continue.getBlock()); 1368 EmitIgnoredExpr(IncExpr); 1369 PostIncGen(*this); 1370 BreakContinueStack.pop_back(); 1371 EmitBranch(CondBlock); 1372 LoopStack.pop(); 1373 // Emit the fall-through block. 1374 EmitBlock(LoopExit.getBlock()); 1375 } 1376 1377 void CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { 1378 if (!HaveInsertPoint()) 1379 return; 1380 // Emit inits for the linear variables. 1381 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1382 for (auto *Init : C->inits()) { 1383 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl()); 1384 if (auto *Ref = dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) { 1385 AutoVarEmission Emission = EmitAutoVarAlloca(*VD); 1386 auto *OrigVD = cast<VarDecl>(Ref->getDecl()); 1387 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1388 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1389 VD->getInit()->getType(), VK_LValue, 1390 VD->getInit()->getExprLoc()); 1391 EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(), 1392 VD->getType()), 1393 /*capturedByInit=*/false); 1394 EmitAutoVarCleanups(Emission); 1395 } else 1396 EmitVarDecl(*VD); 1397 } 1398 // Emit the linear steps for the linear clauses. 1399 // If a step is not constant, it is pre-calculated before the loop. 1400 if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep())) 1401 if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) { 1402 EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl())); 1403 // Emit calculation of the linear step. 1404 EmitIgnoredExpr(CS); 1405 } 1406 } 1407 } 1408 1409 void CodeGenFunction::EmitOMPLinearClauseFinal( 1410 const OMPLoopDirective &D, 1411 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1412 if (!HaveInsertPoint()) 1413 return; 1414 llvm::BasicBlock *DoneBB = nullptr; 1415 // Emit the final values of the linear variables. 1416 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1417 auto IC = C->varlist_begin(); 1418 for (auto *F : C->finals()) { 1419 if (!DoneBB) { 1420 if (auto *Cond = CondGen(*this)) { 1421 // If the first post-update expression is found, emit conditional 1422 // block if it was requested. 1423 auto *ThenBB = createBasicBlock(".omp.linear.pu"); 1424 DoneBB = createBasicBlock(".omp.linear.pu.done"); 1425 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1426 EmitBlock(ThenBB); 1427 } 1428 } 1429 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl()); 1430 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1431 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1432 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); 1433 Address OrigAddr = EmitLValue(&DRE).getAddress(); 1434 CodeGenFunction::OMPPrivateScope VarScope(*this); 1435 VarScope.addPrivate(OrigVD, [OrigAddr]() -> Address { return OrigAddr; }); 1436 (void)VarScope.Privatize(); 1437 EmitIgnoredExpr(F); 1438 ++IC; 1439 } 1440 if (auto *PostUpdate = C->getPostUpdateExpr()) 1441 EmitIgnoredExpr(PostUpdate); 1442 } 1443 if (DoneBB) 1444 EmitBlock(DoneBB, /*IsFinished=*/true); 1445 } 1446 1447 static void emitAlignedClause(CodeGenFunction &CGF, 1448 const OMPExecutableDirective &D) { 1449 if (!CGF.HaveInsertPoint()) 1450 return; 1451 for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) { 1452 unsigned ClauseAlignment = 0; 1453 if (auto AlignmentExpr = Clause->getAlignment()) { 1454 auto AlignmentCI = 1455 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); 1456 ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue()); 1457 } 1458 for (auto E : Clause->varlists()) { 1459 unsigned Alignment = ClauseAlignment; 1460 if (Alignment == 0) { 1461 // OpenMP [2.8.1, Description] 1462 // If no optional parameter is specified, implementation-defined default 1463 // alignments for SIMD instructions on the target platforms are assumed. 1464 Alignment = 1465 CGF.getContext() 1466 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign( 1467 E->getType()->getPointeeType())) 1468 .getQuantity(); 1469 } 1470 assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) && 1471 "alignment is not power of 2"); 1472 if (Alignment != 0) { 1473 llvm::Value *PtrValue = CGF.EmitScalarExpr(E); 1474 CGF.EmitAlignmentAssumption(PtrValue, Alignment); 1475 } 1476 } 1477 } 1478 } 1479 1480 void CodeGenFunction::EmitOMPPrivateLoopCounters( 1481 const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) { 1482 if (!HaveInsertPoint()) 1483 return; 1484 auto I = S.private_counters().begin(); 1485 for (auto *E : S.counters()) { 1486 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1487 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()); 1488 (void)LoopScope.addPrivate(VD, [&]() -> Address { 1489 // Emit var without initialization. 1490 if (!LocalDeclMap.count(PrivateVD)) { 1491 auto VarEmission = EmitAutoVarAlloca(*PrivateVD); 1492 EmitAutoVarCleanups(VarEmission); 1493 } 1494 DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD), 1495 /*RefersToEnclosingVariableOrCapture=*/false, 1496 (*I)->getType(), VK_LValue, (*I)->getExprLoc()); 1497 return EmitLValue(&DRE).getAddress(); 1498 }); 1499 if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) || 1500 VD->hasGlobalStorage()) { 1501 (void)LoopScope.addPrivate(PrivateVD, [&]() -> Address { 1502 DeclRefExpr DRE(const_cast<VarDecl *>(VD), 1503 LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD), 1504 E->getType(), VK_LValue, E->getExprLoc()); 1505 return EmitLValue(&DRE).getAddress(); 1506 }); 1507 } 1508 ++I; 1509 } 1510 } 1511 1512 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S, 1513 const Expr *Cond, llvm::BasicBlock *TrueBlock, 1514 llvm::BasicBlock *FalseBlock, uint64_t TrueCount) { 1515 if (!CGF.HaveInsertPoint()) 1516 return; 1517 { 1518 CodeGenFunction::OMPPrivateScope PreCondScope(CGF); 1519 CGF.EmitOMPPrivateLoopCounters(S, PreCondScope); 1520 (void)PreCondScope.Privatize(); 1521 // Get initial values of real counters. 1522 for (auto I : S.inits()) { 1523 CGF.EmitIgnoredExpr(I); 1524 } 1525 } 1526 // Check that loop is executed at least one time. 1527 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount); 1528 } 1529 1530 void CodeGenFunction::EmitOMPLinearClause( 1531 const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) { 1532 if (!HaveInsertPoint()) 1533 return; 1534 llvm::DenseSet<const VarDecl *> SIMDLCVs; 1535 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 1536 auto *LoopDirective = cast<OMPLoopDirective>(&D); 1537 for (auto *C : LoopDirective->counters()) { 1538 SIMDLCVs.insert( 1539 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 1540 } 1541 } 1542 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1543 auto CurPrivate = C->privates().begin(); 1544 for (auto *E : C->varlists()) { 1545 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1546 auto *PrivateVD = 1547 cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl()); 1548 if (!SIMDLCVs.count(VD->getCanonicalDecl())) { 1549 bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address { 1550 // Emit private VarDecl with copy init. 1551 EmitVarDecl(*PrivateVD); 1552 return GetAddrOfLocalVar(PrivateVD); 1553 }); 1554 assert(IsRegistered && "linear var already registered as private"); 1555 // Silence the warning about unused variable. 1556 (void)IsRegistered; 1557 } else 1558 EmitVarDecl(*PrivateVD); 1559 ++CurPrivate; 1560 } 1561 } 1562 } 1563 1564 static void emitSimdlenSafelenClause(CodeGenFunction &CGF, 1565 const OMPExecutableDirective &D, 1566 bool IsMonotonic) { 1567 if (!CGF.HaveInsertPoint()) 1568 return; 1569 if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) { 1570 RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(), 1571 /*ignoreResult=*/true); 1572 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 1573 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 1574 // In presence of finite 'safelen', it may be unsafe to mark all 1575 // the memory instructions parallel, because loop-carried 1576 // dependences of 'safelen' iterations are possible. 1577 if (!IsMonotonic) 1578 CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>()); 1579 } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) { 1580 RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(), 1581 /*ignoreResult=*/true); 1582 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 1583 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 1584 // In presence of finite 'safelen', it may be unsafe to mark all 1585 // the memory instructions parallel, because loop-carried 1586 // dependences of 'safelen' iterations are possible. 1587 CGF.LoopStack.setParallel(false); 1588 } 1589 } 1590 1591 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D, 1592 bool IsMonotonic) { 1593 // Walk clauses and process safelen/lastprivate. 1594 LoopStack.setParallel(!IsMonotonic); 1595 LoopStack.setVectorizeEnable(true); 1596 emitSimdlenSafelenClause(*this, D, IsMonotonic); 1597 } 1598 1599 void CodeGenFunction::EmitOMPSimdFinal( 1600 const OMPLoopDirective &D, 1601 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1602 if (!HaveInsertPoint()) 1603 return; 1604 llvm::BasicBlock *DoneBB = nullptr; 1605 auto IC = D.counters().begin(); 1606 auto IPC = D.private_counters().begin(); 1607 for (auto F : D.finals()) { 1608 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl()); 1609 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl()); 1610 auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD); 1611 if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) || 1612 OrigVD->hasGlobalStorage() || CED) { 1613 if (!DoneBB) { 1614 if (auto *Cond = CondGen(*this)) { 1615 // If the first post-update expression is found, emit conditional 1616 // block if it was requested. 1617 auto *ThenBB = createBasicBlock(".omp.final.then"); 1618 DoneBB = createBasicBlock(".omp.final.done"); 1619 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1620 EmitBlock(ThenBB); 1621 } 1622 } 1623 Address OrigAddr = Address::invalid(); 1624 if (CED) 1625 OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress(); 1626 else { 1627 DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD), 1628 /*RefersToEnclosingVariableOrCapture=*/false, 1629 (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc()); 1630 OrigAddr = EmitLValue(&DRE).getAddress(); 1631 } 1632 OMPPrivateScope VarScope(*this); 1633 VarScope.addPrivate(OrigVD, 1634 [OrigAddr]() -> Address { return OrigAddr; }); 1635 (void)VarScope.Privatize(); 1636 EmitIgnoredExpr(F); 1637 } 1638 ++IC; 1639 ++IPC; 1640 } 1641 if (DoneBB) 1642 EmitBlock(DoneBB, /*IsFinished=*/true); 1643 } 1644 1645 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { 1646 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1647 OMPLoopScope PreInitScope(CGF, S); 1648 // if (PreCond) { 1649 // for (IV in 0..LastIteration) BODY; 1650 // <Final counter/linear vars updates>; 1651 // } 1652 // 1653 1654 // Emit: if (PreCond) - begin. 1655 // If the condition constant folds and can be elided, avoid emitting the 1656 // whole loop. 1657 bool CondConstant; 1658 llvm::BasicBlock *ContBlock = nullptr; 1659 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 1660 if (!CondConstant) 1661 return; 1662 } else { 1663 auto *ThenBlock = CGF.createBasicBlock("simd.if.then"); 1664 ContBlock = CGF.createBasicBlock("simd.if.end"); 1665 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 1666 CGF.getProfileCount(&S)); 1667 CGF.EmitBlock(ThenBlock); 1668 CGF.incrementProfileCounter(&S); 1669 } 1670 1671 // Emit the loop iteration variable. 1672 const Expr *IVExpr = S.getIterationVariable(); 1673 const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 1674 CGF.EmitVarDecl(*IVDecl); 1675 CGF.EmitIgnoredExpr(S.getInit()); 1676 1677 // Emit the iterations count variable. 1678 // If it is not a variable, Sema decided to calculate iterations count on 1679 // each iteration (e.g., it is foldable into a constant). 1680 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 1681 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 1682 // Emit calculation of the iterations count. 1683 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 1684 } 1685 1686 CGF.EmitOMPSimdInit(S); 1687 1688 emitAlignedClause(CGF, S); 1689 CGF.EmitOMPLinearClauseInit(S); 1690 { 1691 OMPPrivateScope LoopScope(CGF); 1692 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 1693 CGF.EmitOMPLinearClause(S, LoopScope); 1694 CGF.EmitOMPPrivateClause(S, LoopScope); 1695 CGF.EmitOMPReductionClauseInit(S, LoopScope); 1696 bool HasLastprivateClause = 1697 CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 1698 (void)LoopScope.Privatize(); 1699 CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 1700 S.getInc(), 1701 [&S](CodeGenFunction &CGF) { 1702 CGF.EmitOMPLoopBody(S, JumpDest()); 1703 CGF.EmitStopPoint(&S); 1704 }, 1705 [](CodeGenFunction &) {}); 1706 CGF.EmitOMPSimdFinal( 1707 S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1708 // Emit final copy of the lastprivate variables at the end of loops. 1709 if (HasLastprivateClause) 1710 CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true); 1711 CGF.EmitOMPReductionClauseFinal(S); 1712 emitPostUpdateForReductionClause( 1713 CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1714 } 1715 CGF.EmitOMPLinearClauseFinal( 1716 S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1717 // Emit: if (PreCond) - end. 1718 if (ContBlock) { 1719 CGF.EmitBranch(ContBlock); 1720 CGF.EmitBlock(ContBlock, true); 1721 } 1722 }; 1723 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1724 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 1725 } 1726 1727 void CodeGenFunction::EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic, 1728 const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, 1729 Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) { 1730 auto &RT = CGM.getOpenMPRuntime(); 1731 1732 const Expr *IVExpr = S.getIterationVariable(); 1733 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1734 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1735 1736 auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end"); 1737 1738 // Start the loop with a block that tests the condition. 1739 auto CondBlock = createBasicBlock("omp.dispatch.cond"); 1740 EmitBlock(CondBlock); 1741 const SourceRange &R = S.getSourceRange(); 1742 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), 1743 SourceLocToDebugLoc(R.getEnd())); 1744 1745 llvm::Value *BoolCondVal = nullptr; 1746 if (!DynamicOrOrdered) { 1747 // UB = min(UB, GlobalUB) 1748 EmitIgnoredExpr(S.getEnsureUpperBound()); 1749 // IV = LB 1750 EmitIgnoredExpr(S.getInit()); 1751 // IV < UB 1752 BoolCondVal = EvaluateExprAsBool(S.getCond()); 1753 } else { 1754 BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, IL, 1755 LB, UB, ST); 1756 } 1757 1758 // If there are any cleanups between here and the loop-exit scope, 1759 // create a block to stage a loop exit along. 1760 auto ExitBlock = LoopExit.getBlock(); 1761 if (LoopScope.requiresCleanups()) 1762 ExitBlock = createBasicBlock("omp.dispatch.cleanup"); 1763 1764 auto LoopBody = createBasicBlock("omp.dispatch.body"); 1765 Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock); 1766 if (ExitBlock != LoopExit.getBlock()) { 1767 EmitBlock(ExitBlock); 1768 EmitBranchThroughCleanup(LoopExit); 1769 } 1770 EmitBlock(LoopBody); 1771 1772 // Emit "IV = LB" (in case of static schedule, we have already calculated new 1773 // LB for loop condition and emitted it above). 1774 if (DynamicOrOrdered) 1775 EmitIgnoredExpr(S.getInit()); 1776 1777 // Create a block for the increment. 1778 auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); 1779 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1780 1781 // Generate !llvm.loop.parallel metadata for loads and stores for loops 1782 // with dynamic/guided scheduling and without ordered clause. 1783 if (!isOpenMPSimdDirective(S.getDirectiveKind())) 1784 LoopStack.setParallel(!IsMonotonic); 1785 else 1786 EmitOMPSimdInit(S, IsMonotonic); 1787 1788 SourceLocation Loc = S.getLocStart(); 1789 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), 1790 [&S, LoopExit](CodeGenFunction &CGF) { 1791 CGF.EmitOMPLoopBody(S, LoopExit); 1792 CGF.EmitStopPoint(&S); 1793 }, 1794 [Ordered, IVSize, IVSigned, Loc](CodeGenFunction &CGF) { 1795 if (Ordered) { 1796 CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd( 1797 CGF, Loc, IVSize, IVSigned); 1798 } 1799 }); 1800 1801 EmitBlock(Continue.getBlock()); 1802 BreakContinueStack.pop_back(); 1803 if (!DynamicOrOrdered) { 1804 // Emit "LB = LB + Stride", "UB = UB + Stride". 1805 EmitIgnoredExpr(S.getNextLowerBound()); 1806 EmitIgnoredExpr(S.getNextUpperBound()); 1807 } 1808 1809 EmitBranch(CondBlock); 1810 LoopStack.pop(); 1811 // Emit the fall-through block. 1812 EmitBlock(LoopExit.getBlock()); 1813 1814 // Tell the runtime we are done. 1815 auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) { 1816 if (!DynamicOrOrdered) 1817 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd()); 1818 }; 1819 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); 1820 } 1821 1822 void CodeGenFunction::EmitOMPForOuterLoop( 1823 const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic, 1824 const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, 1825 Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) { 1826 auto &RT = CGM.getOpenMPRuntime(); 1827 1828 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). 1829 const bool DynamicOrOrdered = 1830 Ordered || RT.isDynamic(ScheduleKind.Schedule); 1831 1832 assert((Ordered || 1833 !RT.isStaticNonchunked(ScheduleKind.Schedule, 1834 /*Chunked=*/Chunk != nullptr)) && 1835 "static non-chunked schedule does not need outer loop"); 1836 1837 // Emit outer loop. 1838 // 1839 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1840 // When schedule(dynamic,chunk_size) is specified, the iterations are 1841 // distributed to threads in the team in chunks as the threads request them. 1842 // Each thread executes a chunk of iterations, then requests another chunk, 1843 // until no chunks remain to be distributed. Each chunk contains chunk_size 1844 // iterations, except for the last chunk to be distributed, which may have 1845 // fewer iterations. When no chunk_size is specified, it defaults to 1. 1846 // 1847 // When schedule(guided,chunk_size) is specified, the iterations are assigned 1848 // to threads in the team in chunks as the executing threads request them. 1849 // Each thread executes a chunk of iterations, then requests another chunk, 1850 // until no chunks remain to be assigned. For a chunk_size of 1, the size of 1851 // each chunk is proportional to the number of unassigned iterations divided 1852 // by the number of threads in the team, decreasing to 1. For a chunk_size 1853 // with value k (greater than 1), the size of each chunk is determined in the 1854 // same way, with the restriction that the chunks do not contain fewer than k 1855 // iterations (except for the last chunk to be assigned, which may have fewer 1856 // than k iterations). 1857 // 1858 // When schedule(auto) is specified, the decision regarding scheduling is 1859 // delegated to the compiler and/or runtime system. The programmer gives the 1860 // implementation the freedom to choose any possible mapping of iterations to 1861 // threads in the team. 1862 // 1863 // When schedule(runtime) is specified, the decision regarding scheduling is 1864 // deferred until run time, and the schedule and chunk size are taken from the 1865 // run-sched-var ICV. If the ICV is set to auto, the schedule is 1866 // implementation defined 1867 // 1868 // while(__kmpc_dispatch_next(&LB, &UB)) { 1869 // idx = LB; 1870 // while (idx <= UB) { BODY; ++idx; 1871 // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only. 1872 // } // inner loop 1873 // } 1874 // 1875 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1876 // When schedule(static, chunk_size) is specified, iterations are divided into 1877 // chunks of size chunk_size, and the chunks are assigned to the threads in 1878 // the team in a round-robin fashion in the order of the thread number. 1879 // 1880 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { 1881 // while (idx <= UB) { BODY; ++idx; } // inner loop 1882 // LB = LB + ST; 1883 // UB = UB + ST; 1884 // } 1885 // 1886 1887 const Expr *IVExpr = S.getIterationVariable(); 1888 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1889 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1890 1891 if (DynamicOrOrdered) { 1892 llvm::Value *UBVal = EmitScalarExpr(S.getLastIteration()); 1893 RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, IVSize, 1894 IVSigned, Ordered, UBVal, Chunk); 1895 } else { 1896 RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, 1897 Ordered, IL, LB, UB, ST, Chunk); 1898 } 1899 1900 EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, Ordered, LB, UB, 1901 ST, IL, Chunk); 1902 } 1903 1904 void CodeGenFunction::EmitOMPDistributeOuterLoop( 1905 OpenMPDistScheduleClauseKind ScheduleKind, 1906 const OMPDistributeDirective &S, OMPPrivateScope &LoopScope, 1907 Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) { 1908 1909 auto &RT = CGM.getOpenMPRuntime(); 1910 1911 // Emit outer loop. 1912 // Same behavior as a OMPForOuterLoop, except that schedule cannot be 1913 // dynamic 1914 // 1915 1916 const Expr *IVExpr = S.getIterationVariable(); 1917 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1918 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1919 1920 RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, 1921 IVSize, IVSigned, /* Ordered = */ false, 1922 IL, LB, UB, ST, Chunk); 1923 1924 EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, 1925 S, LoopScope, /* Ordered = */ false, LB, UB, ST, IL, Chunk); 1926 } 1927 1928 void CodeGenFunction::EmitOMPDistributeParallelForDirective( 1929 const OMPDistributeParallelForDirective &S) { 1930 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1931 CGM.getOpenMPRuntime().emitInlinedDirective( 1932 *this, OMPD_distribute_parallel_for, 1933 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1934 OMPLoopScope PreInitScope(CGF, S); 1935 OMPCancelStackRAII CancelRegion(CGF, OMPD_distribute_parallel_for, 1936 /*HasCancel=*/false); 1937 CGF.EmitStmt( 1938 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1939 }); 1940 } 1941 1942 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective( 1943 const OMPDistributeParallelForSimdDirective &S) { 1944 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1945 CGM.getOpenMPRuntime().emitInlinedDirective( 1946 *this, OMPD_distribute_parallel_for_simd, 1947 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1948 OMPLoopScope PreInitScope(CGF, S); 1949 CGF.EmitStmt( 1950 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1951 }); 1952 } 1953 1954 void CodeGenFunction::EmitOMPDistributeSimdDirective( 1955 const OMPDistributeSimdDirective &S) { 1956 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1957 CGM.getOpenMPRuntime().emitInlinedDirective( 1958 *this, OMPD_distribute_simd, 1959 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1960 OMPLoopScope PreInitScope(CGF, S); 1961 CGF.EmitStmt( 1962 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1963 }); 1964 } 1965 1966 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective( 1967 const OMPTargetParallelForSimdDirective &S) { 1968 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1969 CGM.getOpenMPRuntime().emitInlinedDirective( 1970 *this, OMPD_target_parallel_for_simd, 1971 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1972 OMPLoopScope PreInitScope(CGF, S); 1973 CGF.EmitStmt( 1974 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1975 }); 1976 } 1977 1978 void CodeGenFunction::EmitOMPTargetSimdDirective( 1979 const OMPTargetSimdDirective &S) { 1980 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1981 CGM.getOpenMPRuntime().emitInlinedDirective( 1982 *this, OMPD_target_simd, [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1983 OMPLoopScope PreInitScope(CGF, S); 1984 CGF.EmitStmt( 1985 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1986 }); 1987 } 1988 1989 void CodeGenFunction::EmitOMPTeamsDistributeDirective( 1990 const OMPTeamsDistributeDirective &S) { 1991 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1992 CGM.getOpenMPRuntime().emitInlinedDirective( 1993 *this, OMPD_teams_distribute, 1994 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1995 OMPLoopScope PreInitScope(CGF, S); 1996 CGF.EmitStmt( 1997 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1998 }); 1999 } 2000 2001 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective( 2002 const OMPTeamsDistributeSimdDirective &S) { 2003 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2004 CGM.getOpenMPRuntime().emitInlinedDirective( 2005 *this, OMPD_teams_distribute_simd, 2006 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2007 OMPLoopScope PreInitScope(CGF, S); 2008 CGF.EmitStmt( 2009 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2010 }); 2011 } 2012 2013 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective( 2014 const OMPTeamsDistributeParallelForSimdDirective &S) { 2015 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2016 CGM.getOpenMPRuntime().emitInlinedDirective( 2017 *this, OMPD_teams_distribute_parallel_for_simd, 2018 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2019 OMPLoopScope PreInitScope(CGF, S); 2020 CGF.EmitStmt( 2021 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2022 }); 2023 } 2024 2025 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective( 2026 const OMPTeamsDistributeParallelForDirective &S) { 2027 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2028 CGM.getOpenMPRuntime().emitInlinedDirective( 2029 *this, OMPD_teams_distribute_parallel_for, 2030 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2031 OMPLoopScope PreInitScope(CGF, S); 2032 CGF.EmitStmt( 2033 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2034 }); 2035 } 2036 2037 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective( 2038 const OMPTargetTeamsDistributeDirective &S) { 2039 CGM.getOpenMPRuntime().emitInlinedDirective( 2040 *this, OMPD_target_teams_distribute, 2041 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2042 CGF.EmitStmt( 2043 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2044 }); 2045 } 2046 2047 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective( 2048 const OMPTargetTeamsDistributeParallelForDirective &S) { 2049 CGM.getOpenMPRuntime().emitInlinedDirective( 2050 *this, OMPD_target_teams_distribute_parallel_for, 2051 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2052 CGF.EmitStmt( 2053 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2054 }); 2055 } 2056 2057 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective( 2058 const OMPTargetTeamsDistributeParallelForSimdDirective &S) { 2059 CGM.getOpenMPRuntime().emitInlinedDirective( 2060 *this, OMPD_target_teams_distribute_parallel_for_simd, 2061 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2062 CGF.EmitStmt( 2063 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2064 }); 2065 } 2066 2067 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective( 2068 const OMPTargetTeamsDistributeSimdDirective &S) { 2069 CGM.getOpenMPRuntime().emitInlinedDirective( 2070 *this, OMPD_target_teams_distribute_simd, 2071 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2072 CGF.EmitStmt( 2073 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2074 }); 2075 } 2076 2077 /// \brief Emit a helper variable and return corresponding lvalue. 2078 static LValue EmitOMPHelperVar(CodeGenFunction &CGF, 2079 const DeclRefExpr *Helper) { 2080 auto VDecl = cast<VarDecl>(Helper->getDecl()); 2081 CGF.EmitVarDecl(*VDecl); 2082 return CGF.EmitLValue(Helper); 2083 } 2084 2085 namespace { 2086 struct ScheduleKindModifiersTy { 2087 OpenMPScheduleClauseKind Kind; 2088 OpenMPScheduleClauseModifier M1; 2089 OpenMPScheduleClauseModifier M2; 2090 ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind, 2091 OpenMPScheduleClauseModifier M1, 2092 OpenMPScheduleClauseModifier M2) 2093 : Kind(Kind), M1(M1), M2(M2) {} 2094 }; 2095 } // namespace 2096 2097 bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { 2098 // Emit the loop iteration variable. 2099 auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 2100 auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); 2101 EmitVarDecl(*IVDecl); 2102 2103 // Emit the iterations count variable. 2104 // If it is not a variable, Sema decided to calculate iterations count on each 2105 // iteration (e.g., it is foldable into a constant). 2106 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 2107 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 2108 // Emit calculation of the iterations count. 2109 EmitIgnoredExpr(S.getCalcLastIteration()); 2110 } 2111 2112 auto &RT = CGM.getOpenMPRuntime(); 2113 2114 bool HasLastprivateClause; 2115 // Check pre-condition. 2116 { 2117 OMPLoopScope PreInitScope(*this, S); 2118 // Skip the entire loop if we don't meet the precondition. 2119 // If the condition constant folds and can be elided, avoid emitting the 2120 // whole loop. 2121 bool CondConstant; 2122 llvm::BasicBlock *ContBlock = nullptr; 2123 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 2124 if (!CondConstant) 2125 return false; 2126 } else { 2127 auto *ThenBlock = createBasicBlock("omp.precond.then"); 2128 ContBlock = createBasicBlock("omp.precond.end"); 2129 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 2130 getProfileCount(&S)); 2131 EmitBlock(ThenBlock); 2132 incrementProfileCounter(&S); 2133 } 2134 2135 bool Ordered = false; 2136 if (auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) { 2137 if (OrderedClause->getNumForLoops()) 2138 RT.emitDoacrossInit(*this, S); 2139 else 2140 Ordered = true; 2141 } 2142 2143 llvm::DenseSet<const Expr *> EmittedFinals; 2144 emitAlignedClause(*this, S); 2145 EmitOMPLinearClauseInit(S); 2146 // Emit helper vars inits. 2147 LValue LB = 2148 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable())); 2149 LValue UB = 2150 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable())); 2151 LValue ST = 2152 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 2153 LValue IL = 2154 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 2155 2156 // Emit 'then' code. 2157 { 2158 OMPPrivateScope LoopScope(*this); 2159 if (EmitOMPFirstprivateClause(S, LoopScope)) { 2160 // Emit implicit barrier to synchronize threads and avoid data races on 2161 // initialization of firstprivate variables and post-update of 2162 // lastprivate variables. 2163 CGM.getOpenMPRuntime().emitBarrierCall( 2164 *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 2165 /*ForceSimpleCall=*/true); 2166 } 2167 EmitOMPPrivateClause(S, LoopScope); 2168 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 2169 EmitOMPReductionClauseInit(S, LoopScope); 2170 EmitOMPPrivateLoopCounters(S, LoopScope); 2171 EmitOMPLinearClause(S, LoopScope); 2172 (void)LoopScope.Privatize(); 2173 2174 // Detect the loop schedule kind and chunk. 2175 llvm::Value *Chunk = nullptr; 2176 OpenMPScheduleTy ScheduleKind; 2177 if (auto *C = S.getSingleClause<OMPScheduleClause>()) { 2178 ScheduleKind.Schedule = C->getScheduleKind(); 2179 ScheduleKind.M1 = C->getFirstScheduleModifier(); 2180 ScheduleKind.M2 = C->getSecondScheduleModifier(); 2181 if (const auto *Ch = C->getChunkSize()) { 2182 Chunk = EmitScalarExpr(Ch); 2183 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 2184 S.getIterationVariable()->getType(), 2185 S.getLocStart()); 2186 } 2187 } 2188 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2189 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2190 // OpenMP 4.5, 2.7.1 Loop Construct, Description. 2191 // If the static schedule kind is specified or if the ordered clause is 2192 // specified, and if no monotonic modifier is specified, the effect will 2193 // be as if the monotonic modifier was specified. 2194 if (RT.isStaticNonchunked(ScheduleKind.Schedule, 2195 /* Chunked */ Chunk != nullptr) && 2196 !Ordered) { 2197 if (isOpenMPSimdDirective(S.getDirectiveKind())) 2198 EmitOMPSimdInit(S, /*IsMonotonic=*/true); 2199 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 2200 // When no chunk_size is specified, the iteration space is divided into 2201 // chunks that are approximately equal in size, and at most one chunk is 2202 // distributed to each thread. Note that the size of the chunks is 2203 // unspecified in this case. 2204 RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, 2205 IVSize, IVSigned, Ordered, 2206 IL.getAddress(), LB.getAddress(), 2207 UB.getAddress(), ST.getAddress()); 2208 auto LoopExit = 2209 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 2210 // UB = min(UB, GlobalUB); 2211 EmitIgnoredExpr(S.getEnsureUpperBound()); 2212 // IV = LB; 2213 EmitIgnoredExpr(S.getInit()); 2214 // while (idx <= UB) { BODY; ++idx; } 2215 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 2216 S.getInc(), 2217 [&S, LoopExit](CodeGenFunction &CGF) { 2218 CGF.EmitOMPLoopBody(S, LoopExit); 2219 CGF.EmitStopPoint(&S); 2220 }, 2221 [](CodeGenFunction &) {}); 2222 EmitBlock(LoopExit.getBlock()); 2223 // Tell the runtime we are done. 2224 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 2225 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd()); 2226 }; 2227 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); 2228 } else { 2229 const bool IsMonotonic = 2230 Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static || 2231 ScheduleKind.Schedule == OMPC_SCHEDULE_unknown || 2232 ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic || 2233 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic; 2234 // Emit the outer loop, which requests its work chunk [LB..UB] from 2235 // runtime and runs the inner loop to process it. 2236 EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered, 2237 LB.getAddress(), UB.getAddress(), ST.getAddress(), 2238 IL.getAddress(), Chunk); 2239 } 2240 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 2241 EmitOMPSimdFinal(S, 2242 [&](CodeGenFunction &CGF) -> llvm::Value * { 2243 return CGF.Builder.CreateIsNotNull( 2244 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2245 }); 2246 } 2247 EmitOMPReductionClauseFinal(S); 2248 // Emit post-update of the reduction variables if IsLastIter != 0. 2249 emitPostUpdateForReductionClause( 2250 *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * { 2251 return CGF.Builder.CreateIsNotNull( 2252 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2253 }); 2254 // Emit final copy of the lastprivate variables if IsLastIter != 0. 2255 if (HasLastprivateClause) 2256 EmitOMPLastprivateClauseFinal( 2257 S, isOpenMPSimdDirective(S.getDirectiveKind()), 2258 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart()))); 2259 } 2260 EmitOMPLinearClauseFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * { 2261 return CGF.Builder.CreateIsNotNull( 2262 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2263 }); 2264 // We're now done with the loop, so jump to the continuation block. 2265 if (ContBlock) { 2266 EmitBranch(ContBlock); 2267 EmitBlock(ContBlock, true); 2268 } 2269 } 2270 return HasLastprivateClause; 2271 } 2272 2273 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { 2274 bool HasLastprivates = false; 2275 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, 2276 PrePostActionTy &) { 2277 OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel()); 2278 HasLastprivates = CGF.EmitOMPWorksharingLoop(S); 2279 }; 2280 { 2281 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2282 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen, 2283 S.hasCancel()); 2284 } 2285 2286 // Emit an implicit barrier at the end. 2287 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) { 2288 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); 2289 } 2290 } 2291 2292 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { 2293 bool HasLastprivates = false; 2294 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, 2295 PrePostActionTy &) { 2296 HasLastprivates = CGF.EmitOMPWorksharingLoop(S); 2297 }; 2298 { 2299 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2300 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 2301 } 2302 2303 // Emit an implicit barrier at the end. 2304 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) { 2305 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); 2306 } 2307 } 2308 2309 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, 2310 const Twine &Name, 2311 llvm::Value *Init = nullptr) { 2312 auto LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty); 2313 if (Init) 2314 CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true); 2315 return LVal; 2316 } 2317 2318 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { 2319 auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt(); 2320 auto *CS = dyn_cast<CompoundStmt>(Stmt); 2321 bool HasLastprivates = false; 2322 auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF, 2323 PrePostActionTy &) { 2324 auto &C = CGF.CGM.getContext(); 2325 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2326 // Emit helper vars inits. 2327 LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.", 2328 CGF.Builder.getInt32(0)); 2329 auto *GlobalUBVal = CS != nullptr ? CGF.Builder.getInt32(CS->size() - 1) 2330 : CGF.Builder.getInt32(0); 2331 LValue UB = 2332 createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal); 2333 LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.", 2334 CGF.Builder.getInt32(1)); 2335 LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.", 2336 CGF.Builder.getInt32(0)); 2337 // Loop counter. 2338 LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv."); 2339 OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 2340 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV); 2341 OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 2342 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); 2343 // Generate condition for loop. 2344 BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, 2345 OK_Ordinary, S.getLocStart(), 2346 /*fpContractable=*/false); 2347 // Increment for loop counter. 2348 UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary, 2349 S.getLocStart()); 2350 auto BodyGen = [Stmt, CS, &S, &IV](CodeGenFunction &CGF) { 2351 // Iterate through all sections and emit a switch construct: 2352 // switch (IV) { 2353 // case 0: 2354 // <SectionStmt[0]>; 2355 // break; 2356 // ... 2357 // case <NumSection> - 1: 2358 // <SectionStmt[<NumSection> - 1]>; 2359 // break; 2360 // } 2361 // .omp.sections.exit: 2362 auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit"); 2363 auto *SwitchStmt = CGF.Builder.CreateSwitch( 2364 CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB, 2365 CS == nullptr ? 1 : CS->size()); 2366 if (CS) { 2367 unsigned CaseNumber = 0; 2368 for (auto *SubStmt : CS->children()) { 2369 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 2370 CGF.EmitBlock(CaseBB); 2371 SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB); 2372 CGF.EmitStmt(SubStmt); 2373 CGF.EmitBranch(ExitBB); 2374 ++CaseNumber; 2375 } 2376 } else { 2377 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 2378 CGF.EmitBlock(CaseBB); 2379 SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB); 2380 CGF.EmitStmt(Stmt); 2381 CGF.EmitBranch(ExitBB); 2382 } 2383 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 2384 }; 2385 2386 CodeGenFunction::OMPPrivateScope LoopScope(CGF); 2387 if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) { 2388 // Emit implicit barrier to synchronize threads and avoid data races on 2389 // initialization of firstprivate variables and post-update of lastprivate 2390 // variables. 2391 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 2392 CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 2393 /*ForceSimpleCall=*/true); 2394 } 2395 CGF.EmitOMPPrivateClause(S, LoopScope); 2396 HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 2397 CGF.EmitOMPReductionClauseInit(S, LoopScope); 2398 (void)LoopScope.Privatize(); 2399 2400 // Emit static non-chunked loop. 2401 OpenMPScheduleTy ScheduleKind; 2402 ScheduleKind.Schedule = OMPC_SCHEDULE_static; 2403 CGF.CGM.getOpenMPRuntime().emitForStaticInit( 2404 CGF, S.getLocStart(), ScheduleKind, /*IVSize=*/32, 2405 /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), LB.getAddress(), 2406 UB.getAddress(), ST.getAddress()); 2407 // UB = min(UB, GlobalUB); 2408 auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart()); 2409 auto *MinUBGlobalUB = CGF.Builder.CreateSelect( 2410 CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal); 2411 CGF.EmitStoreOfScalar(MinUBGlobalUB, UB); 2412 // IV = LB; 2413 CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV); 2414 // while (idx <= UB) { BODY; ++idx; } 2415 CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen, 2416 [](CodeGenFunction &) {}); 2417 // Tell the runtime we are done. 2418 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 2419 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd()); 2420 }; 2421 CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen); 2422 CGF.EmitOMPReductionClauseFinal(S); 2423 // Emit post-update of the reduction variables if IsLastIter != 0. 2424 emitPostUpdateForReductionClause( 2425 CGF, S, [&](CodeGenFunction &CGF) -> llvm::Value * { 2426 return CGF.Builder.CreateIsNotNull( 2427 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2428 }); 2429 2430 // Emit final copy of the lastprivate variables if IsLastIter != 0. 2431 if (HasLastprivates) 2432 CGF.EmitOMPLastprivateClauseFinal( 2433 S, /*NoFinals=*/false, 2434 CGF.Builder.CreateIsNotNull( 2435 CGF.EmitLoadOfScalar(IL, S.getLocStart()))); 2436 }; 2437 2438 bool HasCancel = false; 2439 if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S)) 2440 HasCancel = OSD->hasCancel(); 2441 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S)) 2442 HasCancel = OPSD->hasCancel(); 2443 OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel); 2444 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen, 2445 HasCancel); 2446 // Emit barrier for lastprivates only if 'sections' directive has 'nowait' 2447 // clause. Otherwise the barrier will be generated by the codegen for the 2448 // directive. 2449 if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) { 2450 // Emit implicit barrier to synchronize threads and avoid data races on 2451 // initialization of firstprivate variables. 2452 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), 2453 OMPD_unknown); 2454 } 2455 } 2456 2457 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { 2458 { 2459 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2460 EmitSections(S); 2461 } 2462 // Emit an implicit barrier at the end. 2463 if (!S.getSingleClause<OMPNowaitClause>()) { 2464 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), 2465 OMPD_sections); 2466 } 2467 } 2468 2469 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { 2470 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2471 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2472 }; 2473 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2474 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen, 2475 S.hasCancel()); 2476 } 2477 2478 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { 2479 llvm::SmallVector<const Expr *, 8> CopyprivateVars; 2480 llvm::SmallVector<const Expr *, 8> DestExprs; 2481 llvm::SmallVector<const Expr *, 8> SrcExprs; 2482 llvm::SmallVector<const Expr *, 8> AssignmentOps; 2483 // Check if there are any 'copyprivate' clauses associated with this 2484 // 'single' construct. 2485 // Build a list of copyprivate variables along with helper expressions 2486 // (<source>, <destination>, <destination>=<source> expressions) 2487 for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) { 2488 CopyprivateVars.append(C->varlists().begin(), C->varlists().end()); 2489 DestExprs.append(C->destination_exprs().begin(), 2490 C->destination_exprs().end()); 2491 SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end()); 2492 AssignmentOps.append(C->assignment_ops().begin(), 2493 C->assignment_ops().end()); 2494 } 2495 // Emit code for 'single' region along with 'copyprivate' clauses 2496 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2497 Action.Enter(CGF); 2498 OMPPrivateScope SingleScope(CGF); 2499 (void)CGF.EmitOMPFirstprivateClause(S, SingleScope); 2500 CGF.EmitOMPPrivateClause(S, SingleScope); 2501 (void)SingleScope.Privatize(); 2502 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2503 }; 2504 { 2505 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2506 CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(), 2507 CopyprivateVars, DestExprs, 2508 SrcExprs, AssignmentOps); 2509 } 2510 // Emit an implicit barrier at the end (to avoid data race on firstprivate 2511 // init or if no 'nowait' clause was specified and no 'copyprivate' clause). 2512 if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) { 2513 CGM.getOpenMPRuntime().emitBarrierCall( 2514 *this, S.getLocStart(), 2515 S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single); 2516 } 2517 } 2518 2519 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { 2520 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2521 Action.Enter(CGF); 2522 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2523 }; 2524 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2525 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart()); 2526 } 2527 2528 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { 2529 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2530 Action.Enter(CGF); 2531 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2532 }; 2533 Expr *Hint = nullptr; 2534 if (auto *HintClause = S.getSingleClause<OMPHintClause>()) 2535 Hint = HintClause->getHint(); 2536 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2537 CGM.getOpenMPRuntime().emitCriticalRegion(*this, 2538 S.getDirectiveName().getAsString(), 2539 CodeGen, S.getLocStart(), Hint); 2540 } 2541 2542 void CodeGenFunction::EmitOMPParallelForDirective( 2543 const OMPParallelForDirective &S) { 2544 // Emit directive as a combined directive that consists of two implicit 2545 // directives: 'parallel' with 'for' directive. 2546 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2547 OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel()); 2548 CGF.EmitOMPWorksharingLoop(S); 2549 }; 2550 emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen); 2551 } 2552 2553 void CodeGenFunction::EmitOMPParallelForSimdDirective( 2554 const OMPParallelForSimdDirective &S) { 2555 // Emit directive as a combined directive that consists of two implicit 2556 // directives: 'parallel' with 'for' directive. 2557 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2558 CGF.EmitOMPWorksharingLoop(S); 2559 }; 2560 emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen); 2561 } 2562 2563 void CodeGenFunction::EmitOMPParallelSectionsDirective( 2564 const OMPParallelSectionsDirective &S) { 2565 // Emit directive as a combined directive that consists of two implicit 2566 // directives: 'parallel' with 'sections' directive. 2567 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2568 CGF.EmitSections(S); 2569 }; 2570 emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen); 2571 } 2572 2573 void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, 2574 const RegionCodeGenTy &BodyGen, 2575 const TaskGenTy &TaskGen, 2576 OMPTaskDataTy &Data) { 2577 // Emit outlined function for task construct. 2578 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2579 auto *I = CS->getCapturedDecl()->param_begin(); 2580 auto *PartId = std::next(I); 2581 auto *TaskT = std::next(I, 4); 2582 // Check if the task is final 2583 if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) { 2584 // If the condition constant folds and can be elided, try to avoid emitting 2585 // the condition and the dead arm of the if/else. 2586 auto *Cond = Clause->getCondition(); 2587 bool CondConstant; 2588 if (ConstantFoldsToSimpleInteger(Cond, CondConstant)) 2589 Data.Final.setInt(CondConstant); 2590 else 2591 Data.Final.setPointer(EvaluateExprAsBool(Cond)); 2592 } else { 2593 // By default the task is not final. 2594 Data.Final.setInt(/*IntVal=*/false); 2595 } 2596 // Check if the task has 'priority' clause. 2597 if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) { 2598 auto *Prio = Clause->getPriority(); 2599 Data.Priority.setInt(/*IntVal=*/true); 2600 Data.Priority.setPointer(EmitScalarConversion( 2601 EmitScalarExpr(Prio), Prio->getType(), 2602 getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1), 2603 Prio->getExprLoc())); 2604 } 2605 // The first function argument for tasks is a thread id, the second one is a 2606 // part id (0 for tied tasks, >=0 for untied task). 2607 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 2608 // Get list of private variables. 2609 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 2610 auto IRef = C->varlist_begin(); 2611 for (auto *IInit : C->private_copies()) { 2612 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2613 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2614 Data.PrivateVars.push_back(*IRef); 2615 Data.PrivateCopies.push_back(IInit); 2616 } 2617 ++IRef; 2618 } 2619 } 2620 EmittedAsPrivate.clear(); 2621 // Get list of firstprivate variables. 2622 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 2623 auto IRef = C->varlist_begin(); 2624 auto IElemInitRef = C->inits().begin(); 2625 for (auto *IInit : C->private_copies()) { 2626 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2627 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2628 Data.FirstprivateVars.push_back(*IRef); 2629 Data.FirstprivateCopies.push_back(IInit); 2630 Data.FirstprivateInits.push_back(*IElemInitRef); 2631 } 2632 ++IRef; 2633 ++IElemInitRef; 2634 } 2635 } 2636 // Get list of lastprivate variables (for taskloops). 2637 llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs; 2638 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 2639 auto IRef = C->varlist_begin(); 2640 auto ID = C->destination_exprs().begin(); 2641 for (auto *IInit : C->private_copies()) { 2642 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2643 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2644 Data.LastprivateVars.push_back(*IRef); 2645 Data.LastprivateCopies.push_back(IInit); 2646 } 2647 LastprivateDstsOrigs.insert( 2648 {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()), 2649 cast<DeclRefExpr>(*IRef)}); 2650 ++IRef; 2651 ++ID; 2652 } 2653 } 2654 // Build list of dependences. 2655 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) 2656 for (auto *IRef : C->varlists()) 2657 Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef)); 2658 auto &&CodeGen = [&Data, CS, &BodyGen, &LastprivateDstsOrigs]( 2659 CodeGenFunction &CGF, PrePostActionTy &Action) { 2660 // Set proper addresses for generated private copies. 2661 OMPPrivateScope Scope(CGF); 2662 if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() || 2663 !Data.LastprivateVars.empty()) { 2664 auto *CopyFn = CGF.Builder.CreateLoad( 2665 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3))); 2666 auto *PrivatesPtr = CGF.Builder.CreateLoad( 2667 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2))); 2668 // Map privates. 2669 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; 2670 llvm::SmallVector<llvm::Value *, 16> CallArgs; 2671 CallArgs.push_back(PrivatesPtr); 2672 for (auto *E : Data.PrivateVars) { 2673 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2674 Address PrivatePtr = CGF.CreateMemTemp( 2675 CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr"); 2676 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2677 CallArgs.push_back(PrivatePtr.getPointer()); 2678 } 2679 for (auto *E : Data.FirstprivateVars) { 2680 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2681 Address PrivatePtr = 2682 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 2683 ".firstpriv.ptr.addr"); 2684 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2685 CallArgs.push_back(PrivatePtr.getPointer()); 2686 } 2687 for (auto *E : Data.LastprivateVars) { 2688 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2689 Address PrivatePtr = 2690 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 2691 ".lastpriv.ptr.addr"); 2692 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2693 CallArgs.push_back(PrivatePtr.getPointer()); 2694 } 2695 CGF.EmitRuntimeCall(CopyFn, CallArgs); 2696 for (auto &&Pair : LastprivateDstsOrigs) { 2697 auto *OrigVD = cast<VarDecl>(Pair.second->getDecl()); 2698 DeclRefExpr DRE( 2699 const_cast<VarDecl *>(OrigVD), 2700 /*RefersToEnclosingVariableOrCapture=*/CGF.CapturedStmtInfo->lookup( 2701 OrigVD) != nullptr, 2702 Pair.second->getType(), VK_LValue, Pair.second->getExprLoc()); 2703 Scope.addPrivate(Pair.first, [&CGF, &DRE]() { 2704 return CGF.EmitLValue(&DRE).getAddress(); 2705 }); 2706 } 2707 for (auto &&Pair : PrivatePtrs) { 2708 Address Replacement(CGF.Builder.CreateLoad(Pair.second), 2709 CGF.getContext().getDeclAlign(Pair.first)); 2710 Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); 2711 } 2712 } 2713 (void)Scope.Privatize(); 2714 2715 Action.Enter(CGF); 2716 BodyGen(CGF); 2717 }; 2718 auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( 2719 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied, 2720 Data.NumberOfParts); 2721 OMPLexicalScope Scope(*this, S); 2722 TaskGen(*this, OutlinedFn, Data); 2723 } 2724 2725 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { 2726 // Emit outlined function for task construct. 2727 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2728 auto CapturedStruct = GenerateCapturedStmtArgument(*CS); 2729 auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 2730 const Expr *IfCond = nullptr; 2731 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 2732 if (C->getNameModifier() == OMPD_unknown || 2733 C->getNameModifier() == OMPD_task) { 2734 IfCond = C->getCondition(); 2735 break; 2736 } 2737 } 2738 2739 OMPTaskDataTy Data; 2740 // Check if we should emit tied or untied task. 2741 Data.Tied = !S.getSingleClause<OMPUntiedClause>(); 2742 auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { 2743 CGF.EmitStmt(CS->getCapturedStmt()); 2744 }; 2745 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 2746 IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn, 2747 const OMPTaskDataTy &Data) { 2748 CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getLocStart(), S, OutlinedFn, 2749 SharedsTy, CapturedStruct, IfCond, 2750 Data); 2751 }; 2752 EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data); 2753 } 2754 2755 void CodeGenFunction::EmitOMPTaskyieldDirective( 2756 const OMPTaskyieldDirective &S) { 2757 CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart()); 2758 } 2759 2760 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { 2761 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier); 2762 } 2763 2764 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) { 2765 CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart()); 2766 } 2767 2768 void CodeGenFunction::EmitOMPTaskgroupDirective( 2769 const OMPTaskgroupDirective &S) { 2770 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2771 Action.Enter(CGF); 2772 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2773 }; 2774 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2775 CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart()); 2776 } 2777 2778 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { 2779 CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> { 2780 if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) { 2781 return llvm::makeArrayRef(FlushClause->varlist_begin(), 2782 FlushClause->varlist_end()); 2783 } 2784 return llvm::None; 2785 }(), S.getLocStart()); 2786 } 2787 2788 void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) { 2789 // Emit the loop iteration variable. 2790 auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 2791 auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); 2792 EmitVarDecl(*IVDecl); 2793 2794 // Emit the iterations count variable. 2795 // If it is not a variable, Sema decided to calculate iterations count on each 2796 // iteration (e.g., it is foldable into a constant). 2797 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 2798 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 2799 // Emit calculation of the iterations count. 2800 EmitIgnoredExpr(S.getCalcLastIteration()); 2801 } 2802 2803 auto &RT = CGM.getOpenMPRuntime(); 2804 2805 bool HasLastprivateClause = false; 2806 // Check pre-condition. 2807 { 2808 OMPLoopScope PreInitScope(*this, S); 2809 // Skip the entire loop if we don't meet the precondition. 2810 // If the condition constant folds and can be elided, avoid emitting the 2811 // whole loop. 2812 bool CondConstant; 2813 llvm::BasicBlock *ContBlock = nullptr; 2814 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 2815 if (!CondConstant) 2816 return; 2817 } else { 2818 auto *ThenBlock = createBasicBlock("omp.precond.then"); 2819 ContBlock = createBasicBlock("omp.precond.end"); 2820 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 2821 getProfileCount(&S)); 2822 EmitBlock(ThenBlock); 2823 incrementProfileCounter(&S); 2824 } 2825 2826 // Emit 'then' code. 2827 { 2828 // Emit helper vars inits. 2829 LValue LB = 2830 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable())); 2831 LValue UB = 2832 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable())); 2833 LValue ST = 2834 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 2835 LValue IL = 2836 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 2837 2838 OMPPrivateScope LoopScope(*this); 2839 if (EmitOMPFirstprivateClause(S, LoopScope)) { 2840 // Emit implicit barrier to synchronize threads and avoid data races on 2841 // initialization of firstprivate variables and post-update of 2842 // lastprivate variables. 2843 CGM.getOpenMPRuntime().emitBarrierCall( 2844 *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 2845 /*ForceSimpleCall=*/true); 2846 } 2847 EmitOMPPrivateClause(S, LoopScope); 2848 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 2849 EmitOMPPrivateLoopCounters(S, LoopScope); 2850 (void)LoopScope.Privatize(); 2851 2852 // Detect the distribute schedule kind and chunk. 2853 llvm::Value *Chunk = nullptr; 2854 OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown; 2855 if (auto *C = S.getSingleClause<OMPDistScheduleClause>()) { 2856 ScheduleKind = C->getDistScheduleKind(); 2857 if (const auto *Ch = C->getChunkSize()) { 2858 Chunk = EmitScalarExpr(Ch); 2859 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 2860 S.getIterationVariable()->getType(), 2861 S.getLocStart()); 2862 } 2863 } 2864 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2865 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2866 2867 // OpenMP [2.10.8, distribute Construct, Description] 2868 // If dist_schedule is specified, kind must be static. If specified, 2869 // iterations are divided into chunks of size chunk_size, chunks are 2870 // assigned to the teams of the league in a round-robin fashion in the 2871 // order of the team number. When no chunk_size is specified, the 2872 // iteration space is divided into chunks that are approximately equal 2873 // in size, and at most one chunk is distributed to each team of the 2874 // league. The size of the chunks is unspecified in this case. 2875 if (RT.isStaticNonchunked(ScheduleKind, 2876 /* Chunked */ Chunk != nullptr)) { 2877 RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, 2878 IVSize, IVSigned, /* Ordered = */ false, 2879 IL.getAddress(), LB.getAddress(), 2880 UB.getAddress(), ST.getAddress()); 2881 auto LoopExit = 2882 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 2883 // UB = min(UB, GlobalUB); 2884 EmitIgnoredExpr(S.getEnsureUpperBound()); 2885 // IV = LB; 2886 EmitIgnoredExpr(S.getInit()); 2887 // while (idx <= UB) { BODY; ++idx; } 2888 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 2889 S.getInc(), 2890 [&S, LoopExit](CodeGenFunction &CGF) { 2891 CGF.EmitOMPLoopBody(S, LoopExit); 2892 CGF.EmitStopPoint(&S); 2893 }, 2894 [](CodeGenFunction &) {}); 2895 EmitBlock(LoopExit.getBlock()); 2896 // Tell the runtime we are done. 2897 RT.emitForStaticFinish(*this, S.getLocStart()); 2898 } else { 2899 // Emit the outer loop, which requests its work chunk [LB..UB] from 2900 // runtime and runs the inner loop to process it. 2901 EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, 2902 LB.getAddress(), UB.getAddress(), ST.getAddress(), 2903 IL.getAddress(), Chunk); 2904 } 2905 2906 // Emit final copy of the lastprivate variables if IsLastIter != 0. 2907 if (HasLastprivateClause) 2908 EmitOMPLastprivateClauseFinal( 2909 S, /*NoFinals=*/false, 2910 Builder.CreateIsNotNull( 2911 EmitLoadOfScalar(IL, S.getLocStart()))); 2912 } 2913 2914 // We're now done with the loop, so jump to the continuation block. 2915 if (ContBlock) { 2916 EmitBranch(ContBlock); 2917 EmitBlock(ContBlock, true); 2918 } 2919 } 2920 } 2921 2922 void CodeGenFunction::EmitOMPDistributeDirective( 2923 const OMPDistributeDirective &S) { 2924 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2925 CGF.EmitOMPDistributeLoop(S); 2926 }; 2927 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2928 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen, 2929 false); 2930 } 2931 2932 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, 2933 const CapturedStmt *S) { 2934 CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); 2935 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; 2936 CGF.CapturedStmtInfo = &CapStmtInfo; 2937 auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S); 2938 Fn->addFnAttr(llvm::Attribute::NoInline); 2939 return Fn; 2940 } 2941 2942 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { 2943 if (!S.getAssociatedStmt()) { 2944 for (const auto *DC : S.getClausesOfKind<OMPDependClause>()) 2945 CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC); 2946 return; 2947 } 2948 auto *C = S.getSingleClause<OMPSIMDClause>(); 2949 auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF, 2950 PrePostActionTy &Action) { 2951 if (C) { 2952 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2953 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 2954 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 2955 auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS); 2956 CGF.EmitNounwindRuntimeCall(OutlinedFn, CapturedVars); 2957 } else { 2958 Action.Enter(CGF); 2959 CGF.EmitStmt( 2960 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2961 } 2962 }; 2963 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2964 CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C); 2965 } 2966 2967 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, 2968 QualType SrcType, QualType DestType, 2969 SourceLocation Loc) { 2970 assert(CGF.hasScalarEvaluationKind(DestType) && 2971 "DestType must have scalar evaluation kind."); 2972 assert(!Val.isAggregate() && "Must be a scalar or complex."); 2973 return Val.isScalar() 2974 ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType, 2975 Loc) 2976 : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType, 2977 DestType, Loc); 2978 } 2979 2980 static CodeGenFunction::ComplexPairTy 2981 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, 2982 QualType DestType, SourceLocation Loc) { 2983 assert(CGF.getEvaluationKind(DestType) == TEK_Complex && 2984 "DestType must have complex evaluation kind."); 2985 CodeGenFunction::ComplexPairTy ComplexVal; 2986 if (Val.isScalar()) { 2987 // Convert the input element to the element type of the complex. 2988 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 2989 auto ScalarVal = CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, 2990 DestElementType, Loc); 2991 ComplexVal = CodeGenFunction::ComplexPairTy( 2992 ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType())); 2993 } else { 2994 assert(Val.isComplex() && "Must be a scalar or complex."); 2995 auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType(); 2996 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 2997 ComplexVal.first = CGF.EmitScalarConversion( 2998 Val.getComplexVal().first, SrcElementType, DestElementType, Loc); 2999 ComplexVal.second = CGF.EmitScalarConversion( 3000 Val.getComplexVal().second, SrcElementType, DestElementType, Loc); 3001 } 3002 return ComplexVal; 3003 } 3004 3005 static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst, 3006 LValue LVal, RValue RVal) { 3007 if (LVal.isGlobalReg()) { 3008 CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal); 3009 } else { 3010 CGF.EmitAtomicStore(RVal, LVal, 3011 IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3012 : llvm::AtomicOrdering::Monotonic, 3013 LVal.isVolatile(), /*IsInit=*/false); 3014 } 3015 } 3016 3017 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal, 3018 QualType RValTy, SourceLocation Loc) { 3019 switch (getEvaluationKind(LVal.getType())) { 3020 case TEK_Scalar: 3021 EmitStoreThroughLValue(RValue::get(convertToScalarValue( 3022 *this, RVal, RValTy, LVal.getType(), Loc)), 3023 LVal); 3024 break; 3025 case TEK_Complex: 3026 EmitStoreOfComplex( 3027 convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal, 3028 /*isInit=*/false); 3029 break; 3030 case TEK_Aggregate: 3031 llvm_unreachable("Must be a scalar or complex."); 3032 } 3033 } 3034 3035 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst, 3036 const Expr *X, const Expr *V, 3037 SourceLocation Loc) { 3038 // v = x; 3039 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue"); 3040 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue"); 3041 LValue XLValue = CGF.EmitLValue(X); 3042 LValue VLValue = CGF.EmitLValue(V); 3043 RValue Res = XLValue.isGlobalReg() 3044 ? CGF.EmitLoadOfLValue(XLValue, Loc) 3045 : CGF.EmitAtomicLoad( 3046 XLValue, Loc, 3047 IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3048 : llvm::AtomicOrdering::Monotonic, 3049 XLValue.isVolatile()); 3050 // OpenMP, 2.12.6, atomic Construct 3051 // Any atomic construct with a seq_cst clause forces the atomically 3052 // performed operation to include an implicit flush operation without a 3053 // list. 3054 if (IsSeqCst) 3055 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3056 CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc); 3057 } 3058 3059 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst, 3060 const Expr *X, const Expr *E, 3061 SourceLocation Loc) { 3062 // x = expr; 3063 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue"); 3064 emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E)); 3065 // OpenMP, 2.12.6, atomic Construct 3066 // Any atomic construct with a seq_cst clause forces the atomically 3067 // performed operation to include an implicit flush operation without a 3068 // list. 3069 if (IsSeqCst) 3070 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3071 } 3072 3073 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, 3074 RValue Update, 3075 BinaryOperatorKind BO, 3076 llvm::AtomicOrdering AO, 3077 bool IsXLHSInRHSPart) { 3078 auto &Context = CGF.CGM.getContext(); 3079 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x' 3080 // expression is simple and atomic is allowed for the given type for the 3081 // target platform. 3082 if (BO == BO_Comma || !Update.isScalar() || 3083 !Update.getScalarVal()->getType()->isIntegerTy() || 3084 !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) && 3085 (Update.getScalarVal()->getType() != 3086 X.getAddress().getElementType())) || 3087 !X.getAddress().getElementType()->isIntegerTy() || 3088 !Context.getTargetInfo().hasBuiltinAtomic( 3089 Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment()))) 3090 return std::make_pair(false, RValue::get(nullptr)); 3091 3092 llvm::AtomicRMWInst::BinOp RMWOp; 3093 switch (BO) { 3094 case BO_Add: 3095 RMWOp = llvm::AtomicRMWInst::Add; 3096 break; 3097 case BO_Sub: 3098 if (!IsXLHSInRHSPart) 3099 return std::make_pair(false, RValue::get(nullptr)); 3100 RMWOp = llvm::AtomicRMWInst::Sub; 3101 break; 3102 case BO_And: 3103 RMWOp = llvm::AtomicRMWInst::And; 3104 break; 3105 case BO_Or: 3106 RMWOp = llvm::AtomicRMWInst::Or; 3107 break; 3108 case BO_Xor: 3109 RMWOp = llvm::AtomicRMWInst::Xor; 3110 break; 3111 case BO_LT: 3112 RMWOp = X.getType()->hasSignedIntegerRepresentation() 3113 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min 3114 : llvm::AtomicRMWInst::Max) 3115 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin 3116 : llvm::AtomicRMWInst::UMax); 3117 break; 3118 case BO_GT: 3119 RMWOp = X.getType()->hasSignedIntegerRepresentation() 3120 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max 3121 : llvm::AtomicRMWInst::Min) 3122 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax 3123 : llvm::AtomicRMWInst::UMin); 3124 break; 3125 case BO_Assign: 3126 RMWOp = llvm::AtomicRMWInst::Xchg; 3127 break; 3128 case BO_Mul: 3129 case BO_Div: 3130 case BO_Rem: 3131 case BO_Shl: 3132 case BO_Shr: 3133 case BO_LAnd: 3134 case BO_LOr: 3135 return std::make_pair(false, RValue::get(nullptr)); 3136 case BO_PtrMemD: 3137 case BO_PtrMemI: 3138 case BO_LE: 3139 case BO_GE: 3140 case BO_EQ: 3141 case BO_NE: 3142 case BO_AddAssign: 3143 case BO_SubAssign: 3144 case BO_AndAssign: 3145 case BO_OrAssign: 3146 case BO_XorAssign: 3147 case BO_MulAssign: 3148 case BO_DivAssign: 3149 case BO_RemAssign: 3150 case BO_ShlAssign: 3151 case BO_ShrAssign: 3152 case BO_Comma: 3153 llvm_unreachable("Unsupported atomic update operation"); 3154 } 3155 auto *UpdateVal = Update.getScalarVal(); 3156 if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) { 3157 UpdateVal = CGF.Builder.CreateIntCast( 3158 IC, X.getAddress().getElementType(), 3159 X.getType()->hasSignedIntegerRepresentation()); 3160 } 3161 auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO); 3162 return std::make_pair(true, RValue::get(Res)); 3163 } 3164 3165 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr( 3166 LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart, 3167 llvm::AtomicOrdering AO, SourceLocation Loc, 3168 const llvm::function_ref<RValue(RValue)> &CommonGen) { 3169 // Update expressions are allowed to have the following forms: 3170 // x binop= expr; -> xrval + expr; 3171 // x++, ++x -> xrval + 1; 3172 // x--, --x -> xrval - 1; 3173 // x = x binop expr; -> xrval binop expr 3174 // x = expr Op x; - > expr binop xrval; 3175 auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart); 3176 if (!Res.first) { 3177 if (X.isGlobalReg()) { 3178 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop 3179 // 'xrval'. 3180 EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X); 3181 } else { 3182 // Perform compare-and-swap procedure. 3183 EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified()); 3184 } 3185 } 3186 return Res; 3187 } 3188 3189 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst, 3190 const Expr *X, const Expr *E, 3191 const Expr *UE, bool IsXLHSInRHSPart, 3192 SourceLocation Loc) { 3193 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 3194 "Update expr in 'atomic update' must be a binary operator."); 3195 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 3196 // Update expressions are allowed to have the following forms: 3197 // x binop= expr; -> xrval + expr; 3198 // x++, ++x -> xrval + 1; 3199 // x--, --x -> xrval - 1; 3200 // x = x binop expr; -> xrval binop expr 3201 // x = expr Op x; - > expr binop xrval; 3202 assert(X->isLValue() && "X of 'omp atomic update' is not lvalue"); 3203 LValue XLValue = CGF.EmitLValue(X); 3204 RValue ExprRValue = CGF.EmitAnyExpr(E); 3205 auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3206 : llvm::AtomicOrdering::Monotonic; 3207 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 3208 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 3209 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 3210 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 3211 auto Gen = 3212 [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue { 3213 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3214 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 3215 return CGF.EmitAnyExpr(UE); 3216 }; 3217 (void)CGF.EmitOMPAtomicSimpleUpdateExpr( 3218 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 3219 // OpenMP, 2.12.6, atomic Construct 3220 // Any atomic construct with a seq_cst clause forces the atomically 3221 // performed operation to include an implicit flush operation without a 3222 // list. 3223 if (IsSeqCst) 3224 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3225 } 3226 3227 static RValue convertToType(CodeGenFunction &CGF, RValue Value, 3228 QualType SourceType, QualType ResType, 3229 SourceLocation Loc) { 3230 switch (CGF.getEvaluationKind(ResType)) { 3231 case TEK_Scalar: 3232 return RValue::get( 3233 convertToScalarValue(CGF, Value, SourceType, ResType, Loc)); 3234 case TEK_Complex: { 3235 auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc); 3236 return RValue::getComplex(Res.first, Res.second); 3237 } 3238 case TEK_Aggregate: 3239 break; 3240 } 3241 llvm_unreachable("Must be a scalar or complex."); 3242 } 3243 3244 static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst, 3245 bool IsPostfixUpdate, const Expr *V, 3246 const Expr *X, const Expr *E, 3247 const Expr *UE, bool IsXLHSInRHSPart, 3248 SourceLocation Loc) { 3249 assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue"); 3250 assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue"); 3251 RValue NewVVal; 3252 LValue VLValue = CGF.EmitLValue(V); 3253 LValue XLValue = CGF.EmitLValue(X); 3254 RValue ExprRValue = CGF.EmitAnyExpr(E); 3255 auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3256 : llvm::AtomicOrdering::Monotonic; 3257 QualType NewVValType; 3258 if (UE) { 3259 // 'x' is updated with some additional value. 3260 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 3261 "Update expr in 'atomic capture' must be a binary operator."); 3262 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 3263 // Update expressions are allowed to have the following forms: 3264 // x binop= expr; -> xrval + expr; 3265 // x++, ++x -> xrval + 1; 3266 // x--, --x -> xrval - 1; 3267 // x = x binop expr; -> xrval binop expr 3268 // x = expr Op x; - > expr binop xrval; 3269 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 3270 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 3271 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 3272 NewVValType = XRValExpr->getType(); 3273 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 3274 auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr, 3275 IsPostfixUpdate](RValue XRValue) -> RValue { 3276 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3277 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 3278 RValue Res = CGF.EmitAnyExpr(UE); 3279 NewVVal = IsPostfixUpdate ? XRValue : Res; 3280 return Res; 3281 }; 3282 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 3283 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 3284 if (Res.first) { 3285 // 'atomicrmw' instruction was generated. 3286 if (IsPostfixUpdate) { 3287 // Use old value from 'atomicrmw'. 3288 NewVVal = Res.second; 3289 } else { 3290 // 'atomicrmw' does not provide new value, so evaluate it using old 3291 // value of 'x'. 3292 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3293 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second); 3294 NewVVal = CGF.EmitAnyExpr(UE); 3295 } 3296 } 3297 } else { 3298 // 'x' is simply rewritten with some 'expr'. 3299 NewVValType = X->getType().getNonReferenceType(); 3300 ExprRValue = convertToType(CGF, ExprRValue, E->getType(), 3301 X->getType().getNonReferenceType(), Loc); 3302 auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) -> RValue { 3303 NewVVal = XRValue; 3304 return ExprRValue; 3305 }; 3306 // Try to perform atomicrmw xchg, otherwise simple exchange. 3307 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 3308 XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO, 3309 Loc, Gen); 3310 if (Res.first) { 3311 // 'atomicrmw' instruction was generated. 3312 NewVVal = IsPostfixUpdate ? Res.second : ExprRValue; 3313 } 3314 } 3315 // Emit post-update store to 'v' of old/new 'x' value. 3316 CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc); 3317 // OpenMP, 2.12.6, atomic Construct 3318 // Any atomic construct with a seq_cst clause forces the atomically 3319 // performed operation to include an implicit flush operation without a 3320 // list. 3321 if (IsSeqCst) 3322 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3323 } 3324 3325 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, 3326 bool IsSeqCst, bool IsPostfixUpdate, 3327 const Expr *X, const Expr *V, const Expr *E, 3328 const Expr *UE, bool IsXLHSInRHSPart, 3329 SourceLocation Loc) { 3330 switch (Kind) { 3331 case OMPC_read: 3332 EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc); 3333 break; 3334 case OMPC_write: 3335 EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc); 3336 break; 3337 case OMPC_unknown: 3338 case OMPC_update: 3339 EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc); 3340 break; 3341 case OMPC_capture: 3342 EmitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE, 3343 IsXLHSInRHSPart, Loc); 3344 break; 3345 case OMPC_if: 3346 case OMPC_final: 3347 case OMPC_num_threads: 3348 case OMPC_private: 3349 case OMPC_firstprivate: 3350 case OMPC_lastprivate: 3351 case OMPC_reduction: 3352 case OMPC_safelen: 3353 case OMPC_simdlen: 3354 case OMPC_collapse: 3355 case OMPC_default: 3356 case OMPC_seq_cst: 3357 case OMPC_shared: 3358 case OMPC_linear: 3359 case OMPC_aligned: 3360 case OMPC_copyin: 3361 case OMPC_copyprivate: 3362 case OMPC_flush: 3363 case OMPC_proc_bind: 3364 case OMPC_schedule: 3365 case OMPC_ordered: 3366 case OMPC_nowait: 3367 case OMPC_untied: 3368 case OMPC_threadprivate: 3369 case OMPC_depend: 3370 case OMPC_mergeable: 3371 case OMPC_device: 3372 case OMPC_threads: 3373 case OMPC_simd: 3374 case OMPC_map: 3375 case OMPC_num_teams: 3376 case OMPC_thread_limit: 3377 case OMPC_priority: 3378 case OMPC_grainsize: 3379 case OMPC_nogroup: 3380 case OMPC_num_tasks: 3381 case OMPC_hint: 3382 case OMPC_dist_schedule: 3383 case OMPC_defaultmap: 3384 case OMPC_uniform: 3385 case OMPC_to: 3386 case OMPC_from: 3387 case OMPC_use_device_ptr: 3388 case OMPC_is_device_ptr: 3389 llvm_unreachable("Clause is not allowed in 'omp atomic'."); 3390 } 3391 } 3392 3393 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { 3394 bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>(); 3395 OpenMPClauseKind Kind = OMPC_unknown; 3396 for (auto *C : S.clauses()) { 3397 // Find first clause (skip seq_cst clause, if it is first). 3398 if (C->getClauseKind() != OMPC_seq_cst) { 3399 Kind = C->getClauseKind(); 3400 break; 3401 } 3402 } 3403 3404 const auto *CS = 3405 S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 3406 if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) { 3407 enterFullExpression(EWC); 3408 } 3409 // Processing for statements under 'atomic capture'. 3410 if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) { 3411 for (const auto *C : Compound->body()) { 3412 if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) { 3413 enterFullExpression(EWC); 3414 } 3415 } 3416 } 3417 3418 auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF, 3419 PrePostActionTy &) { 3420 CGF.EmitStopPoint(CS); 3421 EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(), 3422 S.getV(), S.getExpr(), S.getUpdateExpr(), 3423 S.isXLHSInRHSPart(), S.getLocStart()); 3424 }; 3425 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 3426 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen); 3427 } 3428 3429 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, 3430 const OMPExecutableDirective &S, 3431 const RegionCodeGenTy &CodeGen) { 3432 assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind())); 3433 CodeGenModule &CGM = CGF.CGM; 3434 const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt()); 3435 3436 llvm::Function *Fn = nullptr; 3437 llvm::Constant *FnID = nullptr; 3438 3439 const Expr *IfCond = nullptr; 3440 // Check for the at most one if clause associated with the target region. 3441 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 3442 if (C->getNameModifier() == OMPD_unknown || 3443 C->getNameModifier() == OMPD_target) { 3444 IfCond = C->getCondition(); 3445 break; 3446 } 3447 } 3448 3449 // Check if we have any device clause associated with the directive. 3450 const Expr *Device = nullptr; 3451 if (auto *C = S.getSingleClause<OMPDeviceClause>()) { 3452 Device = C->getDevice(); 3453 } 3454 3455 // Check if we have an if clause whose conditional always evaluates to false 3456 // or if we do not have any targets specified. If so the target region is not 3457 // an offload entry point. 3458 bool IsOffloadEntry = true; 3459 if (IfCond) { 3460 bool Val; 3461 if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val) 3462 IsOffloadEntry = false; 3463 } 3464 if (CGM.getLangOpts().OMPTargetTriples.empty()) 3465 IsOffloadEntry = false; 3466 3467 assert(CGF.CurFuncDecl && "No parent declaration for target region!"); 3468 StringRef ParentName; 3469 // In case we have Ctors/Dtors we use the complete type variant to produce 3470 // the mangling of the device outlined kernel. 3471 if (auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl)) 3472 ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete)); 3473 else if (auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl)) 3474 ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete)); 3475 else 3476 ParentName = 3477 CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl))); 3478 3479 // Emit target region as a standalone region. 3480 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID, 3481 IsOffloadEntry, CodeGen); 3482 OMPLexicalScope Scope(CGF, S); 3483 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 3484 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 3485 CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device, 3486 CapturedVars); 3487 } 3488 3489 static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S, 3490 PrePostActionTy &Action) { 3491 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 3492 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 3493 CGF.EmitOMPPrivateClause(S, PrivateScope); 3494 (void)PrivateScope.Privatize(); 3495 3496 Action.Enter(CGF); 3497 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3498 } 3499 3500 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM, 3501 StringRef ParentName, 3502 const OMPTargetDirective &S) { 3503 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3504 emitTargetRegion(CGF, S, Action); 3505 }; 3506 llvm::Function *Fn; 3507 llvm::Constant *Addr; 3508 // Emit target region as a standalone region. 3509 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 3510 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 3511 assert(Fn && Addr && "Target device function emission failed."); 3512 } 3513 3514 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { 3515 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3516 emitTargetRegion(CGF, S, Action); 3517 }; 3518 emitCommonOMPTargetDirective(*this, S, CodeGen); 3519 } 3520 3521 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF, 3522 const OMPExecutableDirective &S, 3523 OpenMPDirectiveKind InnermostKind, 3524 const RegionCodeGenTy &CodeGen) { 3525 const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams); 3526 auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction( 3527 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 3528 3529 const OMPNumTeamsClause *NT = S.getSingleClause<OMPNumTeamsClause>(); 3530 const OMPThreadLimitClause *TL = S.getSingleClause<OMPThreadLimitClause>(); 3531 if (NT || TL) { 3532 Expr *NumTeams = (NT) ? NT->getNumTeams() : nullptr; 3533 Expr *ThreadLimit = (TL) ? TL->getThreadLimit() : nullptr; 3534 3535 CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit, 3536 S.getLocStart()); 3537 } 3538 3539 OMPTeamsScope Scope(CGF, S); 3540 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 3541 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 3542 CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn, 3543 CapturedVars); 3544 } 3545 3546 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) { 3547 // Emit teams region as a standalone region. 3548 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3549 OMPPrivateScope PrivateScope(CGF); 3550 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 3551 CGF.EmitOMPPrivateClause(S, PrivateScope); 3552 (void)PrivateScope.Privatize(); 3553 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3554 }; 3555 emitCommonOMPTeamsDirective(*this, S, OMPD_teams, CodeGen); 3556 } 3557 3558 static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action, 3559 const OMPTargetTeamsDirective &S) { 3560 auto *CS = S.getCapturedStmt(OMPD_teams); 3561 Action.Enter(CGF); 3562 auto &&CodeGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { 3563 // TODO: Add support for clauses. 3564 CGF.EmitStmt(CS->getCapturedStmt()); 3565 }; 3566 emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen); 3567 } 3568 3569 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 3570 CodeGenModule &CGM, StringRef ParentName, 3571 const OMPTargetTeamsDirective &S) { 3572 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3573 emitTargetTeamsRegion(CGF, Action, S); 3574 }; 3575 llvm::Function *Fn; 3576 llvm::Constant *Addr; 3577 // Emit target region as a standalone region. 3578 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 3579 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 3580 assert(Fn && Addr && "Target device function emission failed."); 3581 } 3582 3583 void CodeGenFunction::EmitOMPTargetTeamsDirective( 3584 const OMPTargetTeamsDirective &S) { 3585 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3586 emitTargetTeamsRegion(CGF, Action, S); 3587 }; 3588 emitCommonOMPTargetDirective(*this, S, CodeGen); 3589 } 3590 3591 void CodeGenFunction::EmitOMPCancellationPointDirective( 3592 const OMPCancellationPointDirective &S) { 3593 CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(), 3594 S.getCancelRegion()); 3595 } 3596 3597 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { 3598 const Expr *IfCond = nullptr; 3599 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 3600 if (C->getNameModifier() == OMPD_unknown || 3601 C->getNameModifier() == OMPD_cancel) { 3602 IfCond = C->getCondition(); 3603 break; 3604 } 3605 } 3606 CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(), IfCond, 3607 S.getCancelRegion()); 3608 } 3609 3610 CodeGenFunction::JumpDest 3611 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) { 3612 if (Kind == OMPD_parallel || Kind == OMPD_task || 3613 Kind == OMPD_target_parallel) 3614 return ReturnBlock; 3615 assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections || 3616 Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for || 3617 Kind == OMPD_distribute_parallel_for || 3618 Kind == OMPD_target_parallel_for); 3619 return OMPCancelStack.getExitBlock(); 3620 } 3621 3622 void CodeGenFunction::EmitOMPUseDevicePtrClause( 3623 const OMPClause &NC, OMPPrivateScope &PrivateScope, 3624 const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) { 3625 const auto &C = cast<OMPUseDevicePtrClause>(NC); 3626 auto OrigVarIt = C.varlist_begin(); 3627 auto InitIt = C.inits().begin(); 3628 for (auto PvtVarIt : C.private_copies()) { 3629 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl()); 3630 auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl()); 3631 auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl()); 3632 3633 // In order to identify the right initializer we need to match the 3634 // declaration used by the mapping logic. In some cases we may get 3635 // OMPCapturedExprDecl that refers to the original declaration. 3636 const ValueDecl *MatchingVD = OrigVD; 3637 if (auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) { 3638 // OMPCapturedExprDecl are used to privative fields of the current 3639 // structure. 3640 auto *ME = cast<MemberExpr>(OED->getInit()); 3641 assert(isa<CXXThisExpr>(ME->getBase()) && 3642 "Base should be the current struct!"); 3643 MatchingVD = ME->getMemberDecl(); 3644 } 3645 3646 // If we don't have information about the current list item, move on to 3647 // the next one. 3648 auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD); 3649 if (InitAddrIt == CaptureDeviceAddrMap.end()) 3650 continue; 3651 3652 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 3653 // Initialize the temporary initialization variable with the address we 3654 // get from the runtime library. We have to cast the source address 3655 // because it is always a void *. References are materialized in the 3656 // privatization scope, so the initialization here disregards the fact 3657 // the original variable is a reference. 3658 QualType AddrQTy = 3659 getContext().getPointerType(OrigVD->getType().getNonReferenceType()); 3660 llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy); 3661 Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy); 3662 setAddrOfLocalVar(InitVD, InitAddr); 3663 3664 // Emit private declaration, it will be initialized by the value we 3665 // declaration we just added to the local declarations map. 3666 EmitDecl(*PvtVD); 3667 3668 // The initialization variables reached its purpose in the emission 3669 // ofthe previous declaration, so we don't need it anymore. 3670 LocalDeclMap.erase(InitVD); 3671 3672 // Return the address of the private variable. 3673 return GetAddrOfLocalVar(PvtVD); 3674 }); 3675 assert(IsRegistered && "firstprivate var already registered as private"); 3676 // Silence the warning about unused variable. 3677 (void)IsRegistered; 3678 3679 ++OrigVarIt; 3680 ++InitIt; 3681 } 3682 } 3683 3684 // Generate the instructions for '#pragma omp target data' directive. 3685 void CodeGenFunction::EmitOMPTargetDataDirective( 3686 const OMPTargetDataDirective &S) { 3687 CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true); 3688 3689 // Create a pre/post action to signal the privatization of the device pointer. 3690 // This action can be replaced by the OpenMP runtime code generation to 3691 // deactivate privatization. 3692 bool PrivatizeDevicePointers = false; 3693 class DevicePointerPrivActionTy : public PrePostActionTy { 3694 bool &PrivatizeDevicePointers; 3695 3696 public: 3697 explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers) 3698 : PrePostActionTy(), PrivatizeDevicePointers(PrivatizeDevicePointers) {} 3699 void Enter(CodeGenFunction &CGF) override { 3700 PrivatizeDevicePointers = true; 3701 } 3702 }; 3703 DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers); 3704 3705 auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers]( 3706 CodeGenFunction &CGF, PrePostActionTy &Action) { 3707 auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3708 CGF.EmitStmt( 3709 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3710 }; 3711 3712 // Codegen that selects wheather to generate the privatization code or not. 3713 auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers, 3714 &InnermostCodeGen](CodeGenFunction &CGF, 3715 PrePostActionTy &Action) { 3716 RegionCodeGenTy RCG(InnermostCodeGen); 3717 PrivatizeDevicePointers = false; 3718 3719 // Call the pre-action to change the status of PrivatizeDevicePointers if 3720 // needed. 3721 Action.Enter(CGF); 3722 3723 if (PrivatizeDevicePointers) { 3724 OMPPrivateScope PrivateScope(CGF); 3725 // Emit all instances of the use_device_ptr clause. 3726 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>()) 3727 CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope, 3728 Info.CaptureDeviceAddrMap); 3729 (void)PrivateScope.Privatize(); 3730 RCG(CGF); 3731 } else 3732 RCG(CGF); 3733 }; 3734 3735 // Forward the provided action to the privatization codegen. 3736 RegionCodeGenTy PrivRCG(PrivCodeGen); 3737 PrivRCG.setAction(Action); 3738 3739 // Notwithstanding the body of the region is emitted as inlined directive, 3740 // we don't use an inline scope as changes in the references inside the 3741 // region are expected to be visible outside, so we do not privative them. 3742 OMPLexicalScope Scope(CGF, S); 3743 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data, 3744 PrivRCG); 3745 }; 3746 3747 RegionCodeGenTy RCG(CodeGen); 3748 3749 // If we don't have target devices, don't bother emitting the data mapping 3750 // code. 3751 if (CGM.getLangOpts().OMPTargetTriples.empty()) { 3752 RCG(*this); 3753 return; 3754 } 3755 3756 // Check if we have any if clause associated with the directive. 3757 const Expr *IfCond = nullptr; 3758 if (auto *C = S.getSingleClause<OMPIfClause>()) 3759 IfCond = C->getCondition(); 3760 3761 // Check if we have any device clause associated with the directive. 3762 const Expr *Device = nullptr; 3763 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 3764 Device = C->getDevice(); 3765 3766 // Set the action to signal privatization of device pointers. 3767 RCG.setAction(PrivAction); 3768 3769 // Emit region code. 3770 CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG, 3771 Info); 3772 } 3773 3774 void CodeGenFunction::EmitOMPTargetEnterDataDirective( 3775 const OMPTargetEnterDataDirective &S) { 3776 // If we don't have target devices, don't bother emitting the data mapping 3777 // code. 3778 if (CGM.getLangOpts().OMPTargetTriples.empty()) 3779 return; 3780 3781 // Check if we have any if clause associated with the directive. 3782 const Expr *IfCond = nullptr; 3783 if (auto *C = S.getSingleClause<OMPIfClause>()) 3784 IfCond = C->getCondition(); 3785 3786 // Check if we have any device clause associated with the directive. 3787 const Expr *Device = nullptr; 3788 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 3789 Device = C->getDevice(); 3790 3791 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 3792 } 3793 3794 void CodeGenFunction::EmitOMPTargetExitDataDirective( 3795 const OMPTargetExitDataDirective &S) { 3796 // If we don't have target devices, don't bother emitting the data mapping 3797 // code. 3798 if (CGM.getLangOpts().OMPTargetTriples.empty()) 3799 return; 3800 3801 // Check if we have any if clause associated with the directive. 3802 const Expr *IfCond = nullptr; 3803 if (auto *C = S.getSingleClause<OMPIfClause>()) 3804 IfCond = C->getCondition(); 3805 3806 // Check if we have any device clause associated with the directive. 3807 const Expr *Device = nullptr; 3808 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 3809 Device = C->getDevice(); 3810 3811 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 3812 } 3813 3814 static void emitTargetParallelRegion(CodeGenFunction &CGF, 3815 const OMPTargetParallelDirective &S, 3816 PrePostActionTy &Action) { 3817 // Get the captured statement associated with the 'parallel' region. 3818 auto *CS = S.getCapturedStmt(OMPD_parallel); 3819 Action.Enter(CGF); 3820 auto &&CodeGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { 3821 // TODO: Add support for clauses. 3822 CGF.EmitStmt(CS->getCapturedStmt()); 3823 }; 3824 emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen); 3825 } 3826 3827 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 3828 CodeGenModule &CGM, StringRef ParentName, 3829 const OMPTargetParallelDirective &S) { 3830 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3831 emitTargetParallelRegion(CGF, S, Action); 3832 }; 3833 llvm::Function *Fn; 3834 llvm::Constant *Addr; 3835 // Emit target region as a standalone region. 3836 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 3837 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 3838 assert(Fn && Addr && "Target device function emission failed."); 3839 } 3840 3841 void CodeGenFunction::EmitOMPTargetParallelDirective( 3842 const OMPTargetParallelDirective &S) { 3843 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3844 emitTargetParallelRegion(CGF, S, Action); 3845 }; 3846 emitCommonOMPTargetDirective(*this, S, CodeGen); 3847 } 3848 3849 void CodeGenFunction::EmitOMPTargetParallelForDirective( 3850 const OMPTargetParallelForDirective &S) { 3851 // TODO: codegen for target parallel for. 3852 } 3853 3854 /// Emit a helper variable and return corresponding lvalue. 3855 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper, 3856 const ImplicitParamDecl *PVD, 3857 CodeGenFunction::OMPPrivateScope &Privates) { 3858 auto *VDecl = cast<VarDecl>(Helper->getDecl()); 3859 Privates.addPrivate( 3860 VDecl, [&CGF, PVD]() -> Address { return CGF.GetAddrOfLocalVar(PVD); }); 3861 } 3862 3863 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { 3864 assert(isOpenMPTaskLoopDirective(S.getDirectiveKind())); 3865 // Emit outlined function for task construct. 3866 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 3867 auto CapturedStruct = GenerateCapturedStmtArgument(*CS); 3868 auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 3869 const Expr *IfCond = nullptr; 3870 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 3871 if (C->getNameModifier() == OMPD_unknown || 3872 C->getNameModifier() == OMPD_taskloop) { 3873 IfCond = C->getCondition(); 3874 break; 3875 } 3876 } 3877 3878 OMPTaskDataTy Data; 3879 // Check if taskloop must be emitted without taskgroup. 3880 Data.Nogroup = S.getSingleClause<OMPNogroupClause>(); 3881 // TODO: Check if we should emit tied or untied task. 3882 Data.Tied = true; 3883 // Set scheduling for taskloop 3884 if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) { 3885 // grainsize clause 3886 Data.Schedule.setInt(/*IntVal=*/false); 3887 Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize())); 3888 } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) { 3889 // num_tasks clause 3890 Data.Schedule.setInt(/*IntVal=*/true); 3891 Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks())); 3892 } 3893 3894 auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) { 3895 // if (PreCond) { 3896 // for (IV in 0..LastIteration) BODY; 3897 // <Final counter/linear vars updates>; 3898 // } 3899 // 3900 3901 // Emit: if (PreCond) - begin. 3902 // If the condition constant folds and can be elided, avoid emitting the 3903 // whole loop. 3904 bool CondConstant; 3905 llvm::BasicBlock *ContBlock = nullptr; 3906 OMPLoopScope PreInitScope(CGF, S); 3907 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 3908 if (!CondConstant) 3909 return; 3910 } else { 3911 auto *ThenBlock = CGF.createBasicBlock("taskloop.if.then"); 3912 ContBlock = CGF.createBasicBlock("taskloop.if.end"); 3913 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 3914 CGF.getProfileCount(&S)); 3915 CGF.EmitBlock(ThenBlock); 3916 CGF.incrementProfileCounter(&S); 3917 } 3918 3919 if (isOpenMPSimdDirective(S.getDirectiveKind())) 3920 CGF.EmitOMPSimdInit(S); 3921 3922 OMPPrivateScope LoopScope(CGF); 3923 // Emit helper vars inits. 3924 enum { LowerBound = 5, UpperBound, Stride, LastIter }; 3925 auto *I = CS->getCapturedDecl()->param_begin(); 3926 auto *LBP = std::next(I, LowerBound); 3927 auto *UBP = std::next(I, UpperBound); 3928 auto *STP = std::next(I, Stride); 3929 auto *LIP = std::next(I, LastIter); 3930 mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP, 3931 LoopScope); 3932 mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP, 3933 LoopScope); 3934 mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope); 3935 mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP, 3936 LoopScope); 3937 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 3938 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 3939 (void)LoopScope.Privatize(); 3940 // Emit the loop iteration variable. 3941 const Expr *IVExpr = S.getIterationVariable(); 3942 const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 3943 CGF.EmitVarDecl(*IVDecl); 3944 CGF.EmitIgnoredExpr(S.getInit()); 3945 3946 // Emit the iterations count variable. 3947 // If it is not a variable, Sema decided to calculate iterations count on 3948 // each iteration (e.g., it is foldable into a constant). 3949 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 3950 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 3951 // Emit calculation of the iterations count. 3952 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 3953 } 3954 3955 CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 3956 S.getInc(), 3957 [&S](CodeGenFunction &CGF) { 3958 CGF.EmitOMPLoopBody(S, JumpDest()); 3959 CGF.EmitStopPoint(&S); 3960 }, 3961 [](CodeGenFunction &) {}); 3962 // Emit: if (PreCond) - end. 3963 if (ContBlock) { 3964 CGF.EmitBranch(ContBlock); 3965 CGF.EmitBlock(ContBlock, true); 3966 } 3967 // Emit final copy of the lastprivate variables if IsLastIter != 0. 3968 if (HasLastprivateClause) { 3969 CGF.EmitOMPLastprivateClauseFinal( 3970 S, isOpenMPSimdDirective(S.getDirectiveKind()), 3971 CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar( 3972 CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false, 3973 (*LIP)->getType(), S.getLocStart()))); 3974 } 3975 }; 3976 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 3977 IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn, 3978 const OMPTaskDataTy &Data) { 3979 auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &) { 3980 OMPLoopScope PreInitScope(CGF, S); 3981 CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getLocStart(), S, 3982 OutlinedFn, SharedsTy, 3983 CapturedStruct, IfCond, Data); 3984 }; 3985 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop, 3986 CodeGen); 3987 }; 3988 EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data); 3989 } 3990 3991 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) { 3992 EmitOMPTaskLoopBasedDirective(S); 3993 } 3994 3995 void CodeGenFunction::EmitOMPTaskLoopSimdDirective( 3996 const OMPTaskLoopSimdDirective &S) { 3997 EmitOMPTaskLoopBasedDirective(S); 3998 } 3999 4000 // Generate the instructions for '#pragma omp target update' directive. 4001 void CodeGenFunction::EmitOMPTargetUpdateDirective( 4002 const OMPTargetUpdateDirective &S) { 4003 // If we don't have target devices, don't bother emitting the data mapping 4004 // code. 4005 if (CGM.getLangOpts().OMPTargetTriples.empty()) 4006 return; 4007 4008 // Check if we have any if clause associated with the directive. 4009 const Expr *IfCond = nullptr; 4010 if (auto *C = S.getSingleClause<OMPIfClause>()) 4011 IfCond = C->getCondition(); 4012 4013 // Check if we have any device clause associated with the directive. 4014 const Expr *Device = nullptr; 4015 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 4016 Device = C->getDevice(); 4017 4018 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 4019 } 4020