1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This contains code to emit OpenMP nodes as LLVM code. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGCleanup.h" 15 #include "CGOpenMPRuntime.h" 16 #include "CodeGenFunction.h" 17 #include "CodeGenModule.h" 18 #include "TargetInfo.h" 19 #include "clang/AST/Stmt.h" 20 #include "clang/AST/StmtOpenMP.h" 21 #include "clang/AST/DeclOpenMP.h" 22 #include "llvm/IR/CallSite.h" 23 using namespace clang; 24 using namespace CodeGen; 25 26 namespace { 27 /// Lexical scope for OpenMP executable constructs, that handles correct codegen 28 /// for captured expressions. 29 class OMPLexicalScope : public CodeGenFunction::LexicalScope { 30 void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 31 for (const auto *C : S.clauses()) { 32 if (auto *CPI = OMPClauseWithPreInit::get(C)) { 33 if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { 34 for (const auto *I : PreInit->decls()) { 35 if (!I->hasAttr<OMPCaptureNoInitAttr>()) 36 CGF.EmitVarDecl(cast<VarDecl>(*I)); 37 else { 38 CodeGenFunction::AutoVarEmission Emission = 39 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 40 CGF.EmitAutoVarCleanups(Emission); 41 } 42 } 43 } 44 } 45 } 46 } 47 CodeGenFunction::OMPPrivateScope InlinedShareds; 48 49 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { 50 return CGF.LambdaCaptureFields.lookup(VD) || 51 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || 52 (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl)); 53 } 54 55 public: 56 OMPLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S, 57 bool AsInlined = false, bool EmitPreInitStmt = true) 58 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), 59 InlinedShareds(CGF) { 60 if (EmitPreInitStmt) 61 emitPreInitStmt(CGF, S); 62 if (AsInlined) { 63 if (S.hasAssociatedStmt()) { 64 auto *CS = cast<CapturedStmt>(S.getAssociatedStmt()); 65 for (auto &C : CS->captures()) { 66 if (C.capturesVariable() || C.capturesVariableByCopy()) { 67 auto *VD = C.getCapturedVar(); 68 DeclRefExpr DRE(const_cast<VarDecl *>(VD), 69 isCapturedVar(CGF, VD) || 70 (CGF.CapturedStmtInfo && 71 InlinedShareds.isGlobalVarCaptured(VD)), 72 VD->getType().getNonReferenceType(), VK_LValue, 73 SourceLocation()); 74 InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address { 75 return CGF.EmitLValue(&DRE).getAddress(); 76 }); 77 } 78 } 79 (void)InlinedShareds.Privatize(); 80 } 81 } 82 } 83 }; 84 85 /// Lexical scope for OpenMP parallel construct, that handles correct codegen 86 /// for captured expressions. 87 class OMPParallelScope final : public OMPLexicalScope { 88 bool EmitPreInitStmt(const OMPExecutableDirective &S) { 89 OpenMPDirectiveKind Kind = S.getDirectiveKind(); 90 return !isOpenMPTargetExecutionDirective(Kind) && 91 isOpenMPParallelDirective(Kind); 92 } 93 94 public: 95 OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 96 : OMPLexicalScope(CGF, S, 97 /*AsInlined=*/false, 98 /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {} 99 }; 100 101 /// Lexical scope for OpenMP teams construct, that handles correct codegen 102 /// for captured expressions. 103 class OMPTeamsScope final : public OMPLexicalScope { 104 bool EmitPreInitStmt(const OMPExecutableDirective &S) { 105 OpenMPDirectiveKind Kind = S.getDirectiveKind(); 106 return !isOpenMPTargetExecutionDirective(Kind) && 107 isOpenMPTeamsDirective(Kind); 108 } 109 110 public: 111 OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 112 : OMPLexicalScope(CGF, S, 113 /*AsInlined=*/false, 114 /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {} 115 }; 116 117 /// Private scope for OpenMP loop-based directives, that supports capturing 118 /// of used expression from loop statement. 119 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { 120 void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) { 121 if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) { 122 if (auto *PreInits = cast_or_null<DeclStmt>(LD->getPreInits())) { 123 for (const auto *I : PreInits->decls()) 124 CGF.EmitVarDecl(cast<VarDecl>(*I)); 125 } 126 } 127 } 128 129 public: 130 OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S) 131 : CodeGenFunction::RunCleanupsScope(CGF) { 132 emitPreInitStmt(CGF, S); 133 } 134 }; 135 136 } // namespace 137 138 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) { 139 auto &C = getContext(); 140 llvm::Value *Size = nullptr; 141 auto SizeInChars = C.getTypeSizeInChars(Ty); 142 if (SizeInChars.isZero()) { 143 // getTypeSizeInChars() returns 0 for a VLA. 144 while (auto *VAT = C.getAsVariableArrayType(Ty)) { 145 llvm::Value *ArraySize; 146 std::tie(ArraySize, Ty) = getVLASize(VAT); 147 Size = Size ? Builder.CreateNUWMul(Size, ArraySize) : ArraySize; 148 } 149 SizeInChars = C.getTypeSizeInChars(Ty); 150 if (SizeInChars.isZero()) 151 return llvm::ConstantInt::get(SizeTy, /*V=*/0); 152 Size = Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars)); 153 } else 154 Size = CGM.getSize(SizeInChars); 155 return Size; 156 } 157 158 void CodeGenFunction::GenerateOpenMPCapturedVars( 159 const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) { 160 const RecordDecl *RD = S.getCapturedRecordDecl(); 161 auto CurField = RD->field_begin(); 162 auto CurCap = S.captures().begin(); 163 for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(), 164 E = S.capture_init_end(); 165 I != E; ++I, ++CurField, ++CurCap) { 166 if (CurField->hasCapturedVLAType()) { 167 auto VAT = CurField->getCapturedVLAType(); 168 auto *Val = VLASizeMap[VAT->getSizeExpr()]; 169 CapturedVars.push_back(Val); 170 } else if (CurCap->capturesThis()) 171 CapturedVars.push_back(CXXThisValue); 172 else if (CurCap->capturesVariableByCopy()) { 173 llvm::Value *CV = 174 EmitLoadOfLValue(EmitLValue(*I), SourceLocation()).getScalarVal(); 175 176 // If the field is not a pointer, we need to save the actual value 177 // and load it as a void pointer. 178 if (!CurField->getType()->isAnyPointerType()) { 179 auto &Ctx = getContext(); 180 auto DstAddr = CreateMemTemp( 181 Ctx.getUIntPtrType(), 182 Twine(CurCap->getCapturedVar()->getName()) + ".casted"); 183 LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType()); 184 185 auto *SrcAddrVal = EmitScalarConversion( 186 DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()), 187 Ctx.getPointerType(CurField->getType()), SourceLocation()); 188 LValue SrcLV = 189 MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType()); 190 191 // Store the value using the source type pointer. 192 EmitStoreThroughLValue(RValue::get(CV), SrcLV); 193 194 // Load the value using the destination type pointer. 195 CV = EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal(); 196 } 197 CapturedVars.push_back(CV); 198 } else { 199 assert(CurCap->capturesVariable() && "Expected capture by reference."); 200 CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer()); 201 } 202 } 203 } 204 205 static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType, 206 StringRef Name, LValue AddrLV, 207 bool isReferenceType = false) { 208 ASTContext &Ctx = CGF.getContext(); 209 210 auto *CastedPtr = CGF.EmitScalarConversion( 211 AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(), 212 Ctx.getPointerType(DstType), SourceLocation()); 213 auto TmpAddr = 214 CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType)) 215 .getAddress(); 216 217 // If we are dealing with references we need to return the address of the 218 // reference instead of the reference of the value. 219 if (isReferenceType) { 220 QualType RefType = Ctx.getLValueReferenceType(DstType); 221 auto *RefVal = TmpAddr.getPointer(); 222 TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name) + ".ref"); 223 auto TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType); 224 CGF.EmitStoreThroughLValue(RValue::get(RefVal), TmpLVal, /*isInit*/ true); 225 } 226 227 return TmpAddr; 228 } 229 230 llvm::Function * 231 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { 232 assert( 233 CapturedStmtInfo && 234 "CapturedStmtInfo should be set when generating the captured function"); 235 const CapturedDecl *CD = S.getCapturedDecl(); 236 const RecordDecl *RD = S.getCapturedRecordDecl(); 237 assert(CD->hasBody() && "missing CapturedDecl body"); 238 239 // Build the argument list. 240 ASTContext &Ctx = CGM.getContext(); 241 FunctionArgList Args; 242 Args.append(CD->param_begin(), 243 std::next(CD->param_begin(), CD->getContextParamPosition())); 244 auto I = S.captures().begin(); 245 for (auto *FD : RD->fields()) { 246 QualType ArgType = FD->getType(); 247 IdentifierInfo *II = nullptr; 248 VarDecl *CapVar = nullptr; 249 250 // If this is a capture by copy and the type is not a pointer, the outlined 251 // function argument type should be uintptr and the value properly casted to 252 // uintptr. This is necessary given that the runtime library is only able to 253 // deal with pointers. We can pass in the same way the VLA type sizes to the 254 // outlined function. 255 if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) || 256 I->capturesVariableArrayType()) 257 ArgType = Ctx.getUIntPtrType(); 258 259 if (I->capturesVariable() || I->capturesVariableByCopy()) { 260 CapVar = I->getCapturedVar(); 261 II = CapVar->getIdentifier(); 262 } else if (I->capturesThis()) 263 II = &getContext().Idents.get("this"); 264 else { 265 assert(I->capturesVariableArrayType()); 266 II = &getContext().Idents.get("vla"); 267 } 268 if (ArgType->isVariablyModifiedType()) { 269 bool IsReference = ArgType->isLValueReferenceType(); 270 ArgType = 271 getContext().getCanonicalParamType(ArgType.getNonReferenceType()); 272 if (IsReference && !ArgType->isPointerType()) { 273 ArgType = getContext().getLValueReferenceType( 274 ArgType, /*SpelledAsLValue=*/false); 275 } 276 } 277 Args.push_back(ImplicitParamDecl::Create(getContext(), nullptr, 278 FD->getLocation(), II, ArgType)); 279 ++I; 280 } 281 Args.append( 282 std::next(CD->param_begin(), CD->getContextParamPosition() + 1), 283 CD->param_end()); 284 285 // Create the function declaration. 286 FunctionType::ExtInfo ExtInfo; 287 const CGFunctionInfo &FuncInfo = 288 CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args); 289 llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); 290 291 llvm::Function *F = llvm::Function::Create( 292 FuncLLVMTy, llvm::GlobalValue::InternalLinkage, 293 CapturedStmtInfo->getHelperName(), &CGM.getModule()); 294 CGM.SetInternalFunctionAttributes(CD, F, FuncInfo); 295 if (CD->isNothrow()) 296 F->addFnAttr(llvm::Attribute::NoUnwind); 297 298 // Generate the function. 299 StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getLocation(), 300 CD->getBody()->getLocStart()); 301 unsigned Cnt = CD->getContextParamPosition(); 302 I = S.captures().begin(); 303 for (auto *FD : RD->fields()) { 304 // If we are capturing a pointer by copy we don't need to do anything, just 305 // use the value that we get from the arguments. 306 if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) { 307 const VarDecl *CurVD = I->getCapturedVar(); 308 Address LocalAddr = GetAddrOfLocalVar(Args[Cnt]); 309 // If the variable is a reference we need to materialize it here. 310 if (CurVD->getType()->isReferenceType()) { 311 Address RefAddr = CreateMemTemp(CurVD->getType(), getPointerAlign(), 312 ".materialized_ref"); 313 EmitStoreOfScalar(LocalAddr.getPointer(), RefAddr, /*Volatile=*/false, 314 CurVD->getType()); 315 LocalAddr = RefAddr; 316 } 317 setAddrOfLocalVar(CurVD, LocalAddr); 318 ++Cnt; 319 ++I; 320 continue; 321 } 322 323 LValue ArgLVal = 324 MakeAddrLValue(GetAddrOfLocalVar(Args[Cnt]), Args[Cnt]->getType(), 325 AlignmentSource::Decl); 326 if (FD->hasCapturedVLAType()) { 327 LValue CastedArgLVal = 328 MakeAddrLValue(castValueFromUintptr(*this, FD->getType(), 329 Args[Cnt]->getName(), ArgLVal), 330 FD->getType(), AlignmentSource::Decl); 331 auto *ExprArg = 332 EmitLoadOfLValue(CastedArgLVal, SourceLocation()).getScalarVal(); 333 auto VAT = FD->getCapturedVLAType(); 334 VLASizeMap[VAT->getSizeExpr()] = ExprArg; 335 } else if (I->capturesVariable()) { 336 auto *Var = I->getCapturedVar(); 337 QualType VarTy = Var->getType(); 338 Address ArgAddr = ArgLVal.getAddress(); 339 if (!VarTy->isReferenceType()) { 340 if (ArgLVal.getType()->isLValueReferenceType()) { 341 ArgAddr = EmitLoadOfReference( 342 ArgAddr, ArgLVal.getType()->castAs<ReferenceType>()); 343 } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) { 344 assert(ArgLVal.getType()->isPointerType()); 345 ArgAddr = EmitLoadOfPointer( 346 ArgAddr, ArgLVal.getType()->castAs<PointerType>()); 347 } 348 } 349 setAddrOfLocalVar( 350 Var, Address(ArgAddr.getPointer(), getContext().getDeclAlign(Var))); 351 } else if (I->capturesVariableByCopy()) { 352 assert(!FD->getType()->isAnyPointerType() && 353 "Not expecting a captured pointer."); 354 auto *Var = I->getCapturedVar(); 355 QualType VarTy = Var->getType(); 356 setAddrOfLocalVar(Var, castValueFromUintptr(*this, FD->getType(), 357 Args[Cnt]->getName(), ArgLVal, 358 VarTy->isReferenceType())); 359 } else { 360 // If 'this' is captured, load it into CXXThisValue. 361 assert(I->capturesThis()); 362 CXXThisValue = 363 EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation()).getScalarVal(); 364 } 365 ++Cnt; 366 ++I; 367 } 368 369 PGO.assignRegionCounters(GlobalDecl(CD), F); 370 CapturedStmtInfo->EmitBody(*this, CD->getBody()); 371 FinishFunction(CD->getBodyRBrace()); 372 373 return F; 374 } 375 376 //===----------------------------------------------------------------------===// 377 // OpenMP Directive Emission 378 //===----------------------------------------------------------------------===// 379 void CodeGenFunction::EmitOMPAggregateAssign( 380 Address DestAddr, Address SrcAddr, QualType OriginalType, 381 const llvm::function_ref<void(Address, Address)> &CopyGen) { 382 // Perform element-by-element initialization. 383 QualType ElementTy; 384 385 // Drill down to the base element type on both arrays. 386 auto ArrayTy = OriginalType->getAsArrayTypeUnsafe(); 387 auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr); 388 SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 389 390 auto SrcBegin = SrcAddr.getPointer(); 391 auto DestBegin = DestAddr.getPointer(); 392 // Cast from pointer to array type to pointer to single element. 393 auto DestEnd = Builder.CreateGEP(DestBegin, NumElements); 394 // The basic structure here is a while-do loop. 395 auto BodyBB = createBasicBlock("omp.arraycpy.body"); 396 auto DoneBB = createBasicBlock("omp.arraycpy.done"); 397 auto IsEmpty = 398 Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty"); 399 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 400 401 // Enter the loop body, making that address the current address. 402 auto EntryBB = Builder.GetInsertBlock(); 403 EmitBlock(BodyBB); 404 405 CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy); 406 407 llvm::PHINode *SrcElementPHI = 408 Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 409 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 410 Address SrcElementCurrent = 411 Address(SrcElementPHI, 412 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 413 414 llvm::PHINode *DestElementPHI = 415 Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 416 DestElementPHI->addIncoming(DestBegin, EntryBB); 417 Address DestElementCurrent = 418 Address(DestElementPHI, 419 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 420 421 // Emit copy. 422 CopyGen(DestElementCurrent, SrcElementCurrent); 423 424 // Shift the address forward by one element. 425 auto DestElementNext = Builder.CreateConstGEP1_32( 426 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 427 auto SrcElementNext = Builder.CreateConstGEP1_32( 428 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 429 // Check whether we've reached the end. 430 auto Done = 431 Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 432 Builder.CreateCondBr(Done, DoneBB, BodyBB); 433 DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock()); 434 SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock()); 435 436 // Done. 437 EmitBlock(DoneBB, /*IsFinished=*/true); 438 } 439 440 /// Check if the combiner is a call to UDR combiner and if it is so return the 441 /// UDR decl used for reduction. 442 static const OMPDeclareReductionDecl * 443 getReductionInit(const Expr *ReductionOp) { 444 if (auto *CE = dyn_cast<CallExpr>(ReductionOp)) 445 if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 446 if (auto *DRE = 447 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 448 if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 449 return DRD; 450 return nullptr; 451 } 452 453 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 454 const OMPDeclareReductionDecl *DRD, 455 const Expr *InitOp, 456 Address Private, Address Original, 457 QualType Ty) { 458 if (DRD->getInitializer()) { 459 std::pair<llvm::Function *, llvm::Function *> Reduction = 460 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 461 auto *CE = cast<CallExpr>(InitOp); 462 auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 463 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 464 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 465 auto *LHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 466 auto *RHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 467 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 468 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 469 [=]() -> Address { return Private; }); 470 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 471 [=]() -> Address { return Original; }); 472 (void)PrivateScope.Privatize(); 473 RValue Func = RValue::get(Reduction.second); 474 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 475 CGF.EmitIgnoredExpr(InitOp); 476 } else { 477 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 478 auto *GV = new llvm::GlobalVariable( 479 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 480 llvm::GlobalValue::PrivateLinkage, Init, ".init"); 481 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 482 RValue InitRVal; 483 switch (CGF.getEvaluationKind(Ty)) { 484 case TEK_Scalar: 485 InitRVal = CGF.EmitLoadOfLValue(LV, SourceLocation()); 486 break; 487 case TEK_Complex: 488 InitRVal = 489 RValue::getComplex(CGF.EmitLoadOfComplex(LV, SourceLocation())); 490 break; 491 case TEK_Aggregate: 492 InitRVal = RValue::getAggregate(LV.getAddress()); 493 break; 494 } 495 OpaqueValueExpr OVE(SourceLocation(), Ty, VK_RValue); 496 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 497 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 498 /*IsInitializer=*/false); 499 } 500 } 501 502 /// \brief Emit initialization of arrays of complex types. 503 /// \param DestAddr Address of the array. 504 /// \param Type Type of array. 505 /// \param Init Initial expression of array. 506 /// \param SrcAddr Address of the original array. 507 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 508 QualType Type, const Expr *Init, 509 Address SrcAddr = Address::invalid()) { 510 auto *DRD = getReductionInit(Init); 511 // Perform element-by-element initialization. 512 QualType ElementTy; 513 514 // Drill down to the base element type on both arrays. 515 auto ArrayTy = Type->getAsArrayTypeUnsafe(); 516 auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 517 DestAddr = 518 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 519 if (DRD) 520 SrcAddr = 521 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 522 523 llvm::Value *SrcBegin = nullptr; 524 if (DRD) 525 SrcBegin = SrcAddr.getPointer(); 526 auto DestBegin = DestAddr.getPointer(); 527 // Cast from pointer to array type to pointer to single element. 528 auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 529 // The basic structure here is a while-do loop. 530 auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 531 auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 532 auto IsEmpty = 533 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 534 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 535 536 // Enter the loop body, making that address the current address. 537 auto EntryBB = CGF.Builder.GetInsertBlock(); 538 CGF.EmitBlock(BodyBB); 539 540 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 541 542 llvm::PHINode *SrcElementPHI = nullptr; 543 Address SrcElementCurrent = Address::invalid(); 544 if (DRD) { 545 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 546 "omp.arraycpy.srcElementPast"); 547 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 548 SrcElementCurrent = 549 Address(SrcElementPHI, 550 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 551 } 552 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 553 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 554 DestElementPHI->addIncoming(DestBegin, EntryBB); 555 Address DestElementCurrent = 556 Address(DestElementPHI, 557 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 558 559 // Emit copy. 560 { 561 CodeGenFunction::RunCleanupsScope InitScope(CGF); 562 if (DRD && (DRD->getInitializer() || !Init)) { 563 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 564 SrcElementCurrent, ElementTy); 565 } else 566 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 567 /*IsInitializer=*/false); 568 } 569 570 if (DRD) { 571 // Shift the address forward by one element. 572 auto SrcElementNext = CGF.Builder.CreateConstGEP1_32( 573 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 574 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 575 } 576 577 // Shift the address forward by one element. 578 auto DestElementNext = CGF.Builder.CreateConstGEP1_32( 579 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 580 // Check whether we've reached the end. 581 auto Done = 582 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 583 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 584 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 585 586 // Done. 587 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 588 } 589 590 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr, 591 Address SrcAddr, const VarDecl *DestVD, 592 const VarDecl *SrcVD, const Expr *Copy) { 593 if (OriginalType->isArrayType()) { 594 auto *BO = dyn_cast<BinaryOperator>(Copy); 595 if (BO && BO->getOpcode() == BO_Assign) { 596 // Perform simple memcpy for simple copying. 597 EmitAggregateAssign(DestAddr, SrcAddr, OriginalType); 598 } else { 599 // For arrays with complex element types perform element by element 600 // copying. 601 EmitOMPAggregateAssign( 602 DestAddr, SrcAddr, OriginalType, 603 [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) { 604 // Working with the single array element, so have to remap 605 // destination and source variables to corresponding array 606 // elements. 607 CodeGenFunction::OMPPrivateScope Remap(*this); 608 Remap.addPrivate(DestVD, [DestElement]() -> Address { 609 return DestElement; 610 }); 611 Remap.addPrivate( 612 SrcVD, [SrcElement]() -> Address { return SrcElement; }); 613 (void)Remap.Privatize(); 614 EmitIgnoredExpr(Copy); 615 }); 616 } 617 } else { 618 // Remap pseudo source variable to private copy. 619 CodeGenFunction::OMPPrivateScope Remap(*this); 620 Remap.addPrivate(SrcVD, [SrcAddr]() -> Address { return SrcAddr; }); 621 Remap.addPrivate(DestVD, [DestAddr]() -> Address { return DestAddr; }); 622 (void)Remap.Privatize(); 623 // Emit copying of the whole variable. 624 EmitIgnoredExpr(Copy); 625 } 626 } 627 628 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, 629 OMPPrivateScope &PrivateScope) { 630 if (!HaveInsertPoint()) 631 return false; 632 bool FirstprivateIsLastprivate = false; 633 llvm::DenseSet<const VarDecl *> Lastprivates; 634 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 635 for (const auto *D : C->varlists()) 636 Lastprivates.insert( 637 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); 638 } 639 llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate; 640 CGCapturedStmtInfo CapturesInfo(cast<CapturedStmt>(*D.getAssociatedStmt())); 641 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { 642 auto IRef = C->varlist_begin(); 643 auto InitsRef = C->inits().begin(); 644 for (auto IInit : C->private_copies()) { 645 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 646 bool ThisFirstprivateIsLastprivate = 647 Lastprivates.count(OrigVD->getCanonicalDecl()) > 0; 648 auto *CapFD = CapturesInfo.lookup(OrigVD); 649 auto *FD = CapturedStmtInfo->lookup(OrigVD); 650 if (!ThisFirstprivateIsLastprivate && FD && (FD == CapFD) && 651 !FD->getType()->isReferenceType()) { 652 EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()); 653 ++IRef; 654 ++InitsRef; 655 continue; 656 } 657 FirstprivateIsLastprivate = 658 FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate; 659 if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) { 660 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 661 auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl()); 662 bool IsRegistered; 663 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 664 /*RefersToEnclosingVariableOrCapture=*/FD != nullptr, 665 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 666 Address OriginalAddr = EmitLValue(&DRE).getAddress(); 667 QualType Type = VD->getType(); 668 if (Type->isArrayType()) { 669 // Emit VarDecl with copy init for arrays. 670 // Get the address of the original variable captured in current 671 // captured region. 672 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 673 auto Emission = EmitAutoVarAlloca(*VD); 674 auto *Init = VD->getInit(); 675 if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) { 676 // Perform simple memcpy. 677 EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr, 678 Type); 679 } else { 680 EmitOMPAggregateAssign( 681 Emission.getAllocatedAddress(), OriginalAddr, Type, 682 [this, VDInit, Init](Address DestElement, 683 Address SrcElement) { 684 // Clean up any temporaries needed by the initialization. 685 RunCleanupsScope InitScope(*this); 686 // Emit initialization for single element. 687 setAddrOfLocalVar(VDInit, SrcElement); 688 EmitAnyExprToMem(Init, DestElement, 689 Init->getType().getQualifiers(), 690 /*IsInitializer*/ false); 691 LocalDeclMap.erase(VDInit); 692 }); 693 } 694 EmitAutoVarCleanups(Emission); 695 return Emission.getAllocatedAddress(); 696 }); 697 } else { 698 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 699 // Emit private VarDecl with copy init. 700 // Remap temp VDInit variable to the address of the original 701 // variable 702 // (for proper handling of captured global variables). 703 setAddrOfLocalVar(VDInit, OriginalAddr); 704 EmitDecl(*VD); 705 LocalDeclMap.erase(VDInit); 706 return GetAddrOfLocalVar(VD); 707 }); 708 } 709 assert(IsRegistered && 710 "firstprivate var already registered as private"); 711 // Silence the warning about unused variable. 712 (void)IsRegistered; 713 } 714 ++IRef; 715 ++InitsRef; 716 } 717 } 718 return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty(); 719 } 720 721 void CodeGenFunction::EmitOMPPrivateClause( 722 const OMPExecutableDirective &D, 723 CodeGenFunction::OMPPrivateScope &PrivateScope) { 724 if (!HaveInsertPoint()) 725 return; 726 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 727 for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) { 728 auto IRef = C->varlist_begin(); 729 for (auto IInit : C->private_copies()) { 730 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 731 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 732 auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 733 bool IsRegistered = 734 PrivateScope.addPrivate(OrigVD, [&]() -> Address { 735 // Emit private VarDecl with copy init. 736 EmitDecl(*VD); 737 return GetAddrOfLocalVar(VD); 738 }); 739 assert(IsRegistered && "private var already registered as private"); 740 // Silence the warning about unused variable. 741 (void)IsRegistered; 742 } 743 ++IRef; 744 } 745 } 746 } 747 748 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { 749 if (!HaveInsertPoint()) 750 return false; 751 // threadprivate_var1 = master_threadprivate_var1; 752 // operator=(threadprivate_var2, master_threadprivate_var2); 753 // ... 754 // __kmpc_barrier(&loc, global_tid); 755 llvm::DenseSet<const VarDecl *> CopiedVars; 756 llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr; 757 for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) { 758 auto IRef = C->varlist_begin(); 759 auto ISrcRef = C->source_exprs().begin(); 760 auto IDestRef = C->destination_exprs().begin(); 761 for (auto *AssignOp : C->assignment_ops()) { 762 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 763 QualType Type = VD->getType(); 764 if (CopiedVars.insert(VD->getCanonicalDecl()).second) { 765 // Get the address of the master variable. If we are emitting code with 766 // TLS support, the address is passed from the master as field in the 767 // captured declaration. 768 Address MasterAddr = Address::invalid(); 769 if (getLangOpts().OpenMPUseTLS && 770 getContext().getTargetInfo().isTLSSupported()) { 771 assert(CapturedStmtInfo->lookup(VD) && 772 "Copyin threadprivates should have been captured!"); 773 DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(), 774 VK_LValue, (*IRef)->getExprLoc()); 775 MasterAddr = EmitLValue(&DRE).getAddress(); 776 LocalDeclMap.erase(VD); 777 } else { 778 MasterAddr = 779 Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD) 780 : CGM.GetAddrOfGlobal(VD), 781 getContext().getDeclAlign(VD)); 782 } 783 // Get the address of the threadprivate variable. 784 Address PrivateAddr = EmitLValue(*IRef).getAddress(); 785 if (CopiedVars.size() == 1) { 786 // At first check if current thread is a master thread. If it is, no 787 // need to copy data. 788 CopyBegin = createBasicBlock("copyin.not.master"); 789 CopyEnd = createBasicBlock("copyin.not.master.end"); 790 Builder.CreateCondBr( 791 Builder.CreateICmpNE( 792 Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy), 793 Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy)), 794 CopyBegin, CopyEnd); 795 EmitBlock(CopyBegin); 796 } 797 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 798 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 799 EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp); 800 } 801 ++IRef; 802 ++ISrcRef; 803 ++IDestRef; 804 } 805 } 806 if (CopyEnd) { 807 // Exit out of copying procedure for non-master thread. 808 EmitBlock(CopyEnd, /*IsFinished=*/true); 809 return true; 810 } 811 return false; 812 } 813 814 bool CodeGenFunction::EmitOMPLastprivateClauseInit( 815 const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) { 816 if (!HaveInsertPoint()) 817 return false; 818 bool HasAtLeastOneLastprivate = false; 819 llvm::DenseSet<const VarDecl *> SIMDLCVs; 820 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 821 auto *LoopDirective = cast<OMPLoopDirective>(&D); 822 for (auto *C : LoopDirective->counters()) { 823 SIMDLCVs.insert( 824 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 825 } 826 } 827 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 828 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 829 HasAtLeastOneLastprivate = true; 830 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) 831 break; 832 auto IRef = C->varlist_begin(); 833 auto IDestRef = C->destination_exprs().begin(); 834 for (auto *IInit : C->private_copies()) { 835 // Keep the address of the original variable for future update at the end 836 // of the loop. 837 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 838 // Taskloops do not require additional initialization, it is done in 839 // runtime support library. 840 if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) { 841 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 842 PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address { 843 DeclRefExpr DRE( 844 const_cast<VarDecl *>(OrigVD), 845 /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup( 846 OrigVD) != nullptr, 847 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 848 return EmitLValue(&DRE).getAddress(); 849 }); 850 // Check if the variable is also a firstprivate: in this case IInit is 851 // not generated. Initialization of this variable will happen in codegen 852 // for 'firstprivate' clause. 853 if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) { 854 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 855 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 856 // Emit private VarDecl with copy init. 857 EmitDecl(*VD); 858 return GetAddrOfLocalVar(VD); 859 }); 860 assert(IsRegistered && 861 "lastprivate var already registered as private"); 862 (void)IsRegistered; 863 } 864 } 865 ++IRef; 866 ++IDestRef; 867 } 868 } 869 return HasAtLeastOneLastprivate; 870 } 871 872 void CodeGenFunction::EmitOMPLastprivateClauseFinal( 873 const OMPExecutableDirective &D, bool NoFinals, 874 llvm::Value *IsLastIterCond) { 875 if (!HaveInsertPoint()) 876 return; 877 // Emit following code: 878 // if (<IsLastIterCond>) { 879 // orig_var1 = private_orig_var1; 880 // ... 881 // orig_varn = private_orig_varn; 882 // } 883 llvm::BasicBlock *ThenBB = nullptr; 884 llvm::BasicBlock *DoneBB = nullptr; 885 if (IsLastIterCond) { 886 ThenBB = createBasicBlock(".omp.lastprivate.then"); 887 DoneBB = createBasicBlock(".omp.lastprivate.done"); 888 Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB); 889 EmitBlock(ThenBB); 890 } 891 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 892 llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates; 893 if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) { 894 auto IC = LoopDirective->counters().begin(); 895 for (auto F : LoopDirective->finals()) { 896 auto *D = 897 cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl(); 898 if (NoFinals) 899 AlreadyEmittedVars.insert(D); 900 else 901 LoopCountersAndUpdates[D] = F; 902 ++IC; 903 } 904 } 905 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 906 auto IRef = C->varlist_begin(); 907 auto ISrcRef = C->source_exprs().begin(); 908 auto IDestRef = C->destination_exprs().begin(); 909 for (auto *AssignOp : C->assignment_ops()) { 910 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 911 QualType Type = PrivateVD->getType(); 912 auto *CanonicalVD = PrivateVD->getCanonicalDecl(); 913 if (AlreadyEmittedVars.insert(CanonicalVD).second) { 914 // If lastprivate variable is a loop control variable for loop-based 915 // directive, update its value before copyin back to original 916 // variable. 917 if (auto *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD)) 918 EmitIgnoredExpr(FinalExpr); 919 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 920 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 921 // Get the address of the original variable. 922 Address OriginalAddr = GetAddrOfLocalVar(DestVD); 923 // Get the address of the private variable. 924 Address PrivateAddr = GetAddrOfLocalVar(PrivateVD); 925 if (auto RefTy = PrivateVD->getType()->getAs<ReferenceType>()) 926 PrivateAddr = 927 Address(Builder.CreateLoad(PrivateAddr), 928 getNaturalTypeAlignment(RefTy->getPointeeType())); 929 EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp); 930 } 931 ++IRef; 932 ++ISrcRef; 933 ++IDestRef; 934 } 935 if (auto *PostUpdate = C->getPostUpdateExpr()) 936 EmitIgnoredExpr(PostUpdate); 937 } 938 if (IsLastIterCond) 939 EmitBlock(DoneBB, /*IsFinished=*/true); 940 } 941 942 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 943 LValue BaseLV, llvm::Value *Addr) { 944 Address Tmp = Address::invalid(); 945 Address TopTmp = Address::invalid(); 946 Address MostTopTmp = Address::invalid(); 947 BaseTy = BaseTy.getNonReferenceType(); 948 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 949 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 950 Tmp = CGF.CreateMemTemp(BaseTy); 951 if (TopTmp.isValid()) 952 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 953 else 954 MostTopTmp = Tmp; 955 TopTmp = Tmp; 956 BaseTy = BaseTy->getPointeeType(); 957 } 958 llvm::Type *Ty = BaseLV.getPointer()->getType(); 959 if (Tmp.isValid()) 960 Ty = Tmp.getElementType(); 961 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 962 if (Tmp.isValid()) { 963 CGF.Builder.CreateStore(Addr, Tmp); 964 return MostTopTmp; 965 } 966 return Address(Addr, BaseLV.getAlignment()); 967 } 968 969 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 970 LValue BaseLV) { 971 BaseTy = BaseTy.getNonReferenceType(); 972 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 973 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 974 if (auto *PtrTy = BaseTy->getAs<PointerType>()) 975 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy); 976 else { 977 BaseLV = CGF.EmitLoadOfReferenceLValue(BaseLV.getAddress(), 978 BaseTy->castAs<ReferenceType>()); 979 } 980 BaseTy = BaseTy->getPointeeType(); 981 } 982 return CGF.MakeAddrLValue( 983 Address( 984 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 985 BaseLV.getPointer(), CGF.ConvertTypeForMem(ElTy)->getPointerTo()), 986 BaseLV.getAlignment()), 987 BaseLV.getType(), BaseLV.getAlignmentSource()); 988 } 989 990 void CodeGenFunction::EmitOMPReductionClauseInit( 991 const OMPExecutableDirective &D, 992 CodeGenFunction::OMPPrivateScope &PrivateScope) { 993 if (!HaveInsertPoint()) 994 return; 995 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 996 auto ILHS = C->lhs_exprs().begin(); 997 auto IRHS = C->rhs_exprs().begin(); 998 auto IPriv = C->privates().begin(); 999 auto IRed = C->reduction_ops().begin(); 1000 for (auto IRef : C->varlists()) { 1001 auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 1002 auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 1003 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl()); 1004 auto *DRD = getReductionInit(*IRed); 1005 if (auto *OASE = dyn_cast<OMPArraySectionExpr>(IRef)) { 1006 auto *Base = OASE->getBase()->IgnoreParenImpCasts(); 1007 while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 1008 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 1009 while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1010 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1011 auto *DE = cast<DeclRefExpr>(Base); 1012 auto *OrigVD = cast<VarDecl>(DE->getDecl()); 1013 auto OASELValueLB = EmitOMPArraySectionExpr(OASE); 1014 auto OASELValueUB = 1015 EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 1016 auto OriginalBaseLValue = EmitLValue(DE); 1017 LValue BaseLValue = 1018 loadToBegin(*this, OrigVD->getType(), OASELValueLB.getType(), 1019 OriginalBaseLValue); 1020 // Store the address of the original variable associated with the LHS 1021 // implicit variable. 1022 PrivateScope.addPrivate(LHSVD, [OASELValueLB]() -> Address { 1023 return OASELValueLB.getAddress(); 1024 }); 1025 // Emit reduction copy. 1026 bool IsRegistered = PrivateScope.addPrivate( 1027 OrigVD, [this, OrigVD, PrivateVD, BaseLValue, OASELValueLB, 1028 OASELValueUB, OriginalBaseLValue, DRD, IRed]() -> Address { 1029 // Emit VarDecl with copy init for arrays. 1030 // Get the address of the original variable captured in current 1031 // captured region. 1032 auto *Size = Builder.CreatePtrDiff(OASELValueUB.getPointer(), 1033 OASELValueLB.getPointer()); 1034 Size = Builder.CreateNUWAdd( 1035 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 1036 CodeGenFunction::OpaqueValueMapping OpaqueMap( 1037 *this, cast<OpaqueValueExpr>( 1038 getContext() 1039 .getAsVariableArrayType(PrivateVD->getType()) 1040 ->getSizeExpr()), 1041 RValue::get(Size)); 1042 EmitVariablyModifiedType(PrivateVD->getType()); 1043 auto Emission = EmitAutoVarAlloca(*PrivateVD); 1044 auto Addr = Emission.getAllocatedAddress(); 1045 auto *Init = PrivateVD->getInit(); 1046 EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(), 1047 DRD ? *IRed : Init, 1048 OASELValueLB.getAddress()); 1049 EmitAutoVarCleanups(Emission); 1050 // Emit private VarDecl with reduction init. 1051 auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(), 1052 OASELValueLB.getPointer()); 1053 auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset); 1054 return castToBase(*this, OrigVD->getType(), 1055 OASELValueLB.getType(), OriginalBaseLValue, 1056 Ptr); 1057 }); 1058 assert(IsRegistered && "private var already registered as private"); 1059 // Silence the warning about unused variable. 1060 (void)IsRegistered; 1061 PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { 1062 return GetAddrOfLocalVar(PrivateVD); 1063 }); 1064 } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(IRef)) { 1065 auto *Base = ASE->getBase()->IgnoreParenImpCasts(); 1066 while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1067 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1068 auto *DE = cast<DeclRefExpr>(Base); 1069 auto *OrigVD = cast<VarDecl>(DE->getDecl()); 1070 auto ASELValue = EmitLValue(ASE); 1071 auto OriginalBaseLValue = EmitLValue(DE); 1072 LValue BaseLValue = loadToBegin( 1073 *this, OrigVD->getType(), ASELValue.getType(), OriginalBaseLValue); 1074 // Store the address of the original variable associated with the LHS 1075 // implicit variable. 1076 PrivateScope.addPrivate( 1077 LHSVD, [ASELValue]() -> Address { return ASELValue.getAddress(); }); 1078 // Emit reduction copy. 1079 bool IsRegistered = PrivateScope.addPrivate( 1080 OrigVD, [this, OrigVD, PrivateVD, BaseLValue, ASELValue, 1081 OriginalBaseLValue, DRD, IRed]() -> Address { 1082 // Emit private VarDecl with reduction init. 1083 AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD); 1084 auto Addr = Emission.getAllocatedAddress(); 1085 if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 1086 emitInitWithReductionInitializer(*this, DRD, *IRed, Addr, 1087 ASELValue.getAddress(), 1088 ASELValue.getType()); 1089 } else 1090 EmitAutoVarInit(Emission); 1091 EmitAutoVarCleanups(Emission); 1092 auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(), 1093 ASELValue.getPointer()); 1094 auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset); 1095 return castToBase(*this, OrigVD->getType(), ASELValue.getType(), 1096 OriginalBaseLValue, Ptr); 1097 }); 1098 assert(IsRegistered && "private var already registered as private"); 1099 // Silence the warning about unused variable. 1100 (void)IsRegistered; 1101 PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address { 1102 return Builder.CreateElementBitCast( 1103 GetAddrOfLocalVar(PrivateVD), ConvertTypeForMem(RHSVD->getType()), 1104 "rhs.begin"); 1105 }); 1106 } else { 1107 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); 1108 QualType Type = PrivateVD->getType(); 1109 if (getContext().getAsArrayType(Type)) { 1110 // Store the address of the original variable associated with the LHS 1111 // implicit variable. 1112 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1113 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1114 IRef->getType(), VK_LValue, IRef->getExprLoc()); 1115 Address OriginalAddr = EmitLValue(&DRE).getAddress(); 1116 PrivateScope.addPrivate(LHSVD, [this, &OriginalAddr, 1117 LHSVD]() -> Address { 1118 OriginalAddr = Builder.CreateElementBitCast( 1119 OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin"); 1120 return OriginalAddr; 1121 }); 1122 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 1123 if (Type->isVariablyModifiedType()) { 1124 CodeGenFunction::OpaqueValueMapping OpaqueMap( 1125 *this, cast<OpaqueValueExpr>( 1126 getContext() 1127 .getAsVariableArrayType(PrivateVD->getType()) 1128 ->getSizeExpr()), 1129 RValue::get( 1130 getTypeSize(OrigVD->getType().getNonReferenceType()))); 1131 EmitVariablyModifiedType(Type); 1132 } 1133 auto Emission = EmitAutoVarAlloca(*PrivateVD); 1134 auto Addr = Emission.getAllocatedAddress(); 1135 auto *Init = PrivateVD->getInit(); 1136 EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(), 1137 DRD ? *IRed : Init, OriginalAddr); 1138 EmitAutoVarCleanups(Emission); 1139 return Emission.getAllocatedAddress(); 1140 }); 1141 assert(IsRegistered && "private var already registered as private"); 1142 // Silence the warning about unused variable. 1143 (void)IsRegistered; 1144 PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address { 1145 return Builder.CreateElementBitCast( 1146 GetAddrOfLocalVar(PrivateVD), 1147 ConvertTypeForMem(RHSVD->getType()), "rhs.begin"); 1148 }); 1149 } else { 1150 // Store the address of the original variable associated with the LHS 1151 // implicit variable. 1152 Address OriginalAddr = Address::invalid(); 1153 PrivateScope.addPrivate(LHSVD, [this, OrigVD, IRef, 1154 &OriginalAddr]() -> Address { 1155 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1156 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1157 IRef->getType(), VK_LValue, IRef->getExprLoc()); 1158 OriginalAddr = EmitLValue(&DRE).getAddress(); 1159 return OriginalAddr; 1160 }); 1161 // Emit reduction copy. 1162 bool IsRegistered = PrivateScope.addPrivate( 1163 OrigVD, [this, PrivateVD, OriginalAddr, DRD, IRed]() -> Address { 1164 // Emit private VarDecl with reduction init. 1165 AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD); 1166 auto Addr = Emission.getAllocatedAddress(); 1167 if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 1168 emitInitWithReductionInitializer(*this, DRD, *IRed, Addr, 1169 OriginalAddr, 1170 PrivateVD->getType()); 1171 } else 1172 EmitAutoVarInit(Emission); 1173 EmitAutoVarCleanups(Emission); 1174 return Addr; 1175 }); 1176 assert(IsRegistered && "private var already registered as private"); 1177 // Silence the warning about unused variable. 1178 (void)IsRegistered; 1179 PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { 1180 return GetAddrOfLocalVar(PrivateVD); 1181 }); 1182 } 1183 } 1184 ++ILHS; 1185 ++IRHS; 1186 ++IPriv; 1187 ++IRed; 1188 } 1189 } 1190 } 1191 1192 void CodeGenFunction::EmitOMPReductionClauseFinal( 1193 const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) { 1194 if (!HaveInsertPoint()) 1195 return; 1196 llvm::SmallVector<const Expr *, 8> Privates; 1197 llvm::SmallVector<const Expr *, 8> LHSExprs; 1198 llvm::SmallVector<const Expr *, 8> RHSExprs; 1199 llvm::SmallVector<const Expr *, 8> ReductionOps; 1200 bool HasAtLeastOneReduction = false; 1201 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1202 HasAtLeastOneReduction = true; 1203 Privates.append(C->privates().begin(), C->privates().end()); 1204 LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 1205 RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 1206 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 1207 } 1208 if (HasAtLeastOneReduction) { 1209 bool WithNowait = D.getSingleClause<OMPNowaitClause>() || 1210 isOpenMPParallelDirective(D.getDirectiveKind()) || 1211 D.getDirectiveKind() == OMPD_simd; 1212 bool SimpleReduction = D.getDirectiveKind() == OMPD_simd; 1213 // Emit nowait reduction if nowait clause is present or directive is a 1214 // parallel directive (it always has implicit barrier). 1215 CGM.getOpenMPRuntime().emitReduction( 1216 *this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps, 1217 {WithNowait, SimpleReduction, ReductionKind}); 1218 } 1219 } 1220 1221 static void emitPostUpdateForReductionClause( 1222 CodeGenFunction &CGF, const OMPExecutableDirective &D, 1223 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1224 if (!CGF.HaveInsertPoint()) 1225 return; 1226 llvm::BasicBlock *DoneBB = nullptr; 1227 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1228 if (auto *PostUpdate = C->getPostUpdateExpr()) { 1229 if (!DoneBB) { 1230 if (auto *Cond = CondGen(CGF)) { 1231 // If the first post-update expression is found, emit conditional 1232 // block if it was requested. 1233 auto *ThenBB = CGF.createBasicBlock(".omp.reduction.pu"); 1234 DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done"); 1235 CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1236 CGF.EmitBlock(ThenBB); 1237 } 1238 } 1239 CGF.EmitIgnoredExpr(PostUpdate); 1240 } 1241 } 1242 if (DoneBB) 1243 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 1244 } 1245 1246 static void emitCommonOMPParallelDirective(CodeGenFunction &CGF, 1247 const OMPExecutableDirective &S, 1248 OpenMPDirectiveKind InnermostKind, 1249 const RegionCodeGenTy &CodeGen) { 1250 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); 1251 auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( 1252 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 1253 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) { 1254 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 1255 auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), 1256 /*IgnoreResultAssign*/ true); 1257 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause( 1258 CGF, NumThreads, NumThreadsClause->getLocStart()); 1259 } 1260 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) { 1261 CodeGenFunction::RunCleanupsScope ProcBindScope(CGF); 1262 CGF.CGM.getOpenMPRuntime().emitProcBindClause( 1263 CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart()); 1264 } 1265 const Expr *IfCond = nullptr; 1266 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 1267 if (C->getNameModifier() == OMPD_unknown || 1268 C->getNameModifier() == OMPD_parallel) { 1269 IfCond = C->getCondition(); 1270 break; 1271 } 1272 } 1273 1274 OMPParallelScope Scope(CGF, S); 1275 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 1276 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 1277 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn, 1278 CapturedVars, IfCond); 1279 } 1280 1281 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { 1282 // Emit parallel region as a standalone region. 1283 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1284 OMPPrivateScope PrivateScope(CGF); 1285 bool Copyins = CGF.EmitOMPCopyinClause(S); 1286 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 1287 if (Copyins) { 1288 // Emit implicit barrier to synchronize threads and avoid data races on 1289 // propagation master's thread values of threadprivate variables to local 1290 // instances of that variables of all other implicit threads. 1291 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 1292 CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 1293 /*ForceSimpleCall=*/true); 1294 } 1295 CGF.EmitOMPPrivateClause(S, PrivateScope); 1296 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 1297 (void)PrivateScope.Privatize(); 1298 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1299 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 1300 }; 1301 emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen); 1302 emitPostUpdateForReductionClause( 1303 *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1304 } 1305 1306 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, 1307 JumpDest LoopExit) { 1308 RunCleanupsScope BodyScope(*this); 1309 // Update counters values on current iteration. 1310 for (auto I : D.updates()) { 1311 EmitIgnoredExpr(I); 1312 } 1313 // Update the linear variables. 1314 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1315 for (auto *U : C->updates()) 1316 EmitIgnoredExpr(U); 1317 } 1318 1319 // On a continue in the body, jump to the end. 1320 auto Continue = getJumpDestInCurrentScope("omp.body.continue"); 1321 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1322 // Emit loop body. 1323 EmitStmt(D.getBody()); 1324 // The end (updates/cleanups). 1325 EmitBlock(Continue.getBlock()); 1326 BreakContinueStack.pop_back(); 1327 } 1328 1329 void CodeGenFunction::EmitOMPInnerLoop( 1330 const Stmt &S, bool RequiresCleanup, const Expr *LoopCond, 1331 const Expr *IncExpr, 1332 const llvm::function_ref<void(CodeGenFunction &)> &BodyGen, 1333 const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen) { 1334 auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end"); 1335 1336 // Start the loop with a block that tests the condition. 1337 auto CondBlock = createBasicBlock("omp.inner.for.cond"); 1338 EmitBlock(CondBlock); 1339 const SourceRange &R = S.getSourceRange(); 1340 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), 1341 SourceLocToDebugLoc(R.getEnd())); 1342 1343 // If there are any cleanups between here and the loop-exit scope, 1344 // create a block to stage a loop exit along. 1345 auto ExitBlock = LoopExit.getBlock(); 1346 if (RequiresCleanup) 1347 ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup"); 1348 1349 auto LoopBody = createBasicBlock("omp.inner.for.body"); 1350 1351 // Emit condition. 1352 EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S)); 1353 if (ExitBlock != LoopExit.getBlock()) { 1354 EmitBlock(ExitBlock); 1355 EmitBranchThroughCleanup(LoopExit); 1356 } 1357 1358 EmitBlock(LoopBody); 1359 incrementProfileCounter(&S); 1360 1361 // Create a block for the increment. 1362 auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc"); 1363 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1364 1365 BodyGen(*this); 1366 1367 // Emit "IV = IV + 1" and a back-edge to the condition block. 1368 EmitBlock(Continue.getBlock()); 1369 EmitIgnoredExpr(IncExpr); 1370 PostIncGen(*this); 1371 BreakContinueStack.pop_back(); 1372 EmitBranch(CondBlock); 1373 LoopStack.pop(); 1374 // Emit the fall-through block. 1375 EmitBlock(LoopExit.getBlock()); 1376 } 1377 1378 void CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { 1379 if (!HaveInsertPoint()) 1380 return; 1381 // Emit inits for the linear variables. 1382 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1383 for (auto *Init : C->inits()) { 1384 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl()); 1385 if (auto *Ref = dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) { 1386 AutoVarEmission Emission = EmitAutoVarAlloca(*VD); 1387 auto *OrigVD = cast<VarDecl>(Ref->getDecl()); 1388 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1389 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1390 VD->getInit()->getType(), VK_LValue, 1391 VD->getInit()->getExprLoc()); 1392 EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(), 1393 VD->getType()), 1394 /*capturedByInit=*/false); 1395 EmitAutoVarCleanups(Emission); 1396 } else 1397 EmitVarDecl(*VD); 1398 } 1399 // Emit the linear steps for the linear clauses. 1400 // If a step is not constant, it is pre-calculated before the loop. 1401 if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep())) 1402 if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) { 1403 EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl())); 1404 // Emit calculation of the linear step. 1405 EmitIgnoredExpr(CS); 1406 } 1407 } 1408 } 1409 1410 void CodeGenFunction::EmitOMPLinearClauseFinal( 1411 const OMPLoopDirective &D, 1412 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1413 if (!HaveInsertPoint()) 1414 return; 1415 llvm::BasicBlock *DoneBB = nullptr; 1416 // Emit the final values of the linear variables. 1417 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1418 auto IC = C->varlist_begin(); 1419 for (auto *F : C->finals()) { 1420 if (!DoneBB) { 1421 if (auto *Cond = CondGen(*this)) { 1422 // If the first post-update expression is found, emit conditional 1423 // block if it was requested. 1424 auto *ThenBB = createBasicBlock(".omp.linear.pu"); 1425 DoneBB = createBasicBlock(".omp.linear.pu.done"); 1426 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1427 EmitBlock(ThenBB); 1428 } 1429 } 1430 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl()); 1431 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1432 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1433 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); 1434 Address OrigAddr = EmitLValue(&DRE).getAddress(); 1435 CodeGenFunction::OMPPrivateScope VarScope(*this); 1436 VarScope.addPrivate(OrigVD, [OrigAddr]() -> Address { return OrigAddr; }); 1437 (void)VarScope.Privatize(); 1438 EmitIgnoredExpr(F); 1439 ++IC; 1440 } 1441 if (auto *PostUpdate = C->getPostUpdateExpr()) 1442 EmitIgnoredExpr(PostUpdate); 1443 } 1444 if (DoneBB) 1445 EmitBlock(DoneBB, /*IsFinished=*/true); 1446 } 1447 1448 static void emitAlignedClause(CodeGenFunction &CGF, 1449 const OMPExecutableDirective &D) { 1450 if (!CGF.HaveInsertPoint()) 1451 return; 1452 for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) { 1453 unsigned ClauseAlignment = 0; 1454 if (auto AlignmentExpr = Clause->getAlignment()) { 1455 auto AlignmentCI = 1456 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); 1457 ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue()); 1458 } 1459 for (auto E : Clause->varlists()) { 1460 unsigned Alignment = ClauseAlignment; 1461 if (Alignment == 0) { 1462 // OpenMP [2.8.1, Description] 1463 // If no optional parameter is specified, implementation-defined default 1464 // alignments for SIMD instructions on the target platforms are assumed. 1465 Alignment = 1466 CGF.getContext() 1467 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign( 1468 E->getType()->getPointeeType())) 1469 .getQuantity(); 1470 } 1471 assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) && 1472 "alignment is not power of 2"); 1473 if (Alignment != 0) { 1474 llvm::Value *PtrValue = CGF.EmitScalarExpr(E); 1475 CGF.EmitAlignmentAssumption(PtrValue, Alignment); 1476 } 1477 } 1478 } 1479 } 1480 1481 void CodeGenFunction::EmitOMPPrivateLoopCounters( 1482 const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) { 1483 if (!HaveInsertPoint()) 1484 return; 1485 auto I = S.private_counters().begin(); 1486 for (auto *E : S.counters()) { 1487 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1488 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()); 1489 (void)LoopScope.addPrivate(VD, [&]() -> Address { 1490 // Emit var without initialization. 1491 if (!LocalDeclMap.count(PrivateVD)) { 1492 auto VarEmission = EmitAutoVarAlloca(*PrivateVD); 1493 EmitAutoVarCleanups(VarEmission); 1494 } 1495 DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD), 1496 /*RefersToEnclosingVariableOrCapture=*/false, 1497 (*I)->getType(), VK_LValue, (*I)->getExprLoc()); 1498 return EmitLValue(&DRE).getAddress(); 1499 }); 1500 if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) || 1501 VD->hasGlobalStorage()) { 1502 (void)LoopScope.addPrivate(PrivateVD, [&]() -> Address { 1503 DeclRefExpr DRE(const_cast<VarDecl *>(VD), 1504 LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD), 1505 E->getType(), VK_LValue, E->getExprLoc()); 1506 return EmitLValue(&DRE).getAddress(); 1507 }); 1508 } 1509 ++I; 1510 } 1511 } 1512 1513 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S, 1514 const Expr *Cond, llvm::BasicBlock *TrueBlock, 1515 llvm::BasicBlock *FalseBlock, uint64_t TrueCount) { 1516 if (!CGF.HaveInsertPoint()) 1517 return; 1518 { 1519 CodeGenFunction::OMPPrivateScope PreCondScope(CGF); 1520 CGF.EmitOMPPrivateLoopCounters(S, PreCondScope); 1521 (void)PreCondScope.Privatize(); 1522 // Get initial values of real counters. 1523 for (auto I : S.inits()) { 1524 CGF.EmitIgnoredExpr(I); 1525 } 1526 } 1527 // Check that loop is executed at least one time. 1528 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount); 1529 } 1530 1531 void CodeGenFunction::EmitOMPLinearClause( 1532 const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) { 1533 if (!HaveInsertPoint()) 1534 return; 1535 llvm::DenseSet<const VarDecl *> SIMDLCVs; 1536 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 1537 auto *LoopDirective = cast<OMPLoopDirective>(&D); 1538 for (auto *C : LoopDirective->counters()) { 1539 SIMDLCVs.insert( 1540 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 1541 } 1542 } 1543 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1544 auto CurPrivate = C->privates().begin(); 1545 for (auto *E : C->varlists()) { 1546 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1547 auto *PrivateVD = 1548 cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl()); 1549 if (!SIMDLCVs.count(VD->getCanonicalDecl())) { 1550 bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address { 1551 // Emit private VarDecl with copy init. 1552 EmitVarDecl(*PrivateVD); 1553 return GetAddrOfLocalVar(PrivateVD); 1554 }); 1555 assert(IsRegistered && "linear var already registered as private"); 1556 // Silence the warning about unused variable. 1557 (void)IsRegistered; 1558 } else 1559 EmitVarDecl(*PrivateVD); 1560 ++CurPrivate; 1561 } 1562 } 1563 } 1564 1565 static void emitSimdlenSafelenClause(CodeGenFunction &CGF, 1566 const OMPExecutableDirective &D, 1567 bool IsMonotonic) { 1568 if (!CGF.HaveInsertPoint()) 1569 return; 1570 if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) { 1571 RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(), 1572 /*ignoreResult=*/true); 1573 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 1574 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 1575 // In presence of finite 'safelen', it may be unsafe to mark all 1576 // the memory instructions parallel, because loop-carried 1577 // dependences of 'safelen' iterations are possible. 1578 if (!IsMonotonic) 1579 CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>()); 1580 } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) { 1581 RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(), 1582 /*ignoreResult=*/true); 1583 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 1584 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 1585 // In presence of finite 'safelen', it may be unsafe to mark all 1586 // the memory instructions parallel, because loop-carried 1587 // dependences of 'safelen' iterations are possible. 1588 CGF.LoopStack.setParallel(false); 1589 } 1590 } 1591 1592 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D, 1593 bool IsMonotonic) { 1594 // Walk clauses and process safelen/lastprivate. 1595 LoopStack.setParallel(!IsMonotonic); 1596 LoopStack.setVectorizeEnable(true); 1597 emitSimdlenSafelenClause(*this, D, IsMonotonic); 1598 } 1599 1600 void CodeGenFunction::EmitOMPSimdFinal( 1601 const OMPLoopDirective &D, 1602 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1603 if (!HaveInsertPoint()) 1604 return; 1605 llvm::BasicBlock *DoneBB = nullptr; 1606 auto IC = D.counters().begin(); 1607 auto IPC = D.private_counters().begin(); 1608 for (auto F : D.finals()) { 1609 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl()); 1610 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl()); 1611 auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD); 1612 if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) || 1613 OrigVD->hasGlobalStorage() || CED) { 1614 if (!DoneBB) { 1615 if (auto *Cond = CondGen(*this)) { 1616 // If the first post-update expression is found, emit conditional 1617 // block if it was requested. 1618 auto *ThenBB = createBasicBlock(".omp.final.then"); 1619 DoneBB = createBasicBlock(".omp.final.done"); 1620 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1621 EmitBlock(ThenBB); 1622 } 1623 } 1624 Address OrigAddr = Address::invalid(); 1625 if (CED) 1626 OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress(); 1627 else { 1628 DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD), 1629 /*RefersToEnclosingVariableOrCapture=*/false, 1630 (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc()); 1631 OrigAddr = EmitLValue(&DRE).getAddress(); 1632 } 1633 OMPPrivateScope VarScope(*this); 1634 VarScope.addPrivate(OrigVD, 1635 [OrigAddr]() -> Address { return OrigAddr; }); 1636 (void)VarScope.Privatize(); 1637 EmitIgnoredExpr(F); 1638 } 1639 ++IC; 1640 ++IPC; 1641 } 1642 if (DoneBB) 1643 EmitBlock(DoneBB, /*IsFinished=*/true); 1644 } 1645 1646 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { 1647 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1648 OMPLoopScope PreInitScope(CGF, S); 1649 // if (PreCond) { 1650 // for (IV in 0..LastIteration) BODY; 1651 // <Final counter/linear vars updates>; 1652 // } 1653 // 1654 1655 // Emit: if (PreCond) - begin. 1656 // If the condition constant folds and can be elided, avoid emitting the 1657 // whole loop. 1658 bool CondConstant; 1659 llvm::BasicBlock *ContBlock = nullptr; 1660 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 1661 if (!CondConstant) 1662 return; 1663 } else { 1664 auto *ThenBlock = CGF.createBasicBlock("simd.if.then"); 1665 ContBlock = CGF.createBasicBlock("simd.if.end"); 1666 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 1667 CGF.getProfileCount(&S)); 1668 CGF.EmitBlock(ThenBlock); 1669 CGF.incrementProfileCounter(&S); 1670 } 1671 1672 // Emit the loop iteration variable. 1673 const Expr *IVExpr = S.getIterationVariable(); 1674 const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 1675 CGF.EmitVarDecl(*IVDecl); 1676 CGF.EmitIgnoredExpr(S.getInit()); 1677 1678 // Emit the iterations count variable. 1679 // If it is not a variable, Sema decided to calculate iterations count on 1680 // each iteration (e.g., it is foldable into a constant). 1681 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 1682 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 1683 // Emit calculation of the iterations count. 1684 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 1685 } 1686 1687 CGF.EmitOMPSimdInit(S); 1688 1689 emitAlignedClause(CGF, S); 1690 CGF.EmitOMPLinearClauseInit(S); 1691 { 1692 OMPPrivateScope LoopScope(CGF); 1693 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 1694 CGF.EmitOMPLinearClause(S, LoopScope); 1695 CGF.EmitOMPPrivateClause(S, LoopScope); 1696 CGF.EmitOMPReductionClauseInit(S, LoopScope); 1697 bool HasLastprivateClause = 1698 CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 1699 (void)LoopScope.Privatize(); 1700 CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 1701 S.getInc(), 1702 [&S](CodeGenFunction &CGF) { 1703 CGF.EmitOMPLoopBody(S, JumpDest()); 1704 CGF.EmitStopPoint(&S); 1705 }, 1706 [](CodeGenFunction &) {}); 1707 CGF.EmitOMPSimdFinal( 1708 S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1709 // Emit final copy of the lastprivate variables at the end of loops. 1710 if (HasLastprivateClause) 1711 CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true); 1712 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd); 1713 emitPostUpdateForReductionClause( 1714 CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1715 } 1716 CGF.EmitOMPLinearClauseFinal( 1717 S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1718 // Emit: if (PreCond) - end. 1719 if (ContBlock) { 1720 CGF.EmitBranch(ContBlock); 1721 CGF.EmitBlock(ContBlock, true); 1722 } 1723 }; 1724 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1725 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 1726 } 1727 1728 void CodeGenFunction::EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic, 1729 const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, 1730 Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) { 1731 auto &RT = CGM.getOpenMPRuntime(); 1732 1733 const Expr *IVExpr = S.getIterationVariable(); 1734 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1735 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1736 1737 auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end"); 1738 1739 // Start the loop with a block that tests the condition. 1740 auto CondBlock = createBasicBlock("omp.dispatch.cond"); 1741 EmitBlock(CondBlock); 1742 const SourceRange &R = S.getSourceRange(); 1743 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), 1744 SourceLocToDebugLoc(R.getEnd())); 1745 1746 llvm::Value *BoolCondVal = nullptr; 1747 if (!DynamicOrOrdered) { 1748 // UB = min(UB, GlobalUB) 1749 EmitIgnoredExpr(S.getEnsureUpperBound()); 1750 // IV = LB 1751 EmitIgnoredExpr(S.getInit()); 1752 // IV < UB 1753 BoolCondVal = EvaluateExprAsBool(S.getCond()); 1754 } else { 1755 BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, IL, 1756 LB, UB, ST); 1757 } 1758 1759 // If there are any cleanups between here and the loop-exit scope, 1760 // create a block to stage a loop exit along. 1761 auto ExitBlock = LoopExit.getBlock(); 1762 if (LoopScope.requiresCleanups()) 1763 ExitBlock = createBasicBlock("omp.dispatch.cleanup"); 1764 1765 auto LoopBody = createBasicBlock("omp.dispatch.body"); 1766 Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock); 1767 if (ExitBlock != LoopExit.getBlock()) { 1768 EmitBlock(ExitBlock); 1769 EmitBranchThroughCleanup(LoopExit); 1770 } 1771 EmitBlock(LoopBody); 1772 1773 // Emit "IV = LB" (in case of static schedule, we have already calculated new 1774 // LB for loop condition and emitted it above). 1775 if (DynamicOrOrdered) 1776 EmitIgnoredExpr(S.getInit()); 1777 1778 // Create a block for the increment. 1779 auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); 1780 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1781 1782 // Generate !llvm.loop.parallel metadata for loads and stores for loops 1783 // with dynamic/guided scheduling and without ordered clause. 1784 if (!isOpenMPSimdDirective(S.getDirectiveKind())) 1785 LoopStack.setParallel(!IsMonotonic); 1786 else 1787 EmitOMPSimdInit(S, IsMonotonic); 1788 1789 SourceLocation Loc = S.getLocStart(); 1790 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), 1791 [&S, LoopExit](CodeGenFunction &CGF) { 1792 CGF.EmitOMPLoopBody(S, LoopExit); 1793 CGF.EmitStopPoint(&S); 1794 }, 1795 [Ordered, IVSize, IVSigned, Loc](CodeGenFunction &CGF) { 1796 if (Ordered) { 1797 CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd( 1798 CGF, Loc, IVSize, IVSigned); 1799 } 1800 }); 1801 1802 EmitBlock(Continue.getBlock()); 1803 BreakContinueStack.pop_back(); 1804 if (!DynamicOrOrdered) { 1805 // Emit "LB = LB + Stride", "UB = UB + Stride". 1806 EmitIgnoredExpr(S.getNextLowerBound()); 1807 EmitIgnoredExpr(S.getNextUpperBound()); 1808 } 1809 1810 EmitBranch(CondBlock); 1811 LoopStack.pop(); 1812 // Emit the fall-through block. 1813 EmitBlock(LoopExit.getBlock()); 1814 1815 // Tell the runtime we are done. 1816 auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) { 1817 if (!DynamicOrOrdered) 1818 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd()); 1819 }; 1820 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); 1821 } 1822 1823 void CodeGenFunction::EmitOMPForOuterLoop( 1824 const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic, 1825 const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, 1826 Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) { 1827 auto &RT = CGM.getOpenMPRuntime(); 1828 1829 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). 1830 const bool DynamicOrOrdered = 1831 Ordered || RT.isDynamic(ScheduleKind.Schedule); 1832 1833 assert((Ordered || 1834 !RT.isStaticNonchunked(ScheduleKind.Schedule, 1835 /*Chunked=*/Chunk != nullptr)) && 1836 "static non-chunked schedule does not need outer loop"); 1837 1838 // Emit outer loop. 1839 // 1840 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1841 // When schedule(dynamic,chunk_size) is specified, the iterations are 1842 // distributed to threads in the team in chunks as the threads request them. 1843 // Each thread executes a chunk of iterations, then requests another chunk, 1844 // until no chunks remain to be distributed. Each chunk contains chunk_size 1845 // iterations, except for the last chunk to be distributed, which may have 1846 // fewer iterations. When no chunk_size is specified, it defaults to 1. 1847 // 1848 // When schedule(guided,chunk_size) is specified, the iterations are assigned 1849 // to threads in the team in chunks as the executing threads request them. 1850 // Each thread executes a chunk of iterations, then requests another chunk, 1851 // until no chunks remain to be assigned. For a chunk_size of 1, the size of 1852 // each chunk is proportional to the number of unassigned iterations divided 1853 // by the number of threads in the team, decreasing to 1. For a chunk_size 1854 // with value k (greater than 1), the size of each chunk is determined in the 1855 // same way, with the restriction that the chunks do not contain fewer than k 1856 // iterations (except for the last chunk to be assigned, which may have fewer 1857 // than k iterations). 1858 // 1859 // When schedule(auto) is specified, the decision regarding scheduling is 1860 // delegated to the compiler and/or runtime system. The programmer gives the 1861 // implementation the freedom to choose any possible mapping of iterations to 1862 // threads in the team. 1863 // 1864 // When schedule(runtime) is specified, the decision regarding scheduling is 1865 // deferred until run time, and the schedule and chunk size are taken from the 1866 // run-sched-var ICV. If the ICV is set to auto, the schedule is 1867 // implementation defined 1868 // 1869 // while(__kmpc_dispatch_next(&LB, &UB)) { 1870 // idx = LB; 1871 // while (idx <= UB) { BODY; ++idx; 1872 // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only. 1873 // } // inner loop 1874 // } 1875 // 1876 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1877 // When schedule(static, chunk_size) is specified, iterations are divided into 1878 // chunks of size chunk_size, and the chunks are assigned to the threads in 1879 // the team in a round-robin fashion in the order of the thread number. 1880 // 1881 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { 1882 // while (idx <= UB) { BODY; ++idx; } // inner loop 1883 // LB = LB + ST; 1884 // UB = UB + ST; 1885 // } 1886 // 1887 1888 const Expr *IVExpr = S.getIterationVariable(); 1889 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1890 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1891 1892 if (DynamicOrOrdered) { 1893 llvm::Value *UBVal = EmitScalarExpr(S.getLastIteration()); 1894 RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, IVSize, 1895 IVSigned, Ordered, UBVal, Chunk); 1896 } else { 1897 RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, 1898 Ordered, IL, LB, UB, ST, Chunk); 1899 } 1900 1901 EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, Ordered, LB, UB, 1902 ST, IL, Chunk); 1903 } 1904 1905 void CodeGenFunction::EmitOMPDistributeOuterLoop( 1906 OpenMPDistScheduleClauseKind ScheduleKind, 1907 const OMPDistributeDirective &S, OMPPrivateScope &LoopScope, 1908 Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) { 1909 1910 auto &RT = CGM.getOpenMPRuntime(); 1911 1912 // Emit outer loop. 1913 // Same behavior as a OMPForOuterLoop, except that schedule cannot be 1914 // dynamic 1915 // 1916 1917 const Expr *IVExpr = S.getIterationVariable(); 1918 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1919 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1920 1921 RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, 1922 IVSize, IVSigned, /* Ordered = */ false, 1923 IL, LB, UB, ST, Chunk); 1924 1925 EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, 1926 S, LoopScope, /* Ordered = */ false, LB, UB, ST, IL, Chunk); 1927 } 1928 1929 void CodeGenFunction::EmitOMPDistributeParallelForDirective( 1930 const OMPDistributeParallelForDirective &S) { 1931 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1932 CGM.getOpenMPRuntime().emitInlinedDirective( 1933 *this, OMPD_distribute_parallel_for, 1934 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1935 OMPLoopScope PreInitScope(CGF, S); 1936 OMPCancelStackRAII CancelRegion(CGF, OMPD_distribute_parallel_for, 1937 /*HasCancel=*/false); 1938 CGF.EmitStmt( 1939 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1940 }); 1941 } 1942 1943 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective( 1944 const OMPDistributeParallelForSimdDirective &S) { 1945 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1946 CGM.getOpenMPRuntime().emitInlinedDirective( 1947 *this, OMPD_distribute_parallel_for_simd, 1948 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1949 OMPLoopScope PreInitScope(CGF, S); 1950 CGF.EmitStmt( 1951 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1952 }); 1953 } 1954 1955 void CodeGenFunction::EmitOMPDistributeSimdDirective( 1956 const OMPDistributeSimdDirective &S) { 1957 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1958 CGM.getOpenMPRuntime().emitInlinedDirective( 1959 *this, OMPD_distribute_simd, 1960 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1961 OMPLoopScope PreInitScope(CGF, S); 1962 CGF.EmitStmt( 1963 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1964 }); 1965 } 1966 1967 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective( 1968 const OMPTargetParallelForSimdDirective &S) { 1969 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1970 CGM.getOpenMPRuntime().emitInlinedDirective( 1971 *this, OMPD_target_parallel_for_simd, 1972 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1973 OMPLoopScope PreInitScope(CGF, S); 1974 CGF.EmitStmt( 1975 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1976 }); 1977 } 1978 1979 void CodeGenFunction::EmitOMPTargetSimdDirective( 1980 const OMPTargetSimdDirective &S) { 1981 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1982 CGM.getOpenMPRuntime().emitInlinedDirective( 1983 *this, OMPD_target_simd, [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1984 OMPLoopScope PreInitScope(CGF, S); 1985 CGF.EmitStmt( 1986 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1987 }); 1988 } 1989 1990 void CodeGenFunction::EmitOMPTeamsDistributeDirective( 1991 const OMPTeamsDistributeDirective &S) { 1992 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1993 CGM.getOpenMPRuntime().emitInlinedDirective( 1994 *this, OMPD_teams_distribute, 1995 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1996 OMPLoopScope PreInitScope(CGF, S); 1997 CGF.EmitStmt( 1998 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1999 }); 2000 } 2001 2002 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective( 2003 const OMPTeamsDistributeSimdDirective &S) { 2004 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2005 CGM.getOpenMPRuntime().emitInlinedDirective( 2006 *this, OMPD_teams_distribute_simd, 2007 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2008 OMPLoopScope PreInitScope(CGF, S); 2009 CGF.EmitStmt( 2010 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2011 }); 2012 } 2013 2014 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective( 2015 const OMPTeamsDistributeParallelForSimdDirective &S) { 2016 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2017 CGM.getOpenMPRuntime().emitInlinedDirective( 2018 *this, OMPD_teams_distribute_parallel_for_simd, 2019 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2020 OMPLoopScope PreInitScope(CGF, S); 2021 CGF.EmitStmt( 2022 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2023 }); 2024 } 2025 2026 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective( 2027 const OMPTeamsDistributeParallelForDirective &S) { 2028 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2029 CGM.getOpenMPRuntime().emitInlinedDirective( 2030 *this, OMPD_teams_distribute_parallel_for, 2031 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2032 OMPLoopScope PreInitScope(CGF, S); 2033 CGF.EmitStmt( 2034 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2035 }); 2036 } 2037 2038 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective( 2039 const OMPTargetTeamsDistributeDirective &S) { 2040 CGM.getOpenMPRuntime().emitInlinedDirective( 2041 *this, OMPD_target_teams_distribute, 2042 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2043 CGF.EmitStmt( 2044 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2045 }); 2046 } 2047 2048 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective( 2049 const OMPTargetTeamsDistributeParallelForDirective &S) { 2050 CGM.getOpenMPRuntime().emitInlinedDirective( 2051 *this, OMPD_target_teams_distribute_parallel_for, 2052 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2053 CGF.EmitStmt( 2054 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2055 }); 2056 } 2057 2058 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective( 2059 const OMPTargetTeamsDistributeParallelForSimdDirective &S) { 2060 CGM.getOpenMPRuntime().emitInlinedDirective( 2061 *this, OMPD_target_teams_distribute_parallel_for_simd, 2062 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2063 CGF.EmitStmt( 2064 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2065 }); 2066 } 2067 2068 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective( 2069 const OMPTargetTeamsDistributeSimdDirective &S) { 2070 CGM.getOpenMPRuntime().emitInlinedDirective( 2071 *this, OMPD_target_teams_distribute_simd, 2072 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2073 CGF.EmitStmt( 2074 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2075 }); 2076 } 2077 2078 /// \brief Emit a helper variable and return corresponding lvalue. 2079 static LValue EmitOMPHelperVar(CodeGenFunction &CGF, 2080 const DeclRefExpr *Helper) { 2081 auto VDecl = cast<VarDecl>(Helper->getDecl()); 2082 CGF.EmitVarDecl(*VDecl); 2083 return CGF.EmitLValue(Helper); 2084 } 2085 2086 namespace { 2087 struct ScheduleKindModifiersTy { 2088 OpenMPScheduleClauseKind Kind; 2089 OpenMPScheduleClauseModifier M1; 2090 OpenMPScheduleClauseModifier M2; 2091 ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind, 2092 OpenMPScheduleClauseModifier M1, 2093 OpenMPScheduleClauseModifier M2) 2094 : Kind(Kind), M1(M1), M2(M2) {} 2095 }; 2096 } // namespace 2097 2098 bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { 2099 // Emit the loop iteration variable. 2100 auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 2101 auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); 2102 EmitVarDecl(*IVDecl); 2103 2104 // Emit the iterations count variable. 2105 // If it is not a variable, Sema decided to calculate iterations count on each 2106 // iteration (e.g., it is foldable into a constant). 2107 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 2108 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 2109 // Emit calculation of the iterations count. 2110 EmitIgnoredExpr(S.getCalcLastIteration()); 2111 } 2112 2113 auto &RT = CGM.getOpenMPRuntime(); 2114 2115 bool HasLastprivateClause; 2116 // Check pre-condition. 2117 { 2118 OMPLoopScope PreInitScope(*this, S); 2119 // Skip the entire loop if we don't meet the precondition. 2120 // If the condition constant folds and can be elided, avoid emitting the 2121 // whole loop. 2122 bool CondConstant; 2123 llvm::BasicBlock *ContBlock = nullptr; 2124 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 2125 if (!CondConstant) 2126 return false; 2127 } else { 2128 auto *ThenBlock = createBasicBlock("omp.precond.then"); 2129 ContBlock = createBasicBlock("omp.precond.end"); 2130 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 2131 getProfileCount(&S)); 2132 EmitBlock(ThenBlock); 2133 incrementProfileCounter(&S); 2134 } 2135 2136 bool Ordered = false; 2137 if (auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) { 2138 if (OrderedClause->getNumForLoops()) 2139 RT.emitDoacrossInit(*this, S); 2140 else 2141 Ordered = true; 2142 } 2143 2144 llvm::DenseSet<const Expr *> EmittedFinals; 2145 emitAlignedClause(*this, S); 2146 EmitOMPLinearClauseInit(S); 2147 // Emit helper vars inits. 2148 LValue LB = 2149 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable())); 2150 LValue UB = 2151 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable())); 2152 LValue ST = 2153 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 2154 LValue IL = 2155 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 2156 2157 // Emit 'then' code. 2158 { 2159 OMPPrivateScope LoopScope(*this); 2160 if (EmitOMPFirstprivateClause(S, LoopScope)) { 2161 // Emit implicit barrier to synchronize threads and avoid data races on 2162 // initialization of firstprivate variables and post-update of 2163 // lastprivate variables. 2164 CGM.getOpenMPRuntime().emitBarrierCall( 2165 *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 2166 /*ForceSimpleCall=*/true); 2167 } 2168 EmitOMPPrivateClause(S, LoopScope); 2169 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 2170 EmitOMPReductionClauseInit(S, LoopScope); 2171 EmitOMPPrivateLoopCounters(S, LoopScope); 2172 EmitOMPLinearClause(S, LoopScope); 2173 (void)LoopScope.Privatize(); 2174 2175 // Detect the loop schedule kind and chunk. 2176 llvm::Value *Chunk = nullptr; 2177 OpenMPScheduleTy ScheduleKind; 2178 if (auto *C = S.getSingleClause<OMPScheduleClause>()) { 2179 ScheduleKind.Schedule = C->getScheduleKind(); 2180 ScheduleKind.M1 = C->getFirstScheduleModifier(); 2181 ScheduleKind.M2 = C->getSecondScheduleModifier(); 2182 if (const auto *Ch = C->getChunkSize()) { 2183 Chunk = EmitScalarExpr(Ch); 2184 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 2185 S.getIterationVariable()->getType(), 2186 S.getLocStart()); 2187 } 2188 } 2189 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2190 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2191 // OpenMP 4.5, 2.7.1 Loop Construct, Description. 2192 // If the static schedule kind is specified or if the ordered clause is 2193 // specified, and if no monotonic modifier is specified, the effect will 2194 // be as if the monotonic modifier was specified. 2195 if (RT.isStaticNonchunked(ScheduleKind.Schedule, 2196 /* Chunked */ Chunk != nullptr) && 2197 !Ordered) { 2198 if (isOpenMPSimdDirective(S.getDirectiveKind())) 2199 EmitOMPSimdInit(S, /*IsMonotonic=*/true); 2200 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 2201 // When no chunk_size is specified, the iteration space is divided into 2202 // chunks that are approximately equal in size, and at most one chunk is 2203 // distributed to each thread. Note that the size of the chunks is 2204 // unspecified in this case. 2205 RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, 2206 IVSize, IVSigned, Ordered, 2207 IL.getAddress(), LB.getAddress(), 2208 UB.getAddress(), ST.getAddress()); 2209 auto LoopExit = 2210 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 2211 // UB = min(UB, GlobalUB); 2212 EmitIgnoredExpr(S.getEnsureUpperBound()); 2213 // IV = LB; 2214 EmitIgnoredExpr(S.getInit()); 2215 // while (idx <= UB) { BODY; ++idx; } 2216 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 2217 S.getInc(), 2218 [&S, LoopExit](CodeGenFunction &CGF) { 2219 CGF.EmitOMPLoopBody(S, LoopExit); 2220 CGF.EmitStopPoint(&S); 2221 }, 2222 [](CodeGenFunction &) {}); 2223 EmitBlock(LoopExit.getBlock()); 2224 // Tell the runtime we are done. 2225 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 2226 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd()); 2227 }; 2228 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); 2229 } else { 2230 const bool IsMonotonic = 2231 Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static || 2232 ScheduleKind.Schedule == OMPC_SCHEDULE_unknown || 2233 ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic || 2234 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic; 2235 // Emit the outer loop, which requests its work chunk [LB..UB] from 2236 // runtime and runs the inner loop to process it. 2237 EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered, 2238 LB.getAddress(), UB.getAddress(), ST.getAddress(), 2239 IL.getAddress(), Chunk); 2240 } 2241 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 2242 EmitOMPSimdFinal(S, 2243 [&](CodeGenFunction &CGF) -> llvm::Value * { 2244 return CGF.Builder.CreateIsNotNull( 2245 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2246 }); 2247 } 2248 EmitOMPReductionClauseFinal( 2249 S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind()) 2250 ? /*Parallel and Simd*/ OMPD_parallel_for_simd 2251 : /*Parallel only*/ OMPD_parallel); 2252 // Emit post-update of the reduction variables if IsLastIter != 0. 2253 emitPostUpdateForReductionClause( 2254 *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * { 2255 return CGF.Builder.CreateIsNotNull( 2256 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2257 }); 2258 // Emit final copy of the lastprivate variables if IsLastIter != 0. 2259 if (HasLastprivateClause) 2260 EmitOMPLastprivateClauseFinal( 2261 S, isOpenMPSimdDirective(S.getDirectiveKind()), 2262 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart()))); 2263 } 2264 EmitOMPLinearClauseFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * { 2265 return CGF.Builder.CreateIsNotNull( 2266 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2267 }); 2268 // We're now done with the loop, so jump to the continuation block. 2269 if (ContBlock) { 2270 EmitBranch(ContBlock); 2271 EmitBlock(ContBlock, true); 2272 } 2273 } 2274 return HasLastprivateClause; 2275 } 2276 2277 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { 2278 bool HasLastprivates = false; 2279 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, 2280 PrePostActionTy &) { 2281 OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel()); 2282 HasLastprivates = CGF.EmitOMPWorksharingLoop(S); 2283 }; 2284 { 2285 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2286 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen, 2287 S.hasCancel()); 2288 } 2289 2290 // Emit an implicit barrier at the end. 2291 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) { 2292 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); 2293 } 2294 } 2295 2296 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { 2297 bool HasLastprivates = false; 2298 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, 2299 PrePostActionTy &) { 2300 HasLastprivates = CGF.EmitOMPWorksharingLoop(S); 2301 }; 2302 { 2303 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2304 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 2305 } 2306 2307 // Emit an implicit barrier at the end. 2308 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) { 2309 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); 2310 } 2311 } 2312 2313 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, 2314 const Twine &Name, 2315 llvm::Value *Init = nullptr) { 2316 auto LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty); 2317 if (Init) 2318 CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true); 2319 return LVal; 2320 } 2321 2322 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { 2323 auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt(); 2324 auto *CS = dyn_cast<CompoundStmt>(Stmt); 2325 bool HasLastprivates = false; 2326 auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF, 2327 PrePostActionTy &) { 2328 auto &C = CGF.CGM.getContext(); 2329 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2330 // Emit helper vars inits. 2331 LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.", 2332 CGF.Builder.getInt32(0)); 2333 auto *GlobalUBVal = CS != nullptr ? CGF.Builder.getInt32(CS->size() - 1) 2334 : CGF.Builder.getInt32(0); 2335 LValue UB = 2336 createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal); 2337 LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.", 2338 CGF.Builder.getInt32(1)); 2339 LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.", 2340 CGF.Builder.getInt32(0)); 2341 // Loop counter. 2342 LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv."); 2343 OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 2344 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV); 2345 OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 2346 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); 2347 // Generate condition for loop. 2348 BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, 2349 OK_Ordinary, S.getLocStart(), 2350 /*fpContractable=*/false); 2351 // Increment for loop counter. 2352 UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary, 2353 S.getLocStart()); 2354 auto BodyGen = [Stmt, CS, &S, &IV](CodeGenFunction &CGF) { 2355 // Iterate through all sections and emit a switch construct: 2356 // switch (IV) { 2357 // case 0: 2358 // <SectionStmt[0]>; 2359 // break; 2360 // ... 2361 // case <NumSection> - 1: 2362 // <SectionStmt[<NumSection> - 1]>; 2363 // break; 2364 // } 2365 // .omp.sections.exit: 2366 auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit"); 2367 auto *SwitchStmt = CGF.Builder.CreateSwitch( 2368 CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB, 2369 CS == nullptr ? 1 : CS->size()); 2370 if (CS) { 2371 unsigned CaseNumber = 0; 2372 for (auto *SubStmt : CS->children()) { 2373 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 2374 CGF.EmitBlock(CaseBB); 2375 SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB); 2376 CGF.EmitStmt(SubStmt); 2377 CGF.EmitBranch(ExitBB); 2378 ++CaseNumber; 2379 } 2380 } else { 2381 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 2382 CGF.EmitBlock(CaseBB); 2383 SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB); 2384 CGF.EmitStmt(Stmt); 2385 CGF.EmitBranch(ExitBB); 2386 } 2387 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 2388 }; 2389 2390 CodeGenFunction::OMPPrivateScope LoopScope(CGF); 2391 if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) { 2392 // Emit implicit barrier to synchronize threads and avoid data races on 2393 // initialization of firstprivate variables and post-update of lastprivate 2394 // variables. 2395 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 2396 CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 2397 /*ForceSimpleCall=*/true); 2398 } 2399 CGF.EmitOMPPrivateClause(S, LoopScope); 2400 HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 2401 CGF.EmitOMPReductionClauseInit(S, LoopScope); 2402 (void)LoopScope.Privatize(); 2403 2404 // Emit static non-chunked loop. 2405 OpenMPScheduleTy ScheduleKind; 2406 ScheduleKind.Schedule = OMPC_SCHEDULE_static; 2407 CGF.CGM.getOpenMPRuntime().emitForStaticInit( 2408 CGF, S.getLocStart(), ScheduleKind, /*IVSize=*/32, 2409 /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), LB.getAddress(), 2410 UB.getAddress(), ST.getAddress()); 2411 // UB = min(UB, GlobalUB); 2412 auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart()); 2413 auto *MinUBGlobalUB = CGF.Builder.CreateSelect( 2414 CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal); 2415 CGF.EmitStoreOfScalar(MinUBGlobalUB, UB); 2416 // IV = LB; 2417 CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV); 2418 // while (idx <= UB) { BODY; ++idx; } 2419 CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen, 2420 [](CodeGenFunction &) {}); 2421 // Tell the runtime we are done. 2422 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 2423 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd()); 2424 }; 2425 CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen); 2426 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 2427 // Emit post-update of the reduction variables if IsLastIter != 0. 2428 emitPostUpdateForReductionClause( 2429 CGF, S, [&](CodeGenFunction &CGF) -> llvm::Value * { 2430 return CGF.Builder.CreateIsNotNull( 2431 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2432 }); 2433 2434 // Emit final copy of the lastprivate variables if IsLastIter != 0. 2435 if (HasLastprivates) 2436 CGF.EmitOMPLastprivateClauseFinal( 2437 S, /*NoFinals=*/false, 2438 CGF.Builder.CreateIsNotNull( 2439 CGF.EmitLoadOfScalar(IL, S.getLocStart()))); 2440 }; 2441 2442 bool HasCancel = false; 2443 if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S)) 2444 HasCancel = OSD->hasCancel(); 2445 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S)) 2446 HasCancel = OPSD->hasCancel(); 2447 OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel); 2448 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen, 2449 HasCancel); 2450 // Emit barrier for lastprivates only if 'sections' directive has 'nowait' 2451 // clause. Otherwise the barrier will be generated by the codegen for the 2452 // directive. 2453 if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) { 2454 // Emit implicit barrier to synchronize threads and avoid data races on 2455 // initialization of firstprivate variables. 2456 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), 2457 OMPD_unknown); 2458 } 2459 } 2460 2461 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { 2462 { 2463 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2464 EmitSections(S); 2465 } 2466 // Emit an implicit barrier at the end. 2467 if (!S.getSingleClause<OMPNowaitClause>()) { 2468 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), 2469 OMPD_sections); 2470 } 2471 } 2472 2473 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { 2474 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2475 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2476 }; 2477 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2478 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen, 2479 S.hasCancel()); 2480 } 2481 2482 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { 2483 llvm::SmallVector<const Expr *, 8> CopyprivateVars; 2484 llvm::SmallVector<const Expr *, 8> DestExprs; 2485 llvm::SmallVector<const Expr *, 8> SrcExprs; 2486 llvm::SmallVector<const Expr *, 8> AssignmentOps; 2487 // Check if there are any 'copyprivate' clauses associated with this 2488 // 'single' construct. 2489 // Build a list of copyprivate variables along with helper expressions 2490 // (<source>, <destination>, <destination>=<source> expressions) 2491 for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) { 2492 CopyprivateVars.append(C->varlists().begin(), C->varlists().end()); 2493 DestExprs.append(C->destination_exprs().begin(), 2494 C->destination_exprs().end()); 2495 SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end()); 2496 AssignmentOps.append(C->assignment_ops().begin(), 2497 C->assignment_ops().end()); 2498 } 2499 // Emit code for 'single' region along with 'copyprivate' clauses 2500 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2501 Action.Enter(CGF); 2502 OMPPrivateScope SingleScope(CGF); 2503 (void)CGF.EmitOMPFirstprivateClause(S, SingleScope); 2504 CGF.EmitOMPPrivateClause(S, SingleScope); 2505 (void)SingleScope.Privatize(); 2506 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2507 }; 2508 { 2509 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2510 CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(), 2511 CopyprivateVars, DestExprs, 2512 SrcExprs, AssignmentOps); 2513 } 2514 // Emit an implicit barrier at the end (to avoid data race on firstprivate 2515 // init or if no 'nowait' clause was specified and no 'copyprivate' clause). 2516 if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) { 2517 CGM.getOpenMPRuntime().emitBarrierCall( 2518 *this, S.getLocStart(), 2519 S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single); 2520 } 2521 } 2522 2523 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { 2524 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2525 Action.Enter(CGF); 2526 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2527 }; 2528 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2529 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart()); 2530 } 2531 2532 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { 2533 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2534 Action.Enter(CGF); 2535 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2536 }; 2537 Expr *Hint = nullptr; 2538 if (auto *HintClause = S.getSingleClause<OMPHintClause>()) 2539 Hint = HintClause->getHint(); 2540 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2541 CGM.getOpenMPRuntime().emitCriticalRegion(*this, 2542 S.getDirectiveName().getAsString(), 2543 CodeGen, S.getLocStart(), Hint); 2544 } 2545 2546 void CodeGenFunction::EmitOMPParallelForDirective( 2547 const OMPParallelForDirective &S) { 2548 // Emit directive as a combined directive that consists of two implicit 2549 // directives: 'parallel' with 'for' directive. 2550 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2551 OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel()); 2552 CGF.EmitOMPWorksharingLoop(S); 2553 }; 2554 emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen); 2555 } 2556 2557 void CodeGenFunction::EmitOMPParallelForSimdDirective( 2558 const OMPParallelForSimdDirective &S) { 2559 // Emit directive as a combined directive that consists of two implicit 2560 // directives: 'parallel' with 'for' directive. 2561 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2562 CGF.EmitOMPWorksharingLoop(S); 2563 }; 2564 emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen); 2565 } 2566 2567 void CodeGenFunction::EmitOMPParallelSectionsDirective( 2568 const OMPParallelSectionsDirective &S) { 2569 // Emit directive as a combined directive that consists of two implicit 2570 // directives: 'parallel' with 'sections' directive. 2571 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2572 CGF.EmitSections(S); 2573 }; 2574 emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen); 2575 } 2576 2577 void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, 2578 const RegionCodeGenTy &BodyGen, 2579 const TaskGenTy &TaskGen, 2580 OMPTaskDataTy &Data) { 2581 // Emit outlined function for task construct. 2582 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2583 auto *I = CS->getCapturedDecl()->param_begin(); 2584 auto *PartId = std::next(I); 2585 auto *TaskT = std::next(I, 4); 2586 // Check if the task is final 2587 if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) { 2588 // If the condition constant folds and can be elided, try to avoid emitting 2589 // the condition and the dead arm of the if/else. 2590 auto *Cond = Clause->getCondition(); 2591 bool CondConstant; 2592 if (ConstantFoldsToSimpleInteger(Cond, CondConstant)) 2593 Data.Final.setInt(CondConstant); 2594 else 2595 Data.Final.setPointer(EvaluateExprAsBool(Cond)); 2596 } else { 2597 // By default the task is not final. 2598 Data.Final.setInt(/*IntVal=*/false); 2599 } 2600 // Check if the task has 'priority' clause. 2601 if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) { 2602 auto *Prio = Clause->getPriority(); 2603 Data.Priority.setInt(/*IntVal=*/true); 2604 Data.Priority.setPointer(EmitScalarConversion( 2605 EmitScalarExpr(Prio), Prio->getType(), 2606 getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1), 2607 Prio->getExprLoc())); 2608 } 2609 // The first function argument for tasks is a thread id, the second one is a 2610 // part id (0 for tied tasks, >=0 for untied task). 2611 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 2612 // Get list of private variables. 2613 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 2614 auto IRef = C->varlist_begin(); 2615 for (auto *IInit : C->private_copies()) { 2616 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2617 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2618 Data.PrivateVars.push_back(*IRef); 2619 Data.PrivateCopies.push_back(IInit); 2620 } 2621 ++IRef; 2622 } 2623 } 2624 EmittedAsPrivate.clear(); 2625 // Get list of firstprivate variables. 2626 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 2627 auto IRef = C->varlist_begin(); 2628 auto IElemInitRef = C->inits().begin(); 2629 for (auto *IInit : C->private_copies()) { 2630 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2631 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2632 Data.FirstprivateVars.push_back(*IRef); 2633 Data.FirstprivateCopies.push_back(IInit); 2634 Data.FirstprivateInits.push_back(*IElemInitRef); 2635 } 2636 ++IRef; 2637 ++IElemInitRef; 2638 } 2639 } 2640 // Get list of lastprivate variables (for taskloops). 2641 llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs; 2642 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 2643 auto IRef = C->varlist_begin(); 2644 auto ID = C->destination_exprs().begin(); 2645 for (auto *IInit : C->private_copies()) { 2646 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2647 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2648 Data.LastprivateVars.push_back(*IRef); 2649 Data.LastprivateCopies.push_back(IInit); 2650 } 2651 LastprivateDstsOrigs.insert( 2652 {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()), 2653 cast<DeclRefExpr>(*IRef)}); 2654 ++IRef; 2655 ++ID; 2656 } 2657 } 2658 // Build list of dependences. 2659 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) 2660 for (auto *IRef : C->varlists()) 2661 Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef)); 2662 auto &&CodeGen = [&Data, CS, &BodyGen, &LastprivateDstsOrigs]( 2663 CodeGenFunction &CGF, PrePostActionTy &Action) { 2664 // Set proper addresses for generated private copies. 2665 OMPPrivateScope Scope(CGF); 2666 if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() || 2667 !Data.LastprivateVars.empty()) { 2668 auto *CopyFn = CGF.Builder.CreateLoad( 2669 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3))); 2670 auto *PrivatesPtr = CGF.Builder.CreateLoad( 2671 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2))); 2672 // Map privates. 2673 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; 2674 llvm::SmallVector<llvm::Value *, 16> CallArgs; 2675 CallArgs.push_back(PrivatesPtr); 2676 for (auto *E : Data.PrivateVars) { 2677 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2678 Address PrivatePtr = CGF.CreateMemTemp( 2679 CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr"); 2680 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2681 CallArgs.push_back(PrivatePtr.getPointer()); 2682 } 2683 for (auto *E : Data.FirstprivateVars) { 2684 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2685 Address PrivatePtr = 2686 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 2687 ".firstpriv.ptr.addr"); 2688 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2689 CallArgs.push_back(PrivatePtr.getPointer()); 2690 } 2691 for (auto *E : Data.LastprivateVars) { 2692 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2693 Address PrivatePtr = 2694 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 2695 ".lastpriv.ptr.addr"); 2696 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2697 CallArgs.push_back(PrivatePtr.getPointer()); 2698 } 2699 CGF.EmitRuntimeCall(CopyFn, CallArgs); 2700 for (auto &&Pair : LastprivateDstsOrigs) { 2701 auto *OrigVD = cast<VarDecl>(Pair.second->getDecl()); 2702 DeclRefExpr DRE( 2703 const_cast<VarDecl *>(OrigVD), 2704 /*RefersToEnclosingVariableOrCapture=*/CGF.CapturedStmtInfo->lookup( 2705 OrigVD) != nullptr, 2706 Pair.second->getType(), VK_LValue, Pair.second->getExprLoc()); 2707 Scope.addPrivate(Pair.first, [&CGF, &DRE]() { 2708 return CGF.EmitLValue(&DRE).getAddress(); 2709 }); 2710 } 2711 for (auto &&Pair : PrivatePtrs) { 2712 Address Replacement(CGF.Builder.CreateLoad(Pair.second), 2713 CGF.getContext().getDeclAlign(Pair.first)); 2714 Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); 2715 } 2716 } 2717 (void)Scope.Privatize(); 2718 2719 Action.Enter(CGF); 2720 BodyGen(CGF); 2721 }; 2722 auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( 2723 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied, 2724 Data.NumberOfParts); 2725 OMPLexicalScope Scope(*this, S); 2726 TaskGen(*this, OutlinedFn, Data); 2727 } 2728 2729 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { 2730 // Emit outlined function for task construct. 2731 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2732 auto CapturedStruct = GenerateCapturedStmtArgument(*CS); 2733 auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 2734 const Expr *IfCond = nullptr; 2735 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 2736 if (C->getNameModifier() == OMPD_unknown || 2737 C->getNameModifier() == OMPD_task) { 2738 IfCond = C->getCondition(); 2739 break; 2740 } 2741 } 2742 2743 OMPTaskDataTy Data; 2744 // Check if we should emit tied or untied task. 2745 Data.Tied = !S.getSingleClause<OMPUntiedClause>(); 2746 auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { 2747 CGF.EmitStmt(CS->getCapturedStmt()); 2748 }; 2749 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 2750 IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn, 2751 const OMPTaskDataTy &Data) { 2752 CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getLocStart(), S, OutlinedFn, 2753 SharedsTy, CapturedStruct, IfCond, 2754 Data); 2755 }; 2756 EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data); 2757 } 2758 2759 void CodeGenFunction::EmitOMPTaskyieldDirective( 2760 const OMPTaskyieldDirective &S) { 2761 CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart()); 2762 } 2763 2764 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { 2765 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier); 2766 } 2767 2768 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) { 2769 CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart()); 2770 } 2771 2772 void CodeGenFunction::EmitOMPTaskgroupDirective( 2773 const OMPTaskgroupDirective &S) { 2774 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2775 Action.Enter(CGF); 2776 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2777 }; 2778 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2779 CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart()); 2780 } 2781 2782 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { 2783 CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> { 2784 if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) { 2785 return llvm::makeArrayRef(FlushClause->varlist_begin(), 2786 FlushClause->varlist_end()); 2787 } 2788 return llvm::None; 2789 }(), S.getLocStart()); 2790 } 2791 2792 void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) { 2793 // Emit the loop iteration variable. 2794 auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 2795 auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); 2796 EmitVarDecl(*IVDecl); 2797 2798 // Emit the iterations count variable. 2799 // If it is not a variable, Sema decided to calculate iterations count on each 2800 // iteration (e.g., it is foldable into a constant). 2801 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 2802 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 2803 // Emit calculation of the iterations count. 2804 EmitIgnoredExpr(S.getCalcLastIteration()); 2805 } 2806 2807 auto &RT = CGM.getOpenMPRuntime(); 2808 2809 bool HasLastprivateClause = false; 2810 // Check pre-condition. 2811 { 2812 OMPLoopScope PreInitScope(*this, S); 2813 // Skip the entire loop if we don't meet the precondition. 2814 // If the condition constant folds and can be elided, avoid emitting the 2815 // whole loop. 2816 bool CondConstant; 2817 llvm::BasicBlock *ContBlock = nullptr; 2818 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 2819 if (!CondConstant) 2820 return; 2821 } else { 2822 auto *ThenBlock = createBasicBlock("omp.precond.then"); 2823 ContBlock = createBasicBlock("omp.precond.end"); 2824 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 2825 getProfileCount(&S)); 2826 EmitBlock(ThenBlock); 2827 incrementProfileCounter(&S); 2828 } 2829 2830 // Emit 'then' code. 2831 { 2832 // Emit helper vars inits. 2833 LValue LB = 2834 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable())); 2835 LValue UB = 2836 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable())); 2837 LValue ST = 2838 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 2839 LValue IL = 2840 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 2841 2842 OMPPrivateScope LoopScope(*this); 2843 if (EmitOMPFirstprivateClause(S, LoopScope)) { 2844 // Emit implicit barrier to synchronize threads and avoid data races on 2845 // initialization of firstprivate variables and post-update of 2846 // lastprivate variables. 2847 CGM.getOpenMPRuntime().emitBarrierCall( 2848 *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 2849 /*ForceSimpleCall=*/true); 2850 } 2851 EmitOMPPrivateClause(S, LoopScope); 2852 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 2853 EmitOMPPrivateLoopCounters(S, LoopScope); 2854 (void)LoopScope.Privatize(); 2855 2856 // Detect the distribute schedule kind and chunk. 2857 llvm::Value *Chunk = nullptr; 2858 OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown; 2859 if (auto *C = S.getSingleClause<OMPDistScheduleClause>()) { 2860 ScheduleKind = C->getDistScheduleKind(); 2861 if (const auto *Ch = C->getChunkSize()) { 2862 Chunk = EmitScalarExpr(Ch); 2863 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 2864 S.getIterationVariable()->getType(), 2865 S.getLocStart()); 2866 } 2867 } 2868 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2869 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2870 2871 // OpenMP [2.10.8, distribute Construct, Description] 2872 // If dist_schedule is specified, kind must be static. If specified, 2873 // iterations are divided into chunks of size chunk_size, chunks are 2874 // assigned to the teams of the league in a round-robin fashion in the 2875 // order of the team number. When no chunk_size is specified, the 2876 // iteration space is divided into chunks that are approximately equal 2877 // in size, and at most one chunk is distributed to each team of the 2878 // league. The size of the chunks is unspecified in this case. 2879 if (RT.isStaticNonchunked(ScheduleKind, 2880 /* Chunked */ Chunk != nullptr)) { 2881 RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, 2882 IVSize, IVSigned, /* Ordered = */ false, 2883 IL.getAddress(), LB.getAddress(), 2884 UB.getAddress(), ST.getAddress()); 2885 auto LoopExit = 2886 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 2887 // UB = min(UB, GlobalUB); 2888 EmitIgnoredExpr(S.getEnsureUpperBound()); 2889 // IV = LB; 2890 EmitIgnoredExpr(S.getInit()); 2891 // while (idx <= UB) { BODY; ++idx; } 2892 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 2893 S.getInc(), 2894 [&S, LoopExit](CodeGenFunction &CGF) { 2895 CGF.EmitOMPLoopBody(S, LoopExit); 2896 CGF.EmitStopPoint(&S); 2897 }, 2898 [](CodeGenFunction &) {}); 2899 EmitBlock(LoopExit.getBlock()); 2900 // Tell the runtime we are done. 2901 RT.emitForStaticFinish(*this, S.getLocStart()); 2902 } else { 2903 // Emit the outer loop, which requests its work chunk [LB..UB] from 2904 // runtime and runs the inner loop to process it. 2905 EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, 2906 LB.getAddress(), UB.getAddress(), ST.getAddress(), 2907 IL.getAddress(), Chunk); 2908 } 2909 2910 // Emit final copy of the lastprivate variables if IsLastIter != 0. 2911 if (HasLastprivateClause) 2912 EmitOMPLastprivateClauseFinal( 2913 S, /*NoFinals=*/false, 2914 Builder.CreateIsNotNull( 2915 EmitLoadOfScalar(IL, S.getLocStart()))); 2916 } 2917 2918 // We're now done with the loop, so jump to the continuation block. 2919 if (ContBlock) { 2920 EmitBranch(ContBlock); 2921 EmitBlock(ContBlock, true); 2922 } 2923 } 2924 } 2925 2926 void CodeGenFunction::EmitOMPDistributeDirective( 2927 const OMPDistributeDirective &S) { 2928 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2929 CGF.EmitOMPDistributeLoop(S); 2930 }; 2931 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2932 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen, 2933 false); 2934 } 2935 2936 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, 2937 const CapturedStmt *S) { 2938 CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); 2939 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; 2940 CGF.CapturedStmtInfo = &CapStmtInfo; 2941 auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S); 2942 Fn->addFnAttr(llvm::Attribute::NoInline); 2943 return Fn; 2944 } 2945 2946 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { 2947 if (!S.getAssociatedStmt()) { 2948 for (const auto *DC : S.getClausesOfKind<OMPDependClause>()) 2949 CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC); 2950 return; 2951 } 2952 auto *C = S.getSingleClause<OMPSIMDClause>(); 2953 auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF, 2954 PrePostActionTy &Action) { 2955 if (C) { 2956 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2957 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 2958 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 2959 auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS); 2960 CGF.EmitNounwindRuntimeCall(OutlinedFn, CapturedVars); 2961 } else { 2962 Action.Enter(CGF); 2963 CGF.EmitStmt( 2964 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2965 } 2966 }; 2967 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2968 CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C); 2969 } 2970 2971 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, 2972 QualType SrcType, QualType DestType, 2973 SourceLocation Loc) { 2974 assert(CGF.hasScalarEvaluationKind(DestType) && 2975 "DestType must have scalar evaluation kind."); 2976 assert(!Val.isAggregate() && "Must be a scalar or complex."); 2977 return Val.isScalar() 2978 ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType, 2979 Loc) 2980 : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType, 2981 DestType, Loc); 2982 } 2983 2984 static CodeGenFunction::ComplexPairTy 2985 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, 2986 QualType DestType, SourceLocation Loc) { 2987 assert(CGF.getEvaluationKind(DestType) == TEK_Complex && 2988 "DestType must have complex evaluation kind."); 2989 CodeGenFunction::ComplexPairTy ComplexVal; 2990 if (Val.isScalar()) { 2991 // Convert the input element to the element type of the complex. 2992 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 2993 auto ScalarVal = CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, 2994 DestElementType, Loc); 2995 ComplexVal = CodeGenFunction::ComplexPairTy( 2996 ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType())); 2997 } else { 2998 assert(Val.isComplex() && "Must be a scalar or complex."); 2999 auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType(); 3000 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 3001 ComplexVal.first = CGF.EmitScalarConversion( 3002 Val.getComplexVal().first, SrcElementType, DestElementType, Loc); 3003 ComplexVal.second = CGF.EmitScalarConversion( 3004 Val.getComplexVal().second, SrcElementType, DestElementType, Loc); 3005 } 3006 return ComplexVal; 3007 } 3008 3009 static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst, 3010 LValue LVal, RValue RVal) { 3011 if (LVal.isGlobalReg()) { 3012 CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal); 3013 } else { 3014 CGF.EmitAtomicStore(RVal, LVal, 3015 IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3016 : llvm::AtomicOrdering::Monotonic, 3017 LVal.isVolatile(), /*IsInit=*/false); 3018 } 3019 } 3020 3021 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal, 3022 QualType RValTy, SourceLocation Loc) { 3023 switch (getEvaluationKind(LVal.getType())) { 3024 case TEK_Scalar: 3025 EmitStoreThroughLValue(RValue::get(convertToScalarValue( 3026 *this, RVal, RValTy, LVal.getType(), Loc)), 3027 LVal); 3028 break; 3029 case TEK_Complex: 3030 EmitStoreOfComplex( 3031 convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal, 3032 /*isInit=*/false); 3033 break; 3034 case TEK_Aggregate: 3035 llvm_unreachable("Must be a scalar or complex."); 3036 } 3037 } 3038 3039 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst, 3040 const Expr *X, const Expr *V, 3041 SourceLocation Loc) { 3042 // v = x; 3043 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue"); 3044 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue"); 3045 LValue XLValue = CGF.EmitLValue(X); 3046 LValue VLValue = CGF.EmitLValue(V); 3047 RValue Res = XLValue.isGlobalReg() 3048 ? CGF.EmitLoadOfLValue(XLValue, Loc) 3049 : CGF.EmitAtomicLoad( 3050 XLValue, Loc, 3051 IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3052 : llvm::AtomicOrdering::Monotonic, 3053 XLValue.isVolatile()); 3054 // OpenMP, 2.12.6, atomic Construct 3055 // Any atomic construct with a seq_cst clause forces the atomically 3056 // performed operation to include an implicit flush operation without a 3057 // list. 3058 if (IsSeqCst) 3059 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3060 CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc); 3061 } 3062 3063 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst, 3064 const Expr *X, const Expr *E, 3065 SourceLocation Loc) { 3066 // x = expr; 3067 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue"); 3068 emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E)); 3069 // OpenMP, 2.12.6, atomic Construct 3070 // Any atomic construct with a seq_cst clause forces the atomically 3071 // performed operation to include an implicit flush operation without a 3072 // list. 3073 if (IsSeqCst) 3074 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3075 } 3076 3077 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, 3078 RValue Update, 3079 BinaryOperatorKind BO, 3080 llvm::AtomicOrdering AO, 3081 bool IsXLHSInRHSPart) { 3082 auto &Context = CGF.CGM.getContext(); 3083 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x' 3084 // expression is simple and atomic is allowed for the given type for the 3085 // target platform. 3086 if (BO == BO_Comma || !Update.isScalar() || 3087 !Update.getScalarVal()->getType()->isIntegerTy() || 3088 !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) && 3089 (Update.getScalarVal()->getType() != 3090 X.getAddress().getElementType())) || 3091 !X.getAddress().getElementType()->isIntegerTy() || 3092 !Context.getTargetInfo().hasBuiltinAtomic( 3093 Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment()))) 3094 return std::make_pair(false, RValue::get(nullptr)); 3095 3096 llvm::AtomicRMWInst::BinOp RMWOp; 3097 switch (BO) { 3098 case BO_Add: 3099 RMWOp = llvm::AtomicRMWInst::Add; 3100 break; 3101 case BO_Sub: 3102 if (!IsXLHSInRHSPart) 3103 return std::make_pair(false, RValue::get(nullptr)); 3104 RMWOp = llvm::AtomicRMWInst::Sub; 3105 break; 3106 case BO_And: 3107 RMWOp = llvm::AtomicRMWInst::And; 3108 break; 3109 case BO_Or: 3110 RMWOp = llvm::AtomicRMWInst::Or; 3111 break; 3112 case BO_Xor: 3113 RMWOp = llvm::AtomicRMWInst::Xor; 3114 break; 3115 case BO_LT: 3116 RMWOp = X.getType()->hasSignedIntegerRepresentation() 3117 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min 3118 : llvm::AtomicRMWInst::Max) 3119 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin 3120 : llvm::AtomicRMWInst::UMax); 3121 break; 3122 case BO_GT: 3123 RMWOp = X.getType()->hasSignedIntegerRepresentation() 3124 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max 3125 : llvm::AtomicRMWInst::Min) 3126 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax 3127 : llvm::AtomicRMWInst::UMin); 3128 break; 3129 case BO_Assign: 3130 RMWOp = llvm::AtomicRMWInst::Xchg; 3131 break; 3132 case BO_Mul: 3133 case BO_Div: 3134 case BO_Rem: 3135 case BO_Shl: 3136 case BO_Shr: 3137 case BO_LAnd: 3138 case BO_LOr: 3139 return std::make_pair(false, RValue::get(nullptr)); 3140 case BO_PtrMemD: 3141 case BO_PtrMemI: 3142 case BO_LE: 3143 case BO_GE: 3144 case BO_EQ: 3145 case BO_NE: 3146 case BO_AddAssign: 3147 case BO_SubAssign: 3148 case BO_AndAssign: 3149 case BO_OrAssign: 3150 case BO_XorAssign: 3151 case BO_MulAssign: 3152 case BO_DivAssign: 3153 case BO_RemAssign: 3154 case BO_ShlAssign: 3155 case BO_ShrAssign: 3156 case BO_Comma: 3157 llvm_unreachable("Unsupported atomic update operation"); 3158 } 3159 auto *UpdateVal = Update.getScalarVal(); 3160 if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) { 3161 UpdateVal = CGF.Builder.CreateIntCast( 3162 IC, X.getAddress().getElementType(), 3163 X.getType()->hasSignedIntegerRepresentation()); 3164 } 3165 auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO); 3166 return std::make_pair(true, RValue::get(Res)); 3167 } 3168 3169 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr( 3170 LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart, 3171 llvm::AtomicOrdering AO, SourceLocation Loc, 3172 const llvm::function_ref<RValue(RValue)> &CommonGen) { 3173 // Update expressions are allowed to have the following forms: 3174 // x binop= expr; -> xrval + expr; 3175 // x++, ++x -> xrval + 1; 3176 // x--, --x -> xrval - 1; 3177 // x = x binop expr; -> xrval binop expr 3178 // x = expr Op x; - > expr binop xrval; 3179 auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart); 3180 if (!Res.first) { 3181 if (X.isGlobalReg()) { 3182 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop 3183 // 'xrval'. 3184 EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X); 3185 } else { 3186 // Perform compare-and-swap procedure. 3187 EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified()); 3188 } 3189 } 3190 return Res; 3191 } 3192 3193 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst, 3194 const Expr *X, const Expr *E, 3195 const Expr *UE, bool IsXLHSInRHSPart, 3196 SourceLocation Loc) { 3197 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 3198 "Update expr in 'atomic update' must be a binary operator."); 3199 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 3200 // Update expressions are allowed to have the following forms: 3201 // x binop= expr; -> xrval + expr; 3202 // x++, ++x -> xrval + 1; 3203 // x--, --x -> xrval - 1; 3204 // x = x binop expr; -> xrval binop expr 3205 // x = expr Op x; - > expr binop xrval; 3206 assert(X->isLValue() && "X of 'omp atomic update' is not lvalue"); 3207 LValue XLValue = CGF.EmitLValue(X); 3208 RValue ExprRValue = CGF.EmitAnyExpr(E); 3209 auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3210 : llvm::AtomicOrdering::Monotonic; 3211 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 3212 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 3213 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 3214 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 3215 auto Gen = 3216 [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue { 3217 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3218 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 3219 return CGF.EmitAnyExpr(UE); 3220 }; 3221 (void)CGF.EmitOMPAtomicSimpleUpdateExpr( 3222 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 3223 // OpenMP, 2.12.6, atomic Construct 3224 // Any atomic construct with a seq_cst clause forces the atomically 3225 // performed operation to include an implicit flush operation without a 3226 // list. 3227 if (IsSeqCst) 3228 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3229 } 3230 3231 static RValue convertToType(CodeGenFunction &CGF, RValue Value, 3232 QualType SourceType, QualType ResType, 3233 SourceLocation Loc) { 3234 switch (CGF.getEvaluationKind(ResType)) { 3235 case TEK_Scalar: 3236 return RValue::get( 3237 convertToScalarValue(CGF, Value, SourceType, ResType, Loc)); 3238 case TEK_Complex: { 3239 auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc); 3240 return RValue::getComplex(Res.first, Res.second); 3241 } 3242 case TEK_Aggregate: 3243 break; 3244 } 3245 llvm_unreachable("Must be a scalar or complex."); 3246 } 3247 3248 static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst, 3249 bool IsPostfixUpdate, const Expr *V, 3250 const Expr *X, const Expr *E, 3251 const Expr *UE, bool IsXLHSInRHSPart, 3252 SourceLocation Loc) { 3253 assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue"); 3254 assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue"); 3255 RValue NewVVal; 3256 LValue VLValue = CGF.EmitLValue(V); 3257 LValue XLValue = CGF.EmitLValue(X); 3258 RValue ExprRValue = CGF.EmitAnyExpr(E); 3259 auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3260 : llvm::AtomicOrdering::Monotonic; 3261 QualType NewVValType; 3262 if (UE) { 3263 // 'x' is updated with some additional value. 3264 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 3265 "Update expr in 'atomic capture' must be a binary operator."); 3266 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 3267 // Update expressions are allowed to have the following forms: 3268 // x binop= expr; -> xrval + expr; 3269 // x++, ++x -> xrval + 1; 3270 // x--, --x -> xrval - 1; 3271 // x = x binop expr; -> xrval binop expr 3272 // x = expr Op x; - > expr binop xrval; 3273 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 3274 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 3275 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 3276 NewVValType = XRValExpr->getType(); 3277 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 3278 auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr, 3279 IsPostfixUpdate](RValue XRValue) -> RValue { 3280 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3281 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 3282 RValue Res = CGF.EmitAnyExpr(UE); 3283 NewVVal = IsPostfixUpdate ? XRValue : Res; 3284 return Res; 3285 }; 3286 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 3287 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 3288 if (Res.first) { 3289 // 'atomicrmw' instruction was generated. 3290 if (IsPostfixUpdate) { 3291 // Use old value from 'atomicrmw'. 3292 NewVVal = Res.second; 3293 } else { 3294 // 'atomicrmw' does not provide new value, so evaluate it using old 3295 // value of 'x'. 3296 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3297 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second); 3298 NewVVal = CGF.EmitAnyExpr(UE); 3299 } 3300 } 3301 } else { 3302 // 'x' is simply rewritten with some 'expr'. 3303 NewVValType = X->getType().getNonReferenceType(); 3304 ExprRValue = convertToType(CGF, ExprRValue, E->getType(), 3305 X->getType().getNonReferenceType(), Loc); 3306 auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) -> RValue { 3307 NewVVal = XRValue; 3308 return ExprRValue; 3309 }; 3310 // Try to perform atomicrmw xchg, otherwise simple exchange. 3311 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 3312 XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO, 3313 Loc, Gen); 3314 if (Res.first) { 3315 // 'atomicrmw' instruction was generated. 3316 NewVVal = IsPostfixUpdate ? Res.second : ExprRValue; 3317 } 3318 } 3319 // Emit post-update store to 'v' of old/new 'x' value. 3320 CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc); 3321 // OpenMP, 2.12.6, atomic Construct 3322 // Any atomic construct with a seq_cst clause forces the atomically 3323 // performed operation to include an implicit flush operation without a 3324 // list. 3325 if (IsSeqCst) 3326 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3327 } 3328 3329 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, 3330 bool IsSeqCst, bool IsPostfixUpdate, 3331 const Expr *X, const Expr *V, const Expr *E, 3332 const Expr *UE, bool IsXLHSInRHSPart, 3333 SourceLocation Loc) { 3334 switch (Kind) { 3335 case OMPC_read: 3336 EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc); 3337 break; 3338 case OMPC_write: 3339 EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc); 3340 break; 3341 case OMPC_unknown: 3342 case OMPC_update: 3343 EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc); 3344 break; 3345 case OMPC_capture: 3346 EmitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE, 3347 IsXLHSInRHSPart, Loc); 3348 break; 3349 case OMPC_if: 3350 case OMPC_final: 3351 case OMPC_num_threads: 3352 case OMPC_private: 3353 case OMPC_firstprivate: 3354 case OMPC_lastprivate: 3355 case OMPC_reduction: 3356 case OMPC_safelen: 3357 case OMPC_simdlen: 3358 case OMPC_collapse: 3359 case OMPC_default: 3360 case OMPC_seq_cst: 3361 case OMPC_shared: 3362 case OMPC_linear: 3363 case OMPC_aligned: 3364 case OMPC_copyin: 3365 case OMPC_copyprivate: 3366 case OMPC_flush: 3367 case OMPC_proc_bind: 3368 case OMPC_schedule: 3369 case OMPC_ordered: 3370 case OMPC_nowait: 3371 case OMPC_untied: 3372 case OMPC_threadprivate: 3373 case OMPC_depend: 3374 case OMPC_mergeable: 3375 case OMPC_device: 3376 case OMPC_threads: 3377 case OMPC_simd: 3378 case OMPC_map: 3379 case OMPC_num_teams: 3380 case OMPC_thread_limit: 3381 case OMPC_priority: 3382 case OMPC_grainsize: 3383 case OMPC_nogroup: 3384 case OMPC_num_tasks: 3385 case OMPC_hint: 3386 case OMPC_dist_schedule: 3387 case OMPC_defaultmap: 3388 case OMPC_uniform: 3389 case OMPC_to: 3390 case OMPC_from: 3391 case OMPC_use_device_ptr: 3392 case OMPC_is_device_ptr: 3393 llvm_unreachable("Clause is not allowed in 'omp atomic'."); 3394 } 3395 } 3396 3397 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { 3398 bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>(); 3399 OpenMPClauseKind Kind = OMPC_unknown; 3400 for (auto *C : S.clauses()) { 3401 // Find first clause (skip seq_cst clause, if it is first). 3402 if (C->getClauseKind() != OMPC_seq_cst) { 3403 Kind = C->getClauseKind(); 3404 break; 3405 } 3406 } 3407 3408 const auto *CS = 3409 S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 3410 if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) { 3411 enterFullExpression(EWC); 3412 } 3413 // Processing for statements under 'atomic capture'. 3414 if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) { 3415 for (const auto *C : Compound->body()) { 3416 if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) { 3417 enterFullExpression(EWC); 3418 } 3419 } 3420 } 3421 3422 auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF, 3423 PrePostActionTy &) { 3424 CGF.EmitStopPoint(CS); 3425 EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(), 3426 S.getV(), S.getExpr(), S.getUpdateExpr(), 3427 S.isXLHSInRHSPart(), S.getLocStart()); 3428 }; 3429 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 3430 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen); 3431 } 3432 3433 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, 3434 const OMPExecutableDirective &S, 3435 const RegionCodeGenTy &CodeGen) { 3436 assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind())); 3437 CodeGenModule &CGM = CGF.CGM; 3438 const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt()); 3439 3440 llvm::Function *Fn = nullptr; 3441 llvm::Constant *FnID = nullptr; 3442 3443 const Expr *IfCond = nullptr; 3444 // Check for the at most one if clause associated with the target region. 3445 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 3446 if (C->getNameModifier() == OMPD_unknown || 3447 C->getNameModifier() == OMPD_target) { 3448 IfCond = C->getCondition(); 3449 break; 3450 } 3451 } 3452 3453 // Check if we have any device clause associated with the directive. 3454 const Expr *Device = nullptr; 3455 if (auto *C = S.getSingleClause<OMPDeviceClause>()) { 3456 Device = C->getDevice(); 3457 } 3458 3459 // Check if we have an if clause whose conditional always evaluates to false 3460 // or if we do not have any targets specified. If so the target region is not 3461 // an offload entry point. 3462 bool IsOffloadEntry = true; 3463 if (IfCond) { 3464 bool Val; 3465 if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val) 3466 IsOffloadEntry = false; 3467 } 3468 if (CGM.getLangOpts().OMPTargetTriples.empty()) 3469 IsOffloadEntry = false; 3470 3471 assert(CGF.CurFuncDecl && "No parent declaration for target region!"); 3472 StringRef ParentName; 3473 // In case we have Ctors/Dtors we use the complete type variant to produce 3474 // the mangling of the device outlined kernel. 3475 if (auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl)) 3476 ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete)); 3477 else if (auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl)) 3478 ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete)); 3479 else 3480 ParentName = 3481 CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl))); 3482 3483 // Emit target region as a standalone region. 3484 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID, 3485 IsOffloadEntry, CodeGen); 3486 OMPLexicalScope Scope(CGF, S); 3487 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 3488 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 3489 CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device, 3490 CapturedVars); 3491 } 3492 3493 static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S, 3494 PrePostActionTy &Action) { 3495 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 3496 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 3497 CGF.EmitOMPPrivateClause(S, PrivateScope); 3498 (void)PrivateScope.Privatize(); 3499 3500 Action.Enter(CGF); 3501 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3502 } 3503 3504 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM, 3505 StringRef ParentName, 3506 const OMPTargetDirective &S) { 3507 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3508 emitTargetRegion(CGF, S, Action); 3509 }; 3510 llvm::Function *Fn; 3511 llvm::Constant *Addr; 3512 // Emit target region as a standalone region. 3513 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 3514 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 3515 assert(Fn && Addr && "Target device function emission failed."); 3516 } 3517 3518 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { 3519 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3520 emitTargetRegion(CGF, S, Action); 3521 }; 3522 emitCommonOMPTargetDirective(*this, S, CodeGen); 3523 } 3524 3525 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF, 3526 const OMPExecutableDirective &S, 3527 OpenMPDirectiveKind InnermostKind, 3528 const RegionCodeGenTy &CodeGen) { 3529 const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams); 3530 auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction( 3531 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 3532 3533 const OMPNumTeamsClause *NT = S.getSingleClause<OMPNumTeamsClause>(); 3534 const OMPThreadLimitClause *TL = S.getSingleClause<OMPThreadLimitClause>(); 3535 if (NT || TL) { 3536 Expr *NumTeams = (NT) ? NT->getNumTeams() : nullptr; 3537 Expr *ThreadLimit = (TL) ? TL->getThreadLimit() : nullptr; 3538 3539 CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit, 3540 S.getLocStart()); 3541 } 3542 3543 OMPTeamsScope Scope(CGF, S); 3544 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 3545 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 3546 CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn, 3547 CapturedVars); 3548 } 3549 3550 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) { 3551 // Emit teams region as a standalone region. 3552 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3553 OMPPrivateScope PrivateScope(CGF); 3554 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 3555 CGF.EmitOMPPrivateClause(S, PrivateScope); 3556 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 3557 (void)PrivateScope.Privatize(); 3558 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3559 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 3560 }; 3561 emitCommonOMPTeamsDirective(*this, S, OMPD_teams, CodeGen); 3562 emitPostUpdateForReductionClause( 3563 *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 3564 } 3565 3566 static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action, 3567 const OMPTargetTeamsDirective &S) { 3568 auto *CS = S.getCapturedStmt(OMPD_teams); 3569 Action.Enter(CGF); 3570 auto &&CodeGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { 3571 // TODO: Add support for clauses. 3572 CGF.EmitStmt(CS->getCapturedStmt()); 3573 }; 3574 emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen); 3575 } 3576 3577 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 3578 CodeGenModule &CGM, StringRef ParentName, 3579 const OMPTargetTeamsDirective &S) { 3580 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3581 emitTargetTeamsRegion(CGF, Action, S); 3582 }; 3583 llvm::Function *Fn; 3584 llvm::Constant *Addr; 3585 // Emit target region as a standalone region. 3586 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 3587 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 3588 assert(Fn && Addr && "Target device function emission failed."); 3589 } 3590 3591 void CodeGenFunction::EmitOMPTargetTeamsDirective( 3592 const OMPTargetTeamsDirective &S) { 3593 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3594 emitTargetTeamsRegion(CGF, Action, S); 3595 }; 3596 emitCommonOMPTargetDirective(*this, S, CodeGen); 3597 } 3598 3599 void CodeGenFunction::EmitOMPCancellationPointDirective( 3600 const OMPCancellationPointDirective &S) { 3601 CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(), 3602 S.getCancelRegion()); 3603 } 3604 3605 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { 3606 const Expr *IfCond = nullptr; 3607 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 3608 if (C->getNameModifier() == OMPD_unknown || 3609 C->getNameModifier() == OMPD_cancel) { 3610 IfCond = C->getCondition(); 3611 break; 3612 } 3613 } 3614 CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(), IfCond, 3615 S.getCancelRegion()); 3616 } 3617 3618 CodeGenFunction::JumpDest 3619 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) { 3620 if (Kind == OMPD_parallel || Kind == OMPD_task || 3621 Kind == OMPD_target_parallel) 3622 return ReturnBlock; 3623 assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections || 3624 Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for || 3625 Kind == OMPD_distribute_parallel_for || 3626 Kind == OMPD_target_parallel_for); 3627 return OMPCancelStack.getExitBlock(); 3628 } 3629 3630 void CodeGenFunction::EmitOMPUseDevicePtrClause( 3631 const OMPClause &NC, OMPPrivateScope &PrivateScope, 3632 const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) { 3633 const auto &C = cast<OMPUseDevicePtrClause>(NC); 3634 auto OrigVarIt = C.varlist_begin(); 3635 auto InitIt = C.inits().begin(); 3636 for (auto PvtVarIt : C.private_copies()) { 3637 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl()); 3638 auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl()); 3639 auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl()); 3640 3641 // In order to identify the right initializer we need to match the 3642 // declaration used by the mapping logic. In some cases we may get 3643 // OMPCapturedExprDecl that refers to the original declaration. 3644 const ValueDecl *MatchingVD = OrigVD; 3645 if (auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) { 3646 // OMPCapturedExprDecl are used to privative fields of the current 3647 // structure. 3648 auto *ME = cast<MemberExpr>(OED->getInit()); 3649 assert(isa<CXXThisExpr>(ME->getBase()) && 3650 "Base should be the current struct!"); 3651 MatchingVD = ME->getMemberDecl(); 3652 } 3653 3654 // If we don't have information about the current list item, move on to 3655 // the next one. 3656 auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD); 3657 if (InitAddrIt == CaptureDeviceAddrMap.end()) 3658 continue; 3659 3660 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 3661 // Initialize the temporary initialization variable with the address we 3662 // get from the runtime library. We have to cast the source address 3663 // because it is always a void *. References are materialized in the 3664 // privatization scope, so the initialization here disregards the fact 3665 // the original variable is a reference. 3666 QualType AddrQTy = 3667 getContext().getPointerType(OrigVD->getType().getNonReferenceType()); 3668 llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy); 3669 Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy); 3670 setAddrOfLocalVar(InitVD, InitAddr); 3671 3672 // Emit private declaration, it will be initialized by the value we 3673 // declaration we just added to the local declarations map. 3674 EmitDecl(*PvtVD); 3675 3676 // The initialization variables reached its purpose in the emission 3677 // ofthe previous declaration, so we don't need it anymore. 3678 LocalDeclMap.erase(InitVD); 3679 3680 // Return the address of the private variable. 3681 return GetAddrOfLocalVar(PvtVD); 3682 }); 3683 assert(IsRegistered && "firstprivate var already registered as private"); 3684 // Silence the warning about unused variable. 3685 (void)IsRegistered; 3686 3687 ++OrigVarIt; 3688 ++InitIt; 3689 } 3690 } 3691 3692 // Generate the instructions for '#pragma omp target data' directive. 3693 void CodeGenFunction::EmitOMPTargetDataDirective( 3694 const OMPTargetDataDirective &S) { 3695 CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true); 3696 3697 // Create a pre/post action to signal the privatization of the device pointer. 3698 // This action can be replaced by the OpenMP runtime code generation to 3699 // deactivate privatization. 3700 bool PrivatizeDevicePointers = false; 3701 class DevicePointerPrivActionTy : public PrePostActionTy { 3702 bool &PrivatizeDevicePointers; 3703 3704 public: 3705 explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers) 3706 : PrePostActionTy(), PrivatizeDevicePointers(PrivatizeDevicePointers) {} 3707 void Enter(CodeGenFunction &CGF) override { 3708 PrivatizeDevicePointers = true; 3709 } 3710 }; 3711 DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers); 3712 3713 auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers]( 3714 CodeGenFunction &CGF, PrePostActionTy &Action) { 3715 auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3716 CGF.EmitStmt( 3717 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3718 }; 3719 3720 // Codegen that selects wheather to generate the privatization code or not. 3721 auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers, 3722 &InnermostCodeGen](CodeGenFunction &CGF, 3723 PrePostActionTy &Action) { 3724 RegionCodeGenTy RCG(InnermostCodeGen); 3725 PrivatizeDevicePointers = false; 3726 3727 // Call the pre-action to change the status of PrivatizeDevicePointers if 3728 // needed. 3729 Action.Enter(CGF); 3730 3731 if (PrivatizeDevicePointers) { 3732 OMPPrivateScope PrivateScope(CGF); 3733 // Emit all instances of the use_device_ptr clause. 3734 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>()) 3735 CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope, 3736 Info.CaptureDeviceAddrMap); 3737 (void)PrivateScope.Privatize(); 3738 RCG(CGF); 3739 } else 3740 RCG(CGF); 3741 }; 3742 3743 // Forward the provided action to the privatization codegen. 3744 RegionCodeGenTy PrivRCG(PrivCodeGen); 3745 PrivRCG.setAction(Action); 3746 3747 // Notwithstanding the body of the region is emitted as inlined directive, 3748 // we don't use an inline scope as changes in the references inside the 3749 // region are expected to be visible outside, so we do not privative them. 3750 OMPLexicalScope Scope(CGF, S); 3751 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data, 3752 PrivRCG); 3753 }; 3754 3755 RegionCodeGenTy RCG(CodeGen); 3756 3757 // If we don't have target devices, don't bother emitting the data mapping 3758 // code. 3759 if (CGM.getLangOpts().OMPTargetTriples.empty()) { 3760 RCG(*this); 3761 return; 3762 } 3763 3764 // Check if we have any if clause associated with the directive. 3765 const Expr *IfCond = nullptr; 3766 if (auto *C = S.getSingleClause<OMPIfClause>()) 3767 IfCond = C->getCondition(); 3768 3769 // Check if we have any device clause associated with the directive. 3770 const Expr *Device = nullptr; 3771 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 3772 Device = C->getDevice(); 3773 3774 // Set the action to signal privatization of device pointers. 3775 RCG.setAction(PrivAction); 3776 3777 // Emit region code. 3778 CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG, 3779 Info); 3780 } 3781 3782 void CodeGenFunction::EmitOMPTargetEnterDataDirective( 3783 const OMPTargetEnterDataDirective &S) { 3784 // If we don't have target devices, don't bother emitting the data mapping 3785 // code. 3786 if (CGM.getLangOpts().OMPTargetTriples.empty()) 3787 return; 3788 3789 // Check if we have any if clause associated with the directive. 3790 const Expr *IfCond = nullptr; 3791 if (auto *C = S.getSingleClause<OMPIfClause>()) 3792 IfCond = C->getCondition(); 3793 3794 // Check if we have any device clause associated with the directive. 3795 const Expr *Device = nullptr; 3796 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 3797 Device = C->getDevice(); 3798 3799 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 3800 } 3801 3802 void CodeGenFunction::EmitOMPTargetExitDataDirective( 3803 const OMPTargetExitDataDirective &S) { 3804 // If we don't have target devices, don't bother emitting the data mapping 3805 // code. 3806 if (CGM.getLangOpts().OMPTargetTriples.empty()) 3807 return; 3808 3809 // Check if we have any if clause associated with the directive. 3810 const Expr *IfCond = nullptr; 3811 if (auto *C = S.getSingleClause<OMPIfClause>()) 3812 IfCond = C->getCondition(); 3813 3814 // Check if we have any device clause associated with the directive. 3815 const Expr *Device = nullptr; 3816 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 3817 Device = C->getDevice(); 3818 3819 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 3820 } 3821 3822 static void emitTargetParallelRegion(CodeGenFunction &CGF, 3823 const OMPTargetParallelDirective &S, 3824 PrePostActionTy &Action) { 3825 // Get the captured statement associated with the 'parallel' region. 3826 auto *CS = S.getCapturedStmt(OMPD_parallel); 3827 Action.Enter(CGF); 3828 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &) { 3829 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 3830 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 3831 CGF.EmitOMPPrivateClause(S, PrivateScope); 3832 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 3833 (void)PrivateScope.Privatize(); 3834 // TODO: Add support for clauses. 3835 CGF.EmitStmt(CS->getCapturedStmt()); 3836 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 3837 }; 3838 emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen); 3839 emitPostUpdateForReductionClause( 3840 CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 3841 } 3842 3843 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 3844 CodeGenModule &CGM, StringRef ParentName, 3845 const OMPTargetParallelDirective &S) { 3846 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3847 emitTargetParallelRegion(CGF, S, Action); 3848 }; 3849 llvm::Function *Fn; 3850 llvm::Constant *Addr; 3851 // Emit target region as a standalone region. 3852 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 3853 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 3854 assert(Fn && Addr && "Target device function emission failed."); 3855 } 3856 3857 void CodeGenFunction::EmitOMPTargetParallelDirective( 3858 const OMPTargetParallelDirective &S) { 3859 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3860 emitTargetParallelRegion(CGF, S, Action); 3861 }; 3862 emitCommonOMPTargetDirective(*this, S, CodeGen); 3863 } 3864 3865 void CodeGenFunction::EmitOMPTargetParallelForDirective( 3866 const OMPTargetParallelForDirective &S) { 3867 // TODO: codegen for target parallel for. 3868 } 3869 3870 /// Emit a helper variable and return corresponding lvalue. 3871 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper, 3872 const ImplicitParamDecl *PVD, 3873 CodeGenFunction::OMPPrivateScope &Privates) { 3874 auto *VDecl = cast<VarDecl>(Helper->getDecl()); 3875 Privates.addPrivate( 3876 VDecl, [&CGF, PVD]() -> Address { return CGF.GetAddrOfLocalVar(PVD); }); 3877 } 3878 3879 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { 3880 assert(isOpenMPTaskLoopDirective(S.getDirectiveKind())); 3881 // Emit outlined function for task construct. 3882 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 3883 auto CapturedStruct = GenerateCapturedStmtArgument(*CS); 3884 auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 3885 const Expr *IfCond = nullptr; 3886 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 3887 if (C->getNameModifier() == OMPD_unknown || 3888 C->getNameModifier() == OMPD_taskloop) { 3889 IfCond = C->getCondition(); 3890 break; 3891 } 3892 } 3893 3894 OMPTaskDataTy Data; 3895 // Check if taskloop must be emitted without taskgroup. 3896 Data.Nogroup = S.getSingleClause<OMPNogroupClause>(); 3897 // TODO: Check if we should emit tied or untied task. 3898 Data.Tied = true; 3899 // Set scheduling for taskloop 3900 if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) { 3901 // grainsize clause 3902 Data.Schedule.setInt(/*IntVal=*/false); 3903 Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize())); 3904 } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) { 3905 // num_tasks clause 3906 Data.Schedule.setInt(/*IntVal=*/true); 3907 Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks())); 3908 } 3909 3910 auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) { 3911 // if (PreCond) { 3912 // for (IV in 0..LastIteration) BODY; 3913 // <Final counter/linear vars updates>; 3914 // } 3915 // 3916 3917 // Emit: if (PreCond) - begin. 3918 // If the condition constant folds and can be elided, avoid emitting the 3919 // whole loop. 3920 bool CondConstant; 3921 llvm::BasicBlock *ContBlock = nullptr; 3922 OMPLoopScope PreInitScope(CGF, S); 3923 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 3924 if (!CondConstant) 3925 return; 3926 } else { 3927 auto *ThenBlock = CGF.createBasicBlock("taskloop.if.then"); 3928 ContBlock = CGF.createBasicBlock("taskloop.if.end"); 3929 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 3930 CGF.getProfileCount(&S)); 3931 CGF.EmitBlock(ThenBlock); 3932 CGF.incrementProfileCounter(&S); 3933 } 3934 3935 if (isOpenMPSimdDirective(S.getDirectiveKind())) 3936 CGF.EmitOMPSimdInit(S); 3937 3938 OMPPrivateScope LoopScope(CGF); 3939 // Emit helper vars inits. 3940 enum { LowerBound = 5, UpperBound, Stride, LastIter }; 3941 auto *I = CS->getCapturedDecl()->param_begin(); 3942 auto *LBP = std::next(I, LowerBound); 3943 auto *UBP = std::next(I, UpperBound); 3944 auto *STP = std::next(I, Stride); 3945 auto *LIP = std::next(I, LastIter); 3946 mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP, 3947 LoopScope); 3948 mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP, 3949 LoopScope); 3950 mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope); 3951 mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP, 3952 LoopScope); 3953 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 3954 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 3955 (void)LoopScope.Privatize(); 3956 // Emit the loop iteration variable. 3957 const Expr *IVExpr = S.getIterationVariable(); 3958 const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 3959 CGF.EmitVarDecl(*IVDecl); 3960 CGF.EmitIgnoredExpr(S.getInit()); 3961 3962 // Emit the iterations count variable. 3963 // If it is not a variable, Sema decided to calculate iterations count on 3964 // each iteration (e.g., it is foldable into a constant). 3965 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 3966 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 3967 // Emit calculation of the iterations count. 3968 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 3969 } 3970 3971 CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 3972 S.getInc(), 3973 [&S](CodeGenFunction &CGF) { 3974 CGF.EmitOMPLoopBody(S, JumpDest()); 3975 CGF.EmitStopPoint(&S); 3976 }, 3977 [](CodeGenFunction &) {}); 3978 // Emit: if (PreCond) - end. 3979 if (ContBlock) { 3980 CGF.EmitBranch(ContBlock); 3981 CGF.EmitBlock(ContBlock, true); 3982 } 3983 // Emit final copy of the lastprivate variables if IsLastIter != 0. 3984 if (HasLastprivateClause) { 3985 CGF.EmitOMPLastprivateClauseFinal( 3986 S, isOpenMPSimdDirective(S.getDirectiveKind()), 3987 CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar( 3988 CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false, 3989 (*LIP)->getType(), S.getLocStart()))); 3990 } 3991 }; 3992 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 3993 IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn, 3994 const OMPTaskDataTy &Data) { 3995 auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &) { 3996 OMPLoopScope PreInitScope(CGF, S); 3997 CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getLocStart(), S, 3998 OutlinedFn, SharedsTy, 3999 CapturedStruct, IfCond, Data); 4000 }; 4001 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop, 4002 CodeGen); 4003 }; 4004 EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data); 4005 } 4006 4007 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) { 4008 EmitOMPTaskLoopBasedDirective(S); 4009 } 4010 4011 void CodeGenFunction::EmitOMPTaskLoopSimdDirective( 4012 const OMPTaskLoopSimdDirective &S) { 4013 EmitOMPTaskLoopBasedDirective(S); 4014 } 4015 4016 // Generate the instructions for '#pragma omp target update' directive. 4017 void CodeGenFunction::EmitOMPTargetUpdateDirective( 4018 const OMPTargetUpdateDirective &S) { 4019 // If we don't have target devices, don't bother emitting the data mapping 4020 // code. 4021 if (CGM.getLangOpts().OMPTargetTriples.empty()) 4022 return; 4023 4024 // Check if we have any if clause associated with the directive. 4025 const Expr *IfCond = nullptr; 4026 if (auto *C = S.getSingleClause<OMPIfClause>()) 4027 IfCond = C->getCondition(); 4028 4029 // Check if we have any device clause associated with the directive. 4030 const Expr *Device = nullptr; 4031 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 4032 Device = C->getDevice(); 4033 4034 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 4035 } 4036