1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This contains code to emit OpenMP nodes as LLVM code. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGCleanup.h" 15 #include "CGOpenMPRuntime.h" 16 #include "CodeGenFunction.h" 17 #include "CodeGenModule.h" 18 #include "TargetInfo.h" 19 #include "clang/AST/Stmt.h" 20 #include "clang/AST/StmtOpenMP.h" 21 #include "clang/AST/DeclOpenMP.h" 22 #include "llvm/IR/CallSite.h" 23 using namespace clang; 24 using namespace CodeGen; 25 26 namespace { 27 /// Lexical scope for OpenMP executable constructs, that handles correct codegen 28 /// for captured expressions. 29 class OMPLexicalScope final : public CodeGenFunction::LexicalScope { 30 void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 31 for (const auto *C : S.clauses()) { 32 if (auto *CPI = OMPClauseWithPreInit::get(C)) { 33 if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { 34 for (const auto *I : PreInit->decls()) { 35 if (!I->hasAttr<OMPCaptureNoInitAttr>()) 36 CGF.EmitVarDecl(cast<VarDecl>(*I)); 37 else { 38 CodeGenFunction::AutoVarEmission Emission = 39 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 40 CGF.EmitAutoVarCleanups(Emission); 41 } 42 } 43 } 44 } 45 } 46 } 47 CodeGenFunction::OMPPrivateScope InlinedShareds; 48 49 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { 50 return CGF.LambdaCaptureFields.lookup(VD) || 51 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || 52 (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl)); 53 } 54 55 public: 56 OMPLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S, 57 bool AsInlined = false) 58 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), 59 InlinedShareds(CGF) { 60 emitPreInitStmt(CGF, S); 61 if (AsInlined) { 62 if (S.hasAssociatedStmt()) { 63 auto *CS = cast<CapturedStmt>(S.getAssociatedStmt()); 64 for (auto &C : CS->captures()) { 65 if (C.capturesVariable() || C.capturesVariableByCopy()) { 66 auto *VD = C.getCapturedVar(); 67 DeclRefExpr DRE(const_cast<VarDecl *>(VD), 68 isCapturedVar(CGF, VD) || 69 (CGF.CapturedStmtInfo && 70 InlinedShareds.isGlobalVarCaptured(VD)), 71 VD->getType().getNonReferenceType(), VK_LValue, 72 SourceLocation()); 73 InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address { 74 return CGF.EmitLValue(&DRE).getAddress(); 75 }); 76 } 77 } 78 (void)InlinedShareds.Privatize(); 79 } 80 } 81 } 82 }; 83 84 /// Private scope for OpenMP loop-based directives, that supports capturing 85 /// of used expression from loop statement. 86 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { 87 void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) { 88 if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) { 89 if (auto *PreInits = cast_or_null<DeclStmt>(LD->getPreInits())) { 90 for (const auto *I : PreInits->decls()) 91 CGF.EmitVarDecl(cast<VarDecl>(*I)); 92 } 93 } 94 } 95 96 public: 97 OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S) 98 : CodeGenFunction::RunCleanupsScope(CGF) { 99 emitPreInitStmt(CGF, S); 100 } 101 }; 102 103 } // namespace 104 105 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) { 106 auto &C = getContext(); 107 llvm::Value *Size = nullptr; 108 auto SizeInChars = C.getTypeSizeInChars(Ty); 109 if (SizeInChars.isZero()) { 110 // getTypeSizeInChars() returns 0 for a VLA. 111 while (auto *VAT = C.getAsVariableArrayType(Ty)) { 112 llvm::Value *ArraySize; 113 std::tie(ArraySize, Ty) = getVLASize(VAT); 114 Size = Size ? Builder.CreateNUWMul(Size, ArraySize) : ArraySize; 115 } 116 SizeInChars = C.getTypeSizeInChars(Ty); 117 if (SizeInChars.isZero()) 118 return llvm::ConstantInt::get(SizeTy, /*V=*/0); 119 Size = Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars)); 120 } else 121 Size = CGM.getSize(SizeInChars); 122 return Size; 123 } 124 125 void CodeGenFunction::GenerateOpenMPCapturedVars( 126 const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) { 127 const RecordDecl *RD = S.getCapturedRecordDecl(); 128 auto CurField = RD->field_begin(); 129 auto CurCap = S.captures().begin(); 130 for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(), 131 E = S.capture_init_end(); 132 I != E; ++I, ++CurField, ++CurCap) { 133 if (CurField->hasCapturedVLAType()) { 134 auto VAT = CurField->getCapturedVLAType(); 135 auto *Val = VLASizeMap[VAT->getSizeExpr()]; 136 CapturedVars.push_back(Val); 137 } else if (CurCap->capturesThis()) 138 CapturedVars.push_back(CXXThisValue); 139 else if (CurCap->capturesVariableByCopy()) { 140 llvm::Value *CV = 141 EmitLoadOfLValue(EmitLValue(*I), SourceLocation()).getScalarVal(); 142 143 // If the field is not a pointer, we need to save the actual value 144 // and load it as a void pointer. 145 if (!CurField->getType()->isAnyPointerType()) { 146 auto &Ctx = getContext(); 147 auto DstAddr = CreateMemTemp( 148 Ctx.getUIntPtrType(), 149 Twine(CurCap->getCapturedVar()->getName()) + ".casted"); 150 LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType()); 151 152 auto *SrcAddrVal = EmitScalarConversion( 153 DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()), 154 Ctx.getPointerType(CurField->getType()), SourceLocation()); 155 LValue SrcLV = 156 MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType()); 157 158 // Store the value using the source type pointer. 159 EmitStoreThroughLValue(RValue::get(CV), SrcLV); 160 161 // Load the value using the destination type pointer. 162 CV = EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal(); 163 } 164 CapturedVars.push_back(CV); 165 } else { 166 assert(CurCap->capturesVariable() && "Expected capture by reference."); 167 CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer()); 168 } 169 } 170 } 171 172 static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType, 173 StringRef Name, LValue AddrLV, 174 bool isReferenceType = false) { 175 ASTContext &Ctx = CGF.getContext(); 176 177 auto *CastedPtr = CGF.EmitScalarConversion( 178 AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(), 179 Ctx.getPointerType(DstType), SourceLocation()); 180 auto TmpAddr = 181 CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType)) 182 .getAddress(); 183 184 // If we are dealing with references we need to return the address of the 185 // reference instead of the reference of the value. 186 if (isReferenceType) { 187 QualType RefType = Ctx.getLValueReferenceType(DstType); 188 auto *RefVal = TmpAddr.getPointer(); 189 TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name) + ".ref"); 190 auto TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType); 191 CGF.EmitScalarInit(RefVal, TmpLVal); 192 } 193 194 return TmpAddr; 195 } 196 197 llvm::Function * 198 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { 199 assert( 200 CapturedStmtInfo && 201 "CapturedStmtInfo should be set when generating the captured function"); 202 const CapturedDecl *CD = S.getCapturedDecl(); 203 const RecordDecl *RD = S.getCapturedRecordDecl(); 204 assert(CD->hasBody() && "missing CapturedDecl body"); 205 206 // Build the argument list. 207 ASTContext &Ctx = CGM.getContext(); 208 FunctionArgList Args; 209 Args.append(CD->param_begin(), 210 std::next(CD->param_begin(), CD->getContextParamPosition())); 211 auto I = S.captures().begin(); 212 for (auto *FD : RD->fields()) { 213 QualType ArgType = FD->getType(); 214 IdentifierInfo *II = nullptr; 215 VarDecl *CapVar = nullptr; 216 217 // If this is a capture by copy and the type is not a pointer, the outlined 218 // function argument type should be uintptr and the value properly casted to 219 // uintptr. This is necessary given that the runtime library is only able to 220 // deal with pointers. We can pass in the same way the VLA type sizes to the 221 // outlined function. 222 if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) || 223 I->capturesVariableArrayType()) 224 ArgType = Ctx.getUIntPtrType(); 225 226 if (I->capturesVariable() || I->capturesVariableByCopy()) { 227 CapVar = I->getCapturedVar(); 228 II = CapVar->getIdentifier(); 229 } else if (I->capturesThis()) 230 II = &getContext().Idents.get("this"); 231 else { 232 assert(I->capturesVariableArrayType()); 233 II = &getContext().Idents.get("vla"); 234 } 235 if (ArgType->isVariablyModifiedType()) 236 ArgType = getContext().getVariableArrayDecayedType(ArgType); 237 Args.push_back(ImplicitParamDecl::Create(getContext(), nullptr, 238 FD->getLocation(), II, ArgType)); 239 ++I; 240 } 241 Args.append( 242 std::next(CD->param_begin(), CD->getContextParamPosition() + 1), 243 CD->param_end()); 244 245 // Create the function declaration. 246 FunctionType::ExtInfo ExtInfo; 247 const CGFunctionInfo &FuncInfo = 248 CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args); 249 llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); 250 251 llvm::Function *F = llvm::Function::Create( 252 FuncLLVMTy, llvm::GlobalValue::InternalLinkage, 253 CapturedStmtInfo->getHelperName(), &CGM.getModule()); 254 CGM.SetInternalFunctionAttributes(CD, F, FuncInfo); 255 if (CD->isNothrow()) 256 F->addFnAttr(llvm::Attribute::NoUnwind); 257 258 // Generate the function. 259 StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getLocation(), 260 CD->getBody()->getLocStart()); 261 unsigned Cnt = CD->getContextParamPosition(); 262 I = S.captures().begin(); 263 for (auto *FD : RD->fields()) { 264 // If we are capturing a pointer by copy we don't need to do anything, just 265 // use the value that we get from the arguments. 266 if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) { 267 const VarDecl *CurVD = I->getCapturedVar(); 268 Address LocalAddr = GetAddrOfLocalVar(Args[Cnt]); 269 // If the variable is a reference we need to materialize it here. 270 if (CurVD->getType()->isReferenceType()) { 271 Address RefAddr = CreateMemTemp(CurVD->getType(), getPointerAlign(), 272 ".materialized_ref"); 273 EmitStoreOfScalar(LocalAddr.getPointer(), RefAddr, /*Volatile=*/false, 274 CurVD->getType()); 275 LocalAddr = RefAddr; 276 } 277 setAddrOfLocalVar(CurVD, LocalAddr); 278 ++Cnt; 279 ++I; 280 continue; 281 } 282 283 LValue ArgLVal = 284 MakeAddrLValue(GetAddrOfLocalVar(Args[Cnt]), Args[Cnt]->getType(), 285 AlignmentSource::Decl); 286 if (FD->hasCapturedVLAType()) { 287 LValue CastedArgLVal = 288 MakeAddrLValue(castValueFromUintptr(*this, FD->getType(), 289 Args[Cnt]->getName(), ArgLVal), 290 FD->getType(), AlignmentSource::Decl); 291 auto *ExprArg = 292 EmitLoadOfLValue(CastedArgLVal, SourceLocation()).getScalarVal(); 293 auto VAT = FD->getCapturedVLAType(); 294 VLASizeMap[VAT->getSizeExpr()] = ExprArg; 295 } else if (I->capturesVariable()) { 296 auto *Var = I->getCapturedVar(); 297 QualType VarTy = Var->getType(); 298 Address ArgAddr = ArgLVal.getAddress(); 299 if (!VarTy->isReferenceType()) { 300 ArgAddr = EmitLoadOfReference( 301 ArgAddr, ArgLVal.getType()->castAs<ReferenceType>()); 302 } 303 setAddrOfLocalVar( 304 Var, Address(ArgAddr.getPointer(), getContext().getDeclAlign(Var))); 305 } else if (I->capturesVariableByCopy()) { 306 assert(!FD->getType()->isAnyPointerType() && 307 "Not expecting a captured pointer."); 308 auto *Var = I->getCapturedVar(); 309 QualType VarTy = Var->getType(); 310 setAddrOfLocalVar(Var, castValueFromUintptr(*this, FD->getType(), 311 Args[Cnt]->getName(), ArgLVal, 312 VarTy->isReferenceType())); 313 } else { 314 // If 'this' is captured, load it into CXXThisValue. 315 assert(I->capturesThis()); 316 CXXThisValue = 317 EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation()).getScalarVal(); 318 } 319 ++Cnt; 320 ++I; 321 } 322 323 PGO.assignRegionCounters(GlobalDecl(CD), F); 324 CapturedStmtInfo->EmitBody(*this, CD->getBody()); 325 FinishFunction(CD->getBodyRBrace()); 326 327 return F; 328 } 329 330 //===----------------------------------------------------------------------===// 331 // OpenMP Directive Emission 332 //===----------------------------------------------------------------------===// 333 void CodeGenFunction::EmitOMPAggregateAssign( 334 Address DestAddr, Address SrcAddr, QualType OriginalType, 335 const llvm::function_ref<void(Address, Address)> &CopyGen) { 336 // Perform element-by-element initialization. 337 QualType ElementTy; 338 339 // Drill down to the base element type on both arrays. 340 auto ArrayTy = OriginalType->getAsArrayTypeUnsafe(); 341 auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr); 342 SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 343 344 auto SrcBegin = SrcAddr.getPointer(); 345 auto DestBegin = DestAddr.getPointer(); 346 // Cast from pointer to array type to pointer to single element. 347 auto DestEnd = Builder.CreateGEP(DestBegin, NumElements); 348 // The basic structure here is a while-do loop. 349 auto BodyBB = createBasicBlock("omp.arraycpy.body"); 350 auto DoneBB = createBasicBlock("omp.arraycpy.done"); 351 auto IsEmpty = 352 Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty"); 353 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 354 355 // Enter the loop body, making that address the current address. 356 auto EntryBB = Builder.GetInsertBlock(); 357 EmitBlock(BodyBB); 358 359 CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy); 360 361 llvm::PHINode *SrcElementPHI = 362 Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 363 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 364 Address SrcElementCurrent = 365 Address(SrcElementPHI, 366 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 367 368 llvm::PHINode *DestElementPHI = 369 Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 370 DestElementPHI->addIncoming(DestBegin, EntryBB); 371 Address DestElementCurrent = 372 Address(DestElementPHI, 373 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 374 375 // Emit copy. 376 CopyGen(DestElementCurrent, SrcElementCurrent); 377 378 // Shift the address forward by one element. 379 auto DestElementNext = Builder.CreateConstGEP1_32( 380 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 381 auto SrcElementNext = Builder.CreateConstGEP1_32( 382 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 383 // Check whether we've reached the end. 384 auto Done = 385 Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 386 Builder.CreateCondBr(Done, DoneBB, BodyBB); 387 DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock()); 388 SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock()); 389 390 // Done. 391 EmitBlock(DoneBB, /*IsFinished=*/true); 392 } 393 394 /// Check if the combiner is a call to UDR combiner and if it is so return the 395 /// UDR decl used for reduction. 396 static const OMPDeclareReductionDecl * 397 getReductionInit(const Expr *ReductionOp) { 398 if (auto *CE = dyn_cast<CallExpr>(ReductionOp)) 399 if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 400 if (auto *DRE = 401 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 402 if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 403 return DRD; 404 return nullptr; 405 } 406 407 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 408 const OMPDeclareReductionDecl *DRD, 409 const Expr *InitOp, 410 Address Private, Address Original, 411 QualType Ty) { 412 if (DRD->getInitializer()) { 413 std::pair<llvm::Function *, llvm::Function *> Reduction = 414 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 415 auto *CE = cast<CallExpr>(InitOp); 416 auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 417 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 418 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 419 auto *LHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 420 auto *RHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 421 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 422 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 423 [=]() -> Address { return Private; }); 424 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 425 [=]() -> Address { return Original; }); 426 (void)PrivateScope.Privatize(); 427 RValue Func = RValue::get(Reduction.second); 428 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 429 CGF.EmitIgnoredExpr(InitOp); 430 } else { 431 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 432 auto *GV = new llvm::GlobalVariable( 433 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 434 llvm::GlobalValue::PrivateLinkage, Init, ".init"); 435 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 436 RValue InitRVal; 437 switch (CGF.getEvaluationKind(Ty)) { 438 case TEK_Scalar: 439 InitRVal = CGF.EmitLoadOfLValue(LV, SourceLocation()); 440 break; 441 case TEK_Complex: 442 InitRVal = 443 RValue::getComplex(CGF.EmitLoadOfComplex(LV, SourceLocation())); 444 break; 445 case TEK_Aggregate: 446 InitRVal = RValue::getAggregate(LV.getAddress()); 447 break; 448 } 449 OpaqueValueExpr OVE(SourceLocation(), Ty, VK_RValue); 450 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 451 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 452 /*IsInitializer=*/false); 453 } 454 } 455 456 /// \brief Emit initialization of arrays of complex types. 457 /// \param DestAddr Address of the array. 458 /// \param Type Type of array. 459 /// \param Init Initial expression of array. 460 /// \param SrcAddr Address of the original array. 461 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 462 QualType Type, const Expr *Init, 463 Address SrcAddr = Address::invalid()) { 464 auto *DRD = getReductionInit(Init); 465 // Perform element-by-element initialization. 466 QualType ElementTy; 467 468 // Drill down to the base element type on both arrays. 469 auto ArrayTy = Type->getAsArrayTypeUnsafe(); 470 auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 471 DestAddr = 472 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 473 if (DRD) 474 SrcAddr = 475 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 476 477 llvm::Value *SrcBegin = nullptr; 478 if (DRD) 479 SrcBegin = SrcAddr.getPointer(); 480 auto DestBegin = DestAddr.getPointer(); 481 // Cast from pointer to array type to pointer to single element. 482 auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 483 // The basic structure here is a while-do loop. 484 auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 485 auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 486 auto IsEmpty = 487 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 488 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 489 490 // Enter the loop body, making that address the current address. 491 auto EntryBB = CGF.Builder.GetInsertBlock(); 492 CGF.EmitBlock(BodyBB); 493 494 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 495 496 llvm::PHINode *SrcElementPHI = nullptr; 497 Address SrcElementCurrent = Address::invalid(); 498 if (DRD) { 499 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 500 "omp.arraycpy.srcElementPast"); 501 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 502 SrcElementCurrent = 503 Address(SrcElementPHI, 504 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 505 } 506 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 507 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 508 DestElementPHI->addIncoming(DestBegin, EntryBB); 509 Address DestElementCurrent = 510 Address(DestElementPHI, 511 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 512 513 // Emit copy. 514 { 515 CodeGenFunction::RunCleanupsScope InitScope(CGF); 516 if (DRD && (DRD->getInitializer() || !Init)) { 517 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 518 SrcElementCurrent, ElementTy); 519 } else 520 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 521 /*IsInitializer=*/false); 522 } 523 524 if (DRD) { 525 // Shift the address forward by one element. 526 auto SrcElementNext = CGF.Builder.CreateConstGEP1_32( 527 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 528 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 529 } 530 531 // Shift the address forward by one element. 532 auto DestElementNext = CGF.Builder.CreateConstGEP1_32( 533 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 534 // Check whether we've reached the end. 535 auto Done = 536 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 537 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 538 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 539 540 // Done. 541 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 542 } 543 544 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr, 545 Address SrcAddr, const VarDecl *DestVD, 546 const VarDecl *SrcVD, const Expr *Copy) { 547 if (OriginalType->isArrayType()) { 548 auto *BO = dyn_cast<BinaryOperator>(Copy); 549 if (BO && BO->getOpcode() == BO_Assign) { 550 // Perform simple memcpy for simple copying. 551 EmitAggregateAssign(DestAddr, SrcAddr, OriginalType); 552 } else { 553 // For arrays with complex element types perform element by element 554 // copying. 555 EmitOMPAggregateAssign( 556 DestAddr, SrcAddr, OriginalType, 557 [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) { 558 // Working with the single array element, so have to remap 559 // destination and source variables to corresponding array 560 // elements. 561 CodeGenFunction::OMPPrivateScope Remap(*this); 562 Remap.addPrivate(DestVD, [DestElement]() -> Address { 563 return DestElement; 564 }); 565 Remap.addPrivate( 566 SrcVD, [SrcElement]() -> Address { return SrcElement; }); 567 (void)Remap.Privatize(); 568 EmitIgnoredExpr(Copy); 569 }); 570 } 571 } else { 572 // Remap pseudo source variable to private copy. 573 CodeGenFunction::OMPPrivateScope Remap(*this); 574 Remap.addPrivate(SrcVD, [SrcAddr]() -> Address { return SrcAddr; }); 575 Remap.addPrivate(DestVD, [DestAddr]() -> Address { return DestAddr; }); 576 (void)Remap.Privatize(); 577 // Emit copying of the whole variable. 578 EmitIgnoredExpr(Copy); 579 } 580 } 581 582 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, 583 OMPPrivateScope &PrivateScope) { 584 if (!HaveInsertPoint()) 585 return false; 586 bool FirstprivateIsLastprivate = false; 587 llvm::DenseSet<const VarDecl *> Lastprivates; 588 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 589 for (const auto *D : C->varlists()) 590 Lastprivates.insert( 591 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); 592 } 593 llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate; 594 CGCapturedStmtInfo CapturesInfo(cast<CapturedStmt>(*D.getAssociatedStmt())); 595 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { 596 auto IRef = C->varlist_begin(); 597 auto InitsRef = C->inits().begin(); 598 for (auto IInit : C->private_copies()) { 599 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 600 bool ThisFirstprivateIsLastprivate = 601 Lastprivates.count(OrigVD->getCanonicalDecl()) > 0; 602 auto *CapFD = CapturesInfo.lookup(OrigVD); 603 auto *FD = CapturedStmtInfo->lookup(OrigVD); 604 if (!ThisFirstprivateIsLastprivate && FD && (FD == CapFD) && 605 !FD->getType()->isReferenceType()) { 606 EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()); 607 ++IRef; 608 ++InitsRef; 609 continue; 610 } 611 FirstprivateIsLastprivate = 612 FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate; 613 if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) { 614 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 615 auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl()); 616 bool IsRegistered; 617 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 618 /*RefersToEnclosingVariableOrCapture=*/FD != nullptr, 619 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 620 Address OriginalAddr = EmitLValue(&DRE).getAddress(); 621 QualType Type = VD->getType(); 622 if (Type->isArrayType()) { 623 // Emit VarDecl with copy init for arrays. 624 // Get the address of the original variable captured in current 625 // captured region. 626 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 627 auto Emission = EmitAutoVarAlloca(*VD); 628 auto *Init = VD->getInit(); 629 if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) { 630 // Perform simple memcpy. 631 EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr, 632 Type); 633 } else { 634 EmitOMPAggregateAssign( 635 Emission.getAllocatedAddress(), OriginalAddr, Type, 636 [this, VDInit, Init](Address DestElement, 637 Address SrcElement) { 638 // Clean up any temporaries needed by the initialization. 639 RunCleanupsScope InitScope(*this); 640 // Emit initialization for single element. 641 setAddrOfLocalVar(VDInit, SrcElement); 642 EmitAnyExprToMem(Init, DestElement, 643 Init->getType().getQualifiers(), 644 /*IsInitializer*/ false); 645 LocalDeclMap.erase(VDInit); 646 }); 647 } 648 EmitAutoVarCleanups(Emission); 649 return Emission.getAllocatedAddress(); 650 }); 651 } else { 652 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 653 // Emit private VarDecl with copy init. 654 // Remap temp VDInit variable to the address of the original 655 // variable 656 // (for proper handling of captured global variables). 657 setAddrOfLocalVar(VDInit, OriginalAddr); 658 EmitDecl(*VD); 659 LocalDeclMap.erase(VDInit); 660 return GetAddrOfLocalVar(VD); 661 }); 662 } 663 assert(IsRegistered && 664 "firstprivate var already registered as private"); 665 // Silence the warning about unused variable. 666 (void)IsRegistered; 667 } 668 ++IRef; 669 ++InitsRef; 670 } 671 } 672 return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty(); 673 } 674 675 void CodeGenFunction::EmitOMPPrivateClause( 676 const OMPExecutableDirective &D, 677 CodeGenFunction::OMPPrivateScope &PrivateScope) { 678 if (!HaveInsertPoint()) 679 return; 680 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 681 for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) { 682 auto IRef = C->varlist_begin(); 683 for (auto IInit : C->private_copies()) { 684 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 685 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 686 auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 687 bool IsRegistered = 688 PrivateScope.addPrivate(OrigVD, [&]() -> Address { 689 // Emit private VarDecl with copy init. 690 EmitDecl(*VD); 691 return GetAddrOfLocalVar(VD); 692 }); 693 assert(IsRegistered && "private var already registered as private"); 694 // Silence the warning about unused variable. 695 (void)IsRegistered; 696 } 697 ++IRef; 698 } 699 } 700 } 701 702 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { 703 if (!HaveInsertPoint()) 704 return false; 705 // threadprivate_var1 = master_threadprivate_var1; 706 // operator=(threadprivate_var2, master_threadprivate_var2); 707 // ... 708 // __kmpc_barrier(&loc, global_tid); 709 llvm::DenseSet<const VarDecl *> CopiedVars; 710 llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr; 711 for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) { 712 auto IRef = C->varlist_begin(); 713 auto ISrcRef = C->source_exprs().begin(); 714 auto IDestRef = C->destination_exprs().begin(); 715 for (auto *AssignOp : C->assignment_ops()) { 716 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 717 QualType Type = VD->getType(); 718 if (CopiedVars.insert(VD->getCanonicalDecl()).second) { 719 // Get the address of the master variable. If we are emitting code with 720 // TLS support, the address is passed from the master as field in the 721 // captured declaration. 722 Address MasterAddr = Address::invalid(); 723 if (getLangOpts().OpenMPUseTLS && 724 getContext().getTargetInfo().isTLSSupported()) { 725 assert(CapturedStmtInfo->lookup(VD) && 726 "Copyin threadprivates should have been captured!"); 727 DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(), 728 VK_LValue, (*IRef)->getExprLoc()); 729 MasterAddr = EmitLValue(&DRE).getAddress(); 730 LocalDeclMap.erase(VD); 731 } else { 732 MasterAddr = 733 Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD) 734 : CGM.GetAddrOfGlobal(VD), 735 getContext().getDeclAlign(VD)); 736 } 737 // Get the address of the threadprivate variable. 738 Address PrivateAddr = EmitLValue(*IRef).getAddress(); 739 if (CopiedVars.size() == 1) { 740 // At first check if current thread is a master thread. If it is, no 741 // need to copy data. 742 CopyBegin = createBasicBlock("copyin.not.master"); 743 CopyEnd = createBasicBlock("copyin.not.master.end"); 744 Builder.CreateCondBr( 745 Builder.CreateICmpNE( 746 Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy), 747 Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy)), 748 CopyBegin, CopyEnd); 749 EmitBlock(CopyBegin); 750 } 751 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 752 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 753 EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp); 754 } 755 ++IRef; 756 ++ISrcRef; 757 ++IDestRef; 758 } 759 } 760 if (CopyEnd) { 761 // Exit out of copying procedure for non-master thread. 762 EmitBlock(CopyEnd, /*IsFinished=*/true); 763 return true; 764 } 765 return false; 766 } 767 768 bool CodeGenFunction::EmitOMPLastprivateClauseInit( 769 const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) { 770 if (!HaveInsertPoint()) 771 return false; 772 bool HasAtLeastOneLastprivate = false; 773 llvm::DenseSet<const VarDecl *> SIMDLCVs; 774 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 775 auto *LoopDirective = cast<OMPLoopDirective>(&D); 776 for (auto *C : LoopDirective->counters()) { 777 SIMDLCVs.insert( 778 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 779 } 780 } 781 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 782 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 783 HasAtLeastOneLastprivate = true; 784 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) 785 break; 786 auto IRef = C->varlist_begin(); 787 auto IDestRef = C->destination_exprs().begin(); 788 for (auto *IInit : C->private_copies()) { 789 // Keep the address of the original variable for future update at the end 790 // of the loop. 791 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 792 // Taskloops do not require additional initialization, it is done in 793 // runtime support library. 794 if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) { 795 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 796 PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address { 797 DeclRefExpr DRE( 798 const_cast<VarDecl *>(OrigVD), 799 /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup( 800 OrigVD) != nullptr, 801 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 802 return EmitLValue(&DRE).getAddress(); 803 }); 804 // Check if the variable is also a firstprivate: in this case IInit is 805 // not generated. Initialization of this variable will happen in codegen 806 // for 'firstprivate' clause. 807 if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) { 808 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 809 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 810 // Emit private VarDecl with copy init. 811 EmitDecl(*VD); 812 return GetAddrOfLocalVar(VD); 813 }); 814 assert(IsRegistered && 815 "lastprivate var already registered as private"); 816 (void)IsRegistered; 817 } 818 } 819 ++IRef; 820 ++IDestRef; 821 } 822 } 823 return HasAtLeastOneLastprivate; 824 } 825 826 void CodeGenFunction::EmitOMPLastprivateClauseFinal( 827 const OMPExecutableDirective &D, bool NoFinals, 828 llvm::Value *IsLastIterCond) { 829 if (!HaveInsertPoint()) 830 return; 831 // Emit following code: 832 // if (<IsLastIterCond>) { 833 // orig_var1 = private_orig_var1; 834 // ... 835 // orig_varn = private_orig_varn; 836 // } 837 llvm::BasicBlock *ThenBB = nullptr; 838 llvm::BasicBlock *DoneBB = nullptr; 839 if (IsLastIterCond) { 840 ThenBB = createBasicBlock(".omp.lastprivate.then"); 841 DoneBB = createBasicBlock(".omp.lastprivate.done"); 842 Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB); 843 EmitBlock(ThenBB); 844 } 845 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 846 llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates; 847 if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) { 848 auto IC = LoopDirective->counters().begin(); 849 for (auto F : LoopDirective->finals()) { 850 auto *D = 851 cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl(); 852 if (NoFinals) 853 AlreadyEmittedVars.insert(D); 854 else 855 LoopCountersAndUpdates[D] = F; 856 ++IC; 857 } 858 } 859 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 860 auto IRef = C->varlist_begin(); 861 auto ISrcRef = C->source_exprs().begin(); 862 auto IDestRef = C->destination_exprs().begin(); 863 for (auto *AssignOp : C->assignment_ops()) { 864 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 865 QualType Type = PrivateVD->getType(); 866 auto *CanonicalVD = PrivateVD->getCanonicalDecl(); 867 if (AlreadyEmittedVars.insert(CanonicalVD).second) { 868 // If lastprivate variable is a loop control variable for loop-based 869 // directive, update its value before copyin back to original 870 // variable. 871 if (auto *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD)) 872 EmitIgnoredExpr(FinalExpr); 873 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 874 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 875 // Get the address of the original variable. 876 Address OriginalAddr = GetAddrOfLocalVar(DestVD); 877 // Get the address of the private variable. 878 Address PrivateAddr = GetAddrOfLocalVar(PrivateVD); 879 if (auto RefTy = PrivateVD->getType()->getAs<ReferenceType>()) 880 PrivateAddr = 881 Address(Builder.CreateLoad(PrivateAddr), 882 getNaturalTypeAlignment(RefTy->getPointeeType())); 883 EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp); 884 } 885 ++IRef; 886 ++ISrcRef; 887 ++IDestRef; 888 } 889 if (auto *PostUpdate = C->getPostUpdateExpr()) 890 EmitIgnoredExpr(PostUpdate); 891 } 892 if (IsLastIterCond) 893 EmitBlock(DoneBB, /*IsFinished=*/true); 894 } 895 896 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 897 LValue BaseLV, llvm::Value *Addr) { 898 Address Tmp = Address::invalid(); 899 Address TopTmp = Address::invalid(); 900 Address MostTopTmp = Address::invalid(); 901 BaseTy = BaseTy.getNonReferenceType(); 902 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 903 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 904 Tmp = CGF.CreateMemTemp(BaseTy); 905 if (TopTmp.isValid()) 906 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 907 else 908 MostTopTmp = Tmp; 909 TopTmp = Tmp; 910 BaseTy = BaseTy->getPointeeType(); 911 } 912 llvm::Type *Ty = BaseLV.getPointer()->getType(); 913 if (Tmp.isValid()) 914 Ty = Tmp.getElementType(); 915 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 916 if (Tmp.isValid()) { 917 CGF.Builder.CreateStore(Addr, Tmp); 918 return MostTopTmp; 919 } 920 return Address(Addr, BaseLV.getAlignment()); 921 } 922 923 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 924 LValue BaseLV) { 925 BaseTy = BaseTy.getNonReferenceType(); 926 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 927 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 928 if (auto *PtrTy = BaseTy->getAs<PointerType>()) 929 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy); 930 else { 931 BaseLV = CGF.EmitLoadOfReferenceLValue(BaseLV.getAddress(), 932 BaseTy->castAs<ReferenceType>()); 933 } 934 BaseTy = BaseTy->getPointeeType(); 935 } 936 return CGF.MakeAddrLValue( 937 Address( 938 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 939 BaseLV.getPointer(), CGF.ConvertTypeForMem(ElTy)->getPointerTo()), 940 BaseLV.getAlignment()), 941 BaseLV.getType(), BaseLV.getAlignmentSource()); 942 } 943 944 void CodeGenFunction::EmitOMPReductionClauseInit( 945 const OMPExecutableDirective &D, 946 CodeGenFunction::OMPPrivateScope &PrivateScope) { 947 if (!HaveInsertPoint()) 948 return; 949 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 950 auto ILHS = C->lhs_exprs().begin(); 951 auto IRHS = C->rhs_exprs().begin(); 952 auto IPriv = C->privates().begin(); 953 auto IRed = C->reduction_ops().begin(); 954 for (auto IRef : C->varlists()) { 955 auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 956 auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 957 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl()); 958 auto *DRD = getReductionInit(*IRed); 959 if (auto *OASE = dyn_cast<OMPArraySectionExpr>(IRef)) { 960 auto *Base = OASE->getBase()->IgnoreParenImpCasts(); 961 while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 962 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 963 while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 964 Base = TempASE->getBase()->IgnoreParenImpCasts(); 965 auto *DE = cast<DeclRefExpr>(Base); 966 auto *OrigVD = cast<VarDecl>(DE->getDecl()); 967 auto OASELValueLB = EmitOMPArraySectionExpr(OASE); 968 auto OASELValueUB = 969 EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 970 auto OriginalBaseLValue = EmitLValue(DE); 971 LValue BaseLValue = 972 loadToBegin(*this, OrigVD->getType(), OASELValueLB.getType(), 973 OriginalBaseLValue); 974 // Store the address of the original variable associated with the LHS 975 // implicit variable. 976 PrivateScope.addPrivate(LHSVD, [this, OASELValueLB]() -> Address { 977 return OASELValueLB.getAddress(); 978 }); 979 // Emit reduction copy. 980 bool IsRegistered = PrivateScope.addPrivate( 981 OrigVD, [this, OrigVD, PrivateVD, BaseLValue, OASELValueLB, 982 OASELValueUB, OriginalBaseLValue, DRD, IRed]() -> Address { 983 // Emit VarDecl with copy init for arrays. 984 // Get the address of the original variable captured in current 985 // captured region. 986 auto *Size = Builder.CreatePtrDiff(OASELValueUB.getPointer(), 987 OASELValueLB.getPointer()); 988 Size = Builder.CreateNUWAdd( 989 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 990 CodeGenFunction::OpaqueValueMapping OpaqueMap( 991 *this, cast<OpaqueValueExpr>( 992 getContext() 993 .getAsVariableArrayType(PrivateVD->getType()) 994 ->getSizeExpr()), 995 RValue::get(Size)); 996 EmitVariablyModifiedType(PrivateVD->getType()); 997 auto Emission = EmitAutoVarAlloca(*PrivateVD); 998 auto Addr = Emission.getAllocatedAddress(); 999 auto *Init = PrivateVD->getInit(); 1000 EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(), 1001 DRD ? *IRed : Init, 1002 OASELValueLB.getAddress()); 1003 EmitAutoVarCleanups(Emission); 1004 // Emit private VarDecl with reduction init. 1005 auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(), 1006 OASELValueLB.getPointer()); 1007 auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset); 1008 return castToBase(*this, OrigVD->getType(), 1009 OASELValueLB.getType(), OriginalBaseLValue, 1010 Ptr); 1011 }); 1012 assert(IsRegistered && "private var already registered as private"); 1013 // Silence the warning about unused variable. 1014 (void)IsRegistered; 1015 PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { 1016 return GetAddrOfLocalVar(PrivateVD); 1017 }); 1018 } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(IRef)) { 1019 auto *Base = ASE->getBase()->IgnoreParenImpCasts(); 1020 while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1021 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1022 auto *DE = cast<DeclRefExpr>(Base); 1023 auto *OrigVD = cast<VarDecl>(DE->getDecl()); 1024 auto ASELValue = EmitLValue(ASE); 1025 auto OriginalBaseLValue = EmitLValue(DE); 1026 LValue BaseLValue = loadToBegin( 1027 *this, OrigVD->getType(), ASELValue.getType(), OriginalBaseLValue); 1028 // Store the address of the original variable associated with the LHS 1029 // implicit variable. 1030 PrivateScope.addPrivate(LHSVD, [this, ASELValue]() -> Address { 1031 return ASELValue.getAddress(); 1032 }); 1033 // Emit reduction copy. 1034 bool IsRegistered = PrivateScope.addPrivate( 1035 OrigVD, [this, OrigVD, PrivateVD, BaseLValue, ASELValue, 1036 OriginalBaseLValue, DRD, IRed]() -> Address { 1037 // Emit private VarDecl with reduction init. 1038 AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD); 1039 auto Addr = Emission.getAllocatedAddress(); 1040 if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 1041 emitInitWithReductionInitializer(*this, DRD, *IRed, Addr, 1042 ASELValue.getAddress(), 1043 ASELValue.getType()); 1044 } else 1045 EmitAutoVarInit(Emission); 1046 EmitAutoVarCleanups(Emission); 1047 auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(), 1048 ASELValue.getPointer()); 1049 auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset); 1050 return castToBase(*this, OrigVD->getType(), ASELValue.getType(), 1051 OriginalBaseLValue, Ptr); 1052 }); 1053 assert(IsRegistered && "private var already registered as private"); 1054 // Silence the warning about unused variable. 1055 (void)IsRegistered; 1056 PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address { 1057 return Builder.CreateElementBitCast( 1058 GetAddrOfLocalVar(PrivateVD), ConvertTypeForMem(RHSVD->getType()), 1059 "rhs.begin"); 1060 }); 1061 } else { 1062 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); 1063 QualType Type = PrivateVD->getType(); 1064 if (getContext().getAsArrayType(Type)) { 1065 // Store the address of the original variable associated with the LHS 1066 // implicit variable. 1067 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1068 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1069 IRef->getType(), VK_LValue, IRef->getExprLoc()); 1070 Address OriginalAddr = EmitLValue(&DRE).getAddress(); 1071 PrivateScope.addPrivate(LHSVD, [this, &OriginalAddr, 1072 LHSVD]() -> Address { 1073 OriginalAddr = Builder.CreateElementBitCast( 1074 OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin"); 1075 return OriginalAddr; 1076 }); 1077 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 1078 if (Type->isVariablyModifiedType()) { 1079 CodeGenFunction::OpaqueValueMapping OpaqueMap( 1080 *this, cast<OpaqueValueExpr>( 1081 getContext() 1082 .getAsVariableArrayType(PrivateVD->getType()) 1083 ->getSizeExpr()), 1084 RValue::get( 1085 getTypeSize(OrigVD->getType().getNonReferenceType()))); 1086 EmitVariablyModifiedType(Type); 1087 } 1088 auto Emission = EmitAutoVarAlloca(*PrivateVD); 1089 auto Addr = Emission.getAllocatedAddress(); 1090 auto *Init = PrivateVD->getInit(); 1091 EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(), 1092 DRD ? *IRed : Init, OriginalAddr); 1093 EmitAutoVarCleanups(Emission); 1094 return Emission.getAllocatedAddress(); 1095 }); 1096 assert(IsRegistered && "private var already registered as private"); 1097 // Silence the warning about unused variable. 1098 (void)IsRegistered; 1099 PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address { 1100 return Builder.CreateElementBitCast( 1101 GetAddrOfLocalVar(PrivateVD), 1102 ConvertTypeForMem(RHSVD->getType()), "rhs.begin"); 1103 }); 1104 } else { 1105 // Store the address of the original variable associated with the LHS 1106 // implicit variable. 1107 Address OriginalAddr = Address::invalid(); 1108 PrivateScope.addPrivate(LHSVD, [this, OrigVD, IRef, 1109 &OriginalAddr]() -> Address { 1110 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1111 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1112 IRef->getType(), VK_LValue, IRef->getExprLoc()); 1113 OriginalAddr = EmitLValue(&DRE).getAddress(); 1114 return OriginalAddr; 1115 }); 1116 // Emit reduction copy. 1117 bool IsRegistered = PrivateScope.addPrivate( 1118 OrigVD, [this, PrivateVD, OriginalAddr, DRD, IRed]() -> Address { 1119 // Emit private VarDecl with reduction init. 1120 AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD); 1121 auto Addr = Emission.getAllocatedAddress(); 1122 if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 1123 emitInitWithReductionInitializer(*this, DRD, *IRed, Addr, 1124 OriginalAddr, 1125 PrivateVD->getType()); 1126 } else 1127 EmitAutoVarInit(Emission); 1128 EmitAutoVarCleanups(Emission); 1129 return Addr; 1130 }); 1131 assert(IsRegistered && "private var already registered as private"); 1132 // Silence the warning about unused variable. 1133 (void)IsRegistered; 1134 PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { 1135 return GetAddrOfLocalVar(PrivateVD); 1136 }); 1137 } 1138 } 1139 ++ILHS; 1140 ++IRHS; 1141 ++IPriv; 1142 ++IRed; 1143 } 1144 } 1145 } 1146 1147 void CodeGenFunction::EmitOMPReductionClauseFinal( 1148 const OMPExecutableDirective &D) { 1149 if (!HaveInsertPoint()) 1150 return; 1151 llvm::SmallVector<const Expr *, 8> Privates; 1152 llvm::SmallVector<const Expr *, 8> LHSExprs; 1153 llvm::SmallVector<const Expr *, 8> RHSExprs; 1154 llvm::SmallVector<const Expr *, 8> ReductionOps; 1155 bool HasAtLeastOneReduction = false; 1156 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1157 HasAtLeastOneReduction = true; 1158 Privates.append(C->privates().begin(), C->privates().end()); 1159 LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 1160 RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 1161 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 1162 } 1163 if (HasAtLeastOneReduction) { 1164 // Emit nowait reduction if nowait clause is present or directive is a 1165 // parallel directive (it always has implicit barrier). 1166 CGM.getOpenMPRuntime().emitReduction( 1167 *this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps, 1168 D.getSingleClause<OMPNowaitClause>() || 1169 isOpenMPParallelDirective(D.getDirectiveKind()) || 1170 D.getDirectiveKind() == OMPD_simd, 1171 D.getDirectiveKind() == OMPD_simd); 1172 } 1173 } 1174 1175 static void emitPostUpdateForReductionClause( 1176 CodeGenFunction &CGF, const OMPExecutableDirective &D, 1177 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1178 if (!CGF.HaveInsertPoint()) 1179 return; 1180 llvm::BasicBlock *DoneBB = nullptr; 1181 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1182 if (auto *PostUpdate = C->getPostUpdateExpr()) { 1183 if (!DoneBB) { 1184 if (auto *Cond = CondGen(CGF)) { 1185 // If the first post-update expression is found, emit conditional 1186 // block if it was requested. 1187 auto *ThenBB = CGF.createBasicBlock(".omp.reduction.pu"); 1188 DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done"); 1189 CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1190 CGF.EmitBlock(ThenBB); 1191 } 1192 } 1193 CGF.EmitIgnoredExpr(PostUpdate); 1194 } 1195 } 1196 if (DoneBB) 1197 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 1198 } 1199 1200 static void emitCommonOMPParallelDirective(CodeGenFunction &CGF, 1201 const OMPExecutableDirective &S, 1202 OpenMPDirectiveKind InnermostKind, 1203 const RegionCodeGenTy &CodeGen) { 1204 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 1205 auto OutlinedFn = CGF.CGM.getOpenMPRuntime(). 1206 emitParallelOrTeamsOutlinedFunction(S, 1207 *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 1208 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) { 1209 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 1210 auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), 1211 /*IgnoreResultAssign*/ true); 1212 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause( 1213 CGF, NumThreads, NumThreadsClause->getLocStart()); 1214 } 1215 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) { 1216 CodeGenFunction::RunCleanupsScope ProcBindScope(CGF); 1217 CGF.CGM.getOpenMPRuntime().emitProcBindClause( 1218 CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart()); 1219 } 1220 const Expr *IfCond = nullptr; 1221 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 1222 if (C->getNameModifier() == OMPD_unknown || 1223 C->getNameModifier() == OMPD_parallel) { 1224 IfCond = C->getCondition(); 1225 break; 1226 } 1227 } 1228 1229 OMPLexicalScope Scope(CGF, S); 1230 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 1231 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 1232 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn, 1233 CapturedVars, IfCond); 1234 } 1235 1236 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { 1237 // Emit parallel region as a standalone region. 1238 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1239 OMPPrivateScope PrivateScope(CGF); 1240 bool Copyins = CGF.EmitOMPCopyinClause(S); 1241 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 1242 if (Copyins) { 1243 // Emit implicit barrier to synchronize threads and avoid data races on 1244 // propagation master's thread values of threadprivate variables to local 1245 // instances of that variables of all other implicit threads. 1246 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 1247 CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 1248 /*ForceSimpleCall=*/true); 1249 } 1250 CGF.EmitOMPPrivateClause(S, PrivateScope); 1251 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 1252 (void)PrivateScope.Privatize(); 1253 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1254 CGF.EmitOMPReductionClauseFinal(S); 1255 }; 1256 emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen); 1257 emitPostUpdateForReductionClause( 1258 *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1259 } 1260 1261 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, 1262 JumpDest LoopExit) { 1263 RunCleanupsScope BodyScope(*this); 1264 // Update counters values on current iteration. 1265 for (auto I : D.updates()) { 1266 EmitIgnoredExpr(I); 1267 } 1268 // Update the linear variables. 1269 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1270 for (auto *U : C->updates()) 1271 EmitIgnoredExpr(U); 1272 } 1273 1274 // On a continue in the body, jump to the end. 1275 auto Continue = getJumpDestInCurrentScope("omp.body.continue"); 1276 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1277 // Emit loop body. 1278 EmitStmt(D.getBody()); 1279 // The end (updates/cleanups). 1280 EmitBlock(Continue.getBlock()); 1281 BreakContinueStack.pop_back(); 1282 } 1283 1284 void CodeGenFunction::EmitOMPInnerLoop( 1285 const Stmt &S, bool RequiresCleanup, const Expr *LoopCond, 1286 const Expr *IncExpr, 1287 const llvm::function_ref<void(CodeGenFunction &)> &BodyGen, 1288 const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen) { 1289 auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end"); 1290 1291 // Start the loop with a block that tests the condition. 1292 auto CondBlock = createBasicBlock("omp.inner.for.cond"); 1293 EmitBlock(CondBlock); 1294 LoopStack.push(CondBlock, Builder.getCurrentDebugLocation()); 1295 1296 // If there are any cleanups between here and the loop-exit scope, 1297 // create a block to stage a loop exit along. 1298 auto ExitBlock = LoopExit.getBlock(); 1299 if (RequiresCleanup) 1300 ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup"); 1301 1302 auto LoopBody = createBasicBlock("omp.inner.for.body"); 1303 1304 // Emit condition. 1305 EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S)); 1306 if (ExitBlock != LoopExit.getBlock()) { 1307 EmitBlock(ExitBlock); 1308 EmitBranchThroughCleanup(LoopExit); 1309 } 1310 1311 EmitBlock(LoopBody); 1312 incrementProfileCounter(&S); 1313 1314 // Create a block for the increment. 1315 auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc"); 1316 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1317 1318 BodyGen(*this); 1319 1320 // Emit "IV = IV + 1" and a back-edge to the condition block. 1321 EmitBlock(Continue.getBlock()); 1322 EmitIgnoredExpr(IncExpr); 1323 PostIncGen(*this); 1324 BreakContinueStack.pop_back(); 1325 EmitBranch(CondBlock); 1326 LoopStack.pop(); 1327 // Emit the fall-through block. 1328 EmitBlock(LoopExit.getBlock()); 1329 } 1330 1331 void CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { 1332 if (!HaveInsertPoint()) 1333 return; 1334 // Emit inits for the linear variables. 1335 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1336 for (auto *Init : C->inits()) { 1337 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl()); 1338 if (auto *Ref = dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) { 1339 AutoVarEmission Emission = EmitAutoVarAlloca(*VD); 1340 auto *OrigVD = cast<VarDecl>(Ref->getDecl()); 1341 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1342 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1343 VD->getInit()->getType(), VK_LValue, 1344 VD->getInit()->getExprLoc()); 1345 EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(), 1346 VD->getType()), 1347 /*capturedByInit=*/false); 1348 EmitAutoVarCleanups(Emission); 1349 } else 1350 EmitVarDecl(*VD); 1351 } 1352 // Emit the linear steps for the linear clauses. 1353 // If a step is not constant, it is pre-calculated before the loop. 1354 if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep())) 1355 if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) { 1356 EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl())); 1357 // Emit calculation of the linear step. 1358 EmitIgnoredExpr(CS); 1359 } 1360 } 1361 } 1362 1363 void CodeGenFunction::EmitOMPLinearClauseFinal( 1364 const OMPLoopDirective &D, 1365 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1366 if (!HaveInsertPoint()) 1367 return; 1368 llvm::BasicBlock *DoneBB = nullptr; 1369 // Emit the final values of the linear variables. 1370 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1371 auto IC = C->varlist_begin(); 1372 for (auto *F : C->finals()) { 1373 if (!DoneBB) { 1374 if (auto *Cond = CondGen(*this)) { 1375 // If the first post-update expression is found, emit conditional 1376 // block if it was requested. 1377 auto *ThenBB = createBasicBlock(".omp.linear.pu"); 1378 DoneBB = createBasicBlock(".omp.linear.pu.done"); 1379 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1380 EmitBlock(ThenBB); 1381 } 1382 } 1383 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl()); 1384 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1385 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1386 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); 1387 Address OrigAddr = EmitLValue(&DRE).getAddress(); 1388 CodeGenFunction::OMPPrivateScope VarScope(*this); 1389 VarScope.addPrivate(OrigVD, [OrigAddr]() -> Address { return OrigAddr; }); 1390 (void)VarScope.Privatize(); 1391 EmitIgnoredExpr(F); 1392 ++IC; 1393 } 1394 if (auto *PostUpdate = C->getPostUpdateExpr()) 1395 EmitIgnoredExpr(PostUpdate); 1396 } 1397 if (DoneBB) 1398 EmitBlock(DoneBB, /*IsFinished=*/true); 1399 } 1400 1401 static void emitAlignedClause(CodeGenFunction &CGF, 1402 const OMPExecutableDirective &D) { 1403 if (!CGF.HaveInsertPoint()) 1404 return; 1405 for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) { 1406 unsigned ClauseAlignment = 0; 1407 if (auto AlignmentExpr = Clause->getAlignment()) { 1408 auto AlignmentCI = 1409 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); 1410 ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue()); 1411 } 1412 for (auto E : Clause->varlists()) { 1413 unsigned Alignment = ClauseAlignment; 1414 if (Alignment == 0) { 1415 // OpenMP [2.8.1, Description] 1416 // If no optional parameter is specified, implementation-defined default 1417 // alignments for SIMD instructions on the target platforms are assumed. 1418 Alignment = 1419 CGF.getContext() 1420 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign( 1421 E->getType()->getPointeeType())) 1422 .getQuantity(); 1423 } 1424 assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) && 1425 "alignment is not power of 2"); 1426 if (Alignment != 0) { 1427 llvm::Value *PtrValue = CGF.EmitScalarExpr(E); 1428 CGF.EmitAlignmentAssumption(PtrValue, Alignment); 1429 } 1430 } 1431 } 1432 } 1433 1434 void CodeGenFunction::EmitOMPPrivateLoopCounters( 1435 const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) { 1436 if (!HaveInsertPoint()) 1437 return; 1438 auto I = S.private_counters().begin(); 1439 for (auto *E : S.counters()) { 1440 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1441 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()); 1442 (void)LoopScope.addPrivate(VD, [&]() -> Address { 1443 // Emit var without initialization. 1444 if (!LocalDeclMap.count(PrivateVD)) { 1445 auto VarEmission = EmitAutoVarAlloca(*PrivateVD); 1446 EmitAutoVarCleanups(VarEmission); 1447 } 1448 DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD), 1449 /*RefersToEnclosingVariableOrCapture=*/false, 1450 (*I)->getType(), VK_LValue, (*I)->getExprLoc()); 1451 return EmitLValue(&DRE).getAddress(); 1452 }); 1453 if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) || 1454 VD->hasGlobalStorage()) { 1455 (void)LoopScope.addPrivate(PrivateVD, [&]() -> Address { 1456 DeclRefExpr DRE(const_cast<VarDecl *>(VD), 1457 LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD), 1458 E->getType(), VK_LValue, E->getExprLoc()); 1459 return EmitLValue(&DRE).getAddress(); 1460 }); 1461 } 1462 ++I; 1463 } 1464 } 1465 1466 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S, 1467 const Expr *Cond, llvm::BasicBlock *TrueBlock, 1468 llvm::BasicBlock *FalseBlock, uint64_t TrueCount) { 1469 if (!CGF.HaveInsertPoint()) 1470 return; 1471 { 1472 CodeGenFunction::OMPPrivateScope PreCondScope(CGF); 1473 CGF.EmitOMPPrivateLoopCounters(S, PreCondScope); 1474 (void)PreCondScope.Privatize(); 1475 // Get initial values of real counters. 1476 for (auto I : S.inits()) { 1477 CGF.EmitIgnoredExpr(I); 1478 } 1479 } 1480 // Check that loop is executed at least one time. 1481 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount); 1482 } 1483 1484 void CodeGenFunction::EmitOMPLinearClause( 1485 const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) { 1486 if (!HaveInsertPoint()) 1487 return; 1488 llvm::DenseSet<const VarDecl *> SIMDLCVs; 1489 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 1490 auto *LoopDirective = cast<OMPLoopDirective>(&D); 1491 for (auto *C : LoopDirective->counters()) { 1492 SIMDLCVs.insert( 1493 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 1494 } 1495 } 1496 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1497 auto CurPrivate = C->privates().begin(); 1498 for (auto *E : C->varlists()) { 1499 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1500 auto *PrivateVD = 1501 cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl()); 1502 if (!SIMDLCVs.count(VD->getCanonicalDecl())) { 1503 bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address { 1504 // Emit private VarDecl with copy init. 1505 EmitVarDecl(*PrivateVD); 1506 return GetAddrOfLocalVar(PrivateVD); 1507 }); 1508 assert(IsRegistered && "linear var already registered as private"); 1509 // Silence the warning about unused variable. 1510 (void)IsRegistered; 1511 } else 1512 EmitVarDecl(*PrivateVD); 1513 ++CurPrivate; 1514 } 1515 } 1516 } 1517 1518 static void emitSimdlenSafelenClause(CodeGenFunction &CGF, 1519 const OMPExecutableDirective &D, 1520 bool IsMonotonic) { 1521 if (!CGF.HaveInsertPoint()) 1522 return; 1523 if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) { 1524 RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(), 1525 /*ignoreResult=*/true); 1526 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 1527 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 1528 // In presence of finite 'safelen', it may be unsafe to mark all 1529 // the memory instructions parallel, because loop-carried 1530 // dependences of 'safelen' iterations are possible. 1531 if (!IsMonotonic) 1532 CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>()); 1533 } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) { 1534 RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(), 1535 /*ignoreResult=*/true); 1536 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 1537 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 1538 // In presence of finite 'safelen', it may be unsafe to mark all 1539 // the memory instructions parallel, because loop-carried 1540 // dependences of 'safelen' iterations are possible. 1541 CGF.LoopStack.setParallel(false); 1542 } 1543 } 1544 1545 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D, 1546 bool IsMonotonic) { 1547 // Walk clauses and process safelen/lastprivate. 1548 LoopStack.setParallel(!IsMonotonic); 1549 LoopStack.setVectorizeEnable(true); 1550 emitSimdlenSafelenClause(*this, D, IsMonotonic); 1551 } 1552 1553 void CodeGenFunction::EmitOMPSimdFinal( 1554 const OMPLoopDirective &D, 1555 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1556 if (!HaveInsertPoint()) 1557 return; 1558 llvm::BasicBlock *DoneBB = nullptr; 1559 auto IC = D.counters().begin(); 1560 auto IPC = D.private_counters().begin(); 1561 for (auto F : D.finals()) { 1562 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl()); 1563 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl()); 1564 auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD); 1565 if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) || 1566 OrigVD->hasGlobalStorage() || CED) { 1567 if (!DoneBB) { 1568 if (auto *Cond = CondGen(*this)) { 1569 // If the first post-update expression is found, emit conditional 1570 // block if it was requested. 1571 auto *ThenBB = createBasicBlock(".omp.final.then"); 1572 DoneBB = createBasicBlock(".omp.final.done"); 1573 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1574 EmitBlock(ThenBB); 1575 } 1576 } 1577 Address OrigAddr = Address::invalid(); 1578 if (CED) 1579 OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress(); 1580 else { 1581 DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD), 1582 /*RefersToEnclosingVariableOrCapture=*/false, 1583 (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc()); 1584 OrigAddr = EmitLValue(&DRE).getAddress(); 1585 } 1586 OMPPrivateScope VarScope(*this); 1587 VarScope.addPrivate(OrigVD, 1588 [OrigAddr]() -> Address { return OrigAddr; }); 1589 (void)VarScope.Privatize(); 1590 EmitIgnoredExpr(F); 1591 } 1592 ++IC; 1593 ++IPC; 1594 } 1595 if (DoneBB) 1596 EmitBlock(DoneBB, /*IsFinished=*/true); 1597 } 1598 1599 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { 1600 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1601 OMPLoopScope PreInitScope(CGF, S); 1602 // if (PreCond) { 1603 // for (IV in 0..LastIteration) BODY; 1604 // <Final counter/linear vars updates>; 1605 // } 1606 // 1607 1608 // Emit: if (PreCond) - begin. 1609 // If the condition constant folds and can be elided, avoid emitting the 1610 // whole loop. 1611 bool CondConstant; 1612 llvm::BasicBlock *ContBlock = nullptr; 1613 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 1614 if (!CondConstant) 1615 return; 1616 } else { 1617 auto *ThenBlock = CGF.createBasicBlock("simd.if.then"); 1618 ContBlock = CGF.createBasicBlock("simd.if.end"); 1619 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 1620 CGF.getProfileCount(&S)); 1621 CGF.EmitBlock(ThenBlock); 1622 CGF.incrementProfileCounter(&S); 1623 } 1624 1625 // Emit the loop iteration variable. 1626 const Expr *IVExpr = S.getIterationVariable(); 1627 const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 1628 CGF.EmitVarDecl(*IVDecl); 1629 CGF.EmitIgnoredExpr(S.getInit()); 1630 1631 // Emit the iterations count variable. 1632 // If it is not a variable, Sema decided to calculate iterations count on 1633 // each iteration (e.g., it is foldable into a constant). 1634 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 1635 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 1636 // Emit calculation of the iterations count. 1637 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 1638 } 1639 1640 CGF.EmitOMPSimdInit(S); 1641 1642 emitAlignedClause(CGF, S); 1643 CGF.EmitOMPLinearClauseInit(S); 1644 { 1645 OMPPrivateScope LoopScope(CGF); 1646 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 1647 CGF.EmitOMPLinearClause(S, LoopScope); 1648 CGF.EmitOMPPrivateClause(S, LoopScope); 1649 CGF.EmitOMPReductionClauseInit(S, LoopScope); 1650 bool HasLastprivateClause = 1651 CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 1652 (void)LoopScope.Privatize(); 1653 CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 1654 S.getInc(), 1655 [&S](CodeGenFunction &CGF) { 1656 CGF.EmitOMPLoopBody(S, JumpDest()); 1657 CGF.EmitStopPoint(&S); 1658 }, 1659 [](CodeGenFunction &) {}); 1660 CGF.EmitOMPSimdFinal( 1661 S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1662 // Emit final copy of the lastprivate variables at the end of loops. 1663 if (HasLastprivateClause) 1664 CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true); 1665 CGF.EmitOMPReductionClauseFinal(S); 1666 emitPostUpdateForReductionClause( 1667 CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1668 } 1669 CGF.EmitOMPLinearClauseFinal( 1670 S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1671 // Emit: if (PreCond) - end. 1672 if (ContBlock) { 1673 CGF.EmitBranch(ContBlock); 1674 CGF.EmitBlock(ContBlock, true); 1675 } 1676 }; 1677 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1678 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 1679 } 1680 1681 void CodeGenFunction::EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic, 1682 const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, 1683 Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) { 1684 auto &RT = CGM.getOpenMPRuntime(); 1685 1686 const Expr *IVExpr = S.getIterationVariable(); 1687 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1688 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1689 1690 auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end"); 1691 1692 // Start the loop with a block that tests the condition. 1693 auto CondBlock = createBasicBlock("omp.dispatch.cond"); 1694 EmitBlock(CondBlock); 1695 LoopStack.push(CondBlock, Builder.getCurrentDebugLocation()); 1696 1697 llvm::Value *BoolCondVal = nullptr; 1698 if (!DynamicOrOrdered) { 1699 // UB = min(UB, GlobalUB) 1700 EmitIgnoredExpr(S.getEnsureUpperBound()); 1701 // IV = LB 1702 EmitIgnoredExpr(S.getInit()); 1703 // IV < UB 1704 BoolCondVal = EvaluateExprAsBool(S.getCond()); 1705 } else { 1706 BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, IL, 1707 LB, UB, ST); 1708 } 1709 1710 // If there are any cleanups between here and the loop-exit scope, 1711 // create a block to stage a loop exit along. 1712 auto ExitBlock = LoopExit.getBlock(); 1713 if (LoopScope.requiresCleanups()) 1714 ExitBlock = createBasicBlock("omp.dispatch.cleanup"); 1715 1716 auto LoopBody = createBasicBlock("omp.dispatch.body"); 1717 Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock); 1718 if (ExitBlock != LoopExit.getBlock()) { 1719 EmitBlock(ExitBlock); 1720 EmitBranchThroughCleanup(LoopExit); 1721 } 1722 EmitBlock(LoopBody); 1723 1724 // Emit "IV = LB" (in case of static schedule, we have already calculated new 1725 // LB for loop condition and emitted it above). 1726 if (DynamicOrOrdered) 1727 EmitIgnoredExpr(S.getInit()); 1728 1729 // Create a block for the increment. 1730 auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); 1731 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1732 1733 // Generate !llvm.loop.parallel metadata for loads and stores for loops 1734 // with dynamic/guided scheduling and without ordered clause. 1735 if (!isOpenMPSimdDirective(S.getDirectiveKind())) 1736 LoopStack.setParallel(!IsMonotonic); 1737 else 1738 EmitOMPSimdInit(S, IsMonotonic); 1739 1740 SourceLocation Loc = S.getLocStart(); 1741 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), 1742 [&S, LoopExit](CodeGenFunction &CGF) { 1743 CGF.EmitOMPLoopBody(S, LoopExit); 1744 CGF.EmitStopPoint(&S); 1745 }, 1746 [Ordered, IVSize, IVSigned, Loc](CodeGenFunction &CGF) { 1747 if (Ordered) { 1748 CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd( 1749 CGF, Loc, IVSize, IVSigned); 1750 } 1751 }); 1752 1753 EmitBlock(Continue.getBlock()); 1754 BreakContinueStack.pop_back(); 1755 if (!DynamicOrOrdered) { 1756 // Emit "LB = LB + Stride", "UB = UB + Stride". 1757 EmitIgnoredExpr(S.getNextLowerBound()); 1758 EmitIgnoredExpr(S.getNextUpperBound()); 1759 } 1760 1761 EmitBranch(CondBlock); 1762 LoopStack.pop(); 1763 // Emit the fall-through block. 1764 EmitBlock(LoopExit.getBlock()); 1765 1766 // Tell the runtime we are done. 1767 if (!DynamicOrOrdered) 1768 RT.emitForStaticFinish(*this, S.getLocEnd()); 1769 1770 } 1771 1772 void CodeGenFunction::EmitOMPForOuterLoop( 1773 const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic, 1774 const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, 1775 Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) { 1776 auto &RT = CGM.getOpenMPRuntime(); 1777 1778 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). 1779 const bool DynamicOrOrdered = 1780 Ordered || RT.isDynamic(ScheduleKind.Schedule); 1781 1782 assert((Ordered || 1783 !RT.isStaticNonchunked(ScheduleKind.Schedule, 1784 /*Chunked=*/Chunk != nullptr)) && 1785 "static non-chunked schedule does not need outer loop"); 1786 1787 // Emit outer loop. 1788 // 1789 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1790 // When schedule(dynamic,chunk_size) is specified, the iterations are 1791 // distributed to threads in the team in chunks as the threads request them. 1792 // Each thread executes a chunk of iterations, then requests another chunk, 1793 // until no chunks remain to be distributed. Each chunk contains chunk_size 1794 // iterations, except for the last chunk to be distributed, which may have 1795 // fewer iterations. When no chunk_size is specified, it defaults to 1. 1796 // 1797 // When schedule(guided,chunk_size) is specified, the iterations are assigned 1798 // to threads in the team in chunks as the executing threads request them. 1799 // Each thread executes a chunk of iterations, then requests another chunk, 1800 // until no chunks remain to be assigned. For a chunk_size of 1, the size of 1801 // each chunk is proportional to the number of unassigned iterations divided 1802 // by the number of threads in the team, decreasing to 1. For a chunk_size 1803 // with value k (greater than 1), the size of each chunk is determined in the 1804 // same way, with the restriction that the chunks do not contain fewer than k 1805 // iterations (except for the last chunk to be assigned, which may have fewer 1806 // than k iterations). 1807 // 1808 // When schedule(auto) is specified, the decision regarding scheduling is 1809 // delegated to the compiler and/or runtime system. The programmer gives the 1810 // implementation the freedom to choose any possible mapping of iterations to 1811 // threads in the team. 1812 // 1813 // When schedule(runtime) is specified, the decision regarding scheduling is 1814 // deferred until run time, and the schedule and chunk size are taken from the 1815 // run-sched-var ICV. If the ICV is set to auto, the schedule is 1816 // implementation defined 1817 // 1818 // while(__kmpc_dispatch_next(&LB, &UB)) { 1819 // idx = LB; 1820 // while (idx <= UB) { BODY; ++idx; 1821 // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only. 1822 // } // inner loop 1823 // } 1824 // 1825 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1826 // When schedule(static, chunk_size) is specified, iterations are divided into 1827 // chunks of size chunk_size, and the chunks are assigned to the threads in 1828 // the team in a round-robin fashion in the order of the thread number. 1829 // 1830 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { 1831 // while (idx <= UB) { BODY; ++idx; } // inner loop 1832 // LB = LB + ST; 1833 // UB = UB + ST; 1834 // } 1835 // 1836 1837 const Expr *IVExpr = S.getIterationVariable(); 1838 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1839 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1840 1841 if (DynamicOrOrdered) { 1842 llvm::Value *UBVal = EmitScalarExpr(S.getLastIteration()); 1843 RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, IVSize, 1844 IVSigned, Ordered, UBVal, Chunk); 1845 } else { 1846 RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, 1847 Ordered, IL, LB, UB, ST, Chunk); 1848 } 1849 1850 EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, Ordered, LB, UB, 1851 ST, IL, Chunk); 1852 } 1853 1854 void CodeGenFunction::EmitOMPDistributeOuterLoop( 1855 OpenMPDistScheduleClauseKind ScheduleKind, 1856 const OMPDistributeDirective &S, OMPPrivateScope &LoopScope, 1857 Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) { 1858 1859 auto &RT = CGM.getOpenMPRuntime(); 1860 1861 // Emit outer loop. 1862 // Same behavior as a OMPForOuterLoop, except that schedule cannot be 1863 // dynamic 1864 // 1865 1866 const Expr *IVExpr = S.getIterationVariable(); 1867 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1868 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1869 1870 RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, 1871 IVSize, IVSigned, /* Ordered = */ false, 1872 IL, LB, UB, ST, Chunk); 1873 1874 EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, 1875 S, LoopScope, /* Ordered = */ false, LB, UB, ST, IL, Chunk); 1876 } 1877 1878 void CodeGenFunction::EmitOMPDistributeParallelForDirective( 1879 const OMPDistributeParallelForDirective &S) { 1880 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1881 CGM.getOpenMPRuntime().emitInlinedDirective( 1882 *this, OMPD_distribute_parallel_for, 1883 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1884 OMPLoopScope PreInitScope(CGF, S); 1885 CGF.EmitStmt( 1886 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1887 }); 1888 } 1889 1890 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective( 1891 const OMPDistributeParallelForSimdDirective &S) { 1892 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1893 CGM.getOpenMPRuntime().emitInlinedDirective( 1894 *this, OMPD_distribute_parallel_for_simd, 1895 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1896 OMPLoopScope PreInitScope(CGF, S); 1897 CGF.EmitStmt( 1898 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1899 }); 1900 } 1901 1902 void CodeGenFunction::EmitOMPDistributeSimdDirective( 1903 const OMPDistributeSimdDirective &S) { 1904 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1905 CGM.getOpenMPRuntime().emitInlinedDirective( 1906 *this, OMPD_distribute_simd, 1907 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1908 OMPLoopScope PreInitScope(CGF, S); 1909 CGF.EmitStmt( 1910 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1911 }); 1912 } 1913 1914 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective( 1915 const OMPTargetParallelForSimdDirective &S) { 1916 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1917 CGM.getOpenMPRuntime().emitInlinedDirective( 1918 *this, OMPD_target_parallel_for_simd, 1919 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1920 OMPLoopScope PreInitScope(CGF, S); 1921 CGF.EmitStmt( 1922 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1923 }); 1924 } 1925 1926 void CodeGenFunction::EmitOMPTargetSimdDirective( 1927 const OMPTargetSimdDirective &S) { 1928 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1929 CGM.getOpenMPRuntime().emitInlinedDirective( 1930 *this, OMPD_target_simd, [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1931 OMPLoopScope PreInitScope(CGF, S); 1932 CGF.EmitStmt( 1933 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1934 }); 1935 } 1936 1937 /// \brief Emit a helper variable and return corresponding lvalue. 1938 static LValue EmitOMPHelperVar(CodeGenFunction &CGF, 1939 const DeclRefExpr *Helper) { 1940 auto VDecl = cast<VarDecl>(Helper->getDecl()); 1941 CGF.EmitVarDecl(*VDecl); 1942 return CGF.EmitLValue(Helper); 1943 } 1944 1945 namespace { 1946 struct ScheduleKindModifiersTy { 1947 OpenMPScheduleClauseKind Kind; 1948 OpenMPScheduleClauseModifier M1; 1949 OpenMPScheduleClauseModifier M2; 1950 ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind, 1951 OpenMPScheduleClauseModifier M1, 1952 OpenMPScheduleClauseModifier M2) 1953 : Kind(Kind), M1(M1), M2(M2) {} 1954 }; 1955 } // namespace 1956 1957 bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { 1958 // Emit the loop iteration variable. 1959 auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 1960 auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); 1961 EmitVarDecl(*IVDecl); 1962 1963 // Emit the iterations count variable. 1964 // If it is not a variable, Sema decided to calculate iterations count on each 1965 // iteration (e.g., it is foldable into a constant). 1966 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 1967 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 1968 // Emit calculation of the iterations count. 1969 EmitIgnoredExpr(S.getCalcLastIteration()); 1970 } 1971 1972 auto &RT = CGM.getOpenMPRuntime(); 1973 1974 bool HasLastprivateClause; 1975 // Check pre-condition. 1976 { 1977 OMPLoopScope PreInitScope(*this, S); 1978 // Skip the entire loop if we don't meet the precondition. 1979 // If the condition constant folds and can be elided, avoid emitting the 1980 // whole loop. 1981 bool CondConstant; 1982 llvm::BasicBlock *ContBlock = nullptr; 1983 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 1984 if (!CondConstant) 1985 return false; 1986 } else { 1987 auto *ThenBlock = createBasicBlock("omp.precond.then"); 1988 ContBlock = createBasicBlock("omp.precond.end"); 1989 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 1990 getProfileCount(&S)); 1991 EmitBlock(ThenBlock); 1992 incrementProfileCounter(&S); 1993 } 1994 1995 bool Ordered = false; 1996 if (auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) { 1997 if (OrderedClause->getNumForLoops()) 1998 RT.emitDoacrossInit(*this, S); 1999 else 2000 Ordered = true; 2001 } 2002 2003 llvm::DenseSet<const Expr *> EmittedFinals; 2004 emitAlignedClause(*this, S); 2005 EmitOMPLinearClauseInit(S); 2006 // Emit helper vars inits. 2007 LValue LB = 2008 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable())); 2009 LValue UB = 2010 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable())); 2011 LValue ST = 2012 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 2013 LValue IL = 2014 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 2015 2016 // Emit 'then' code. 2017 { 2018 OMPPrivateScope LoopScope(*this); 2019 if (EmitOMPFirstprivateClause(S, LoopScope)) { 2020 // Emit implicit barrier to synchronize threads and avoid data races on 2021 // initialization of firstprivate variables and post-update of 2022 // lastprivate variables. 2023 CGM.getOpenMPRuntime().emitBarrierCall( 2024 *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 2025 /*ForceSimpleCall=*/true); 2026 } 2027 EmitOMPPrivateClause(S, LoopScope); 2028 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 2029 EmitOMPReductionClauseInit(S, LoopScope); 2030 EmitOMPPrivateLoopCounters(S, LoopScope); 2031 EmitOMPLinearClause(S, LoopScope); 2032 (void)LoopScope.Privatize(); 2033 2034 // Detect the loop schedule kind and chunk. 2035 llvm::Value *Chunk = nullptr; 2036 OpenMPScheduleTy ScheduleKind; 2037 if (auto *C = S.getSingleClause<OMPScheduleClause>()) { 2038 ScheduleKind.Schedule = C->getScheduleKind(); 2039 ScheduleKind.M1 = C->getFirstScheduleModifier(); 2040 ScheduleKind.M2 = C->getSecondScheduleModifier(); 2041 if (const auto *Ch = C->getChunkSize()) { 2042 Chunk = EmitScalarExpr(Ch); 2043 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 2044 S.getIterationVariable()->getType(), 2045 S.getLocStart()); 2046 } 2047 } 2048 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2049 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2050 // OpenMP 4.5, 2.7.1 Loop Construct, Description. 2051 // If the static schedule kind is specified or if the ordered clause is 2052 // specified, and if no monotonic modifier is specified, the effect will 2053 // be as if the monotonic modifier was specified. 2054 if (RT.isStaticNonchunked(ScheduleKind.Schedule, 2055 /* Chunked */ Chunk != nullptr) && 2056 !Ordered) { 2057 if (isOpenMPSimdDirective(S.getDirectiveKind())) 2058 EmitOMPSimdInit(S, /*IsMonotonic=*/true); 2059 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 2060 // When no chunk_size is specified, the iteration space is divided into 2061 // chunks that are approximately equal in size, and at most one chunk is 2062 // distributed to each thread. Note that the size of the chunks is 2063 // unspecified in this case. 2064 RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, 2065 IVSize, IVSigned, Ordered, 2066 IL.getAddress(), LB.getAddress(), 2067 UB.getAddress(), ST.getAddress()); 2068 auto LoopExit = 2069 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 2070 // UB = min(UB, GlobalUB); 2071 EmitIgnoredExpr(S.getEnsureUpperBound()); 2072 // IV = LB; 2073 EmitIgnoredExpr(S.getInit()); 2074 // while (idx <= UB) { BODY; ++idx; } 2075 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 2076 S.getInc(), 2077 [&S, LoopExit](CodeGenFunction &CGF) { 2078 CGF.EmitOMPLoopBody(S, LoopExit); 2079 CGF.EmitStopPoint(&S); 2080 }, 2081 [](CodeGenFunction &) {}); 2082 EmitBlock(LoopExit.getBlock()); 2083 // Tell the runtime we are done. 2084 RT.emitForStaticFinish(*this, S.getLocStart()); 2085 } else { 2086 const bool IsMonotonic = 2087 Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static || 2088 ScheduleKind.Schedule == OMPC_SCHEDULE_unknown || 2089 ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic || 2090 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic; 2091 // Emit the outer loop, which requests its work chunk [LB..UB] from 2092 // runtime and runs the inner loop to process it. 2093 EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered, 2094 LB.getAddress(), UB.getAddress(), ST.getAddress(), 2095 IL.getAddress(), Chunk); 2096 } 2097 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 2098 EmitOMPSimdFinal(S, 2099 [&](CodeGenFunction &CGF) -> llvm::Value * { 2100 return CGF.Builder.CreateIsNotNull( 2101 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2102 }); 2103 } 2104 EmitOMPReductionClauseFinal(S); 2105 // Emit post-update of the reduction variables if IsLastIter != 0. 2106 emitPostUpdateForReductionClause( 2107 *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * { 2108 return CGF.Builder.CreateIsNotNull( 2109 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2110 }); 2111 // Emit final copy of the lastprivate variables if IsLastIter != 0. 2112 if (HasLastprivateClause) 2113 EmitOMPLastprivateClauseFinal( 2114 S, isOpenMPSimdDirective(S.getDirectiveKind()), 2115 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart()))); 2116 } 2117 EmitOMPLinearClauseFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * { 2118 return CGF.Builder.CreateIsNotNull( 2119 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2120 }); 2121 // We're now done with the loop, so jump to the continuation block. 2122 if (ContBlock) { 2123 EmitBranch(ContBlock); 2124 EmitBlock(ContBlock, true); 2125 } 2126 } 2127 return HasLastprivateClause; 2128 } 2129 2130 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { 2131 bool HasLastprivates = false; 2132 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, 2133 PrePostActionTy &) { 2134 HasLastprivates = CGF.EmitOMPWorksharingLoop(S); 2135 }; 2136 { 2137 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2138 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen, 2139 S.hasCancel()); 2140 } 2141 2142 // Emit an implicit barrier at the end. 2143 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) { 2144 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); 2145 } 2146 } 2147 2148 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { 2149 bool HasLastprivates = false; 2150 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, 2151 PrePostActionTy &) { 2152 HasLastprivates = CGF.EmitOMPWorksharingLoop(S); 2153 }; 2154 { 2155 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2156 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 2157 } 2158 2159 // Emit an implicit barrier at the end. 2160 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) { 2161 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); 2162 } 2163 } 2164 2165 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, 2166 const Twine &Name, 2167 llvm::Value *Init = nullptr) { 2168 auto LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty); 2169 if (Init) 2170 CGF.EmitScalarInit(Init, LVal); 2171 return LVal; 2172 } 2173 2174 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { 2175 auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt(); 2176 auto *CS = dyn_cast<CompoundStmt>(Stmt); 2177 bool HasLastprivates = false; 2178 auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF, 2179 PrePostActionTy &) { 2180 auto &C = CGF.CGM.getContext(); 2181 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2182 // Emit helper vars inits. 2183 LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.", 2184 CGF.Builder.getInt32(0)); 2185 auto *GlobalUBVal = CS != nullptr ? CGF.Builder.getInt32(CS->size() - 1) 2186 : CGF.Builder.getInt32(0); 2187 LValue UB = 2188 createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal); 2189 LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.", 2190 CGF.Builder.getInt32(1)); 2191 LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.", 2192 CGF.Builder.getInt32(0)); 2193 // Loop counter. 2194 LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv."); 2195 OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 2196 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV); 2197 OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 2198 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); 2199 // Generate condition for loop. 2200 BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, 2201 OK_Ordinary, S.getLocStart(), 2202 /*fpContractable=*/false); 2203 // Increment for loop counter. 2204 UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary, 2205 S.getLocStart()); 2206 auto BodyGen = [Stmt, CS, &S, &IV](CodeGenFunction &CGF) { 2207 // Iterate through all sections and emit a switch construct: 2208 // switch (IV) { 2209 // case 0: 2210 // <SectionStmt[0]>; 2211 // break; 2212 // ... 2213 // case <NumSection> - 1: 2214 // <SectionStmt[<NumSection> - 1]>; 2215 // break; 2216 // } 2217 // .omp.sections.exit: 2218 auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit"); 2219 auto *SwitchStmt = CGF.Builder.CreateSwitch( 2220 CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB, 2221 CS == nullptr ? 1 : CS->size()); 2222 if (CS) { 2223 unsigned CaseNumber = 0; 2224 for (auto *SubStmt : CS->children()) { 2225 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 2226 CGF.EmitBlock(CaseBB); 2227 SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB); 2228 CGF.EmitStmt(SubStmt); 2229 CGF.EmitBranch(ExitBB); 2230 ++CaseNumber; 2231 } 2232 } else { 2233 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 2234 CGF.EmitBlock(CaseBB); 2235 SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB); 2236 CGF.EmitStmt(Stmt); 2237 CGF.EmitBranch(ExitBB); 2238 } 2239 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 2240 }; 2241 2242 CodeGenFunction::OMPPrivateScope LoopScope(CGF); 2243 if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) { 2244 // Emit implicit barrier to synchronize threads and avoid data races on 2245 // initialization of firstprivate variables and post-update of lastprivate 2246 // variables. 2247 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 2248 CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 2249 /*ForceSimpleCall=*/true); 2250 } 2251 CGF.EmitOMPPrivateClause(S, LoopScope); 2252 HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 2253 CGF.EmitOMPReductionClauseInit(S, LoopScope); 2254 (void)LoopScope.Privatize(); 2255 2256 // Emit static non-chunked loop. 2257 OpenMPScheduleTy ScheduleKind; 2258 ScheduleKind.Schedule = OMPC_SCHEDULE_static; 2259 CGF.CGM.getOpenMPRuntime().emitForStaticInit( 2260 CGF, S.getLocStart(), ScheduleKind, /*IVSize=*/32, 2261 /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), LB.getAddress(), 2262 UB.getAddress(), ST.getAddress()); 2263 // UB = min(UB, GlobalUB); 2264 auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart()); 2265 auto *MinUBGlobalUB = CGF.Builder.CreateSelect( 2266 CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal); 2267 CGF.EmitStoreOfScalar(MinUBGlobalUB, UB); 2268 // IV = LB; 2269 CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV); 2270 // while (idx <= UB) { BODY; ++idx; } 2271 CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen, 2272 [](CodeGenFunction &) {}); 2273 // Tell the runtime we are done. 2274 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocStart()); 2275 CGF.EmitOMPReductionClauseFinal(S); 2276 // Emit post-update of the reduction variables if IsLastIter != 0. 2277 emitPostUpdateForReductionClause( 2278 CGF, S, [&](CodeGenFunction &CGF) -> llvm::Value * { 2279 return CGF.Builder.CreateIsNotNull( 2280 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2281 }); 2282 2283 // Emit final copy of the lastprivate variables if IsLastIter != 0. 2284 if (HasLastprivates) 2285 CGF.EmitOMPLastprivateClauseFinal( 2286 S, /*NoFinals=*/false, 2287 CGF.Builder.CreateIsNotNull( 2288 CGF.EmitLoadOfScalar(IL, S.getLocStart()))); 2289 }; 2290 2291 bool HasCancel = false; 2292 if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S)) 2293 HasCancel = OSD->hasCancel(); 2294 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S)) 2295 HasCancel = OPSD->hasCancel(); 2296 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen, 2297 HasCancel); 2298 // Emit barrier for lastprivates only if 'sections' directive has 'nowait' 2299 // clause. Otherwise the barrier will be generated by the codegen for the 2300 // directive. 2301 if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) { 2302 // Emit implicit barrier to synchronize threads and avoid data races on 2303 // initialization of firstprivate variables. 2304 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), 2305 OMPD_unknown); 2306 } 2307 } 2308 2309 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { 2310 { 2311 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2312 EmitSections(S); 2313 } 2314 // Emit an implicit barrier at the end. 2315 if (!S.getSingleClause<OMPNowaitClause>()) { 2316 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), 2317 OMPD_sections); 2318 } 2319 } 2320 2321 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { 2322 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2323 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2324 }; 2325 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2326 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen, 2327 S.hasCancel()); 2328 } 2329 2330 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { 2331 llvm::SmallVector<const Expr *, 8> CopyprivateVars; 2332 llvm::SmallVector<const Expr *, 8> DestExprs; 2333 llvm::SmallVector<const Expr *, 8> SrcExprs; 2334 llvm::SmallVector<const Expr *, 8> AssignmentOps; 2335 // Check if there are any 'copyprivate' clauses associated with this 2336 // 'single' construct. 2337 // Build a list of copyprivate variables along with helper expressions 2338 // (<source>, <destination>, <destination>=<source> expressions) 2339 for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) { 2340 CopyprivateVars.append(C->varlists().begin(), C->varlists().end()); 2341 DestExprs.append(C->destination_exprs().begin(), 2342 C->destination_exprs().end()); 2343 SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end()); 2344 AssignmentOps.append(C->assignment_ops().begin(), 2345 C->assignment_ops().end()); 2346 } 2347 // Emit code for 'single' region along with 'copyprivate' clauses 2348 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2349 Action.Enter(CGF); 2350 OMPPrivateScope SingleScope(CGF); 2351 (void)CGF.EmitOMPFirstprivateClause(S, SingleScope); 2352 CGF.EmitOMPPrivateClause(S, SingleScope); 2353 (void)SingleScope.Privatize(); 2354 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2355 }; 2356 { 2357 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2358 CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(), 2359 CopyprivateVars, DestExprs, 2360 SrcExprs, AssignmentOps); 2361 } 2362 // Emit an implicit barrier at the end (to avoid data race on firstprivate 2363 // init or if no 'nowait' clause was specified and no 'copyprivate' clause). 2364 if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) { 2365 CGM.getOpenMPRuntime().emitBarrierCall( 2366 *this, S.getLocStart(), 2367 S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single); 2368 } 2369 } 2370 2371 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { 2372 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2373 Action.Enter(CGF); 2374 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2375 }; 2376 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2377 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart()); 2378 } 2379 2380 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { 2381 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2382 Action.Enter(CGF); 2383 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2384 }; 2385 Expr *Hint = nullptr; 2386 if (auto *HintClause = S.getSingleClause<OMPHintClause>()) 2387 Hint = HintClause->getHint(); 2388 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2389 CGM.getOpenMPRuntime().emitCriticalRegion(*this, 2390 S.getDirectiveName().getAsString(), 2391 CodeGen, S.getLocStart(), Hint); 2392 } 2393 2394 void CodeGenFunction::EmitOMPParallelForDirective( 2395 const OMPParallelForDirective &S) { 2396 // Emit directive as a combined directive that consists of two implicit 2397 // directives: 'parallel' with 'for' directive. 2398 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2399 CGF.EmitOMPWorksharingLoop(S); 2400 }; 2401 emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen); 2402 } 2403 2404 void CodeGenFunction::EmitOMPParallelForSimdDirective( 2405 const OMPParallelForSimdDirective &S) { 2406 // Emit directive as a combined directive that consists of two implicit 2407 // directives: 'parallel' with 'for' directive. 2408 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2409 CGF.EmitOMPWorksharingLoop(S); 2410 }; 2411 emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen); 2412 } 2413 2414 void CodeGenFunction::EmitOMPParallelSectionsDirective( 2415 const OMPParallelSectionsDirective &S) { 2416 // Emit directive as a combined directive that consists of two implicit 2417 // directives: 'parallel' with 'sections' directive. 2418 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2419 CGF.EmitSections(S); 2420 }; 2421 emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen); 2422 } 2423 2424 void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, 2425 const RegionCodeGenTy &BodyGen, 2426 const TaskGenTy &TaskGen, 2427 OMPTaskDataTy &Data) { 2428 // Emit outlined function for task construct. 2429 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2430 auto *I = CS->getCapturedDecl()->param_begin(); 2431 auto *PartId = std::next(I); 2432 auto *TaskT = std::next(I, 4); 2433 // Check if the task is final 2434 if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) { 2435 // If the condition constant folds and can be elided, try to avoid emitting 2436 // the condition and the dead arm of the if/else. 2437 auto *Cond = Clause->getCondition(); 2438 bool CondConstant; 2439 if (ConstantFoldsToSimpleInteger(Cond, CondConstant)) 2440 Data.Final.setInt(CondConstant); 2441 else 2442 Data.Final.setPointer(EvaluateExprAsBool(Cond)); 2443 } else { 2444 // By default the task is not final. 2445 Data.Final.setInt(/*IntVal=*/false); 2446 } 2447 // Check if the task has 'priority' clause. 2448 if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) { 2449 auto *Prio = Clause->getPriority(); 2450 Data.Priority.setInt(/*IntVal=*/true); 2451 Data.Priority.setPointer(EmitScalarConversion( 2452 EmitScalarExpr(Prio), Prio->getType(), 2453 getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1), 2454 Prio->getExprLoc())); 2455 } 2456 // The first function argument for tasks is a thread id, the second one is a 2457 // part id (0 for tied tasks, >=0 for untied task). 2458 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 2459 // Get list of private variables. 2460 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 2461 auto IRef = C->varlist_begin(); 2462 for (auto *IInit : C->private_copies()) { 2463 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2464 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2465 Data.PrivateVars.push_back(*IRef); 2466 Data.PrivateCopies.push_back(IInit); 2467 } 2468 ++IRef; 2469 } 2470 } 2471 EmittedAsPrivate.clear(); 2472 // Get list of firstprivate variables. 2473 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 2474 auto IRef = C->varlist_begin(); 2475 auto IElemInitRef = C->inits().begin(); 2476 for (auto *IInit : C->private_copies()) { 2477 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2478 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2479 Data.FirstprivateVars.push_back(*IRef); 2480 Data.FirstprivateCopies.push_back(IInit); 2481 Data.FirstprivateInits.push_back(*IElemInitRef); 2482 } 2483 ++IRef; 2484 ++IElemInitRef; 2485 } 2486 } 2487 // Get list of lastprivate variables (for taskloops). 2488 llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs; 2489 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 2490 auto IRef = C->varlist_begin(); 2491 auto ID = C->destination_exprs().begin(); 2492 for (auto *IInit : C->private_copies()) { 2493 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2494 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2495 Data.LastprivateVars.push_back(*IRef); 2496 Data.LastprivateCopies.push_back(IInit); 2497 } 2498 LastprivateDstsOrigs.insert( 2499 {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()), 2500 cast<DeclRefExpr>(*IRef)}); 2501 ++IRef; 2502 ++ID; 2503 } 2504 } 2505 // Build list of dependences. 2506 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) 2507 for (auto *IRef : C->varlists()) 2508 Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef)); 2509 auto &&CodeGen = [PartId, &S, &Data, CS, &BodyGen, &LastprivateDstsOrigs]( 2510 CodeGenFunction &CGF, PrePostActionTy &Action) { 2511 // Set proper addresses for generated private copies. 2512 OMPPrivateScope Scope(CGF); 2513 if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() || 2514 !Data.LastprivateVars.empty()) { 2515 auto *CopyFn = CGF.Builder.CreateLoad( 2516 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3))); 2517 auto *PrivatesPtr = CGF.Builder.CreateLoad( 2518 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2))); 2519 // Map privates. 2520 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; 2521 llvm::SmallVector<llvm::Value *, 16> CallArgs; 2522 CallArgs.push_back(PrivatesPtr); 2523 for (auto *E : Data.PrivateVars) { 2524 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2525 Address PrivatePtr = CGF.CreateMemTemp( 2526 CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr"); 2527 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2528 CallArgs.push_back(PrivatePtr.getPointer()); 2529 } 2530 for (auto *E : Data.FirstprivateVars) { 2531 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2532 Address PrivatePtr = 2533 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 2534 ".firstpriv.ptr.addr"); 2535 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2536 CallArgs.push_back(PrivatePtr.getPointer()); 2537 } 2538 for (auto *E : Data.LastprivateVars) { 2539 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2540 Address PrivatePtr = 2541 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 2542 ".lastpriv.ptr.addr"); 2543 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2544 CallArgs.push_back(PrivatePtr.getPointer()); 2545 } 2546 CGF.EmitRuntimeCall(CopyFn, CallArgs); 2547 for (auto &&Pair : LastprivateDstsOrigs) { 2548 auto *OrigVD = cast<VarDecl>(Pair.second->getDecl()); 2549 DeclRefExpr DRE( 2550 const_cast<VarDecl *>(OrigVD), 2551 /*RefersToEnclosingVariableOrCapture=*/CGF.CapturedStmtInfo->lookup( 2552 OrigVD) != nullptr, 2553 Pair.second->getType(), VK_LValue, Pair.second->getExprLoc()); 2554 Scope.addPrivate(Pair.first, [&CGF, &DRE]() { 2555 return CGF.EmitLValue(&DRE).getAddress(); 2556 }); 2557 } 2558 for (auto &&Pair : PrivatePtrs) { 2559 Address Replacement(CGF.Builder.CreateLoad(Pair.second), 2560 CGF.getContext().getDeclAlign(Pair.first)); 2561 Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); 2562 } 2563 } 2564 (void)Scope.Privatize(); 2565 2566 Action.Enter(CGF); 2567 BodyGen(CGF); 2568 }; 2569 auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( 2570 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied, 2571 Data.NumberOfParts); 2572 OMPLexicalScope Scope(*this, S); 2573 TaskGen(*this, OutlinedFn, Data); 2574 } 2575 2576 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { 2577 // Emit outlined function for task construct. 2578 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2579 auto CapturedStruct = GenerateCapturedStmtArgument(*CS); 2580 auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 2581 const Expr *IfCond = nullptr; 2582 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 2583 if (C->getNameModifier() == OMPD_unknown || 2584 C->getNameModifier() == OMPD_task) { 2585 IfCond = C->getCondition(); 2586 break; 2587 } 2588 } 2589 2590 OMPTaskDataTy Data; 2591 // Check if we should emit tied or untied task. 2592 Data.Tied = !S.getSingleClause<OMPUntiedClause>(); 2593 auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { 2594 CGF.EmitStmt(CS->getCapturedStmt()); 2595 }; 2596 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 2597 IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn, 2598 const OMPTaskDataTy &Data) { 2599 CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getLocStart(), S, OutlinedFn, 2600 SharedsTy, CapturedStruct, IfCond, 2601 Data); 2602 }; 2603 EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data); 2604 } 2605 2606 void CodeGenFunction::EmitOMPTaskyieldDirective( 2607 const OMPTaskyieldDirective &S) { 2608 CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart()); 2609 } 2610 2611 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { 2612 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier); 2613 } 2614 2615 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) { 2616 CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart()); 2617 } 2618 2619 void CodeGenFunction::EmitOMPTaskgroupDirective( 2620 const OMPTaskgroupDirective &S) { 2621 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2622 Action.Enter(CGF); 2623 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2624 }; 2625 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2626 CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart()); 2627 } 2628 2629 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { 2630 CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> { 2631 if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) { 2632 return llvm::makeArrayRef(FlushClause->varlist_begin(), 2633 FlushClause->varlist_end()); 2634 } 2635 return llvm::None; 2636 }(), S.getLocStart()); 2637 } 2638 2639 void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) { 2640 // Emit the loop iteration variable. 2641 auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 2642 auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); 2643 EmitVarDecl(*IVDecl); 2644 2645 // Emit the iterations count variable. 2646 // If it is not a variable, Sema decided to calculate iterations count on each 2647 // iteration (e.g., it is foldable into a constant). 2648 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 2649 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 2650 // Emit calculation of the iterations count. 2651 EmitIgnoredExpr(S.getCalcLastIteration()); 2652 } 2653 2654 auto &RT = CGM.getOpenMPRuntime(); 2655 2656 // Check pre-condition. 2657 { 2658 OMPLoopScope PreInitScope(*this, S); 2659 // Skip the entire loop if we don't meet the precondition. 2660 // If the condition constant folds and can be elided, avoid emitting the 2661 // whole loop. 2662 bool CondConstant; 2663 llvm::BasicBlock *ContBlock = nullptr; 2664 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 2665 if (!CondConstant) 2666 return; 2667 } else { 2668 auto *ThenBlock = createBasicBlock("omp.precond.then"); 2669 ContBlock = createBasicBlock("omp.precond.end"); 2670 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 2671 getProfileCount(&S)); 2672 EmitBlock(ThenBlock); 2673 incrementProfileCounter(&S); 2674 } 2675 2676 // Emit 'then' code. 2677 { 2678 // Emit helper vars inits. 2679 LValue LB = 2680 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable())); 2681 LValue UB = 2682 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable())); 2683 LValue ST = 2684 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 2685 LValue IL = 2686 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 2687 2688 OMPPrivateScope LoopScope(*this); 2689 EmitOMPPrivateLoopCounters(S, LoopScope); 2690 (void)LoopScope.Privatize(); 2691 2692 // Detect the distribute schedule kind and chunk. 2693 llvm::Value *Chunk = nullptr; 2694 OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown; 2695 if (auto *C = S.getSingleClause<OMPDistScheduleClause>()) { 2696 ScheduleKind = C->getDistScheduleKind(); 2697 if (const auto *Ch = C->getChunkSize()) { 2698 Chunk = EmitScalarExpr(Ch); 2699 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 2700 S.getIterationVariable()->getType(), 2701 S.getLocStart()); 2702 } 2703 } 2704 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2705 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2706 2707 // OpenMP [2.10.8, distribute Construct, Description] 2708 // If dist_schedule is specified, kind must be static. If specified, 2709 // iterations are divided into chunks of size chunk_size, chunks are 2710 // assigned to the teams of the league in a round-robin fashion in the 2711 // order of the team number. When no chunk_size is specified, the 2712 // iteration space is divided into chunks that are approximately equal 2713 // in size, and at most one chunk is distributed to each team of the 2714 // league. The size of the chunks is unspecified in this case. 2715 if (RT.isStaticNonchunked(ScheduleKind, 2716 /* Chunked */ Chunk != nullptr)) { 2717 RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, 2718 IVSize, IVSigned, /* Ordered = */ false, 2719 IL.getAddress(), LB.getAddress(), 2720 UB.getAddress(), ST.getAddress()); 2721 auto LoopExit = 2722 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 2723 // UB = min(UB, GlobalUB); 2724 EmitIgnoredExpr(S.getEnsureUpperBound()); 2725 // IV = LB; 2726 EmitIgnoredExpr(S.getInit()); 2727 // while (idx <= UB) { BODY; ++idx; } 2728 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 2729 S.getInc(), 2730 [&S, LoopExit](CodeGenFunction &CGF) { 2731 CGF.EmitOMPLoopBody(S, LoopExit); 2732 CGF.EmitStopPoint(&S); 2733 }, 2734 [](CodeGenFunction &) {}); 2735 EmitBlock(LoopExit.getBlock()); 2736 // Tell the runtime we are done. 2737 RT.emitForStaticFinish(*this, S.getLocStart()); 2738 } else { 2739 // Emit the outer loop, which requests its work chunk [LB..UB] from 2740 // runtime and runs the inner loop to process it. 2741 EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, 2742 LB.getAddress(), UB.getAddress(), ST.getAddress(), 2743 IL.getAddress(), Chunk); 2744 } 2745 } 2746 2747 // We're now done with the loop, so jump to the continuation block. 2748 if (ContBlock) { 2749 EmitBranch(ContBlock); 2750 EmitBlock(ContBlock, true); 2751 } 2752 } 2753 } 2754 2755 void CodeGenFunction::EmitOMPDistributeDirective( 2756 const OMPDistributeDirective &S) { 2757 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2758 CGF.EmitOMPDistributeLoop(S); 2759 }; 2760 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2761 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen, 2762 false); 2763 } 2764 2765 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, 2766 const CapturedStmt *S) { 2767 CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); 2768 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; 2769 CGF.CapturedStmtInfo = &CapStmtInfo; 2770 auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S); 2771 Fn->addFnAttr(llvm::Attribute::NoInline); 2772 return Fn; 2773 } 2774 2775 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { 2776 if (!S.getAssociatedStmt()) { 2777 for (const auto *DC : S.getClausesOfKind<OMPDependClause>()) 2778 CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC); 2779 return; 2780 } 2781 auto *C = S.getSingleClause<OMPSIMDClause>(); 2782 auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF, 2783 PrePostActionTy &Action) { 2784 if (C) { 2785 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2786 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 2787 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 2788 auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS); 2789 CGF.EmitNounwindRuntimeCall(OutlinedFn, CapturedVars); 2790 } else { 2791 Action.Enter(CGF); 2792 CGF.EmitStmt( 2793 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2794 } 2795 }; 2796 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2797 CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C); 2798 } 2799 2800 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, 2801 QualType SrcType, QualType DestType, 2802 SourceLocation Loc) { 2803 assert(CGF.hasScalarEvaluationKind(DestType) && 2804 "DestType must have scalar evaluation kind."); 2805 assert(!Val.isAggregate() && "Must be a scalar or complex."); 2806 return Val.isScalar() 2807 ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType, 2808 Loc) 2809 : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType, 2810 DestType, Loc); 2811 } 2812 2813 static CodeGenFunction::ComplexPairTy 2814 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, 2815 QualType DestType, SourceLocation Loc) { 2816 assert(CGF.getEvaluationKind(DestType) == TEK_Complex && 2817 "DestType must have complex evaluation kind."); 2818 CodeGenFunction::ComplexPairTy ComplexVal; 2819 if (Val.isScalar()) { 2820 // Convert the input element to the element type of the complex. 2821 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 2822 auto ScalarVal = CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, 2823 DestElementType, Loc); 2824 ComplexVal = CodeGenFunction::ComplexPairTy( 2825 ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType())); 2826 } else { 2827 assert(Val.isComplex() && "Must be a scalar or complex."); 2828 auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType(); 2829 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 2830 ComplexVal.first = CGF.EmitScalarConversion( 2831 Val.getComplexVal().first, SrcElementType, DestElementType, Loc); 2832 ComplexVal.second = CGF.EmitScalarConversion( 2833 Val.getComplexVal().second, SrcElementType, DestElementType, Loc); 2834 } 2835 return ComplexVal; 2836 } 2837 2838 static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst, 2839 LValue LVal, RValue RVal) { 2840 if (LVal.isGlobalReg()) { 2841 CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal); 2842 } else { 2843 CGF.EmitAtomicStore(RVal, LVal, 2844 IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 2845 : llvm::AtomicOrdering::Monotonic, 2846 LVal.isVolatile(), /*IsInit=*/false); 2847 } 2848 } 2849 2850 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal, 2851 QualType RValTy, SourceLocation Loc) { 2852 switch (getEvaluationKind(LVal.getType())) { 2853 case TEK_Scalar: 2854 EmitStoreThroughLValue(RValue::get(convertToScalarValue( 2855 *this, RVal, RValTy, LVal.getType(), Loc)), 2856 LVal); 2857 break; 2858 case TEK_Complex: 2859 EmitStoreOfComplex( 2860 convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal, 2861 /*isInit=*/false); 2862 break; 2863 case TEK_Aggregate: 2864 llvm_unreachable("Must be a scalar or complex."); 2865 } 2866 } 2867 2868 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst, 2869 const Expr *X, const Expr *V, 2870 SourceLocation Loc) { 2871 // v = x; 2872 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue"); 2873 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue"); 2874 LValue XLValue = CGF.EmitLValue(X); 2875 LValue VLValue = CGF.EmitLValue(V); 2876 RValue Res = XLValue.isGlobalReg() 2877 ? CGF.EmitLoadOfLValue(XLValue, Loc) 2878 : CGF.EmitAtomicLoad( 2879 XLValue, Loc, 2880 IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 2881 : llvm::AtomicOrdering::Monotonic, 2882 XLValue.isVolatile()); 2883 // OpenMP, 2.12.6, atomic Construct 2884 // Any atomic construct with a seq_cst clause forces the atomically 2885 // performed operation to include an implicit flush operation without a 2886 // list. 2887 if (IsSeqCst) 2888 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 2889 CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc); 2890 } 2891 2892 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst, 2893 const Expr *X, const Expr *E, 2894 SourceLocation Loc) { 2895 // x = expr; 2896 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue"); 2897 emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E)); 2898 // OpenMP, 2.12.6, atomic Construct 2899 // Any atomic construct with a seq_cst clause forces the atomically 2900 // performed operation to include an implicit flush operation without a 2901 // list. 2902 if (IsSeqCst) 2903 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 2904 } 2905 2906 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, 2907 RValue Update, 2908 BinaryOperatorKind BO, 2909 llvm::AtomicOrdering AO, 2910 bool IsXLHSInRHSPart) { 2911 auto &Context = CGF.CGM.getContext(); 2912 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x' 2913 // expression is simple and atomic is allowed for the given type for the 2914 // target platform. 2915 if (BO == BO_Comma || !Update.isScalar() || 2916 !Update.getScalarVal()->getType()->isIntegerTy() || 2917 !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) && 2918 (Update.getScalarVal()->getType() != 2919 X.getAddress().getElementType())) || 2920 !X.getAddress().getElementType()->isIntegerTy() || 2921 !Context.getTargetInfo().hasBuiltinAtomic( 2922 Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment()))) 2923 return std::make_pair(false, RValue::get(nullptr)); 2924 2925 llvm::AtomicRMWInst::BinOp RMWOp; 2926 switch (BO) { 2927 case BO_Add: 2928 RMWOp = llvm::AtomicRMWInst::Add; 2929 break; 2930 case BO_Sub: 2931 if (!IsXLHSInRHSPart) 2932 return std::make_pair(false, RValue::get(nullptr)); 2933 RMWOp = llvm::AtomicRMWInst::Sub; 2934 break; 2935 case BO_And: 2936 RMWOp = llvm::AtomicRMWInst::And; 2937 break; 2938 case BO_Or: 2939 RMWOp = llvm::AtomicRMWInst::Or; 2940 break; 2941 case BO_Xor: 2942 RMWOp = llvm::AtomicRMWInst::Xor; 2943 break; 2944 case BO_LT: 2945 RMWOp = X.getType()->hasSignedIntegerRepresentation() 2946 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min 2947 : llvm::AtomicRMWInst::Max) 2948 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin 2949 : llvm::AtomicRMWInst::UMax); 2950 break; 2951 case BO_GT: 2952 RMWOp = X.getType()->hasSignedIntegerRepresentation() 2953 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max 2954 : llvm::AtomicRMWInst::Min) 2955 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax 2956 : llvm::AtomicRMWInst::UMin); 2957 break; 2958 case BO_Assign: 2959 RMWOp = llvm::AtomicRMWInst::Xchg; 2960 break; 2961 case BO_Mul: 2962 case BO_Div: 2963 case BO_Rem: 2964 case BO_Shl: 2965 case BO_Shr: 2966 case BO_LAnd: 2967 case BO_LOr: 2968 return std::make_pair(false, RValue::get(nullptr)); 2969 case BO_PtrMemD: 2970 case BO_PtrMemI: 2971 case BO_LE: 2972 case BO_GE: 2973 case BO_EQ: 2974 case BO_NE: 2975 case BO_AddAssign: 2976 case BO_SubAssign: 2977 case BO_AndAssign: 2978 case BO_OrAssign: 2979 case BO_XorAssign: 2980 case BO_MulAssign: 2981 case BO_DivAssign: 2982 case BO_RemAssign: 2983 case BO_ShlAssign: 2984 case BO_ShrAssign: 2985 case BO_Comma: 2986 llvm_unreachable("Unsupported atomic update operation"); 2987 } 2988 auto *UpdateVal = Update.getScalarVal(); 2989 if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) { 2990 UpdateVal = CGF.Builder.CreateIntCast( 2991 IC, X.getAddress().getElementType(), 2992 X.getType()->hasSignedIntegerRepresentation()); 2993 } 2994 auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO); 2995 return std::make_pair(true, RValue::get(Res)); 2996 } 2997 2998 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr( 2999 LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart, 3000 llvm::AtomicOrdering AO, SourceLocation Loc, 3001 const llvm::function_ref<RValue(RValue)> &CommonGen) { 3002 // Update expressions are allowed to have the following forms: 3003 // x binop= expr; -> xrval + expr; 3004 // x++, ++x -> xrval + 1; 3005 // x--, --x -> xrval - 1; 3006 // x = x binop expr; -> xrval binop expr 3007 // x = expr Op x; - > expr binop xrval; 3008 auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart); 3009 if (!Res.first) { 3010 if (X.isGlobalReg()) { 3011 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop 3012 // 'xrval'. 3013 EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X); 3014 } else { 3015 // Perform compare-and-swap procedure. 3016 EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified()); 3017 } 3018 } 3019 return Res; 3020 } 3021 3022 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst, 3023 const Expr *X, const Expr *E, 3024 const Expr *UE, bool IsXLHSInRHSPart, 3025 SourceLocation Loc) { 3026 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 3027 "Update expr in 'atomic update' must be a binary operator."); 3028 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 3029 // Update expressions are allowed to have the following forms: 3030 // x binop= expr; -> xrval + expr; 3031 // x++, ++x -> xrval + 1; 3032 // x--, --x -> xrval - 1; 3033 // x = x binop expr; -> xrval binop expr 3034 // x = expr Op x; - > expr binop xrval; 3035 assert(X->isLValue() && "X of 'omp atomic update' is not lvalue"); 3036 LValue XLValue = CGF.EmitLValue(X); 3037 RValue ExprRValue = CGF.EmitAnyExpr(E); 3038 auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3039 : llvm::AtomicOrdering::Monotonic; 3040 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 3041 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 3042 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 3043 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 3044 auto Gen = 3045 [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue { 3046 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3047 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 3048 return CGF.EmitAnyExpr(UE); 3049 }; 3050 (void)CGF.EmitOMPAtomicSimpleUpdateExpr( 3051 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 3052 // OpenMP, 2.12.6, atomic Construct 3053 // Any atomic construct with a seq_cst clause forces the atomically 3054 // performed operation to include an implicit flush operation without a 3055 // list. 3056 if (IsSeqCst) 3057 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3058 } 3059 3060 static RValue convertToType(CodeGenFunction &CGF, RValue Value, 3061 QualType SourceType, QualType ResType, 3062 SourceLocation Loc) { 3063 switch (CGF.getEvaluationKind(ResType)) { 3064 case TEK_Scalar: 3065 return RValue::get( 3066 convertToScalarValue(CGF, Value, SourceType, ResType, Loc)); 3067 case TEK_Complex: { 3068 auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc); 3069 return RValue::getComplex(Res.first, Res.second); 3070 } 3071 case TEK_Aggregate: 3072 break; 3073 } 3074 llvm_unreachable("Must be a scalar or complex."); 3075 } 3076 3077 static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst, 3078 bool IsPostfixUpdate, const Expr *V, 3079 const Expr *X, const Expr *E, 3080 const Expr *UE, bool IsXLHSInRHSPart, 3081 SourceLocation Loc) { 3082 assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue"); 3083 assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue"); 3084 RValue NewVVal; 3085 LValue VLValue = CGF.EmitLValue(V); 3086 LValue XLValue = CGF.EmitLValue(X); 3087 RValue ExprRValue = CGF.EmitAnyExpr(E); 3088 auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3089 : llvm::AtomicOrdering::Monotonic; 3090 QualType NewVValType; 3091 if (UE) { 3092 // 'x' is updated with some additional value. 3093 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 3094 "Update expr in 'atomic capture' must be a binary operator."); 3095 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 3096 // Update expressions are allowed to have the following forms: 3097 // x binop= expr; -> xrval + expr; 3098 // x++, ++x -> xrval + 1; 3099 // x--, --x -> xrval - 1; 3100 // x = x binop expr; -> xrval binop expr 3101 // x = expr Op x; - > expr binop xrval; 3102 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 3103 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 3104 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 3105 NewVValType = XRValExpr->getType(); 3106 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 3107 auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr, 3108 IsSeqCst, IsPostfixUpdate](RValue XRValue) -> RValue { 3109 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3110 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 3111 RValue Res = CGF.EmitAnyExpr(UE); 3112 NewVVal = IsPostfixUpdate ? XRValue : Res; 3113 return Res; 3114 }; 3115 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 3116 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 3117 if (Res.first) { 3118 // 'atomicrmw' instruction was generated. 3119 if (IsPostfixUpdate) { 3120 // Use old value from 'atomicrmw'. 3121 NewVVal = Res.second; 3122 } else { 3123 // 'atomicrmw' does not provide new value, so evaluate it using old 3124 // value of 'x'. 3125 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3126 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second); 3127 NewVVal = CGF.EmitAnyExpr(UE); 3128 } 3129 } 3130 } else { 3131 // 'x' is simply rewritten with some 'expr'. 3132 NewVValType = X->getType().getNonReferenceType(); 3133 ExprRValue = convertToType(CGF, ExprRValue, E->getType(), 3134 X->getType().getNonReferenceType(), Loc); 3135 auto &&Gen = [&CGF, &NewVVal, ExprRValue](RValue XRValue) -> RValue { 3136 NewVVal = XRValue; 3137 return ExprRValue; 3138 }; 3139 // Try to perform atomicrmw xchg, otherwise simple exchange. 3140 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 3141 XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO, 3142 Loc, Gen); 3143 if (Res.first) { 3144 // 'atomicrmw' instruction was generated. 3145 NewVVal = IsPostfixUpdate ? Res.second : ExprRValue; 3146 } 3147 } 3148 // Emit post-update store to 'v' of old/new 'x' value. 3149 CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc); 3150 // OpenMP, 2.12.6, atomic Construct 3151 // Any atomic construct with a seq_cst clause forces the atomically 3152 // performed operation to include an implicit flush operation without a 3153 // list. 3154 if (IsSeqCst) 3155 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3156 } 3157 3158 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, 3159 bool IsSeqCst, bool IsPostfixUpdate, 3160 const Expr *X, const Expr *V, const Expr *E, 3161 const Expr *UE, bool IsXLHSInRHSPart, 3162 SourceLocation Loc) { 3163 switch (Kind) { 3164 case OMPC_read: 3165 EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc); 3166 break; 3167 case OMPC_write: 3168 EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc); 3169 break; 3170 case OMPC_unknown: 3171 case OMPC_update: 3172 EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc); 3173 break; 3174 case OMPC_capture: 3175 EmitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE, 3176 IsXLHSInRHSPart, Loc); 3177 break; 3178 case OMPC_if: 3179 case OMPC_final: 3180 case OMPC_num_threads: 3181 case OMPC_private: 3182 case OMPC_firstprivate: 3183 case OMPC_lastprivate: 3184 case OMPC_reduction: 3185 case OMPC_safelen: 3186 case OMPC_simdlen: 3187 case OMPC_collapse: 3188 case OMPC_default: 3189 case OMPC_seq_cst: 3190 case OMPC_shared: 3191 case OMPC_linear: 3192 case OMPC_aligned: 3193 case OMPC_copyin: 3194 case OMPC_copyprivate: 3195 case OMPC_flush: 3196 case OMPC_proc_bind: 3197 case OMPC_schedule: 3198 case OMPC_ordered: 3199 case OMPC_nowait: 3200 case OMPC_untied: 3201 case OMPC_threadprivate: 3202 case OMPC_depend: 3203 case OMPC_mergeable: 3204 case OMPC_device: 3205 case OMPC_threads: 3206 case OMPC_simd: 3207 case OMPC_map: 3208 case OMPC_num_teams: 3209 case OMPC_thread_limit: 3210 case OMPC_priority: 3211 case OMPC_grainsize: 3212 case OMPC_nogroup: 3213 case OMPC_num_tasks: 3214 case OMPC_hint: 3215 case OMPC_dist_schedule: 3216 case OMPC_defaultmap: 3217 case OMPC_uniform: 3218 case OMPC_to: 3219 case OMPC_from: 3220 case OMPC_use_device_ptr: 3221 case OMPC_is_device_ptr: 3222 llvm_unreachable("Clause is not allowed in 'omp atomic'."); 3223 } 3224 } 3225 3226 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { 3227 bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>(); 3228 OpenMPClauseKind Kind = OMPC_unknown; 3229 for (auto *C : S.clauses()) { 3230 // Find first clause (skip seq_cst clause, if it is first). 3231 if (C->getClauseKind() != OMPC_seq_cst) { 3232 Kind = C->getClauseKind(); 3233 break; 3234 } 3235 } 3236 3237 const auto *CS = 3238 S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 3239 if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) { 3240 enterFullExpression(EWC); 3241 } 3242 // Processing for statements under 'atomic capture'. 3243 if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) { 3244 for (const auto *C : Compound->body()) { 3245 if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) { 3246 enterFullExpression(EWC); 3247 } 3248 } 3249 } 3250 3251 auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF, 3252 PrePostActionTy &) { 3253 CGF.EmitStopPoint(CS); 3254 EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(), 3255 S.getV(), S.getExpr(), S.getUpdateExpr(), 3256 S.isXLHSInRHSPart(), S.getLocStart()); 3257 }; 3258 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 3259 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen); 3260 } 3261 3262 std::pair<llvm::Function * /*OutlinedFn*/, llvm::Constant * /*OutlinedFnID*/> 3263 CodeGenFunction::EmitOMPTargetDirectiveOutlinedFunction( 3264 CodeGenModule &CGM, const OMPTargetDirective &S, StringRef ParentName, 3265 bool IsOffloadEntry) { 3266 llvm::Function *OutlinedFn = nullptr; 3267 llvm::Constant *OutlinedFnID = nullptr; 3268 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3269 OMPPrivateScope PrivateScope(CGF); 3270 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 3271 CGF.EmitOMPPrivateClause(S, PrivateScope); 3272 (void)PrivateScope.Privatize(); 3273 3274 Action.Enter(CGF); 3275 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3276 }; 3277 // Emit target region as a standalone region. 3278 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 3279 S, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, CodeGen); 3280 return std::make_pair(OutlinedFn, OutlinedFnID); 3281 } 3282 3283 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { 3284 const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt()); 3285 3286 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 3287 GenerateOpenMPCapturedVars(CS, CapturedVars); 3288 3289 llvm::Function *Fn = nullptr; 3290 llvm::Constant *FnID = nullptr; 3291 3292 // Check if we have any if clause associated with the directive. 3293 const Expr *IfCond = nullptr; 3294 3295 if (auto *C = S.getSingleClause<OMPIfClause>()) { 3296 IfCond = C->getCondition(); 3297 } 3298 3299 // Check if we have any device clause associated with the directive. 3300 const Expr *Device = nullptr; 3301 if (auto *C = S.getSingleClause<OMPDeviceClause>()) { 3302 Device = C->getDevice(); 3303 } 3304 3305 // Check if we have an if clause whose conditional always evaluates to false 3306 // or if we do not have any targets specified. If so the target region is not 3307 // an offload entry point. 3308 bool IsOffloadEntry = true; 3309 if (IfCond) { 3310 bool Val; 3311 if (ConstantFoldsToSimpleInteger(IfCond, Val) && !Val) 3312 IsOffloadEntry = false; 3313 } 3314 if (CGM.getLangOpts().OMPTargetTriples.empty()) 3315 IsOffloadEntry = false; 3316 3317 assert(CurFuncDecl && "No parent declaration for target region!"); 3318 StringRef ParentName; 3319 // In case we have Ctors/Dtors we use the complete type variant to produce 3320 // the mangling of the device outlined kernel. 3321 if (auto *D = dyn_cast<CXXConstructorDecl>(CurFuncDecl)) 3322 ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete)); 3323 else if (auto *D = dyn_cast<CXXDestructorDecl>(CurFuncDecl)) 3324 ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete)); 3325 else 3326 ParentName = 3327 CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CurFuncDecl))); 3328 3329 std::tie(Fn, FnID) = EmitOMPTargetDirectiveOutlinedFunction( 3330 CGM, S, ParentName, IsOffloadEntry); 3331 OMPLexicalScope Scope(*this, S); 3332 CGM.getOpenMPRuntime().emitTargetCall(*this, S, Fn, FnID, IfCond, Device, 3333 CapturedVars); 3334 } 3335 3336 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF, 3337 const OMPExecutableDirective &S, 3338 OpenMPDirectiveKind InnermostKind, 3339 const RegionCodeGenTy &CodeGen) { 3340 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 3341 auto OutlinedFn = CGF.CGM.getOpenMPRuntime(). 3342 emitParallelOrTeamsOutlinedFunction(S, 3343 *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 3344 3345 const OMPTeamsDirective &TD = *dyn_cast<OMPTeamsDirective>(&S); 3346 const OMPNumTeamsClause *NT = TD.getSingleClause<OMPNumTeamsClause>(); 3347 const OMPThreadLimitClause *TL = TD.getSingleClause<OMPThreadLimitClause>(); 3348 if (NT || TL) { 3349 Expr *NumTeams = (NT) ? NT->getNumTeams() : nullptr; 3350 Expr *ThreadLimit = (TL) ? TL->getThreadLimit() : nullptr; 3351 3352 CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit, 3353 S.getLocStart()); 3354 } 3355 3356 OMPLexicalScope Scope(CGF, S); 3357 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 3358 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 3359 CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn, 3360 CapturedVars); 3361 } 3362 3363 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) { 3364 // Emit parallel region as a standalone region. 3365 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3366 OMPPrivateScope PrivateScope(CGF); 3367 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 3368 CGF.EmitOMPPrivateClause(S, PrivateScope); 3369 (void)PrivateScope.Privatize(); 3370 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3371 }; 3372 emitCommonOMPTeamsDirective(*this, S, OMPD_teams, CodeGen); 3373 } 3374 3375 void CodeGenFunction::EmitOMPCancellationPointDirective( 3376 const OMPCancellationPointDirective &S) { 3377 CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(), 3378 S.getCancelRegion()); 3379 } 3380 3381 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { 3382 const Expr *IfCond = nullptr; 3383 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 3384 if (C->getNameModifier() == OMPD_unknown || 3385 C->getNameModifier() == OMPD_cancel) { 3386 IfCond = C->getCondition(); 3387 break; 3388 } 3389 } 3390 CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(), IfCond, 3391 S.getCancelRegion()); 3392 } 3393 3394 CodeGenFunction::JumpDest 3395 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) { 3396 if (Kind == OMPD_parallel || Kind == OMPD_task) 3397 return ReturnBlock; 3398 assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections || 3399 Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for); 3400 return BreakContinueStack.back().BreakBlock; 3401 } 3402 3403 void CodeGenFunction::EmitOMPUseDevicePtrClause( 3404 const OMPClause &NC, OMPPrivateScope &PrivateScope, 3405 const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) { 3406 const auto &C = cast<OMPUseDevicePtrClause>(NC); 3407 auto OrigVarIt = C.varlist_begin(); 3408 auto InitIt = C.inits().begin(); 3409 for (auto PvtVarIt : C.private_copies()) { 3410 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl()); 3411 auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl()); 3412 auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl()); 3413 3414 // In order to identify the right initializer we need to match the 3415 // declaration used by the mapping logic. In some cases we may get 3416 // OMPCapturedExprDecl that refers to the original declaration. 3417 const ValueDecl *MatchingVD = OrigVD; 3418 if (auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) { 3419 // OMPCapturedExprDecl are used to privative fields of the current 3420 // structure. 3421 auto *ME = cast<MemberExpr>(OED->getInit()); 3422 assert(isa<CXXThisExpr>(ME->getBase()) && 3423 "Base should be the current struct!"); 3424 MatchingVD = ME->getMemberDecl(); 3425 } 3426 3427 // If we don't have information about the current list item, move on to 3428 // the next one. 3429 auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD); 3430 if (InitAddrIt == CaptureDeviceAddrMap.end()) 3431 continue; 3432 3433 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 3434 // Initialize the temporary initialization variable with the address we 3435 // get from the runtime library. We have to cast the source address 3436 // because it is always a void *. References are materialized in the 3437 // privatization scope, so the initialization here disregards the fact 3438 // the original variable is a reference. 3439 QualType AddrQTy = 3440 getContext().getPointerType(OrigVD->getType().getNonReferenceType()); 3441 llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy); 3442 Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy); 3443 setAddrOfLocalVar(InitVD, InitAddr); 3444 3445 // Emit private declaration, it will be initialized by the value we 3446 // declaration we just added to the local declarations map. 3447 EmitDecl(*PvtVD); 3448 3449 // The initialization variables reached its purpose in the emission 3450 // ofthe previous declaration, so we don't need it anymore. 3451 LocalDeclMap.erase(InitVD); 3452 3453 // Return the address of the private variable. 3454 return GetAddrOfLocalVar(PvtVD); 3455 }); 3456 assert(IsRegistered && "firstprivate var already registered as private"); 3457 // Silence the warning about unused variable. 3458 (void)IsRegistered; 3459 3460 ++OrigVarIt; 3461 ++InitIt; 3462 } 3463 } 3464 3465 // Generate the instructions for '#pragma omp target data' directive. 3466 void CodeGenFunction::EmitOMPTargetDataDirective( 3467 const OMPTargetDataDirective &S) { 3468 CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true); 3469 3470 // Create a pre/post action to signal the privatization of the device pointer. 3471 // This action can be replaced by the OpenMP runtime code generation to 3472 // deactivate privatization. 3473 bool PrivatizeDevicePointers = false; 3474 class DevicePointerPrivActionTy : public PrePostActionTy { 3475 bool &PrivatizeDevicePointers; 3476 3477 public: 3478 explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers) 3479 : PrePostActionTy(), PrivatizeDevicePointers(PrivatizeDevicePointers) {} 3480 void Enter(CodeGenFunction &CGF) override { 3481 PrivatizeDevicePointers = true; 3482 } 3483 }; 3484 DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers); 3485 3486 auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers]( 3487 CodeGenFunction &CGF, PrePostActionTy &Action) { 3488 auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3489 CGF.EmitStmt( 3490 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3491 }; 3492 3493 // Codegen that selects wheather to generate the privatization code or not. 3494 auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers, 3495 &InnermostCodeGen](CodeGenFunction &CGF, 3496 PrePostActionTy &Action) { 3497 RegionCodeGenTy RCG(InnermostCodeGen); 3498 PrivatizeDevicePointers = false; 3499 3500 // Call the pre-action to change the status of PrivatizeDevicePointers if 3501 // needed. 3502 Action.Enter(CGF); 3503 3504 if (PrivatizeDevicePointers) { 3505 OMPPrivateScope PrivateScope(CGF); 3506 // Emit all instances of the use_device_ptr clause. 3507 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>()) 3508 CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope, 3509 Info.CaptureDeviceAddrMap); 3510 (void)PrivateScope.Privatize(); 3511 RCG(CGF); 3512 } else 3513 RCG(CGF); 3514 }; 3515 3516 // Forward the provided action to the privatization codegen. 3517 RegionCodeGenTy PrivRCG(PrivCodeGen); 3518 PrivRCG.setAction(Action); 3519 3520 // Notwithstanding the body of the region is emitted as inlined directive, 3521 // we don't use an inline scope as changes in the references inside the 3522 // region are expected to be visible outside, so we do not privative them. 3523 OMPLexicalScope Scope(CGF, S); 3524 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data, 3525 PrivRCG); 3526 }; 3527 3528 RegionCodeGenTy RCG(CodeGen); 3529 3530 // If we don't have target devices, don't bother emitting the data mapping 3531 // code. 3532 if (CGM.getLangOpts().OMPTargetTriples.empty()) { 3533 RCG(*this); 3534 return; 3535 } 3536 3537 // Check if we have any if clause associated with the directive. 3538 const Expr *IfCond = nullptr; 3539 if (auto *C = S.getSingleClause<OMPIfClause>()) 3540 IfCond = C->getCondition(); 3541 3542 // Check if we have any device clause associated with the directive. 3543 const Expr *Device = nullptr; 3544 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 3545 Device = C->getDevice(); 3546 3547 // Set the action to signal privatization of device pointers. 3548 RCG.setAction(PrivAction); 3549 3550 // Emit region code. 3551 CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG, 3552 Info); 3553 } 3554 3555 void CodeGenFunction::EmitOMPTargetEnterDataDirective( 3556 const OMPTargetEnterDataDirective &S) { 3557 // If we don't have target devices, don't bother emitting the data mapping 3558 // code. 3559 if (CGM.getLangOpts().OMPTargetTriples.empty()) 3560 return; 3561 3562 // Check if we have any if clause associated with the directive. 3563 const Expr *IfCond = nullptr; 3564 if (auto *C = S.getSingleClause<OMPIfClause>()) 3565 IfCond = C->getCondition(); 3566 3567 // Check if we have any device clause associated with the directive. 3568 const Expr *Device = nullptr; 3569 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 3570 Device = C->getDevice(); 3571 3572 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 3573 } 3574 3575 void CodeGenFunction::EmitOMPTargetExitDataDirective( 3576 const OMPTargetExitDataDirective &S) { 3577 // If we don't have target devices, don't bother emitting the data mapping 3578 // code. 3579 if (CGM.getLangOpts().OMPTargetTriples.empty()) 3580 return; 3581 3582 // Check if we have any if clause associated with the directive. 3583 const Expr *IfCond = nullptr; 3584 if (auto *C = S.getSingleClause<OMPIfClause>()) 3585 IfCond = C->getCondition(); 3586 3587 // Check if we have any device clause associated with the directive. 3588 const Expr *Device = nullptr; 3589 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 3590 Device = C->getDevice(); 3591 3592 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 3593 } 3594 3595 void CodeGenFunction::EmitOMPTargetParallelDirective( 3596 const OMPTargetParallelDirective &S) { 3597 // TODO: codegen for target parallel. 3598 } 3599 3600 void CodeGenFunction::EmitOMPTargetParallelForDirective( 3601 const OMPTargetParallelForDirective &S) { 3602 // TODO: codegen for target parallel for. 3603 } 3604 3605 /// Emit a helper variable and return corresponding lvalue. 3606 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper, 3607 const ImplicitParamDecl *PVD, 3608 CodeGenFunction::OMPPrivateScope &Privates) { 3609 auto *VDecl = cast<VarDecl>(Helper->getDecl()); 3610 Privates.addPrivate( 3611 VDecl, [&CGF, PVD]() -> Address { return CGF.GetAddrOfLocalVar(PVD); }); 3612 } 3613 3614 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { 3615 assert(isOpenMPTaskLoopDirective(S.getDirectiveKind())); 3616 // Emit outlined function for task construct. 3617 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 3618 auto CapturedStruct = GenerateCapturedStmtArgument(*CS); 3619 auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 3620 const Expr *IfCond = nullptr; 3621 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 3622 if (C->getNameModifier() == OMPD_unknown || 3623 C->getNameModifier() == OMPD_taskloop) { 3624 IfCond = C->getCondition(); 3625 break; 3626 } 3627 } 3628 3629 OMPTaskDataTy Data; 3630 // Check if taskloop must be emitted without taskgroup. 3631 Data.Nogroup = S.getSingleClause<OMPNogroupClause>(); 3632 // TODO: Check if we should emit tied or untied task. 3633 Data.Tied = true; 3634 // Set scheduling for taskloop 3635 if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) { 3636 // grainsize clause 3637 Data.Schedule.setInt(/*IntVal=*/false); 3638 Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize())); 3639 } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) { 3640 // num_tasks clause 3641 Data.Schedule.setInt(/*IntVal=*/true); 3642 Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks())); 3643 } 3644 3645 auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) { 3646 // if (PreCond) { 3647 // for (IV in 0..LastIteration) BODY; 3648 // <Final counter/linear vars updates>; 3649 // } 3650 // 3651 3652 // Emit: if (PreCond) - begin. 3653 // If the condition constant folds and can be elided, avoid emitting the 3654 // whole loop. 3655 bool CondConstant; 3656 llvm::BasicBlock *ContBlock = nullptr; 3657 OMPLoopScope PreInitScope(CGF, S); 3658 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 3659 if (!CondConstant) 3660 return; 3661 } else { 3662 auto *ThenBlock = CGF.createBasicBlock("taskloop.if.then"); 3663 ContBlock = CGF.createBasicBlock("taskloop.if.end"); 3664 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 3665 CGF.getProfileCount(&S)); 3666 CGF.EmitBlock(ThenBlock); 3667 CGF.incrementProfileCounter(&S); 3668 } 3669 3670 if (isOpenMPSimdDirective(S.getDirectiveKind())) 3671 CGF.EmitOMPSimdInit(S); 3672 3673 OMPPrivateScope LoopScope(CGF); 3674 // Emit helper vars inits. 3675 enum { LowerBound = 5, UpperBound, Stride, LastIter }; 3676 auto *I = CS->getCapturedDecl()->param_begin(); 3677 auto *LBP = std::next(I, LowerBound); 3678 auto *UBP = std::next(I, UpperBound); 3679 auto *STP = std::next(I, Stride); 3680 auto *LIP = std::next(I, LastIter); 3681 mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP, 3682 LoopScope); 3683 mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP, 3684 LoopScope); 3685 mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope); 3686 mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP, 3687 LoopScope); 3688 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 3689 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 3690 (void)LoopScope.Privatize(); 3691 // Emit the loop iteration variable. 3692 const Expr *IVExpr = S.getIterationVariable(); 3693 const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 3694 CGF.EmitVarDecl(*IVDecl); 3695 CGF.EmitIgnoredExpr(S.getInit()); 3696 3697 // Emit the iterations count variable. 3698 // If it is not a variable, Sema decided to calculate iterations count on 3699 // each iteration (e.g., it is foldable into a constant). 3700 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 3701 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 3702 // Emit calculation of the iterations count. 3703 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 3704 } 3705 3706 CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 3707 S.getInc(), 3708 [&S](CodeGenFunction &CGF) { 3709 CGF.EmitOMPLoopBody(S, JumpDest()); 3710 CGF.EmitStopPoint(&S); 3711 }, 3712 [](CodeGenFunction &) {}); 3713 // Emit: if (PreCond) - end. 3714 if (ContBlock) { 3715 CGF.EmitBranch(ContBlock); 3716 CGF.EmitBlock(ContBlock, true); 3717 } 3718 // Emit final copy of the lastprivate variables if IsLastIter != 0. 3719 if (HasLastprivateClause) { 3720 CGF.EmitOMPLastprivateClauseFinal( 3721 S, isOpenMPSimdDirective(S.getDirectiveKind()), 3722 CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar( 3723 CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false, 3724 (*LIP)->getType(), S.getLocStart()))); 3725 } 3726 }; 3727 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 3728 IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn, 3729 const OMPTaskDataTy &Data) { 3730 auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &) { 3731 OMPLoopScope PreInitScope(CGF, S); 3732 CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getLocStart(), S, 3733 OutlinedFn, SharedsTy, 3734 CapturedStruct, IfCond, Data); 3735 }; 3736 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop, 3737 CodeGen); 3738 }; 3739 EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data); 3740 } 3741 3742 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) { 3743 EmitOMPTaskLoopBasedDirective(S); 3744 } 3745 3746 void CodeGenFunction::EmitOMPTaskLoopSimdDirective( 3747 const OMPTaskLoopSimdDirective &S) { 3748 EmitOMPTaskLoopBasedDirective(S); 3749 } 3750 3751 // Generate the instructions for '#pragma omp target update' directive. 3752 void CodeGenFunction::EmitOMPTargetUpdateDirective( 3753 const OMPTargetUpdateDirective &S) { 3754 // If we don't have target devices, don't bother emitting the data mapping 3755 // code. 3756 if (CGM.getLangOpts().OMPTargetTriples.empty()) 3757 return; 3758 3759 // Check if we have any if clause associated with the directive. 3760 const Expr *IfCond = nullptr; 3761 if (auto *C = S.getSingleClause<OMPIfClause>()) 3762 IfCond = C->getCondition(); 3763 3764 // Check if we have any device clause associated with the directive. 3765 const Expr *Device = nullptr; 3766 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 3767 Device = C->getDevice(); 3768 3769 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 3770 } 3771