1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This contains code to emit OpenMP nodes as LLVM code. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGCleanup.h" 15 #include "CGOpenMPRuntime.h" 16 #include "CodeGenFunction.h" 17 #include "CodeGenModule.h" 18 #include "TargetInfo.h" 19 #include "clang/AST/Stmt.h" 20 #include "clang/AST/StmtOpenMP.h" 21 #include "clang/AST/DeclOpenMP.h" 22 #include "llvm/IR/CallSite.h" 23 using namespace clang; 24 using namespace CodeGen; 25 26 namespace { 27 /// Lexical scope for OpenMP executable constructs, that handles correct codegen 28 /// for captured expressions. 29 class OMPLexicalScope final : public CodeGenFunction::LexicalScope { 30 void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 31 for (const auto *C : S.clauses()) { 32 if (auto *CPI = OMPClauseWithPreInit::get(C)) { 33 if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { 34 for (const auto *I : PreInit->decls()) { 35 if (!I->hasAttr<OMPCaptureNoInitAttr>()) 36 CGF.EmitVarDecl(cast<VarDecl>(*I)); 37 else { 38 CodeGenFunction::AutoVarEmission Emission = 39 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 40 CGF.EmitAutoVarCleanups(Emission); 41 } 42 } 43 } 44 } 45 } 46 } 47 CodeGenFunction::OMPPrivateScope InlinedShareds; 48 49 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { 50 return CGF.LambdaCaptureFields.lookup(VD) || 51 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || 52 (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl)); 53 } 54 55 public: 56 OMPLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S, 57 bool AsInlined = false) 58 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), 59 InlinedShareds(CGF) { 60 emitPreInitStmt(CGF, S); 61 if (AsInlined) { 62 if (S.hasAssociatedStmt()) { 63 auto *CS = cast<CapturedStmt>(S.getAssociatedStmt()); 64 for (auto &C : CS->captures()) { 65 if (C.capturesVariable() || C.capturesVariableByCopy()) { 66 auto *VD = C.getCapturedVar(); 67 DeclRefExpr DRE(const_cast<VarDecl *>(VD), 68 isCapturedVar(CGF, VD) || 69 (CGF.CapturedStmtInfo && 70 InlinedShareds.isGlobalVarCaptured(VD)), 71 VD->getType().getNonReferenceType(), VK_LValue, 72 SourceLocation()); 73 InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address { 74 return CGF.EmitLValue(&DRE).getAddress(); 75 }); 76 } 77 } 78 (void)InlinedShareds.Privatize(); 79 } 80 } 81 } 82 }; 83 84 /// Private scope for OpenMP loop-based directives, that supports capturing 85 /// of used expression from loop statement. 86 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { 87 void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) { 88 if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) { 89 if (auto *PreInits = cast_or_null<DeclStmt>(LD->getPreInits())) { 90 for (const auto *I : PreInits->decls()) 91 CGF.EmitVarDecl(cast<VarDecl>(*I)); 92 } 93 } 94 } 95 96 public: 97 OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S) 98 : CodeGenFunction::RunCleanupsScope(CGF) { 99 emitPreInitStmt(CGF, S); 100 } 101 }; 102 103 } // namespace 104 105 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) { 106 auto &C = getContext(); 107 llvm::Value *Size = nullptr; 108 auto SizeInChars = C.getTypeSizeInChars(Ty); 109 if (SizeInChars.isZero()) { 110 // getTypeSizeInChars() returns 0 for a VLA. 111 while (auto *VAT = C.getAsVariableArrayType(Ty)) { 112 llvm::Value *ArraySize; 113 std::tie(ArraySize, Ty) = getVLASize(VAT); 114 Size = Size ? Builder.CreateNUWMul(Size, ArraySize) : ArraySize; 115 } 116 SizeInChars = C.getTypeSizeInChars(Ty); 117 if (SizeInChars.isZero()) 118 return llvm::ConstantInt::get(SizeTy, /*V=*/0); 119 Size = Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars)); 120 } else 121 Size = CGM.getSize(SizeInChars); 122 return Size; 123 } 124 125 void CodeGenFunction::GenerateOpenMPCapturedVars( 126 const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) { 127 const RecordDecl *RD = S.getCapturedRecordDecl(); 128 auto CurField = RD->field_begin(); 129 auto CurCap = S.captures().begin(); 130 for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(), 131 E = S.capture_init_end(); 132 I != E; ++I, ++CurField, ++CurCap) { 133 if (CurField->hasCapturedVLAType()) { 134 auto VAT = CurField->getCapturedVLAType(); 135 auto *Val = VLASizeMap[VAT->getSizeExpr()]; 136 CapturedVars.push_back(Val); 137 } else if (CurCap->capturesThis()) 138 CapturedVars.push_back(CXXThisValue); 139 else if (CurCap->capturesVariableByCopy()) { 140 llvm::Value *CV = 141 EmitLoadOfLValue(EmitLValue(*I), SourceLocation()).getScalarVal(); 142 143 // If the field is not a pointer, we need to save the actual value 144 // and load it as a void pointer. 145 if (!CurField->getType()->isAnyPointerType()) { 146 auto &Ctx = getContext(); 147 auto DstAddr = CreateMemTemp( 148 Ctx.getUIntPtrType(), 149 Twine(CurCap->getCapturedVar()->getName()) + ".casted"); 150 LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType()); 151 152 auto *SrcAddrVal = EmitScalarConversion( 153 DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()), 154 Ctx.getPointerType(CurField->getType()), SourceLocation()); 155 LValue SrcLV = 156 MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType()); 157 158 // Store the value using the source type pointer. 159 EmitStoreThroughLValue(RValue::get(CV), SrcLV); 160 161 // Load the value using the destination type pointer. 162 CV = EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal(); 163 } 164 CapturedVars.push_back(CV); 165 } else { 166 assert(CurCap->capturesVariable() && "Expected capture by reference."); 167 CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer()); 168 } 169 } 170 } 171 172 static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType, 173 StringRef Name, LValue AddrLV, 174 bool isReferenceType = false) { 175 ASTContext &Ctx = CGF.getContext(); 176 177 auto *CastedPtr = CGF.EmitScalarConversion( 178 AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(), 179 Ctx.getPointerType(DstType), SourceLocation()); 180 auto TmpAddr = 181 CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType)) 182 .getAddress(); 183 184 // If we are dealing with references we need to return the address of the 185 // reference instead of the reference of the value. 186 if (isReferenceType) { 187 QualType RefType = Ctx.getLValueReferenceType(DstType); 188 auto *RefVal = TmpAddr.getPointer(); 189 TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name) + ".ref"); 190 auto TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType); 191 CGF.EmitScalarInit(RefVal, TmpLVal); 192 } 193 194 return TmpAddr; 195 } 196 197 llvm::Function * 198 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { 199 assert( 200 CapturedStmtInfo && 201 "CapturedStmtInfo should be set when generating the captured function"); 202 const CapturedDecl *CD = S.getCapturedDecl(); 203 const RecordDecl *RD = S.getCapturedRecordDecl(); 204 assert(CD->hasBody() && "missing CapturedDecl body"); 205 206 // Build the argument list. 207 ASTContext &Ctx = CGM.getContext(); 208 FunctionArgList Args; 209 Args.append(CD->param_begin(), 210 std::next(CD->param_begin(), CD->getContextParamPosition())); 211 auto I = S.captures().begin(); 212 for (auto *FD : RD->fields()) { 213 QualType ArgType = FD->getType(); 214 IdentifierInfo *II = nullptr; 215 VarDecl *CapVar = nullptr; 216 217 // If this is a capture by copy and the type is not a pointer, the outlined 218 // function argument type should be uintptr and the value properly casted to 219 // uintptr. This is necessary given that the runtime library is only able to 220 // deal with pointers. We can pass in the same way the VLA type sizes to the 221 // outlined function. 222 if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) || 223 I->capturesVariableArrayType()) 224 ArgType = Ctx.getUIntPtrType(); 225 226 if (I->capturesVariable() || I->capturesVariableByCopy()) { 227 CapVar = I->getCapturedVar(); 228 II = CapVar->getIdentifier(); 229 } else if (I->capturesThis()) 230 II = &getContext().Idents.get("this"); 231 else { 232 assert(I->capturesVariableArrayType()); 233 II = &getContext().Idents.get("vla"); 234 } 235 if (ArgType->isVariablyModifiedType()) 236 ArgType = getContext().getVariableArrayDecayedType(ArgType); 237 Args.push_back(ImplicitParamDecl::Create(getContext(), nullptr, 238 FD->getLocation(), II, ArgType)); 239 ++I; 240 } 241 Args.append( 242 std::next(CD->param_begin(), CD->getContextParamPosition() + 1), 243 CD->param_end()); 244 245 // Create the function declaration. 246 FunctionType::ExtInfo ExtInfo; 247 const CGFunctionInfo &FuncInfo = 248 CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args); 249 llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); 250 251 llvm::Function *F = llvm::Function::Create( 252 FuncLLVMTy, llvm::GlobalValue::InternalLinkage, 253 CapturedStmtInfo->getHelperName(), &CGM.getModule()); 254 CGM.SetInternalFunctionAttributes(CD, F, FuncInfo); 255 if (CD->isNothrow()) 256 F->addFnAttr(llvm::Attribute::NoUnwind); 257 258 // Generate the function. 259 StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getLocation(), 260 CD->getBody()->getLocStart()); 261 unsigned Cnt = CD->getContextParamPosition(); 262 I = S.captures().begin(); 263 for (auto *FD : RD->fields()) { 264 // If we are capturing a pointer by copy we don't need to do anything, just 265 // use the value that we get from the arguments. 266 if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) { 267 const VarDecl *CurVD = I->getCapturedVar(); 268 Address LocalAddr = GetAddrOfLocalVar(Args[Cnt]); 269 // If the variable is a reference we need to materialize it here. 270 if (CurVD->getType()->isReferenceType()) { 271 Address RefAddr = CreateMemTemp(CurVD->getType(), getPointerAlign(), 272 ".materialized_ref"); 273 EmitStoreOfScalar(LocalAddr.getPointer(), RefAddr, /*Volatile=*/false, 274 CurVD->getType()); 275 LocalAddr = RefAddr; 276 } 277 setAddrOfLocalVar(CurVD, LocalAddr); 278 ++Cnt; 279 ++I; 280 continue; 281 } 282 283 LValue ArgLVal = 284 MakeAddrLValue(GetAddrOfLocalVar(Args[Cnt]), Args[Cnt]->getType(), 285 AlignmentSource::Decl); 286 if (FD->hasCapturedVLAType()) { 287 LValue CastedArgLVal = 288 MakeAddrLValue(castValueFromUintptr(*this, FD->getType(), 289 Args[Cnt]->getName(), ArgLVal), 290 FD->getType(), AlignmentSource::Decl); 291 auto *ExprArg = 292 EmitLoadOfLValue(CastedArgLVal, SourceLocation()).getScalarVal(); 293 auto VAT = FD->getCapturedVLAType(); 294 VLASizeMap[VAT->getSizeExpr()] = ExprArg; 295 } else if (I->capturesVariable()) { 296 auto *Var = I->getCapturedVar(); 297 QualType VarTy = Var->getType(); 298 Address ArgAddr = ArgLVal.getAddress(); 299 if (!VarTy->isReferenceType()) { 300 ArgAddr = EmitLoadOfReference( 301 ArgAddr, ArgLVal.getType()->castAs<ReferenceType>()); 302 } 303 setAddrOfLocalVar( 304 Var, Address(ArgAddr.getPointer(), getContext().getDeclAlign(Var))); 305 } else if (I->capturesVariableByCopy()) { 306 assert(!FD->getType()->isAnyPointerType() && 307 "Not expecting a captured pointer."); 308 auto *Var = I->getCapturedVar(); 309 QualType VarTy = Var->getType(); 310 setAddrOfLocalVar(Var, castValueFromUintptr(*this, FD->getType(), 311 Args[Cnt]->getName(), ArgLVal, 312 VarTy->isReferenceType())); 313 } else { 314 // If 'this' is captured, load it into CXXThisValue. 315 assert(I->capturesThis()); 316 CXXThisValue = 317 EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation()).getScalarVal(); 318 } 319 ++Cnt; 320 ++I; 321 } 322 323 PGO.assignRegionCounters(GlobalDecl(CD), F); 324 CapturedStmtInfo->EmitBody(*this, CD->getBody()); 325 FinishFunction(CD->getBodyRBrace()); 326 327 return F; 328 } 329 330 //===----------------------------------------------------------------------===// 331 // OpenMP Directive Emission 332 //===----------------------------------------------------------------------===// 333 void CodeGenFunction::EmitOMPAggregateAssign( 334 Address DestAddr, Address SrcAddr, QualType OriginalType, 335 const llvm::function_ref<void(Address, Address)> &CopyGen) { 336 // Perform element-by-element initialization. 337 QualType ElementTy; 338 339 // Drill down to the base element type on both arrays. 340 auto ArrayTy = OriginalType->getAsArrayTypeUnsafe(); 341 auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr); 342 SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 343 344 auto SrcBegin = SrcAddr.getPointer(); 345 auto DestBegin = DestAddr.getPointer(); 346 // Cast from pointer to array type to pointer to single element. 347 auto DestEnd = Builder.CreateGEP(DestBegin, NumElements); 348 // The basic structure here is a while-do loop. 349 auto BodyBB = createBasicBlock("omp.arraycpy.body"); 350 auto DoneBB = createBasicBlock("omp.arraycpy.done"); 351 auto IsEmpty = 352 Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty"); 353 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 354 355 // Enter the loop body, making that address the current address. 356 auto EntryBB = Builder.GetInsertBlock(); 357 EmitBlock(BodyBB); 358 359 CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy); 360 361 llvm::PHINode *SrcElementPHI = 362 Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 363 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 364 Address SrcElementCurrent = 365 Address(SrcElementPHI, 366 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 367 368 llvm::PHINode *DestElementPHI = 369 Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 370 DestElementPHI->addIncoming(DestBegin, EntryBB); 371 Address DestElementCurrent = 372 Address(DestElementPHI, 373 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 374 375 // Emit copy. 376 CopyGen(DestElementCurrent, SrcElementCurrent); 377 378 // Shift the address forward by one element. 379 auto DestElementNext = Builder.CreateConstGEP1_32( 380 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 381 auto SrcElementNext = Builder.CreateConstGEP1_32( 382 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 383 // Check whether we've reached the end. 384 auto Done = 385 Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 386 Builder.CreateCondBr(Done, DoneBB, BodyBB); 387 DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock()); 388 SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock()); 389 390 // Done. 391 EmitBlock(DoneBB, /*IsFinished=*/true); 392 } 393 394 /// Check if the combiner is a call to UDR combiner and if it is so return the 395 /// UDR decl used for reduction. 396 static const OMPDeclareReductionDecl * 397 getReductionInit(const Expr *ReductionOp) { 398 if (auto *CE = dyn_cast<CallExpr>(ReductionOp)) 399 if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 400 if (auto *DRE = 401 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 402 if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 403 return DRD; 404 return nullptr; 405 } 406 407 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 408 const OMPDeclareReductionDecl *DRD, 409 const Expr *InitOp, 410 Address Private, Address Original, 411 QualType Ty) { 412 if (DRD->getInitializer()) { 413 std::pair<llvm::Function *, llvm::Function *> Reduction = 414 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 415 auto *CE = cast<CallExpr>(InitOp); 416 auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 417 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 418 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 419 auto *LHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 420 auto *RHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 421 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 422 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 423 [=]() -> Address { return Private; }); 424 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 425 [=]() -> Address { return Original; }); 426 (void)PrivateScope.Privatize(); 427 RValue Func = RValue::get(Reduction.second); 428 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 429 CGF.EmitIgnoredExpr(InitOp); 430 } else { 431 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 432 auto *GV = new llvm::GlobalVariable( 433 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 434 llvm::GlobalValue::PrivateLinkage, Init, ".init"); 435 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 436 RValue InitRVal; 437 switch (CGF.getEvaluationKind(Ty)) { 438 case TEK_Scalar: 439 InitRVal = CGF.EmitLoadOfLValue(LV, SourceLocation()); 440 break; 441 case TEK_Complex: 442 InitRVal = 443 RValue::getComplex(CGF.EmitLoadOfComplex(LV, SourceLocation())); 444 break; 445 case TEK_Aggregate: 446 InitRVal = RValue::getAggregate(LV.getAddress()); 447 break; 448 } 449 OpaqueValueExpr OVE(SourceLocation(), Ty, VK_RValue); 450 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 451 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 452 /*IsInitializer=*/false); 453 } 454 } 455 456 /// \brief Emit initialization of arrays of complex types. 457 /// \param DestAddr Address of the array. 458 /// \param Type Type of array. 459 /// \param Init Initial expression of array. 460 /// \param SrcAddr Address of the original array. 461 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 462 QualType Type, const Expr *Init, 463 Address SrcAddr = Address::invalid()) { 464 auto *DRD = getReductionInit(Init); 465 // Perform element-by-element initialization. 466 QualType ElementTy; 467 468 // Drill down to the base element type on both arrays. 469 auto ArrayTy = Type->getAsArrayTypeUnsafe(); 470 auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 471 DestAddr = 472 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 473 if (DRD) 474 SrcAddr = 475 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 476 477 llvm::Value *SrcBegin = nullptr; 478 if (DRD) 479 SrcBegin = SrcAddr.getPointer(); 480 auto DestBegin = DestAddr.getPointer(); 481 // Cast from pointer to array type to pointer to single element. 482 auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 483 // The basic structure here is a while-do loop. 484 auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 485 auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 486 auto IsEmpty = 487 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 488 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 489 490 // Enter the loop body, making that address the current address. 491 auto EntryBB = CGF.Builder.GetInsertBlock(); 492 CGF.EmitBlock(BodyBB); 493 494 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 495 496 llvm::PHINode *SrcElementPHI = nullptr; 497 Address SrcElementCurrent = Address::invalid(); 498 if (DRD) { 499 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 500 "omp.arraycpy.srcElementPast"); 501 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 502 SrcElementCurrent = 503 Address(SrcElementPHI, 504 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 505 } 506 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 507 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 508 DestElementPHI->addIncoming(DestBegin, EntryBB); 509 Address DestElementCurrent = 510 Address(DestElementPHI, 511 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 512 513 // Emit copy. 514 { 515 CodeGenFunction::RunCleanupsScope InitScope(CGF); 516 if (DRD && (DRD->getInitializer() || !Init)) { 517 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 518 SrcElementCurrent, ElementTy); 519 } else 520 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 521 /*IsInitializer=*/false); 522 } 523 524 if (DRD) { 525 // Shift the address forward by one element. 526 auto SrcElementNext = CGF.Builder.CreateConstGEP1_32( 527 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 528 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 529 } 530 531 // Shift the address forward by one element. 532 auto DestElementNext = CGF.Builder.CreateConstGEP1_32( 533 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 534 // Check whether we've reached the end. 535 auto Done = 536 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 537 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 538 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 539 540 // Done. 541 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 542 } 543 544 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr, 545 Address SrcAddr, const VarDecl *DestVD, 546 const VarDecl *SrcVD, const Expr *Copy) { 547 if (OriginalType->isArrayType()) { 548 auto *BO = dyn_cast<BinaryOperator>(Copy); 549 if (BO && BO->getOpcode() == BO_Assign) { 550 // Perform simple memcpy for simple copying. 551 EmitAggregateAssign(DestAddr, SrcAddr, OriginalType); 552 } else { 553 // For arrays with complex element types perform element by element 554 // copying. 555 EmitOMPAggregateAssign( 556 DestAddr, SrcAddr, OriginalType, 557 [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) { 558 // Working with the single array element, so have to remap 559 // destination and source variables to corresponding array 560 // elements. 561 CodeGenFunction::OMPPrivateScope Remap(*this); 562 Remap.addPrivate(DestVD, [DestElement]() -> Address { 563 return DestElement; 564 }); 565 Remap.addPrivate( 566 SrcVD, [SrcElement]() -> Address { return SrcElement; }); 567 (void)Remap.Privatize(); 568 EmitIgnoredExpr(Copy); 569 }); 570 } 571 } else { 572 // Remap pseudo source variable to private copy. 573 CodeGenFunction::OMPPrivateScope Remap(*this); 574 Remap.addPrivate(SrcVD, [SrcAddr]() -> Address { return SrcAddr; }); 575 Remap.addPrivate(DestVD, [DestAddr]() -> Address { return DestAddr; }); 576 (void)Remap.Privatize(); 577 // Emit copying of the whole variable. 578 EmitIgnoredExpr(Copy); 579 } 580 } 581 582 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, 583 OMPPrivateScope &PrivateScope) { 584 if (!HaveInsertPoint()) 585 return false; 586 bool FirstprivateIsLastprivate = false; 587 llvm::DenseSet<const VarDecl *> Lastprivates; 588 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 589 for (const auto *D : C->varlists()) 590 Lastprivates.insert( 591 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); 592 } 593 llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate; 594 CGCapturedStmtInfo CapturesInfo(cast<CapturedStmt>(*D.getAssociatedStmt())); 595 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { 596 auto IRef = C->varlist_begin(); 597 auto InitsRef = C->inits().begin(); 598 for (auto IInit : C->private_copies()) { 599 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 600 bool ThisFirstprivateIsLastprivate = 601 Lastprivates.count(OrigVD->getCanonicalDecl()) > 0; 602 auto *CapFD = CapturesInfo.lookup(OrigVD); 603 auto *FD = CapturedStmtInfo->lookup(OrigVD); 604 if (!ThisFirstprivateIsLastprivate && FD && (FD == CapFD) && 605 !FD->getType()->isReferenceType()) { 606 EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()); 607 ++IRef; 608 ++InitsRef; 609 continue; 610 } 611 FirstprivateIsLastprivate = 612 FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate; 613 if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) { 614 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 615 auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl()); 616 bool IsRegistered; 617 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 618 /*RefersToEnclosingVariableOrCapture=*/FD != nullptr, 619 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 620 Address OriginalAddr = EmitLValue(&DRE).getAddress(); 621 QualType Type = VD->getType(); 622 if (Type->isArrayType()) { 623 // Emit VarDecl with copy init for arrays. 624 // Get the address of the original variable captured in current 625 // captured region. 626 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 627 auto Emission = EmitAutoVarAlloca(*VD); 628 auto *Init = VD->getInit(); 629 if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) { 630 // Perform simple memcpy. 631 EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr, 632 Type); 633 } else { 634 EmitOMPAggregateAssign( 635 Emission.getAllocatedAddress(), OriginalAddr, Type, 636 [this, VDInit, Init](Address DestElement, 637 Address SrcElement) { 638 // Clean up any temporaries needed by the initialization. 639 RunCleanupsScope InitScope(*this); 640 // Emit initialization for single element. 641 setAddrOfLocalVar(VDInit, SrcElement); 642 EmitAnyExprToMem(Init, DestElement, 643 Init->getType().getQualifiers(), 644 /*IsInitializer*/ false); 645 LocalDeclMap.erase(VDInit); 646 }); 647 } 648 EmitAutoVarCleanups(Emission); 649 return Emission.getAllocatedAddress(); 650 }); 651 } else { 652 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 653 // Emit private VarDecl with copy init. 654 // Remap temp VDInit variable to the address of the original 655 // variable 656 // (for proper handling of captured global variables). 657 setAddrOfLocalVar(VDInit, OriginalAddr); 658 EmitDecl(*VD); 659 LocalDeclMap.erase(VDInit); 660 return GetAddrOfLocalVar(VD); 661 }); 662 } 663 assert(IsRegistered && 664 "firstprivate var already registered as private"); 665 // Silence the warning about unused variable. 666 (void)IsRegistered; 667 } 668 ++IRef; 669 ++InitsRef; 670 } 671 } 672 return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty(); 673 } 674 675 void CodeGenFunction::EmitOMPPrivateClause( 676 const OMPExecutableDirective &D, 677 CodeGenFunction::OMPPrivateScope &PrivateScope) { 678 if (!HaveInsertPoint()) 679 return; 680 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 681 for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) { 682 auto IRef = C->varlist_begin(); 683 for (auto IInit : C->private_copies()) { 684 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 685 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 686 auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 687 bool IsRegistered = 688 PrivateScope.addPrivate(OrigVD, [&]() -> Address { 689 // Emit private VarDecl with copy init. 690 EmitDecl(*VD); 691 return GetAddrOfLocalVar(VD); 692 }); 693 assert(IsRegistered && "private var already registered as private"); 694 // Silence the warning about unused variable. 695 (void)IsRegistered; 696 } 697 ++IRef; 698 } 699 } 700 } 701 702 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { 703 if (!HaveInsertPoint()) 704 return false; 705 // threadprivate_var1 = master_threadprivate_var1; 706 // operator=(threadprivate_var2, master_threadprivate_var2); 707 // ... 708 // __kmpc_barrier(&loc, global_tid); 709 llvm::DenseSet<const VarDecl *> CopiedVars; 710 llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr; 711 for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) { 712 auto IRef = C->varlist_begin(); 713 auto ISrcRef = C->source_exprs().begin(); 714 auto IDestRef = C->destination_exprs().begin(); 715 for (auto *AssignOp : C->assignment_ops()) { 716 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 717 QualType Type = VD->getType(); 718 if (CopiedVars.insert(VD->getCanonicalDecl()).second) { 719 // Get the address of the master variable. If we are emitting code with 720 // TLS support, the address is passed from the master as field in the 721 // captured declaration. 722 Address MasterAddr = Address::invalid(); 723 if (getLangOpts().OpenMPUseTLS && 724 getContext().getTargetInfo().isTLSSupported()) { 725 assert(CapturedStmtInfo->lookup(VD) && 726 "Copyin threadprivates should have been captured!"); 727 DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(), 728 VK_LValue, (*IRef)->getExprLoc()); 729 MasterAddr = EmitLValue(&DRE).getAddress(); 730 LocalDeclMap.erase(VD); 731 } else { 732 MasterAddr = 733 Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD) 734 : CGM.GetAddrOfGlobal(VD), 735 getContext().getDeclAlign(VD)); 736 } 737 // Get the address of the threadprivate variable. 738 Address PrivateAddr = EmitLValue(*IRef).getAddress(); 739 if (CopiedVars.size() == 1) { 740 // At first check if current thread is a master thread. If it is, no 741 // need to copy data. 742 CopyBegin = createBasicBlock("copyin.not.master"); 743 CopyEnd = createBasicBlock("copyin.not.master.end"); 744 Builder.CreateCondBr( 745 Builder.CreateICmpNE( 746 Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy), 747 Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy)), 748 CopyBegin, CopyEnd); 749 EmitBlock(CopyBegin); 750 } 751 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 752 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 753 EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp); 754 } 755 ++IRef; 756 ++ISrcRef; 757 ++IDestRef; 758 } 759 } 760 if (CopyEnd) { 761 // Exit out of copying procedure for non-master thread. 762 EmitBlock(CopyEnd, /*IsFinished=*/true); 763 return true; 764 } 765 return false; 766 } 767 768 bool CodeGenFunction::EmitOMPLastprivateClauseInit( 769 const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) { 770 if (!HaveInsertPoint()) 771 return false; 772 bool HasAtLeastOneLastprivate = false; 773 llvm::DenseSet<const VarDecl *> SIMDLCVs; 774 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 775 auto *LoopDirective = cast<OMPLoopDirective>(&D); 776 for (auto *C : LoopDirective->counters()) { 777 SIMDLCVs.insert( 778 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 779 } 780 } 781 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 782 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 783 HasAtLeastOneLastprivate = true; 784 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) 785 break; 786 auto IRef = C->varlist_begin(); 787 auto IDestRef = C->destination_exprs().begin(); 788 for (auto *IInit : C->private_copies()) { 789 // Keep the address of the original variable for future update at the end 790 // of the loop. 791 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 792 // Taskloops do not require additional initialization, it is done in 793 // runtime support library. 794 if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) { 795 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 796 PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address { 797 DeclRefExpr DRE( 798 const_cast<VarDecl *>(OrigVD), 799 /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup( 800 OrigVD) != nullptr, 801 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 802 return EmitLValue(&DRE).getAddress(); 803 }); 804 // Check if the variable is also a firstprivate: in this case IInit is 805 // not generated. Initialization of this variable will happen in codegen 806 // for 'firstprivate' clause. 807 if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) { 808 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 809 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 810 // Emit private VarDecl with copy init. 811 EmitDecl(*VD); 812 return GetAddrOfLocalVar(VD); 813 }); 814 assert(IsRegistered && 815 "lastprivate var already registered as private"); 816 (void)IsRegistered; 817 } 818 } 819 ++IRef; 820 ++IDestRef; 821 } 822 } 823 return HasAtLeastOneLastprivate; 824 } 825 826 void CodeGenFunction::EmitOMPLastprivateClauseFinal( 827 const OMPExecutableDirective &D, bool NoFinals, 828 llvm::Value *IsLastIterCond) { 829 if (!HaveInsertPoint()) 830 return; 831 // Emit following code: 832 // if (<IsLastIterCond>) { 833 // orig_var1 = private_orig_var1; 834 // ... 835 // orig_varn = private_orig_varn; 836 // } 837 llvm::BasicBlock *ThenBB = nullptr; 838 llvm::BasicBlock *DoneBB = nullptr; 839 if (IsLastIterCond) { 840 ThenBB = createBasicBlock(".omp.lastprivate.then"); 841 DoneBB = createBasicBlock(".omp.lastprivate.done"); 842 Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB); 843 EmitBlock(ThenBB); 844 } 845 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 846 llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates; 847 if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) { 848 auto IC = LoopDirective->counters().begin(); 849 for (auto F : LoopDirective->finals()) { 850 auto *D = 851 cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl(); 852 if (NoFinals) 853 AlreadyEmittedVars.insert(D); 854 else 855 LoopCountersAndUpdates[D] = F; 856 ++IC; 857 } 858 } 859 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 860 auto IRef = C->varlist_begin(); 861 auto ISrcRef = C->source_exprs().begin(); 862 auto IDestRef = C->destination_exprs().begin(); 863 for (auto *AssignOp : C->assignment_ops()) { 864 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 865 QualType Type = PrivateVD->getType(); 866 auto *CanonicalVD = PrivateVD->getCanonicalDecl(); 867 if (AlreadyEmittedVars.insert(CanonicalVD).second) { 868 // If lastprivate variable is a loop control variable for loop-based 869 // directive, update its value before copyin back to original 870 // variable. 871 if (auto *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD)) 872 EmitIgnoredExpr(FinalExpr); 873 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 874 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 875 // Get the address of the original variable. 876 Address OriginalAddr = GetAddrOfLocalVar(DestVD); 877 // Get the address of the private variable. 878 Address PrivateAddr = GetAddrOfLocalVar(PrivateVD); 879 if (auto RefTy = PrivateVD->getType()->getAs<ReferenceType>()) 880 PrivateAddr = 881 Address(Builder.CreateLoad(PrivateAddr), 882 getNaturalTypeAlignment(RefTy->getPointeeType())); 883 EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp); 884 } 885 ++IRef; 886 ++ISrcRef; 887 ++IDestRef; 888 } 889 if (auto *PostUpdate = C->getPostUpdateExpr()) 890 EmitIgnoredExpr(PostUpdate); 891 } 892 if (IsLastIterCond) 893 EmitBlock(DoneBB, /*IsFinished=*/true); 894 } 895 896 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 897 LValue BaseLV, llvm::Value *Addr) { 898 Address Tmp = Address::invalid(); 899 Address TopTmp = Address::invalid(); 900 Address MostTopTmp = Address::invalid(); 901 BaseTy = BaseTy.getNonReferenceType(); 902 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 903 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 904 Tmp = CGF.CreateMemTemp(BaseTy); 905 if (TopTmp.isValid()) 906 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 907 else 908 MostTopTmp = Tmp; 909 TopTmp = Tmp; 910 BaseTy = BaseTy->getPointeeType(); 911 } 912 llvm::Type *Ty = BaseLV.getPointer()->getType(); 913 if (Tmp.isValid()) 914 Ty = Tmp.getElementType(); 915 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 916 if (Tmp.isValid()) { 917 CGF.Builder.CreateStore(Addr, Tmp); 918 return MostTopTmp; 919 } 920 return Address(Addr, BaseLV.getAlignment()); 921 } 922 923 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 924 LValue BaseLV) { 925 BaseTy = BaseTy.getNonReferenceType(); 926 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 927 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 928 if (auto *PtrTy = BaseTy->getAs<PointerType>()) 929 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy); 930 else { 931 BaseLV = CGF.EmitLoadOfReferenceLValue(BaseLV.getAddress(), 932 BaseTy->castAs<ReferenceType>()); 933 } 934 BaseTy = BaseTy->getPointeeType(); 935 } 936 return CGF.MakeAddrLValue( 937 Address( 938 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 939 BaseLV.getPointer(), CGF.ConvertTypeForMem(ElTy)->getPointerTo()), 940 BaseLV.getAlignment()), 941 BaseLV.getType(), BaseLV.getAlignmentSource()); 942 } 943 944 void CodeGenFunction::EmitOMPReductionClauseInit( 945 const OMPExecutableDirective &D, 946 CodeGenFunction::OMPPrivateScope &PrivateScope) { 947 if (!HaveInsertPoint()) 948 return; 949 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 950 auto ILHS = C->lhs_exprs().begin(); 951 auto IRHS = C->rhs_exprs().begin(); 952 auto IPriv = C->privates().begin(); 953 auto IRed = C->reduction_ops().begin(); 954 for (auto IRef : C->varlists()) { 955 auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 956 auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 957 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl()); 958 auto *DRD = getReductionInit(*IRed); 959 if (auto *OASE = dyn_cast<OMPArraySectionExpr>(IRef)) { 960 auto *Base = OASE->getBase()->IgnoreParenImpCasts(); 961 while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 962 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 963 while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 964 Base = TempASE->getBase()->IgnoreParenImpCasts(); 965 auto *DE = cast<DeclRefExpr>(Base); 966 auto *OrigVD = cast<VarDecl>(DE->getDecl()); 967 auto OASELValueLB = EmitOMPArraySectionExpr(OASE); 968 auto OASELValueUB = 969 EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 970 auto OriginalBaseLValue = EmitLValue(DE); 971 LValue BaseLValue = 972 loadToBegin(*this, OrigVD->getType(), OASELValueLB.getType(), 973 OriginalBaseLValue); 974 // Store the address of the original variable associated with the LHS 975 // implicit variable. 976 PrivateScope.addPrivate(LHSVD, [this, OASELValueLB]() -> Address { 977 return OASELValueLB.getAddress(); 978 }); 979 // Emit reduction copy. 980 bool IsRegistered = PrivateScope.addPrivate( 981 OrigVD, [this, OrigVD, PrivateVD, BaseLValue, OASELValueLB, 982 OASELValueUB, OriginalBaseLValue, DRD, IRed]() -> Address { 983 // Emit VarDecl with copy init for arrays. 984 // Get the address of the original variable captured in current 985 // captured region. 986 auto *Size = Builder.CreatePtrDiff(OASELValueUB.getPointer(), 987 OASELValueLB.getPointer()); 988 Size = Builder.CreateNUWAdd( 989 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 990 CodeGenFunction::OpaqueValueMapping OpaqueMap( 991 *this, cast<OpaqueValueExpr>( 992 getContext() 993 .getAsVariableArrayType(PrivateVD->getType()) 994 ->getSizeExpr()), 995 RValue::get(Size)); 996 EmitVariablyModifiedType(PrivateVD->getType()); 997 auto Emission = EmitAutoVarAlloca(*PrivateVD); 998 auto Addr = Emission.getAllocatedAddress(); 999 auto *Init = PrivateVD->getInit(); 1000 EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(), 1001 DRD ? *IRed : Init, 1002 OASELValueLB.getAddress()); 1003 EmitAutoVarCleanups(Emission); 1004 // Emit private VarDecl with reduction init. 1005 auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(), 1006 OASELValueLB.getPointer()); 1007 auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset); 1008 return castToBase(*this, OrigVD->getType(), 1009 OASELValueLB.getType(), OriginalBaseLValue, 1010 Ptr); 1011 }); 1012 assert(IsRegistered && "private var already registered as private"); 1013 // Silence the warning about unused variable. 1014 (void)IsRegistered; 1015 PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { 1016 return GetAddrOfLocalVar(PrivateVD); 1017 }); 1018 } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(IRef)) { 1019 auto *Base = ASE->getBase()->IgnoreParenImpCasts(); 1020 while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1021 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1022 auto *DE = cast<DeclRefExpr>(Base); 1023 auto *OrigVD = cast<VarDecl>(DE->getDecl()); 1024 auto ASELValue = EmitLValue(ASE); 1025 auto OriginalBaseLValue = EmitLValue(DE); 1026 LValue BaseLValue = loadToBegin( 1027 *this, OrigVD->getType(), ASELValue.getType(), OriginalBaseLValue); 1028 // Store the address of the original variable associated with the LHS 1029 // implicit variable. 1030 PrivateScope.addPrivate(LHSVD, [this, ASELValue]() -> Address { 1031 return ASELValue.getAddress(); 1032 }); 1033 // Emit reduction copy. 1034 bool IsRegistered = PrivateScope.addPrivate( 1035 OrigVD, [this, OrigVD, PrivateVD, BaseLValue, ASELValue, 1036 OriginalBaseLValue, DRD, IRed]() -> Address { 1037 // Emit private VarDecl with reduction init. 1038 AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD); 1039 auto Addr = Emission.getAllocatedAddress(); 1040 if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 1041 emitInitWithReductionInitializer(*this, DRD, *IRed, Addr, 1042 ASELValue.getAddress(), 1043 ASELValue.getType()); 1044 } else 1045 EmitAutoVarInit(Emission); 1046 EmitAutoVarCleanups(Emission); 1047 auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(), 1048 ASELValue.getPointer()); 1049 auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset); 1050 return castToBase(*this, OrigVD->getType(), ASELValue.getType(), 1051 OriginalBaseLValue, Ptr); 1052 }); 1053 assert(IsRegistered && "private var already registered as private"); 1054 // Silence the warning about unused variable. 1055 (void)IsRegistered; 1056 PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address { 1057 return Builder.CreateElementBitCast( 1058 GetAddrOfLocalVar(PrivateVD), ConvertTypeForMem(RHSVD->getType()), 1059 "rhs.begin"); 1060 }); 1061 } else { 1062 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); 1063 QualType Type = PrivateVD->getType(); 1064 if (getContext().getAsArrayType(Type)) { 1065 // Store the address of the original variable associated with the LHS 1066 // implicit variable. 1067 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1068 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1069 IRef->getType(), VK_LValue, IRef->getExprLoc()); 1070 Address OriginalAddr = EmitLValue(&DRE).getAddress(); 1071 PrivateScope.addPrivate(LHSVD, [this, &OriginalAddr, 1072 LHSVD]() -> Address { 1073 OriginalAddr = Builder.CreateElementBitCast( 1074 OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin"); 1075 return OriginalAddr; 1076 }); 1077 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 1078 if (Type->isVariablyModifiedType()) { 1079 CodeGenFunction::OpaqueValueMapping OpaqueMap( 1080 *this, cast<OpaqueValueExpr>( 1081 getContext() 1082 .getAsVariableArrayType(PrivateVD->getType()) 1083 ->getSizeExpr()), 1084 RValue::get( 1085 getTypeSize(OrigVD->getType().getNonReferenceType()))); 1086 EmitVariablyModifiedType(Type); 1087 } 1088 auto Emission = EmitAutoVarAlloca(*PrivateVD); 1089 auto Addr = Emission.getAllocatedAddress(); 1090 auto *Init = PrivateVD->getInit(); 1091 EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(), 1092 DRD ? *IRed : Init, OriginalAddr); 1093 EmitAutoVarCleanups(Emission); 1094 return Emission.getAllocatedAddress(); 1095 }); 1096 assert(IsRegistered && "private var already registered as private"); 1097 // Silence the warning about unused variable. 1098 (void)IsRegistered; 1099 PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address { 1100 return Builder.CreateElementBitCast( 1101 GetAddrOfLocalVar(PrivateVD), 1102 ConvertTypeForMem(RHSVD->getType()), "rhs.begin"); 1103 }); 1104 } else { 1105 // Store the address of the original variable associated with the LHS 1106 // implicit variable. 1107 Address OriginalAddr = Address::invalid(); 1108 PrivateScope.addPrivate(LHSVD, [this, OrigVD, IRef, 1109 &OriginalAddr]() -> Address { 1110 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1111 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1112 IRef->getType(), VK_LValue, IRef->getExprLoc()); 1113 OriginalAddr = EmitLValue(&DRE).getAddress(); 1114 return OriginalAddr; 1115 }); 1116 // Emit reduction copy. 1117 bool IsRegistered = PrivateScope.addPrivate( 1118 OrigVD, [this, PrivateVD, OriginalAddr, DRD, IRed]() -> Address { 1119 // Emit private VarDecl with reduction init. 1120 AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD); 1121 auto Addr = Emission.getAllocatedAddress(); 1122 if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 1123 emitInitWithReductionInitializer(*this, DRD, *IRed, Addr, 1124 OriginalAddr, 1125 PrivateVD->getType()); 1126 } else 1127 EmitAutoVarInit(Emission); 1128 EmitAutoVarCleanups(Emission); 1129 return Addr; 1130 }); 1131 assert(IsRegistered && "private var already registered as private"); 1132 // Silence the warning about unused variable. 1133 (void)IsRegistered; 1134 PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { 1135 return GetAddrOfLocalVar(PrivateVD); 1136 }); 1137 } 1138 } 1139 ++ILHS; 1140 ++IRHS; 1141 ++IPriv; 1142 ++IRed; 1143 } 1144 } 1145 } 1146 1147 void CodeGenFunction::EmitOMPReductionClauseFinal( 1148 const OMPExecutableDirective &D) { 1149 if (!HaveInsertPoint()) 1150 return; 1151 llvm::SmallVector<const Expr *, 8> Privates; 1152 llvm::SmallVector<const Expr *, 8> LHSExprs; 1153 llvm::SmallVector<const Expr *, 8> RHSExprs; 1154 llvm::SmallVector<const Expr *, 8> ReductionOps; 1155 bool HasAtLeastOneReduction = false; 1156 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1157 HasAtLeastOneReduction = true; 1158 Privates.append(C->privates().begin(), C->privates().end()); 1159 LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 1160 RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 1161 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 1162 } 1163 if (HasAtLeastOneReduction) { 1164 // Emit nowait reduction if nowait clause is present or directive is a 1165 // parallel directive (it always has implicit barrier). 1166 CGM.getOpenMPRuntime().emitReduction( 1167 *this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps, 1168 D.getSingleClause<OMPNowaitClause>() || 1169 isOpenMPParallelDirective(D.getDirectiveKind()) || 1170 D.getDirectiveKind() == OMPD_simd, 1171 D.getDirectiveKind() == OMPD_simd); 1172 } 1173 } 1174 1175 static void emitPostUpdateForReductionClause( 1176 CodeGenFunction &CGF, const OMPExecutableDirective &D, 1177 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1178 if (!CGF.HaveInsertPoint()) 1179 return; 1180 llvm::BasicBlock *DoneBB = nullptr; 1181 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1182 if (auto *PostUpdate = C->getPostUpdateExpr()) { 1183 if (!DoneBB) { 1184 if (auto *Cond = CondGen(CGF)) { 1185 // If the first post-update expression is found, emit conditional 1186 // block if it was requested. 1187 auto *ThenBB = CGF.createBasicBlock(".omp.reduction.pu"); 1188 DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done"); 1189 CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1190 CGF.EmitBlock(ThenBB); 1191 } 1192 } 1193 CGF.EmitIgnoredExpr(PostUpdate); 1194 } 1195 } 1196 if (DoneBB) 1197 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 1198 } 1199 1200 static void emitCommonOMPParallelDirective(CodeGenFunction &CGF, 1201 const OMPExecutableDirective &S, 1202 OpenMPDirectiveKind InnermostKind, 1203 const RegionCodeGenTy &CodeGen) { 1204 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 1205 auto OutlinedFn = CGF.CGM.getOpenMPRuntime(). 1206 emitParallelOrTeamsOutlinedFunction(S, 1207 *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 1208 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) { 1209 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 1210 auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), 1211 /*IgnoreResultAssign*/ true); 1212 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause( 1213 CGF, NumThreads, NumThreadsClause->getLocStart()); 1214 } 1215 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) { 1216 CodeGenFunction::RunCleanupsScope ProcBindScope(CGF); 1217 CGF.CGM.getOpenMPRuntime().emitProcBindClause( 1218 CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart()); 1219 } 1220 const Expr *IfCond = nullptr; 1221 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 1222 if (C->getNameModifier() == OMPD_unknown || 1223 C->getNameModifier() == OMPD_parallel) { 1224 IfCond = C->getCondition(); 1225 break; 1226 } 1227 } 1228 1229 OMPLexicalScope Scope(CGF, S); 1230 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 1231 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 1232 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn, 1233 CapturedVars, IfCond); 1234 } 1235 1236 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { 1237 // Emit parallel region as a standalone region. 1238 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1239 OMPPrivateScope PrivateScope(CGF); 1240 bool Copyins = CGF.EmitOMPCopyinClause(S); 1241 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 1242 if (Copyins) { 1243 // Emit implicit barrier to synchronize threads and avoid data races on 1244 // propagation master's thread values of threadprivate variables to local 1245 // instances of that variables of all other implicit threads. 1246 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 1247 CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 1248 /*ForceSimpleCall=*/true); 1249 } 1250 CGF.EmitOMPPrivateClause(S, PrivateScope); 1251 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 1252 (void)PrivateScope.Privatize(); 1253 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1254 CGF.EmitOMPReductionClauseFinal(S); 1255 }; 1256 emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen); 1257 emitPostUpdateForReductionClause( 1258 *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1259 } 1260 1261 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, 1262 JumpDest LoopExit) { 1263 RunCleanupsScope BodyScope(*this); 1264 // Update counters values on current iteration. 1265 for (auto I : D.updates()) { 1266 EmitIgnoredExpr(I); 1267 } 1268 // Update the linear variables. 1269 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1270 for (auto *U : C->updates()) 1271 EmitIgnoredExpr(U); 1272 } 1273 1274 // On a continue in the body, jump to the end. 1275 auto Continue = getJumpDestInCurrentScope("omp.body.continue"); 1276 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1277 // Emit loop body. 1278 EmitStmt(D.getBody()); 1279 // The end (updates/cleanups). 1280 EmitBlock(Continue.getBlock()); 1281 BreakContinueStack.pop_back(); 1282 } 1283 1284 void CodeGenFunction::EmitOMPInnerLoop( 1285 const Stmt &S, bool RequiresCleanup, const Expr *LoopCond, 1286 const Expr *IncExpr, 1287 const llvm::function_ref<void(CodeGenFunction &)> &BodyGen, 1288 const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen) { 1289 auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end"); 1290 1291 // Start the loop with a block that tests the condition. 1292 auto CondBlock = createBasicBlock("omp.inner.for.cond"); 1293 EmitBlock(CondBlock); 1294 LoopStack.push(CondBlock, Builder.getCurrentDebugLocation()); 1295 1296 // If there are any cleanups between here and the loop-exit scope, 1297 // create a block to stage a loop exit along. 1298 auto ExitBlock = LoopExit.getBlock(); 1299 if (RequiresCleanup) 1300 ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup"); 1301 1302 auto LoopBody = createBasicBlock("omp.inner.for.body"); 1303 1304 // Emit condition. 1305 EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S)); 1306 if (ExitBlock != LoopExit.getBlock()) { 1307 EmitBlock(ExitBlock); 1308 EmitBranchThroughCleanup(LoopExit); 1309 } 1310 1311 EmitBlock(LoopBody); 1312 incrementProfileCounter(&S); 1313 1314 // Create a block for the increment. 1315 auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc"); 1316 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1317 1318 BodyGen(*this); 1319 1320 // Emit "IV = IV + 1" and a back-edge to the condition block. 1321 EmitBlock(Continue.getBlock()); 1322 EmitIgnoredExpr(IncExpr); 1323 PostIncGen(*this); 1324 BreakContinueStack.pop_back(); 1325 EmitBranch(CondBlock); 1326 LoopStack.pop(); 1327 // Emit the fall-through block. 1328 EmitBlock(LoopExit.getBlock()); 1329 } 1330 1331 void CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { 1332 if (!HaveInsertPoint()) 1333 return; 1334 // Emit inits for the linear variables. 1335 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1336 for (auto *Init : C->inits()) { 1337 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl()); 1338 if (auto *Ref = dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) { 1339 AutoVarEmission Emission = EmitAutoVarAlloca(*VD); 1340 auto *OrigVD = cast<VarDecl>(Ref->getDecl()); 1341 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1342 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1343 VD->getInit()->getType(), VK_LValue, 1344 VD->getInit()->getExprLoc()); 1345 EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(), 1346 VD->getType()), 1347 /*capturedByInit=*/false); 1348 EmitAutoVarCleanups(Emission); 1349 } else 1350 EmitVarDecl(*VD); 1351 } 1352 // Emit the linear steps for the linear clauses. 1353 // If a step is not constant, it is pre-calculated before the loop. 1354 if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep())) 1355 if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) { 1356 EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl())); 1357 // Emit calculation of the linear step. 1358 EmitIgnoredExpr(CS); 1359 } 1360 } 1361 } 1362 1363 void CodeGenFunction::EmitOMPLinearClauseFinal( 1364 const OMPLoopDirective &D, 1365 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1366 if (!HaveInsertPoint()) 1367 return; 1368 llvm::BasicBlock *DoneBB = nullptr; 1369 // Emit the final values of the linear variables. 1370 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1371 auto IC = C->varlist_begin(); 1372 for (auto *F : C->finals()) { 1373 if (!DoneBB) { 1374 if (auto *Cond = CondGen(*this)) { 1375 // If the first post-update expression is found, emit conditional 1376 // block if it was requested. 1377 auto *ThenBB = createBasicBlock(".omp.linear.pu"); 1378 DoneBB = createBasicBlock(".omp.linear.pu.done"); 1379 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1380 EmitBlock(ThenBB); 1381 } 1382 } 1383 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl()); 1384 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1385 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1386 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); 1387 Address OrigAddr = EmitLValue(&DRE).getAddress(); 1388 CodeGenFunction::OMPPrivateScope VarScope(*this); 1389 VarScope.addPrivate(OrigVD, [OrigAddr]() -> Address { return OrigAddr; }); 1390 (void)VarScope.Privatize(); 1391 EmitIgnoredExpr(F); 1392 ++IC; 1393 } 1394 if (auto *PostUpdate = C->getPostUpdateExpr()) 1395 EmitIgnoredExpr(PostUpdate); 1396 } 1397 if (DoneBB) 1398 EmitBlock(DoneBB, /*IsFinished=*/true); 1399 } 1400 1401 static void emitAlignedClause(CodeGenFunction &CGF, 1402 const OMPExecutableDirective &D) { 1403 if (!CGF.HaveInsertPoint()) 1404 return; 1405 for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) { 1406 unsigned ClauseAlignment = 0; 1407 if (auto AlignmentExpr = Clause->getAlignment()) { 1408 auto AlignmentCI = 1409 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); 1410 ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue()); 1411 } 1412 for (auto E : Clause->varlists()) { 1413 unsigned Alignment = ClauseAlignment; 1414 if (Alignment == 0) { 1415 // OpenMP [2.8.1, Description] 1416 // If no optional parameter is specified, implementation-defined default 1417 // alignments for SIMD instructions on the target platforms are assumed. 1418 Alignment = 1419 CGF.getContext() 1420 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign( 1421 E->getType()->getPointeeType())) 1422 .getQuantity(); 1423 } 1424 assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) && 1425 "alignment is not power of 2"); 1426 if (Alignment != 0) { 1427 llvm::Value *PtrValue = CGF.EmitScalarExpr(E); 1428 CGF.EmitAlignmentAssumption(PtrValue, Alignment); 1429 } 1430 } 1431 } 1432 } 1433 1434 void CodeGenFunction::EmitOMPPrivateLoopCounters( 1435 const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) { 1436 if (!HaveInsertPoint()) 1437 return; 1438 auto I = S.private_counters().begin(); 1439 for (auto *E : S.counters()) { 1440 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1441 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()); 1442 (void)LoopScope.addPrivate(VD, [&]() -> Address { 1443 // Emit var without initialization. 1444 if (!LocalDeclMap.count(PrivateVD)) { 1445 auto VarEmission = EmitAutoVarAlloca(*PrivateVD); 1446 EmitAutoVarCleanups(VarEmission); 1447 } 1448 DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD), 1449 /*RefersToEnclosingVariableOrCapture=*/false, 1450 (*I)->getType(), VK_LValue, (*I)->getExprLoc()); 1451 return EmitLValue(&DRE).getAddress(); 1452 }); 1453 if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) || 1454 VD->hasGlobalStorage()) { 1455 (void)LoopScope.addPrivate(PrivateVD, [&]() -> Address { 1456 DeclRefExpr DRE(const_cast<VarDecl *>(VD), 1457 LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD), 1458 E->getType(), VK_LValue, E->getExprLoc()); 1459 return EmitLValue(&DRE).getAddress(); 1460 }); 1461 } 1462 ++I; 1463 } 1464 } 1465 1466 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S, 1467 const Expr *Cond, llvm::BasicBlock *TrueBlock, 1468 llvm::BasicBlock *FalseBlock, uint64_t TrueCount) { 1469 if (!CGF.HaveInsertPoint()) 1470 return; 1471 { 1472 CodeGenFunction::OMPPrivateScope PreCondScope(CGF); 1473 CGF.EmitOMPPrivateLoopCounters(S, PreCondScope); 1474 (void)PreCondScope.Privatize(); 1475 // Get initial values of real counters. 1476 for (auto I : S.inits()) { 1477 CGF.EmitIgnoredExpr(I); 1478 } 1479 } 1480 // Check that loop is executed at least one time. 1481 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount); 1482 } 1483 1484 void CodeGenFunction::EmitOMPLinearClause( 1485 const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) { 1486 if (!HaveInsertPoint()) 1487 return; 1488 llvm::DenseSet<const VarDecl *> SIMDLCVs; 1489 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 1490 auto *LoopDirective = cast<OMPLoopDirective>(&D); 1491 for (auto *C : LoopDirective->counters()) { 1492 SIMDLCVs.insert( 1493 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 1494 } 1495 } 1496 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1497 auto CurPrivate = C->privates().begin(); 1498 for (auto *E : C->varlists()) { 1499 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1500 auto *PrivateVD = 1501 cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl()); 1502 if (!SIMDLCVs.count(VD->getCanonicalDecl())) { 1503 bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address { 1504 // Emit private VarDecl with copy init. 1505 EmitVarDecl(*PrivateVD); 1506 return GetAddrOfLocalVar(PrivateVD); 1507 }); 1508 assert(IsRegistered && "linear var already registered as private"); 1509 // Silence the warning about unused variable. 1510 (void)IsRegistered; 1511 } else 1512 EmitVarDecl(*PrivateVD); 1513 ++CurPrivate; 1514 } 1515 } 1516 } 1517 1518 static void emitSimdlenSafelenClause(CodeGenFunction &CGF, 1519 const OMPExecutableDirective &D, 1520 bool IsMonotonic) { 1521 if (!CGF.HaveInsertPoint()) 1522 return; 1523 if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) { 1524 RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(), 1525 /*ignoreResult=*/true); 1526 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 1527 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 1528 // In presence of finite 'safelen', it may be unsafe to mark all 1529 // the memory instructions parallel, because loop-carried 1530 // dependences of 'safelen' iterations are possible. 1531 if (!IsMonotonic) 1532 CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>()); 1533 } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) { 1534 RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(), 1535 /*ignoreResult=*/true); 1536 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 1537 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 1538 // In presence of finite 'safelen', it may be unsafe to mark all 1539 // the memory instructions parallel, because loop-carried 1540 // dependences of 'safelen' iterations are possible. 1541 CGF.LoopStack.setParallel(false); 1542 } 1543 } 1544 1545 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D, 1546 bool IsMonotonic) { 1547 // Walk clauses and process safelen/lastprivate. 1548 LoopStack.setParallel(!IsMonotonic); 1549 LoopStack.setVectorizeEnable(true); 1550 emitSimdlenSafelenClause(*this, D, IsMonotonic); 1551 } 1552 1553 void CodeGenFunction::EmitOMPSimdFinal( 1554 const OMPLoopDirective &D, 1555 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1556 if (!HaveInsertPoint()) 1557 return; 1558 llvm::BasicBlock *DoneBB = nullptr; 1559 auto IC = D.counters().begin(); 1560 auto IPC = D.private_counters().begin(); 1561 for (auto F : D.finals()) { 1562 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl()); 1563 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl()); 1564 auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD); 1565 if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) || 1566 OrigVD->hasGlobalStorage() || CED) { 1567 if (!DoneBB) { 1568 if (auto *Cond = CondGen(*this)) { 1569 // If the first post-update expression is found, emit conditional 1570 // block if it was requested. 1571 auto *ThenBB = createBasicBlock(".omp.final.then"); 1572 DoneBB = createBasicBlock(".omp.final.done"); 1573 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1574 EmitBlock(ThenBB); 1575 } 1576 } 1577 Address OrigAddr = Address::invalid(); 1578 if (CED) 1579 OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress(); 1580 else { 1581 DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD), 1582 /*RefersToEnclosingVariableOrCapture=*/false, 1583 (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc()); 1584 OrigAddr = EmitLValue(&DRE).getAddress(); 1585 } 1586 OMPPrivateScope VarScope(*this); 1587 VarScope.addPrivate(OrigVD, 1588 [OrigAddr]() -> Address { return OrigAddr; }); 1589 (void)VarScope.Privatize(); 1590 EmitIgnoredExpr(F); 1591 } 1592 ++IC; 1593 ++IPC; 1594 } 1595 if (DoneBB) 1596 EmitBlock(DoneBB, /*IsFinished=*/true); 1597 } 1598 1599 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { 1600 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1601 OMPLoopScope PreInitScope(CGF, S); 1602 // if (PreCond) { 1603 // for (IV in 0..LastIteration) BODY; 1604 // <Final counter/linear vars updates>; 1605 // } 1606 // 1607 1608 // Emit: if (PreCond) - begin. 1609 // If the condition constant folds and can be elided, avoid emitting the 1610 // whole loop. 1611 bool CondConstant; 1612 llvm::BasicBlock *ContBlock = nullptr; 1613 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 1614 if (!CondConstant) 1615 return; 1616 } else { 1617 auto *ThenBlock = CGF.createBasicBlock("simd.if.then"); 1618 ContBlock = CGF.createBasicBlock("simd.if.end"); 1619 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 1620 CGF.getProfileCount(&S)); 1621 CGF.EmitBlock(ThenBlock); 1622 CGF.incrementProfileCounter(&S); 1623 } 1624 1625 // Emit the loop iteration variable. 1626 const Expr *IVExpr = S.getIterationVariable(); 1627 const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 1628 CGF.EmitVarDecl(*IVDecl); 1629 CGF.EmitIgnoredExpr(S.getInit()); 1630 1631 // Emit the iterations count variable. 1632 // If it is not a variable, Sema decided to calculate iterations count on 1633 // each iteration (e.g., it is foldable into a constant). 1634 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 1635 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 1636 // Emit calculation of the iterations count. 1637 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 1638 } 1639 1640 CGF.EmitOMPSimdInit(S); 1641 1642 emitAlignedClause(CGF, S); 1643 CGF.EmitOMPLinearClauseInit(S); 1644 { 1645 OMPPrivateScope LoopScope(CGF); 1646 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 1647 CGF.EmitOMPLinearClause(S, LoopScope); 1648 CGF.EmitOMPPrivateClause(S, LoopScope); 1649 CGF.EmitOMPReductionClauseInit(S, LoopScope); 1650 bool HasLastprivateClause = 1651 CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 1652 (void)LoopScope.Privatize(); 1653 CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 1654 S.getInc(), 1655 [&S](CodeGenFunction &CGF) { 1656 CGF.EmitOMPLoopBody(S, JumpDest()); 1657 CGF.EmitStopPoint(&S); 1658 }, 1659 [](CodeGenFunction &) {}); 1660 CGF.EmitOMPSimdFinal( 1661 S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1662 // Emit final copy of the lastprivate variables at the end of loops. 1663 if (HasLastprivateClause) 1664 CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true); 1665 CGF.EmitOMPReductionClauseFinal(S); 1666 emitPostUpdateForReductionClause( 1667 CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1668 } 1669 CGF.EmitOMPLinearClauseFinal( 1670 S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1671 // Emit: if (PreCond) - end. 1672 if (ContBlock) { 1673 CGF.EmitBranch(ContBlock); 1674 CGF.EmitBlock(ContBlock, true); 1675 } 1676 }; 1677 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1678 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 1679 } 1680 1681 void CodeGenFunction::EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic, 1682 const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, 1683 Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) { 1684 auto &RT = CGM.getOpenMPRuntime(); 1685 1686 const Expr *IVExpr = S.getIterationVariable(); 1687 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1688 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1689 1690 auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end"); 1691 1692 // Start the loop with a block that tests the condition. 1693 auto CondBlock = createBasicBlock("omp.dispatch.cond"); 1694 EmitBlock(CondBlock); 1695 LoopStack.push(CondBlock, Builder.getCurrentDebugLocation()); 1696 1697 llvm::Value *BoolCondVal = nullptr; 1698 if (!DynamicOrOrdered) { 1699 // UB = min(UB, GlobalUB) 1700 EmitIgnoredExpr(S.getEnsureUpperBound()); 1701 // IV = LB 1702 EmitIgnoredExpr(S.getInit()); 1703 // IV < UB 1704 BoolCondVal = EvaluateExprAsBool(S.getCond()); 1705 } else { 1706 BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, IL, 1707 LB, UB, ST); 1708 } 1709 1710 // If there are any cleanups between here and the loop-exit scope, 1711 // create a block to stage a loop exit along. 1712 auto ExitBlock = LoopExit.getBlock(); 1713 if (LoopScope.requiresCleanups()) 1714 ExitBlock = createBasicBlock("omp.dispatch.cleanup"); 1715 1716 auto LoopBody = createBasicBlock("omp.dispatch.body"); 1717 Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock); 1718 if (ExitBlock != LoopExit.getBlock()) { 1719 EmitBlock(ExitBlock); 1720 EmitBranchThroughCleanup(LoopExit); 1721 } 1722 EmitBlock(LoopBody); 1723 1724 // Emit "IV = LB" (in case of static schedule, we have already calculated new 1725 // LB for loop condition and emitted it above). 1726 if (DynamicOrOrdered) 1727 EmitIgnoredExpr(S.getInit()); 1728 1729 // Create a block for the increment. 1730 auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); 1731 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1732 1733 // Generate !llvm.loop.parallel metadata for loads and stores for loops 1734 // with dynamic/guided scheduling and without ordered clause. 1735 if (!isOpenMPSimdDirective(S.getDirectiveKind())) 1736 LoopStack.setParallel(!IsMonotonic); 1737 else 1738 EmitOMPSimdInit(S, IsMonotonic); 1739 1740 SourceLocation Loc = S.getLocStart(); 1741 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), 1742 [&S, LoopExit](CodeGenFunction &CGF) { 1743 CGF.EmitOMPLoopBody(S, LoopExit); 1744 CGF.EmitStopPoint(&S); 1745 }, 1746 [Ordered, IVSize, IVSigned, Loc](CodeGenFunction &CGF) { 1747 if (Ordered) { 1748 CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd( 1749 CGF, Loc, IVSize, IVSigned); 1750 } 1751 }); 1752 1753 EmitBlock(Continue.getBlock()); 1754 BreakContinueStack.pop_back(); 1755 if (!DynamicOrOrdered) { 1756 // Emit "LB = LB + Stride", "UB = UB + Stride". 1757 EmitIgnoredExpr(S.getNextLowerBound()); 1758 EmitIgnoredExpr(S.getNextUpperBound()); 1759 } 1760 1761 EmitBranch(CondBlock); 1762 LoopStack.pop(); 1763 // Emit the fall-through block. 1764 EmitBlock(LoopExit.getBlock()); 1765 1766 // Tell the runtime we are done. 1767 if (!DynamicOrOrdered) 1768 RT.emitForStaticFinish(*this, S.getLocEnd()); 1769 1770 } 1771 1772 void CodeGenFunction::EmitOMPForOuterLoop( 1773 const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic, 1774 const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, 1775 Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) { 1776 auto &RT = CGM.getOpenMPRuntime(); 1777 1778 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). 1779 const bool DynamicOrOrdered = 1780 Ordered || RT.isDynamic(ScheduleKind.Schedule); 1781 1782 assert((Ordered || 1783 !RT.isStaticNonchunked(ScheduleKind.Schedule, 1784 /*Chunked=*/Chunk != nullptr)) && 1785 "static non-chunked schedule does not need outer loop"); 1786 1787 // Emit outer loop. 1788 // 1789 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1790 // When schedule(dynamic,chunk_size) is specified, the iterations are 1791 // distributed to threads in the team in chunks as the threads request them. 1792 // Each thread executes a chunk of iterations, then requests another chunk, 1793 // until no chunks remain to be distributed. Each chunk contains chunk_size 1794 // iterations, except for the last chunk to be distributed, which may have 1795 // fewer iterations. When no chunk_size is specified, it defaults to 1. 1796 // 1797 // When schedule(guided,chunk_size) is specified, the iterations are assigned 1798 // to threads in the team in chunks as the executing threads request them. 1799 // Each thread executes a chunk of iterations, then requests another chunk, 1800 // until no chunks remain to be assigned. For a chunk_size of 1, the size of 1801 // each chunk is proportional to the number of unassigned iterations divided 1802 // by the number of threads in the team, decreasing to 1. For a chunk_size 1803 // with value k (greater than 1), the size of each chunk is determined in the 1804 // same way, with the restriction that the chunks do not contain fewer than k 1805 // iterations (except for the last chunk to be assigned, which may have fewer 1806 // than k iterations). 1807 // 1808 // When schedule(auto) is specified, the decision regarding scheduling is 1809 // delegated to the compiler and/or runtime system. The programmer gives the 1810 // implementation the freedom to choose any possible mapping of iterations to 1811 // threads in the team. 1812 // 1813 // When schedule(runtime) is specified, the decision regarding scheduling is 1814 // deferred until run time, and the schedule and chunk size are taken from the 1815 // run-sched-var ICV. If the ICV is set to auto, the schedule is 1816 // implementation defined 1817 // 1818 // while(__kmpc_dispatch_next(&LB, &UB)) { 1819 // idx = LB; 1820 // while (idx <= UB) { BODY; ++idx; 1821 // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only. 1822 // } // inner loop 1823 // } 1824 // 1825 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1826 // When schedule(static, chunk_size) is specified, iterations are divided into 1827 // chunks of size chunk_size, and the chunks are assigned to the threads in 1828 // the team in a round-robin fashion in the order of the thread number. 1829 // 1830 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { 1831 // while (idx <= UB) { BODY; ++idx; } // inner loop 1832 // LB = LB + ST; 1833 // UB = UB + ST; 1834 // } 1835 // 1836 1837 const Expr *IVExpr = S.getIterationVariable(); 1838 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1839 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1840 1841 if (DynamicOrOrdered) { 1842 llvm::Value *UBVal = EmitScalarExpr(S.getLastIteration()); 1843 RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, IVSize, 1844 IVSigned, Ordered, UBVal, Chunk); 1845 } else { 1846 RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, 1847 Ordered, IL, LB, UB, ST, Chunk); 1848 } 1849 1850 EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, Ordered, LB, UB, 1851 ST, IL, Chunk); 1852 } 1853 1854 void CodeGenFunction::EmitOMPDistributeOuterLoop( 1855 OpenMPDistScheduleClauseKind ScheduleKind, 1856 const OMPDistributeDirective &S, OMPPrivateScope &LoopScope, 1857 Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) { 1858 1859 auto &RT = CGM.getOpenMPRuntime(); 1860 1861 // Emit outer loop. 1862 // Same behavior as a OMPForOuterLoop, except that schedule cannot be 1863 // dynamic 1864 // 1865 1866 const Expr *IVExpr = S.getIterationVariable(); 1867 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1868 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1869 1870 RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, 1871 IVSize, IVSigned, /* Ordered = */ false, 1872 IL, LB, UB, ST, Chunk); 1873 1874 EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, 1875 S, LoopScope, /* Ordered = */ false, LB, UB, ST, IL, Chunk); 1876 } 1877 1878 void CodeGenFunction::EmitOMPDistributeParallelForDirective( 1879 const OMPDistributeParallelForDirective &S) { 1880 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1881 CGM.getOpenMPRuntime().emitInlinedDirective( 1882 *this, OMPD_distribute_parallel_for, 1883 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1884 OMPLoopScope PreInitScope(CGF, S); 1885 CGF.EmitStmt( 1886 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1887 }); 1888 } 1889 1890 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective( 1891 const OMPDistributeParallelForSimdDirective &S) { 1892 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1893 CGM.getOpenMPRuntime().emitInlinedDirective( 1894 *this, OMPD_distribute_parallel_for_simd, 1895 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1896 OMPLoopScope PreInitScope(CGF, S); 1897 CGF.EmitStmt( 1898 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1899 }); 1900 } 1901 1902 void CodeGenFunction::EmitOMPDistributeSimdDirective( 1903 const OMPDistributeSimdDirective &S) { 1904 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1905 CGM.getOpenMPRuntime().emitInlinedDirective( 1906 *this, OMPD_distribute_simd, 1907 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1908 OMPLoopScope PreInitScope(CGF, S); 1909 CGF.EmitStmt( 1910 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1911 }); 1912 } 1913 1914 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective( 1915 const OMPTargetParallelForSimdDirective &S) { 1916 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1917 CGM.getOpenMPRuntime().emitInlinedDirective( 1918 *this, OMPD_target_parallel_for_simd, 1919 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1920 OMPLoopScope PreInitScope(CGF, S); 1921 CGF.EmitStmt( 1922 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1923 }); 1924 } 1925 1926 void CodeGenFunction::EmitOMPTargetSimdDirective( 1927 const OMPTargetSimdDirective &S) { 1928 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1929 CGM.getOpenMPRuntime().emitInlinedDirective( 1930 *this, OMPD_target_simd, [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1931 OMPLoopScope PreInitScope(CGF, S); 1932 CGF.EmitStmt( 1933 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1934 }); 1935 } 1936 1937 void CodeGenFunction::EmitOMPTeamsDistributeDirective( 1938 const OMPTeamsDistributeDirective &S) { 1939 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1940 CGM.getOpenMPRuntime().emitInlinedDirective( 1941 *this, OMPD_teams_distribute, 1942 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1943 OMPLoopScope PreInitScope(CGF, S); 1944 CGF.EmitStmt( 1945 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1946 }); 1947 } 1948 1949 /// \brief Emit a helper variable and return corresponding lvalue. 1950 static LValue EmitOMPHelperVar(CodeGenFunction &CGF, 1951 const DeclRefExpr *Helper) { 1952 auto VDecl = cast<VarDecl>(Helper->getDecl()); 1953 CGF.EmitVarDecl(*VDecl); 1954 return CGF.EmitLValue(Helper); 1955 } 1956 1957 namespace { 1958 struct ScheduleKindModifiersTy { 1959 OpenMPScheduleClauseKind Kind; 1960 OpenMPScheduleClauseModifier M1; 1961 OpenMPScheduleClauseModifier M2; 1962 ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind, 1963 OpenMPScheduleClauseModifier M1, 1964 OpenMPScheduleClauseModifier M2) 1965 : Kind(Kind), M1(M1), M2(M2) {} 1966 }; 1967 } // namespace 1968 1969 bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { 1970 // Emit the loop iteration variable. 1971 auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 1972 auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); 1973 EmitVarDecl(*IVDecl); 1974 1975 // Emit the iterations count variable. 1976 // If it is not a variable, Sema decided to calculate iterations count on each 1977 // iteration (e.g., it is foldable into a constant). 1978 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 1979 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 1980 // Emit calculation of the iterations count. 1981 EmitIgnoredExpr(S.getCalcLastIteration()); 1982 } 1983 1984 auto &RT = CGM.getOpenMPRuntime(); 1985 1986 bool HasLastprivateClause; 1987 // Check pre-condition. 1988 { 1989 OMPLoopScope PreInitScope(*this, S); 1990 // Skip the entire loop if we don't meet the precondition. 1991 // If the condition constant folds and can be elided, avoid emitting the 1992 // whole loop. 1993 bool CondConstant; 1994 llvm::BasicBlock *ContBlock = nullptr; 1995 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 1996 if (!CondConstant) 1997 return false; 1998 } else { 1999 auto *ThenBlock = createBasicBlock("omp.precond.then"); 2000 ContBlock = createBasicBlock("omp.precond.end"); 2001 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 2002 getProfileCount(&S)); 2003 EmitBlock(ThenBlock); 2004 incrementProfileCounter(&S); 2005 } 2006 2007 bool Ordered = false; 2008 if (auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) { 2009 if (OrderedClause->getNumForLoops()) 2010 RT.emitDoacrossInit(*this, S); 2011 else 2012 Ordered = true; 2013 } 2014 2015 llvm::DenseSet<const Expr *> EmittedFinals; 2016 emitAlignedClause(*this, S); 2017 EmitOMPLinearClauseInit(S); 2018 // Emit helper vars inits. 2019 LValue LB = 2020 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable())); 2021 LValue UB = 2022 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable())); 2023 LValue ST = 2024 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 2025 LValue IL = 2026 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 2027 2028 // Emit 'then' code. 2029 { 2030 OMPPrivateScope LoopScope(*this); 2031 if (EmitOMPFirstprivateClause(S, LoopScope)) { 2032 // Emit implicit barrier to synchronize threads and avoid data races on 2033 // initialization of firstprivate variables and post-update of 2034 // lastprivate variables. 2035 CGM.getOpenMPRuntime().emitBarrierCall( 2036 *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 2037 /*ForceSimpleCall=*/true); 2038 } 2039 EmitOMPPrivateClause(S, LoopScope); 2040 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 2041 EmitOMPReductionClauseInit(S, LoopScope); 2042 EmitOMPPrivateLoopCounters(S, LoopScope); 2043 EmitOMPLinearClause(S, LoopScope); 2044 (void)LoopScope.Privatize(); 2045 2046 // Detect the loop schedule kind and chunk. 2047 llvm::Value *Chunk = nullptr; 2048 OpenMPScheduleTy ScheduleKind; 2049 if (auto *C = S.getSingleClause<OMPScheduleClause>()) { 2050 ScheduleKind.Schedule = C->getScheduleKind(); 2051 ScheduleKind.M1 = C->getFirstScheduleModifier(); 2052 ScheduleKind.M2 = C->getSecondScheduleModifier(); 2053 if (const auto *Ch = C->getChunkSize()) { 2054 Chunk = EmitScalarExpr(Ch); 2055 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 2056 S.getIterationVariable()->getType(), 2057 S.getLocStart()); 2058 } 2059 } 2060 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2061 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2062 // OpenMP 4.5, 2.7.1 Loop Construct, Description. 2063 // If the static schedule kind is specified or if the ordered clause is 2064 // specified, and if no monotonic modifier is specified, the effect will 2065 // be as if the monotonic modifier was specified. 2066 if (RT.isStaticNonchunked(ScheduleKind.Schedule, 2067 /* Chunked */ Chunk != nullptr) && 2068 !Ordered) { 2069 if (isOpenMPSimdDirective(S.getDirectiveKind())) 2070 EmitOMPSimdInit(S, /*IsMonotonic=*/true); 2071 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 2072 // When no chunk_size is specified, the iteration space is divided into 2073 // chunks that are approximately equal in size, and at most one chunk is 2074 // distributed to each thread. Note that the size of the chunks is 2075 // unspecified in this case. 2076 RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, 2077 IVSize, IVSigned, Ordered, 2078 IL.getAddress(), LB.getAddress(), 2079 UB.getAddress(), ST.getAddress()); 2080 auto LoopExit = 2081 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 2082 // UB = min(UB, GlobalUB); 2083 EmitIgnoredExpr(S.getEnsureUpperBound()); 2084 // IV = LB; 2085 EmitIgnoredExpr(S.getInit()); 2086 // while (idx <= UB) { BODY; ++idx; } 2087 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 2088 S.getInc(), 2089 [&S, LoopExit](CodeGenFunction &CGF) { 2090 CGF.EmitOMPLoopBody(S, LoopExit); 2091 CGF.EmitStopPoint(&S); 2092 }, 2093 [](CodeGenFunction &) {}); 2094 EmitBlock(LoopExit.getBlock()); 2095 // Tell the runtime we are done. 2096 RT.emitForStaticFinish(*this, S.getLocStart()); 2097 } else { 2098 const bool IsMonotonic = 2099 Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static || 2100 ScheduleKind.Schedule == OMPC_SCHEDULE_unknown || 2101 ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic || 2102 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic; 2103 // Emit the outer loop, which requests its work chunk [LB..UB] from 2104 // runtime and runs the inner loop to process it. 2105 EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered, 2106 LB.getAddress(), UB.getAddress(), ST.getAddress(), 2107 IL.getAddress(), Chunk); 2108 } 2109 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 2110 EmitOMPSimdFinal(S, 2111 [&](CodeGenFunction &CGF) -> llvm::Value * { 2112 return CGF.Builder.CreateIsNotNull( 2113 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2114 }); 2115 } 2116 EmitOMPReductionClauseFinal(S); 2117 // Emit post-update of the reduction variables if IsLastIter != 0. 2118 emitPostUpdateForReductionClause( 2119 *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * { 2120 return CGF.Builder.CreateIsNotNull( 2121 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2122 }); 2123 // Emit final copy of the lastprivate variables if IsLastIter != 0. 2124 if (HasLastprivateClause) 2125 EmitOMPLastprivateClauseFinal( 2126 S, isOpenMPSimdDirective(S.getDirectiveKind()), 2127 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart()))); 2128 } 2129 EmitOMPLinearClauseFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * { 2130 return CGF.Builder.CreateIsNotNull( 2131 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2132 }); 2133 // We're now done with the loop, so jump to the continuation block. 2134 if (ContBlock) { 2135 EmitBranch(ContBlock); 2136 EmitBlock(ContBlock, true); 2137 } 2138 } 2139 return HasLastprivateClause; 2140 } 2141 2142 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { 2143 bool HasLastprivates = false; 2144 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, 2145 PrePostActionTy &) { 2146 HasLastprivates = CGF.EmitOMPWorksharingLoop(S); 2147 }; 2148 { 2149 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2150 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen, 2151 S.hasCancel()); 2152 } 2153 2154 // Emit an implicit barrier at the end. 2155 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) { 2156 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); 2157 } 2158 } 2159 2160 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { 2161 bool HasLastprivates = false; 2162 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, 2163 PrePostActionTy &) { 2164 HasLastprivates = CGF.EmitOMPWorksharingLoop(S); 2165 }; 2166 { 2167 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2168 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 2169 } 2170 2171 // Emit an implicit barrier at the end. 2172 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) { 2173 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); 2174 } 2175 } 2176 2177 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, 2178 const Twine &Name, 2179 llvm::Value *Init = nullptr) { 2180 auto LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty); 2181 if (Init) 2182 CGF.EmitScalarInit(Init, LVal); 2183 return LVal; 2184 } 2185 2186 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { 2187 auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt(); 2188 auto *CS = dyn_cast<CompoundStmt>(Stmt); 2189 bool HasLastprivates = false; 2190 auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF, 2191 PrePostActionTy &) { 2192 auto &C = CGF.CGM.getContext(); 2193 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2194 // Emit helper vars inits. 2195 LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.", 2196 CGF.Builder.getInt32(0)); 2197 auto *GlobalUBVal = CS != nullptr ? CGF.Builder.getInt32(CS->size() - 1) 2198 : CGF.Builder.getInt32(0); 2199 LValue UB = 2200 createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal); 2201 LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.", 2202 CGF.Builder.getInt32(1)); 2203 LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.", 2204 CGF.Builder.getInt32(0)); 2205 // Loop counter. 2206 LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv."); 2207 OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 2208 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV); 2209 OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 2210 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); 2211 // Generate condition for loop. 2212 BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, 2213 OK_Ordinary, S.getLocStart(), 2214 /*fpContractable=*/false); 2215 // Increment for loop counter. 2216 UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary, 2217 S.getLocStart()); 2218 auto BodyGen = [Stmt, CS, &S, &IV](CodeGenFunction &CGF) { 2219 // Iterate through all sections and emit a switch construct: 2220 // switch (IV) { 2221 // case 0: 2222 // <SectionStmt[0]>; 2223 // break; 2224 // ... 2225 // case <NumSection> - 1: 2226 // <SectionStmt[<NumSection> - 1]>; 2227 // break; 2228 // } 2229 // .omp.sections.exit: 2230 auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit"); 2231 auto *SwitchStmt = CGF.Builder.CreateSwitch( 2232 CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB, 2233 CS == nullptr ? 1 : CS->size()); 2234 if (CS) { 2235 unsigned CaseNumber = 0; 2236 for (auto *SubStmt : CS->children()) { 2237 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 2238 CGF.EmitBlock(CaseBB); 2239 SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB); 2240 CGF.EmitStmt(SubStmt); 2241 CGF.EmitBranch(ExitBB); 2242 ++CaseNumber; 2243 } 2244 } else { 2245 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 2246 CGF.EmitBlock(CaseBB); 2247 SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB); 2248 CGF.EmitStmt(Stmt); 2249 CGF.EmitBranch(ExitBB); 2250 } 2251 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 2252 }; 2253 2254 CodeGenFunction::OMPPrivateScope LoopScope(CGF); 2255 if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) { 2256 // Emit implicit barrier to synchronize threads and avoid data races on 2257 // initialization of firstprivate variables and post-update of lastprivate 2258 // variables. 2259 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 2260 CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 2261 /*ForceSimpleCall=*/true); 2262 } 2263 CGF.EmitOMPPrivateClause(S, LoopScope); 2264 HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 2265 CGF.EmitOMPReductionClauseInit(S, LoopScope); 2266 (void)LoopScope.Privatize(); 2267 2268 // Emit static non-chunked loop. 2269 OpenMPScheduleTy ScheduleKind; 2270 ScheduleKind.Schedule = OMPC_SCHEDULE_static; 2271 CGF.CGM.getOpenMPRuntime().emitForStaticInit( 2272 CGF, S.getLocStart(), ScheduleKind, /*IVSize=*/32, 2273 /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), LB.getAddress(), 2274 UB.getAddress(), ST.getAddress()); 2275 // UB = min(UB, GlobalUB); 2276 auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart()); 2277 auto *MinUBGlobalUB = CGF.Builder.CreateSelect( 2278 CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal); 2279 CGF.EmitStoreOfScalar(MinUBGlobalUB, UB); 2280 // IV = LB; 2281 CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV); 2282 // while (idx <= UB) { BODY; ++idx; } 2283 CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen, 2284 [](CodeGenFunction &) {}); 2285 // Tell the runtime we are done. 2286 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocStart()); 2287 CGF.EmitOMPReductionClauseFinal(S); 2288 // Emit post-update of the reduction variables if IsLastIter != 0. 2289 emitPostUpdateForReductionClause( 2290 CGF, S, [&](CodeGenFunction &CGF) -> llvm::Value * { 2291 return CGF.Builder.CreateIsNotNull( 2292 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2293 }); 2294 2295 // Emit final copy of the lastprivate variables if IsLastIter != 0. 2296 if (HasLastprivates) 2297 CGF.EmitOMPLastprivateClauseFinal( 2298 S, /*NoFinals=*/false, 2299 CGF.Builder.CreateIsNotNull( 2300 CGF.EmitLoadOfScalar(IL, S.getLocStart()))); 2301 }; 2302 2303 bool HasCancel = false; 2304 if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S)) 2305 HasCancel = OSD->hasCancel(); 2306 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S)) 2307 HasCancel = OPSD->hasCancel(); 2308 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen, 2309 HasCancel); 2310 // Emit barrier for lastprivates only if 'sections' directive has 'nowait' 2311 // clause. Otherwise the barrier will be generated by the codegen for the 2312 // directive. 2313 if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) { 2314 // Emit implicit barrier to synchronize threads and avoid data races on 2315 // initialization of firstprivate variables. 2316 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), 2317 OMPD_unknown); 2318 } 2319 } 2320 2321 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { 2322 { 2323 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2324 EmitSections(S); 2325 } 2326 // Emit an implicit barrier at the end. 2327 if (!S.getSingleClause<OMPNowaitClause>()) { 2328 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), 2329 OMPD_sections); 2330 } 2331 } 2332 2333 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { 2334 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2335 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2336 }; 2337 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2338 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen, 2339 S.hasCancel()); 2340 } 2341 2342 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { 2343 llvm::SmallVector<const Expr *, 8> CopyprivateVars; 2344 llvm::SmallVector<const Expr *, 8> DestExprs; 2345 llvm::SmallVector<const Expr *, 8> SrcExprs; 2346 llvm::SmallVector<const Expr *, 8> AssignmentOps; 2347 // Check if there are any 'copyprivate' clauses associated with this 2348 // 'single' construct. 2349 // Build a list of copyprivate variables along with helper expressions 2350 // (<source>, <destination>, <destination>=<source> expressions) 2351 for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) { 2352 CopyprivateVars.append(C->varlists().begin(), C->varlists().end()); 2353 DestExprs.append(C->destination_exprs().begin(), 2354 C->destination_exprs().end()); 2355 SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end()); 2356 AssignmentOps.append(C->assignment_ops().begin(), 2357 C->assignment_ops().end()); 2358 } 2359 // Emit code for 'single' region along with 'copyprivate' clauses 2360 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2361 Action.Enter(CGF); 2362 OMPPrivateScope SingleScope(CGF); 2363 (void)CGF.EmitOMPFirstprivateClause(S, SingleScope); 2364 CGF.EmitOMPPrivateClause(S, SingleScope); 2365 (void)SingleScope.Privatize(); 2366 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2367 }; 2368 { 2369 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2370 CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(), 2371 CopyprivateVars, DestExprs, 2372 SrcExprs, AssignmentOps); 2373 } 2374 // Emit an implicit barrier at the end (to avoid data race on firstprivate 2375 // init or if no 'nowait' clause was specified and no 'copyprivate' clause). 2376 if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) { 2377 CGM.getOpenMPRuntime().emitBarrierCall( 2378 *this, S.getLocStart(), 2379 S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single); 2380 } 2381 } 2382 2383 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { 2384 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2385 Action.Enter(CGF); 2386 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2387 }; 2388 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2389 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart()); 2390 } 2391 2392 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { 2393 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2394 Action.Enter(CGF); 2395 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2396 }; 2397 Expr *Hint = nullptr; 2398 if (auto *HintClause = S.getSingleClause<OMPHintClause>()) 2399 Hint = HintClause->getHint(); 2400 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2401 CGM.getOpenMPRuntime().emitCriticalRegion(*this, 2402 S.getDirectiveName().getAsString(), 2403 CodeGen, S.getLocStart(), Hint); 2404 } 2405 2406 void CodeGenFunction::EmitOMPParallelForDirective( 2407 const OMPParallelForDirective &S) { 2408 // Emit directive as a combined directive that consists of two implicit 2409 // directives: 'parallel' with 'for' directive. 2410 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2411 CGF.EmitOMPWorksharingLoop(S); 2412 }; 2413 emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen); 2414 } 2415 2416 void CodeGenFunction::EmitOMPParallelForSimdDirective( 2417 const OMPParallelForSimdDirective &S) { 2418 // Emit directive as a combined directive that consists of two implicit 2419 // directives: 'parallel' with 'for' directive. 2420 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2421 CGF.EmitOMPWorksharingLoop(S); 2422 }; 2423 emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen); 2424 } 2425 2426 void CodeGenFunction::EmitOMPParallelSectionsDirective( 2427 const OMPParallelSectionsDirective &S) { 2428 // Emit directive as a combined directive that consists of two implicit 2429 // directives: 'parallel' with 'sections' directive. 2430 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2431 CGF.EmitSections(S); 2432 }; 2433 emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen); 2434 } 2435 2436 void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, 2437 const RegionCodeGenTy &BodyGen, 2438 const TaskGenTy &TaskGen, 2439 OMPTaskDataTy &Data) { 2440 // Emit outlined function for task construct. 2441 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2442 auto *I = CS->getCapturedDecl()->param_begin(); 2443 auto *PartId = std::next(I); 2444 auto *TaskT = std::next(I, 4); 2445 // Check if the task is final 2446 if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) { 2447 // If the condition constant folds and can be elided, try to avoid emitting 2448 // the condition and the dead arm of the if/else. 2449 auto *Cond = Clause->getCondition(); 2450 bool CondConstant; 2451 if (ConstantFoldsToSimpleInteger(Cond, CondConstant)) 2452 Data.Final.setInt(CondConstant); 2453 else 2454 Data.Final.setPointer(EvaluateExprAsBool(Cond)); 2455 } else { 2456 // By default the task is not final. 2457 Data.Final.setInt(/*IntVal=*/false); 2458 } 2459 // Check if the task has 'priority' clause. 2460 if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) { 2461 auto *Prio = Clause->getPriority(); 2462 Data.Priority.setInt(/*IntVal=*/true); 2463 Data.Priority.setPointer(EmitScalarConversion( 2464 EmitScalarExpr(Prio), Prio->getType(), 2465 getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1), 2466 Prio->getExprLoc())); 2467 } 2468 // The first function argument for tasks is a thread id, the second one is a 2469 // part id (0 for tied tasks, >=0 for untied task). 2470 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 2471 // Get list of private variables. 2472 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 2473 auto IRef = C->varlist_begin(); 2474 for (auto *IInit : C->private_copies()) { 2475 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2476 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2477 Data.PrivateVars.push_back(*IRef); 2478 Data.PrivateCopies.push_back(IInit); 2479 } 2480 ++IRef; 2481 } 2482 } 2483 EmittedAsPrivate.clear(); 2484 // Get list of firstprivate variables. 2485 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 2486 auto IRef = C->varlist_begin(); 2487 auto IElemInitRef = C->inits().begin(); 2488 for (auto *IInit : C->private_copies()) { 2489 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2490 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2491 Data.FirstprivateVars.push_back(*IRef); 2492 Data.FirstprivateCopies.push_back(IInit); 2493 Data.FirstprivateInits.push_back(*IElemInitRef); 2494 } 2495 ++IRef; 2496 ++IElemInitRef; 2497 } 2498 } 2499 // Get list of lastprivate variables (for taskloops). 2500 llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs; 2501 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 2502 auto IRef = C->varlist_begin(); 2503 auto ID = C->destination_exprs().begin(); 2504 for (auto *IInit : C->private_copies()) { 2505 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2506 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2507 Data.LastprivateVars.push_back(*IRef); 2508 Data.LastprivateCopies.push_back(IInit); 2509 } 2510 LastprivateDstsOrigs.insert( 2511 {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()), 2512 cast<DeclRefExpr>(*IRef)}); 2513 ++IRef; 2514 ++ID; 2515 } 2516 } 2517 // Build list of dependences. 2518 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) 2519 for (auto *IRef : C->varlists()) 2520 Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef)); 2521 auto &&CodeGen = [PartId, &S, &Data, CS, &BodyGen, &LastprivateDstsOrigs]( 2522 CodeGenFunction &CGF, PrePostActionTy &Action) { 2523 // Set proper addresses for generated private copies. 2524 OMPPrivateScope Scope(CGF); 2525 if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() || 2526 !Data.LastprivateVars.empty()) { 2527 auto *CopyFn = CGF.Builder.CreateLoad( 2528 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3))); 2529 auto *PrivatesPtr = CGF.Builder.CreateLoad( 2530 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2))); 2531 // Map privates. 2532 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; 2533 llvm::SmallVector<llvm::Value *, 16> CallArgs; 2534 CallArgs.push_back(PrivatesPtr); 2535 for (auto *E : Data.PrivateVars) { 2536 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2537 Address PrivatePtr = CGF.CreateMemTemp( 2538 CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr"); 2539 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2540 CallArgs.push_back(PrivatePtr.getPointer()); 2541 } 2542 for (auto *E : Data.FirstprivateVars) { 2543 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2544 Address PrivatePtr = 2545 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 2546 ".firstpriv.ptr.addr"); 2547 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2548 CallArgs.push_back(PrivatePtr.getPointer()); 2549 } 2550 for (auto *E : Data.LastprivateVars) { 2551 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2552 Address PrivatePtr = 2553 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 2554 ".lastpriv.ptr.addr"); 2555 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2556 CallArgs.push_back(PrivatePtr.getPointer()); 2557 } 2558 CGF.EmitRuntimeCall(CopyFn, CallArgs); 2559 for (auto &&Pair : LastprivateDstsOrigs) { 2560 auto *OrigVD = cast<VarDecl>(Pair.second->getDecl()); 2561 DeclRefExpr DRE( 2562 const_cast<VarDecl *>(OrigVD), 2563 /*RefersToEnclosingVariableOrCapture=*/CGF.CapturedStmtInfo->lookup( 2564 OrigVD) != nullptr, 2565 Pair.second->getType(), VK_LValue, Pair.second->getExprLoc()); 2566 Scope.addPrivate(Pair.first, [&CGF, &DRE]() { 2567 return CGF.EmitLValue(&DRE).getAddress(); 2568 }); 2569 } 2570 for (auto &&Pair : PrivatePtrs) { 2571 Address Replacement(CGF.Builder.CreateLoad(Pair.second), 2572 CGF.getContext().getDeclAlign(Pair.first)); 2573 Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); 2574 } 2575 } 2576 (void)Scope.Privatize(); 2577 2578 Action.Enter(CGF); 2579 BodyGen(CGF); 2580 }; 2581 auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( 2582 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied, 2583 Data.NumberOfParts); 2584 OMPLexicalScope Scope(*this, S); 2585 TaskGen(*this, OutlinedFn, Data); 2586 } 2587 2588 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { 2589 // Emit outlined function for task construct. 2590 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2591 auto CapturedStruct = GenerateCapturedStmtArgument(*CS); 2592 auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 2593 const Expr *IfCond = nullptr; 2594 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 2595 if (C->getNameModifier() == OMPD_unknown || 2596 C->getNameModifier() == OMPD_task) { 2597 IfCond = C->getCondition(); 2598 break; 2599 } 2600 } 2601 2602 OMPTaskDataTy Data; 2603 // Check if we should emit tied or untied task. 2604 Data.Tied = !S.getSingleClause<OMPUntiedClause>(); 2605 auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { 2606 CGF.EmitStmt(CS->getCapturedStmt()); 2607 }; 2608 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 2609 IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn, 2610 const OMPTaskDataTy &Data) { 2611 CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getLocStart(), S, OutlinedFn, 2612 SharedsTy, CapturedStruct, IfCond, 2613 Data); 2614 }; 2615 EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data); 2616 } 2617 2618 void CodeGenFunction::EmitOMPTaskyieldDirective( 2619 const OMPTaskyieldDirective &S) { 2620 CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart()); 2621 } 2622 2623 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { 2624 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier); 2625 } 2626 2627 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) { 2628 CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart()); 2629 } 2630 2631 void CodeGenFunction::EmitOMPTaskgroupDirective( 2632 const OMPTaskgroupDirective &S) { 2633 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2634 Action.Enter(CGF); 2635 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2636 }; 2637 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2638 CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart()); 2639 } 2640 2641 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { 2642 CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> { 2643 if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) { 2644 return llvm::makeArrayRef(FlushClause->varlist_begin(), 2645 FlushClause->varlist_end()); 2646 } 2647 return llvm::None; 2648 }(), S.getLocStart()); 2649 } 2650 2651 void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) { 2652 // Emit the loop iteration variable. 2653 auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 2654 auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); 2655 EmitVarDecl(*IVDecl); 2656 2657 // Emit the iterations count variable. 2658 // If it is not a variable, Sema decided to calculate iterations count on each 2659 // iteration (e.g., it is foldable into a constant). 2660 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 2661 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 2662 // Emit calculation of the iterations count. 2663 EmitIgnoredExpr(S.getCalcLastIteration()); 2664 } 2665 2666 auto &RT = CGM.getOpenMPRuntime(); 2667 2668 // Check pre-condition. 2669 { 2670 OMPLoopScope PreInitScope(*this, S); 2671 // Skip the entire loop if we don't meet the precondition. 2672 // If the condition constant folds and can be elided, avoid emitting the 2673 // whole loop. 2674 bool CondConstant; 2675 llvm::BasicBlock *ContBlock = nullptr; 2676 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 2677 if (!CondConstant) 2678 return; 2679 } else { 2680 auto *ThenBlock = createBasicBlock("omp.precond.then"); 2681 ContBlock = createBasicBlock("omp.precond.end"); 2682 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 2683 getProfileCount(&S)); 2684 EmitBlock(ThenBlock); 2685 incrementProfileCounter(&S); 2686 } 2687 2688 // Emit 'then' code. 2689 { 2690 // Emit helper vars inits. 2691 LValue LB = 2692 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable())); 2693 LValue UB = 2694 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable())); 2695 LValue ST = 2696 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 2697 LValue IL = 2698 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 2699 2700 OMPPrivateScope LoopScope(*this); 2701 EmitOMPPrivateLoopCounters(S, LoopScope); 2702 (void)LoopScope.Privatize(); 2703 2704 // Detect the distribute schedule kind and chunk. 2705 llvm::Value *Chunk = nullptr; 2706 OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown; 2707 if (auto *C = S.getSingleClause<OMPDistScheduleClause>()) { 2708 ScheduleKind = C->getDistScheduleKind(); 2709 if (const auto *Ch = C->getChunkSize()) { 2710 Chunk = EmitScalarExpr(Ch); 2711 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 2712 S.getIterationVariable()->getType(), 2713 S.getLocStart()); 2714 } 2715 } 2716 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2717 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2718 2719 // OpenMP [2.10.8, distribute Construct, Description] 2720 // If dist_schedule is specified, kind must be static. If specified, 2721 // iterations are divided into chunks of size chunk_size, chunks are 2722 // assigned to the teams of the league in a round-robin fashion in the 2723 // order of the team number. When no chunk_size is specified, the 2724 // iteration space is divided into chunks that are approximately equal 2725 // in size, and at most one chunk is distributed to each team of the 2726 // league. The size of the chunks is unspecified in this case. 2727 if (RT.isStaticNonchunked(ScheduleKind, 2728 /* Chunked */ Chunk != nullptr)) { 2729 RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, 2730 IVSize, IVSigned, /* Ordered = */ false, 2731 IL.getAddress(), LB.getAddress(), 2732 UB.getAddress(), ST.getAddress()); 2733 auto LoopExit = 2734 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 2735 // UB = min(UB, GlobalUB); 2736 EmitIgnoredExpr(S.getEnsureUpperBound()); 2737 // IV = LB; 2738 EmitIgnoredExpr(S.getInit()); 2739 // while (idx <= UB) { BODY; ++idx; } 2740 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 2741 S.getInc(), 2742 [&S, LoopExit](CodeGenFunction &CGF) { 2743 CGF.EmitOMPLoopBody(S, LoopExit); 2744 CGF.EmitStopPoint(&S); 2745 }, 2746 [](CodeGenFunction &) {}); 2747 EmitBlock(LoopExit.getBlock()); 2748 // Tell the runtime we are done. 2749 RT.emitForStaticFinish(*this, S.getLocStart()); 2750 } else { 2751 // Emit the outer loop, which requests its work chunk [LB..UB] from 2752 // runtime and runs the inner loop to process it. 2753 EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, 2754 LB.getAddress(), UB.getAddress(), ST.getAddress(), 2755 IL.getAddress(), Chunk); 2756 } 2757 } 2758 2759 // We're now done with the loop, so jump to the continuation block. 2760 if (ContBlock) { 2761 EmitBranch(ContBlock); 2762 EmitBlock(ContBlock, true); 2763 } 2764 } 2765 } 2766 2767 void CodeGenFunction::EmitOMPDistributeDirective( 2768 const OMPDistributeDirective &S) { 2769 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2770 CGF.EmitOMPDistributeLoop(S); 2771 }; 2772 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2773 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen, 2774 false); 2775 } 2776 2777 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, 2778 const CapturedStmt *S) { 2779 CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); 2780 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; 2781 CGF.CapturedStmtInfo = &CapStmtInfo; 2782 auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S); 2783 Fn->addFnAttr(llvm::Attribute::NoInline); 2784 return Fn; 2785 } 2786 2787 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { 2788 if (!S.getAssociatedStmt()) { 2789 for (const auto *DC : S.getClausesOfKind<OMPDependClause>()) 2790 CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC); 2791 return; 2792 } 2793 auto *C = S.getSingleClause<OMPSIMDClause>(); 2794 auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF, 2795 PrePostActionTy &Action) { 2796 if (C) { 2797 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2798 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 2799 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 2800 auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS); 2801 CGF.EmitNounwindRuntimeCall(OutlinedFn, CapturedVars); 2802 } else { 2803 Action.Enter(CGF); 2804 CGF.EmitStmt( 2805 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2806 } 2807 }; 2808 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2809 CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C); 2810 } 2811 2812 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, 2813 QualType SrcType, QualType DestType, 2814 SourceLocation Loc) { 2815 assert(CGF.hasScalarEvaluationKind(DestType) && 2816 "DestType must have scalar evaluation kind."); 2817 assert(!Val.isAggregate() && "Must be a scalar or complex."); 2818 return Val.isScalar() 2819 ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType, 2820 Loc) 2821 : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType, 2822 DestType, Loc); 2823 } 2824 2825 static CodeGenFunction::ComplexPairTy 2826 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, 2827 QualType DestType, SourceLocation Loc) { 2828 assert(CGF.getEvaluationKind(DestType) == TEK_Complex && 2829 "DestType must have complex evaluation kind."); 2830 CodeGenFunction::ComplexPairTy ComplexVal; 2831 if (Val.isScalar()) { 2832 // Convert the input element to the element type of the complex. 2833 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 2834 auto ScalarVal = CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, 2835 DestElementType, Loc); 2836 ComplexVal = CodeGenFunction::ComplexPairTy( 2837 ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType())); 2838 } else { 2839 assert(Val.isComplex() && "Must be a scalar or complex."); 2840 auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType(); 2841 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 2842 ComplexVal.first = CGF.EmitScalarConversion( 2843 Val.getComplexVal().first, SrcElementType, DestElementType, Loc); 2844 ComplexVal.second = CGF.EmitScalarConversion( 2845 Val.getComplexVal().second, SrcElementType, DestElementType, Loc); 2846 } 2847 return ComplexVal; 2848 } 2849 2850 static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst, 2851 LValue LVal, RValue RVal) { 2852 if (LVal.isGlobalReg()) { 2853 CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal); 2854 } else { 2855 CGF.EmitAtomicStore(RVal, LVal, 2856 IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 2857 : llvm::AtomicOrdering::Monotonic, 2858 LVal.isVolatile(), /*IsInit=*/false); 2859 } 2860 } 2861 2862 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal, 2863 QualType RValTy, SourceLocation Loc) { 2864 switch (getEvaluationKind(LVal.getType())) { 2865 case TEK_Scalar: 2866 EmitStoreThroughLValue(RValue::get(convertToScalarValue( 2867 *this, RVal, RValTy, LVal.getType(), Loc)), 2868 LVal); 2869 break; 2870 case TEK_Complex: 2871 EmitStoreOfComplex( 2872 convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal, 2873 /*isInit=*/false); 2874 break; 2875 case TEK_Aggregate: 2876 llvm_unreachable("Must be a scalar or complex."); 2877 } 2878 } 2879 2880 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst, 2881 const Expr *X, const Expr *V, 2882 SourceLocation Loc) { 2883 // v = x; 2884 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue"); 2885 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue"); 2886 LValue XLValue = CGF.EmitLValue(X); 2887 LValue VLValue = CGF.EmitLValue(V); 2888 RValue Res = XLValue.isGlobalReg() 2889 ? CGF.EmitLoadOfLValue(XLValue, Loc) 2890 : CGF.EmitAtomicLoad( 2891 XLValue, Loc, 2892 IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 2893 : llvm::AtomicOrdering::Monotonic, 2894 XLValue.isVolatile()); 2895 // OpenMP, 2.12.6, atomic Construct 2896 // Any atomic construct with a seq_cst clause forces the atomically 2897 // performed operation to include an implicit flush operation without a 2898 // list. 2899 if (IsSeqCst) 2900 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 2901 CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc); 2902 } 2903 2904 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst, 2905 const Expr *X, const Expr *E, 2906 SourceLocation Loc) { 2907 // x = expr; 2908 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue"); 2909 emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E)); 2910 // OpenMP, 2.12.6, atomic Construct 2911 // Any atomic construct with a seq_cst clause forces the atomically 2912 // performed operation to include an implicit flush operation without a 2913 // list. 2914 if (IsSeqCst) 2915 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 2916 } 2917 2918 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, 2919 RValue Update, 2920 BinaryOperatorKind BO, 2921 llvm::AtomicOrdering AO, 2922 bool IsXLHSInRHSPart) { 2923 auto &Context = CGF.CGM.getContext(); 2924 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x' 2925 // expression is simple and atomic is allowed for the given type for the 2926 // target platform. 2927 if (BO == BO_Comma || !Update.isScalar() || 2928 !Update.getScalarVal()->getType()->isIntegerTy() || 2929 !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) && 2930 (Update.getScalarVal()->getType() != 2931 X.getAddress().getElementType())) || 2932 !X.getAddress().getElementType()->isIntegerTy() || 2933 !Context.getTargetInfo().hasBuiltinAtomic( 2934 Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment()))) 2935 return std::make_pair(false, RValue::get(nullptr)); 2936 2937 llvm::AtomicRMWInst::BinOp RMWOp; 2938 switch (BO) { 2939 case BO_Add: 2940 RMWOp = llvm::AtomicRMWInst::Add; 2941 break; 2942 case BO_Sub: 2943 if (!IsXLHSInRHSPart) 2944 return std::make_pair(false, RValue::get(nullptr)); 2945 RMWOp = llvm::AtomicRMWInst::Sub; 2946 break; 2947 case BO_And: 2948 RMWOp = llvm::AtomicRMWInst::And; 2949 break; 2950 case BO_Or: 2951 RMWOp = llvm::AtomicRMWInst::Or; 2952 break; 2953 case BO_Xor: 2954 RMWOp = llvm::AtomicRMWInst::Xor; 2955 break; 2956 case BO_LT: 2957 RMWOp = X.getType()->hasSignedIntegerRepresentation() 2958 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min 2959 : llvm::AtomicRMWInst::Max) 2960 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin 2961 : llvm::AtomicRMWInst::UMax); 2962 break; 2963 case BO_GT: 2964 RMWOp = X.getType()->hasSignedIntegerRepresentation() 2965 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max 2966 : llvm::AtomicRMWInst::Min) 2967 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax 2968 : llvm::AtomicRMWInst::UMin); 2969 break; 2970 case BO_Assign: 2971 RMWOp = llvm::AtomicRMWInst::Xchg; 2972 break; 2973 case BO_Mul: 2974 case BO_Div: 2975 case BO_Rem: 2976 case BO_Shl: 2977 case BO_Shr: 2978 case BO_LAnd: 2979 case BO_LOr: 2980 return std::make_pair(false, RValue::get(nullptr)); 2981 case BO_PtrMemD: 2982 case BO_PtrMemI: 2983 case BO_LE: 2984 case BO_GE: 2985 case BO_EQ: 2986 case BO_NE: 2987 case BO_AddAssign: 2988 case BO_SubAssign: 2989 case BO_AndAssign: 2990 case BO_OrAssign: 2991 case BO_XorAssign: 2992 case BO_MulAssign: 2993 case BO_DivAssign: 2994 case BO_RemAssign: 2995 case BO_ShlAssign: 2996 case BO_ShrAssign: 2997 case BO_Comma: 2998 llvm_unreachable("Unsupported atomic update operation"); 2999 } 3000 auto *UpdateVal = Update.getScalarVal(); 3001 if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) { 3002 UpdateVal = CGF.Builder.CreateIntCast( 3003 IC, X.getAddress().getElementType(), 3004 X.getType()->hasSignedIntegerRepresentation()); 3005 } 3006 auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO); 3007 return std::make_pair(true, RValue::get(Res)); 3008 } 3009 3010 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr( 3011 LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart, 3012 llvm::AtomicOrdering AO, SourceLocation Loc, 3013 const llvm::function_ref<RValue(RValue)> &CommonGen) { 3014 // Update expressions are allowed to have the following forms: 3015 // x binop= expr; -> xrval + expr; 3016 // x++, ++x -> xrval + 1; 3017 // x--, --x -> xrval - 1; 3018 // x = x binop expr; -> xrval binop expr 3019 // x = expr Op x; - > expr binop xrval; 3020 auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart); 3021 if (!Res.first) { 3022 if (X.isGlobalReg()) { 3023 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop 3024 // 'xrval'. 3025 EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X); 3026 } else { 3027 // Perform compare-and-swap procedure. 3028 EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified()); 3029 } 3030 } 3031 return Res; 3032 } 3033 3034 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst, 3035 const Expr *X, const Expr *E, 3036 const Expr *UE, bool IsXLHSInRHSPart, 3037 SourceLocation Loc) { 3038 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 3039 "Update expr in 'atomic update' must be a binary operator."); 3040 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 3041 // Update expressions are allowed to have the following forms: 3042 // x binop= expr; -> xrval + expr; 3043 // x++, ++x -> xrval + 1; 3044 // x--, --x -> xrval - 1; 3045 // x = x binop expr; -> xrval binop expr 3046 // x = expr Op x; - > expr binop xrval; 3047 assert(X->isLValue() && "X of 'omp atomic update' is not lvalue"); 3048 LValue XLValue = CGF.EmitLValue(X); 3049 RValue ExprRValue = CGF.EmitAnyExpr(E); 3050 auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3051 : llvm::AtomicOrdering::Monotonic; 3052 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 3053 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 3054 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 3055 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 3056 auto Gen = 3057 [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue { 3058 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3059 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 3060 return CGF.EmitAnyExpr(UE); 3061 }; 3062 (void)CGF.EmitOMPAtomicSimpleUpdateExpr( 3063 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 3064 // OpenMP, 2.12.6, atomic Construct 3065 // Any atomic construct with a seq_cst clause forces the atomically 3066 // performed operation to include an implicit flush operation without a 3067 // list. 3068 if (IsSeqCst) 3069 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3070 } 3071 3072 static RValue convertToType(CodeGenFunction &CGF, RValue Value, 3073 QualType SourceType, QualType ResType, 3074 SourceLocation Loc) { 3075 switch (CGF.getEvaluationKind(ResType)) { 3076 case TEK_Scalar: 3077 return RValue::get( 3078 convertToScalarValue(CGF, Value, SourceType, ResType, Loc)); 3079 case TEK_Complex: { 3080 auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc); 3081 return RValue::getComplex(Res.first, Res.second); 3082 } 3083 case TEK_Aggregate: 3084 break; 3085 } 3086 llvm_unreachable("Must be a scalar or complex."); 3087 } 3088 3089 static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst, 3090 bool IsPostfixUpdate, const Expr *V, 3091 const Expr *X, const Expr *E, 3092 const Expr *UE, bool IsXLHSInRHSPart, 3093 SourceLocation Loc) { 3094 assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue"); 3095 assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue"); 3096 RValue NewVVal; 3097 LValue VLValue = CGF.EmitLValue(V); 3098 LValue XLValue = CGF.EmitLValue(X); 3099 RValue ExprRValue = CGF.EmitAnyExpr(E); 3100 auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3101 : llvm::AtomicOrdering::Monotonic; 3102 QualType NewVValType; 3103 if (UE) { 3104 // 'x' is updated with some additional value. 3105 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 3106 "Update expr in 'atomic capture' must be a binary operator."); 3107 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 3108 // Update expressions are allowed to have the following forms: 3109 // x binop= expr; -> xrval + expr; 3110 // x++, ++x -> xrval + 1; 3111 // x--, --x -> xrval - 1; 3112 // x = x binop expr; -> xrval binop expr 3113 // x = expr Op x; - > expr binop xrval; 3114 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 3115 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 3116 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 3117 NewVValType = XRValExpr->getType(); 3118 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 3119 auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr, 3120 IsSeqCst, IsPostfixUpdate](RValue XRValue) -> RValue { 3121 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3122 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 3123 RValue Res = CGF.EmitAnyExpr(UE); 3124 NewVVal = IsPostfixUpdate ? XRValue : Res; 3125 return Res; 3126 }; 3127 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 3128 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 3129 if (Res.first) { 3130 // 'atomicrmw' instruction was generated. 3131 if (IsPostfixUpdate) { 3132 // Use old value from 'atomicrmw'. 3133 NewVVal = Res.second; 3134 } else { 3135 // 'atomicrmw' does not provide new value, so evaluate it using old 3136 // value of 'x'. 3137 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3138 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second); 3139 NewVVal = CGF.EmitAnyExpr(UE); 3140 } 3141 } 3142 } else { 3143 // 'x' is simply rewritten with some 'expr'. 3144 NewVValType = X->getType().getNonReferenceType(); 3145 ExprRValue = convertToType(CGF, ExprRValue, E->getType(), 3146 X->getType().getNonReferenceType(), Loc); 3147 auto &&Gen = [&CGF, &NewVVal, ExprRValue](RValue XRValue) -> RValue { 3148 NewVVal = XRValue; 3149 return ExprRValue; 3150 }; 3151 // Try to perform atomicrmw xchg, otherwise simple exchange. 3152 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 3153 XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO, 3154 Loc, Gen); 3155 if (Res.first) { 3156 // 'atomicrmw' instruction was generated. 3157 NewVVal = IsPostfixUpdate ? Res.second : ExprRValue; 3158 } 3159 } 3160 // Emit post-update store to 'v' of old/new 'x' value. 3161 CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc); 3162 // OpenMP, 2.12.6, atomic Construct 3163 // Any atomic construct with a seq_cst clause forces the atomically 3164 // performed operation to include an implicit flush operation without a 3165 // list. 3166 if (IsSeqCst) 3167 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3168 } 3169 3170 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, 3171 bool IsSeqCst, bool IsPostfixUpdate, 3172 const Expr *X, const Expr *V, const Expr *E, 3173 const Expr *UE, bool IsXLHSInRHSPart, 3174 SourceLocation Loc) { 3175 switch (Kind) { 3176 case OMPC_read: 3177 EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc); 3178 break; 3179 case OMPC_write: 3180 EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc); 3181 break; 3182 case OMPC_unknown: 3183 case OMPC_update: 3184 EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc); 3185 break; 3186 case OMPC_capture: 3187 EmitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE, 3188 IsXLHSInRHSPart, Loc); 3189 break; 3190 case OMPC_if: 3191 case OMPC_final: 3192 case OMPC_num_threads: 3193 case OMPC_private: 3194 case OMPC_firstprivate: 3195 case OMPC_lastprivate: 3196 case OMPC_reduction: 3197 case OMPC_safelen: 3198 case OMPC_simdlen: 3199 case OMPC_collapse: 3200 case OMPC_default: 3201 case OMPC_seq_cst: 3202 case OMPC_shared: 3203 case OMPC_linear: 3204 case OMPC_aligned: 3205 case OMPC_copyin: 3206 case OMPC_copyprivate: 3207 case OMPC_flush: 3208 case OMPC_proc_bind: 3209 case OMPC_schedule: 3210 case OMPC_ordered: 3211 case OMPC_nowait: 3212 case OMPC_untied: 3213 case OMPC_threadprivate: 3214 case OMPC_depend: 3215 case OMPC_mergeable: 3216 case OMPC_device: 3217 case OMPC_threads: 3218 case OMPC_simd: 3219 case OMPC_map: 3220 case OMPC_num_teams: 3221 case OMPC_thread_limit: 3222 case OMPC_priority: 3223 case OMPC_grainsize: 3224 case OMPC_nogroup: 3225 case OMPC_num_tasks: 3226 case OMPC_hint: 3227 case OMPC_dist_schedule: 3228 case OMPC_defaultmap: 3229 case OMPC_uniform: 3230 case OMPC_to: 3231 case OMPC_from: 3232 case OMPC_use_device_ptr: 3233 case OMPC_is_device_ptr: 3234 llvm_unreachable("Clause is not allowed in 'omp atomic'."); 3235 } 3236 } 3237 3238 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { 3239 bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>(); 3240 OpenMPClauseKind Kind = OMPC_unknown; 3241 for (auto *C : S.clauses()) { 3242 // Find first clause (skip seq_cst clause, if it is first). 3243 if (C->getClauseKind() != OMPC_seq_cst) { 3244 Kind = C->getClauseKind(); 3245 break; 3246 } 3247 } 3248 3249 const auto *CS = 3250 S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 3251 if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) { 3252 enterFullExpression(EWC); 3253 } 3254 // Processing for statements under 'atomic capture'. 3255 if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) { 3256 for (const auto *C : Compound->body()) { 3257 if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) { 3258 enterFullExpression(EWC); 3259 } 3260 } 3261 } 3262 3263 auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF, 3264 PrePostActionTy &) { 3265 CGF.EmitStopPoint(CS); 3266 EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(), 3267 S.getV(), S.getExpr(), S.getUpdateExpr(), 3268 S.isXLHSInRHSPart(), S.getLocStart()); 3269 }; 3270 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 3271 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen); 3272 } 3273 3274 std::pair<llvm::Function * /*OutlinedFn*/, llvm::Constant * /*OutlinedFnID*/> 3275 CodeGenFunction::EmitOMPTargetDirectiveOutlinedFunction( 3276 CodeGenModule &CGM, const OMPTargetDirective &S, StringRef ParentName, 3277 bool IsOffloadEntry) { 3278 llvm::Function *OutlinedFn = nullptr; 3279 llvm::Constant *OutlinedFnID = nullptr; 3280 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3281 OMPPrivateScope PrivateScope(CGF); 3282 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 3283 CGF.EmitOMPPrivateClause(S, PrivateScope); 3284 (void)PrivateScope.Privatize(); 3285 3286 Action.Enter(CGF); 3287 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3288 }; 3289 // Emit target region as a standalone region. 3290 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 3291 S, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, CodeGen); 3292 return std::make_pair(OutlinedFn, OutlinedFnID); 3293 } 3294 3295 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { 3296 const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt()); 3297 3298 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 3299 GenerateOpenMPCapturedVars(CS, CapturedVars); 3300 3301 llvm::Function *Fn = nullptr; 3302 llvm::Constant *FnID = nullptr; 3303 3304 // Check if we have any if clause associated with the directive. 3305 const Expr *IfCond = nullptr; 3306 3307 if (auto *C = S.getSingleClause<OMPIfClause>()) { 3308 IfCond = C->getCondition(); 3309 } 3310 3311 // Check if we have any device clause associated with the directive. 3312 const Expr *Device = nullptr; 3313 if (auto *C = S.getSingleClause<OMPDeviceClause>()) { 3314 Device = C->getDevice(); 3315 } 3316 3317 // Check if we have an if clause whose conditional always evaluates to false 3318 // or if we do not have any targets specified. If so the target region is not 3319 // an offload entry point. 3320 bool IsOffloadEntry = true; 3321 if (IfCond) { 3322 bool Val; 3323 if (ConstantFoldsToSimpleInteger(IfCond, Val) && !Val) 3324 IsOffloadEntry = false; 3325 } 3326 if (CGM.getLangOpts().OMPTargetTriples.empty()) 3327 IsOffloadEntry = false; 3328 3329 assert(CurFuncDecl && "No parent declaration for target region!"); 3330 StringRef ParentName; 3331 // In case we have Ctors/Dtors we use the complete type variant to produce 3332 // the mangling of the device outlined kernel. 3333 if (auto *D = dyn_cast<CXXConstructorDecl>(CurFuncDecl)) 3334 ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete)); 3335 else if (auto *D = dyn_cast<CXXDestructorDecl>(CurFuncDecl)) 3336 ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete)); 3337 else 3338 ParentName = 3339 CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CurFuncDecl))); 3340 3341 std::tie(Fn, FnID) = EmitOMPTargetDirectiveOutlinedFunction( 3342 CGM, S, ParentName, IsOffloadEntry); 3343 OMPLexicalScope Scope(*this, S); 3344 CGM.getOpenMPRuntime().emitTargetCall(*this, S, Fn, FnID, IfCond, Device, 3345 CapturedVars); 3346 } 3347 3348 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF, 3349 const OMPExecutableDirective &S, 3350 OpenMPDirectiveKind InnermostKind, 3351 const RegionCodeGenTy &CodeGen) { 3352 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 3353 auto OutlinedFn = CGF.CGM.getOpenMPRuntime(). 3354 emitParallelOrTeamsOutlinedFunction(S, 3355 *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 3356 3357 const OMPTeamsDirective &TD = *dyn_cast<OMPTeamsDirective>(&S); 3358 const OMPNumTeamsClause *NT = TD.getSingleClause<OMPNumTeamsClause>(); 3359 const OMPThreadLimitClause *TL = TD.getSingleClause<OMPThreadLimitClause>(); 3360 if (NT || TL) { 3361 Expr *NumTeams = (NT) ? NT->getNumTeams() : nullptr; 3362 Expr *ThreadLimit = (TL) ? TL->getThreadLimit() : nullptr; 3363 3364 CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit, 3365 S.getLocStart()); 3366 } 3367 3368 OMPLexicalScope Scope(CGF, S); 3369 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 3370 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 3371 CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn, 3372 CapturedVars); 3373 } 3374 3375 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) { 3376 // Emit parallel region as a standalone region. 3377 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3378 OMPPrivateScope PrivateScope(CGF); 3379 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 3380 CGF.EmitOMPPrivateClause(S, PrivateScope); 3381 (void)PrivateScope.Privatize(); 3382 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3383 }; 3384 emitCommonOMPTeamsDirective(*this, S, OMPD_teams, CodeGen); 3385 } 3386 3387 void CodeGenFunction::EmitOMPCancellationPointDirective( 3388 const OMPCancellationPointDirective &S) { 3389 CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(), 3390 S.getCancelRegion()); 3391 } 3392 3393 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { 3394 const Expr *IfCond = nullptr; 3395 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 3396 if (C->getNameModifier() == OMPD_unknown || 3397 C->getNameModifier() == OMPD_cancel) { 3398 IfCond = C->getCondition(); 3399 break; 3400 } 3401 } 3402 CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(), IfCond, 3403 S.getCancelRegion()); 3404 } 3405 3406 CodeGenFunction::JumpDest 3407 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) { 3408 if (Kind == OMPD_parallel || Kind == OMPD_task) 3409 return ReturnBlock; 3410 assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections || 3411 Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for); 3412 return BreakContinueStack.back().BreakBlock; 3413 } 3414 3415 void CodeGenFunction::EmitOMPUseDevicePtrClause( 3416 const OMPClause &NC, OMPPrivateScope &PrivateScope, 3417 const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) { 3418 const auto &C = cast<OMPUseDevicePtrClause>(NC); 3419 auto OrigVarIt = C.varlist_begin(); 3420 auto InitIt = C.inits().begin(); 3421 for (auto PvtVarIt : C.private_copies()) { 3422 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl()); 3423 auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl()); 3424 auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl()); 3425 3426 // In order to identify the right initializer we need to match the 3427 // declaration used by the mapping logic. In some cases we may get 3428 // OMPCapturedExprDecl that refers to the original declaration. 3429 const ValueDecl *MatchingVD = OrigVD; 3430 if (auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) { 3431 // OMPCapturedExprDecl are used to privative fields of the current 3432 // structure. 3433 auto *ME = cast<MemberExpr>(OED->getInit()); 3434 assert(isa<CXXThisExpr>(ME->getBase()) && 3435 "Base should be the current struct!"); 3436 MatchingVD = ME->getMemberDecl(); 3437 } 3438 3439 // If we don't have information about the current list item, move on to 3440 // the next one. 3441 auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD); 3442 if (InitAddrIt == CaptureDeviceAddrMap.end()) 3443 continue; 3444 3445 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 3446 // Initialize the temporary initialization variable with the address we 3447 // get from the runtime library. We have to cast the source address 3448 // because it is always a void *. References are materialized in the 3449 // privatization scope, so the initialization here disregards the fact 3450 // the original variable is a reference. 3451 QualType AddrQTy = 3452 getContext().getPointerType(OrigVD->getType().getNonReferenceType()); 3453 llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy); 3454 Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy); 3455 setAddrOfLocalVar(InitVD, InitAddr); 3456 3457 // Emit private declaration, it will be initialized by the value we 3458 // declaration we just added to the local declarations map. 3459 EmitDecl(*PvtVD); 3460 3461 // The initialization variables reached its purpose in the emission 3462 // ofthe previous declaration, so we don't need it anymore. 3463 LocalDeclMap.erase(InitVD); 3464 3465 // Return the address of the private variable. 3466 return GetAddrOfLocalVar(PvtVD); 3467 }); 3468 assert(IsRegistered && "firstprivate var already registered as private"); 3469 // Silence the warning about unused variable. 3470 (void)IsRegistered; 3471 3472 ++OrigVarIt; 3473 ++InitIt; 3474 } 3475 } 3476 3477 // Generate the instructions for '#pragma omp target data' directive. 3478 void CodeGenFunction::EmitOMPTargetDataDirective( 3479 const OMPTargetDataDirective &S) { 3480 CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true); 3481 3482 // Create a pre/post action to signal the privatization of the device pointer. 3483 // This action can be replaced by the OpenMP runtime code generation to 3484 // deactivate privatization. 3485 bool PrivatizeDevicePointers = false; 3486 class DevicePointerPrivActionTy : public PrePostActionTy { 3487 bool &PrivatizeDevicePointers; 3488 3489 public: 3490 explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers) 3491 : PrePostActionTy(), PrivatizeDevicePointers(PrivatizeDevicePointers) {} 3492 void Enter(CodeGenFunction &CGF) override { 3493 PrivatizeDevicePointers = true; 3494 } 3495 }; 3496 DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers); 3497 3498 auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers]( 3499 CodeGenFunction &CGF, PrePostActionTy &Action) { 3500 auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3501 CGF.EmitStmt( 3502 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3503 }; 3504 3505 // Codegen that selects wheather to generate the privatization code or not. 3506 auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers, 3507 &InnermostCodeGen](CodeGenFunction &CGF, 3508 PrePostActionTy &Action) { 3509 RegionCodeGenTy RCG(InnermostCodeGen); 3510 PrivatizeDevicePointers = false; 3511 3512 // Call the pre-action to change the status of PrivatizeDevicePointers if 3513 // needed. 3514 Action.Enter(CGF); 3515 3516 if (PrivatizeDevicePointers) { 3517 OMPPrivateScope PrivateScope(CGF); 3518 // Emit all instances of the use_device_ptr clause. 3519 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>()) 3520 CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope, 3521 Info.CaptureDeviceAddrMap); 3522 (void)PrivateScope.Privatize(); 3523 RCG(CGF); 3524 } else 3525 RCG(CGF); 3526 }; 3527 3528 // Forward the provided action to the privatization codegen. 3529 RegionCodeGenTy PrivRCG(PrivCodeGen); 3530 PrivRCG.setAction(Action); 3531 3532 // Notwithstanding the body of the region is emitted as inlined directive, 3533 // we don't use an inline scope as changes in the references inside the 3534 // region are expected to be visible outside, so we do not privative them. 3535 OMPLexicalScope Scope(CGF, S); 3536 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data, 3537 PrivRCG); 3538 }; 3539 3540 RegionCodeGenTy RCG(CodeGen); 3541 3542 // If we don't have target devices, don't bother emitting the data mapping 3543 // code. 3544 if (CGM.getLangOpts().OMPTargetTriples.empty()) { 3545 RCG(*this); 3546 return; 3547 } 3548 3549 // Check if we have any if clause associated with the directive. 3550 const Expr *IfCond = nullptr; 3551 if (auto *C = S.getSingleClause<OMPIfClause>()) 3552 IfCond = C->getCondition(); 3553 3554 // Check if we have any device clause associated with the directive. 3555 const Expr *Device = nullptr; 3556 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 3557 Device = C->getDevice(); 3558 3559 // Set the action to signal privatization of device pointers. 3560 RCG.setAction(PrivAction); 3561 3562 // Emit region code. 3563 CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG, 3564 Info); 3565 } 3566 3567 void CodeGenFunction::EmitOMPTargetEnterDataDirective( 3568 const OMPTargetEnterDataDirective &S) { 3569 // If we don't have target devices, don't bother emitting the data mapping 3570 // code. 3571 if (CGM.getLangOpts().OMPTargetTriples.empty()) 3572 return; 3573 3574 // Check if we have any if clause associated with the directive. 3575 const Expr *IfCond = nullptr; 3576 if (auto *C = S.getSingleClause<OMPIfClause>()) 3577 IfCond = C->getCondition(); 3578 3579 // Check if we have any device clause associated with the directive. 3580 const Expr *Device = nullptr; 3581 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 3582 Device = C->getDevice(); 3583 3584 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 3585 } 3586 3587 void CodeGenFunction::EmitOMPTargetExitDataDirective( 3588 const OMPTargetExitDataDirective &S) { 3589 // If we don't have target devices, don't bother emitting the data mapping 3590 // code. 3591 if (CGM.getLangOpts().OMPTargetTriples.empty()) 3592 return; 3593 3594 // Check if we have any if clause associated with the directive. 3595 const Expr *IfCond = nullptr; 3596 if (auto *C = S.getSingleClause<OMPIfClause>()) 3597 IfCond = C->getCondition(); 3598 3599 // Check if we have any device clause associated with the directive. 3600 const Expr *Device = nullptr; 3601 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 3602 Device = C->getDevice(); 3603 3604 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 3605 } 3606 3607 void CodeGenFunction::EmitOMPTargetParallelDirective( 3608 const OMPTargetParallelDirective &S) { 3609 // TODO: codegen for target parallel. 3610 } 3611 3612 void CodeGenFunction::EmitOMPTargetParallelForDirective( 3613 const OMPTargetParallelForDirective &S) { 3614 // TODO: codegen for target parallel for. 3615 } 3616 3617 /// Emit a helper variable and return corresponding lvalue. 3618 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper, 3619 const ImplicitParamDecl *PVD, 3620 CodeGenFunction::OMPPrivateScope &Privates) { 3621 auto *VDecl = cast<VarDecl>(Helper->getDecl()); 3622 Privates.addPrivate( 3623 VDecl, [&CGF, PVD]() -> Address { return CGF.GetAddrOfLocalVar(PVD); }); 3624 } 3625 3626 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { 3627 assert(isOpenMPTaskLoopDirective(S.getDirectiveKind())); 3628 // Emit outlined function for task construct. 3629 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 3630 auto CapturedStruct = GenerateCapturedStmtArgument(*CS); 3631 auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 3632 const Expr *IfCond = nullptr; 3633 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 3634 if (C->getNameModifier() == OMPD_unknown || 3635 C->getNameModifier() == OMPD_taskloop) { 3636 IfCond = C->getCondition(); 3637 break; 3638 } 3639 } 3640 3641 OMPTaskDataTy Data; 3642 // Check if taskloop must be emitted without taskgroup. 3643 Data.Nogroup = S.getSingleClause<OMPNogroupClause>(); 3644 // TODO: Check if we should emit tied or untied task. 3645 Data.Tied = true; 3646 // Set scheduling for taskloop 3647 if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) { 3648 // grainsize clause 3649 Data.Schedule.setInt(/*IntVal=*/false); 3650 Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize())); 3651 } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) { 3652 // num_tasks clause 3653 Data.Schedule.setInt(/*IntVal=*/true); 3654 Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks())); 3655 } 3656 3657 auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) { 3658 // if (PreCond) { 3659 // for (IV in 0..LastIteration) BODY; 3660 // <Final counter/linear vars updates>; 3661 // } 3662 // 3663 3664 // Emit: if (PreCond) - begin. 3665 // If the condition constant folds and can be elided, avoid emitting the 3666 // whole loop. 3667 bool CondConstant; 3668 llvm::BasicBlock *ContBlock = nullptr; 3669 OMPLoopScope PreInitScope(CGF, S); 3670 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 3671 if (!CondConstant) 3672 return; 3673 } else { 3674 auto *ThenBlock = CGF.createBasicBlock("taskloop.if.then"); 3675 ContBlock = CGF.createBasicBlock("taskloop.if.end"); 3676 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 3677 CGF.getProfileCount(&S)); 3678 CGF.EmitBlock(ThenBlock); 3679 CGF.incrementProfileCounter(&S); 3680 } 3681 3682 if (isOpenMPSimdDirective(S.getDirectiveKind())) 3683 CGF.EmitOMPSimdInit(S); 3684 3685 OMPPrivateScope LoopScope(CGF); 3686 // Emit helper vars inits. 3687 enum { LowerBound = 5, UpperBound, Stride, LastIter }; 3688 auto *I = CS->getCapturedDecl()->param_begin(); 3689 auto *LBP = std::next(I, LowerBound); 3690 auto *UBP = std::next(I, UpperBound); 3691 auto *STP = std::next(I, Stride); 3692 auto *LIP = std::next(I, LastIter); 3693 mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP, 3694 LoopScope); 3695 mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP, 3696 LoopScope); 3697 mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope); 3698 mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP, 3699 LoopScope); 3700 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 3701 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 3702 (void)LoopScope.Privatize(); 3703 // Emit the loop iteration variable. 3704 const Expr *IVExpr = S.getIterationVariable(); 3705 const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 3706 CGF.EmitVarDecl(*IVDecl); 3707 CGF.EmitIgnoredExpr(S.getInit()); 3708 3709 // Emit the iterations count variable. 3710 // If it is not a variable, Sema decided to calculate iterations count on 3711 // each iteration (e.g., it is foldable into a constant). 3712 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 3713 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 3714 // Emit calculation of the iterations count. 3715 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 3716 } 3717 3718 CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 3719 S.getInc(), 3720 [&S](CodeGenFunction &CGF) { 3721 CGF.EmitOMPLoopBody(S, JumpDest()); 3722 CGF.EmitStopPoint(&S); 3723 }, 3724 [](CodeGenFunction &) {}); 3725 // Emit: if (PreCond) - end. 3726 if (ContBlock) { 3727 CGF.EmitBranch(ContBlock); 3728 CGF.EmitBlock(ContBlock, true); 3729 } 3730 // Emit final copy of the lastprivate variables if IsLastIter != 0. 3731 if (HasLastprivateClause) { 3732 CGF.EmitOMPLastprivateClauseFinal( 3733 S, isOpenMPSimdDirective(S.getDirectiveKind()), 3734 CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar( 3735 CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false, 3736 (*LIP)->getType(), S.getLocStart()))); 3737 } 3738 }; 3739 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 3740 IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn, 3741 const OMPTaskDataTy &Data) { 3742 auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &) { 3743 OMPLoopScope PreInitScope(CGF, S); 3744 CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getLocStart(), S, 3745 OutlinedFn, SharedsTy, 3746 CapturedStruct, IfCond, Data); 3747 }; 3748 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop, 3749 CodeGen); 3750 }; 3751 EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data); 3752 } 3753 3754 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) { 3755 EmitOMPTaskLoopBasedDirective(S); 3756 } 3757 3758 void CodeGenFunction::EmitOMPTaskLoopSimdDirective( 3759 const OMPTaskLoopSimdDirective &S) { 3760 EmitOMPTaskLoopBasedDirective(S); 3761 } 3762 3763 // Generate the instructions for '#pragma omp target update' directive. 3764 void CodeGenFunction::EmitOMPTargetUpdateDirective( 3765 const OMPTargetUpdateDirective &S) { 3766 // If we don't have target devices, don't bother emitting the data mapping 3767 // code. 3768 if (CGM.getLangOpts().OMPTargetTriples.empty()) 3769 return; 3770 3771 // Check if we have any if clause associated with the directive. 3772 const Expr *IfCond = nullptr; 3773 if (auto *C = S.getSingleClause<OMPIfClause>()) 3774 IfCond = C->getCondition(); 3775 3776 // Check if we have any device clause associated with the directive. 3777 const Expr *Device = nullptr; 3778 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 3779 Device = C->getDevice(); 3780 3781 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 3782 } 3783