1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This contains code to emit OpenMP nodes as LLVM code. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGCleanup.h" 15 #include "CGOpenMPRuntime.h" 16 #include "CodeGenFunction.h" 17 #include "CodeGenModule.h" 18 #include "TargetInfo.h" 19 #include "clang/AST/Stmt.h" 20 #include "clang/AST/StmtOpenMP.h" 21 #include "clang/AST/DeclOpenMP.h" 22 #include "llvm/IR/CallSite.h" 23 using namespace clang; 24 using namespace CodeGen; 25 26 namespace { 27 /// Lexical scope for OpenMP executable constructs, that handles correct codegen 28 /// for captured expressions. 29 class OMPLexicalScope final : public CodeGenFunction::LexicalScope { 30 void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 31 for (const auto *C : S.clauses()) { 32 if (auto *CPI = OMPClauseWithPreInit::get(C)) { 33 if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { 34 for (const auto *I : PreInit->decls()) { 35 if (!I->hasAttr<OMPCaptureNoInitAttr>()) 36 CGF.EmitVarDecl(cast<VarDecl>(*I)); 37 else { 38 CodeGenFunction::AutoVarEmission Emission = 39 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 40 CGF.EmitAutoVarCleanups(Emission); 41 } 42 } 43 } 44 } 45 } 46 } 47 CodeGenFunction::OMPPrivateScope InlinedShareds; 48 49 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { 50 return CGF.LambdaCaptureFields.lookup(VD) || 51 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || 52 (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl)); 53 } 54 55 public: 56 OMPLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S, 57 bool AsInlined = false) 58 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), 59 InlinedShareds(CGF) { 60 emitPreInitStmt(CGF, S); 61 if (AsInlined) { 62 if (S.hasAssociatedStmt()) { 63 auto *CS = cast<CapturedStmt>(S.getAssociatedStmt()); 64 for (auto &C : CS->captures()) { 65 if (C.capturesVariable() || C.capturesVariableByCopy()) { 66 auto *VD = C.getCapturedVar(); 67 DeclRefExpr DRE(const_cast<VarDecl *>(VD), 68 isCapturedVar(CGF, VD) || 69 (CGF.CapturedStmtInfo && 70 InlinedShareds.isGlobalVarCaptured(VD)), 71 VD->getType().getNonReferenceType(), VK_LValue, 72 SourceLocation()); 73 InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address { 74 return CGF.EmitLValue(&DRE).getAddress(); 75 }); 76 } 77 } 78 (void)InlinedShareds.Privatize(); 79 } 80 } 81 } 82 }; 83 84 /// Private scope for OpenMP loop-based directives, that supports capturing 85 /// of used expression from loop statement. 86 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { 87 void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) { 88 if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) { 89 if (auto *PreInits = cast_or_null<DeclStmt>(LD->getPreInits())) { 90 for (const auto *I : PreInits->decls()) 91 CGF.EmitVarDecl(cast<VarDecl>(*I)); 92 } 93 } 94 } 95 96 public: 97 OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S) 98 : CodeGenFunction::RunCleanupsScope(CGF) { 99 emitPreInitStmt(CGF, S); 100 } 101 }; 102 103 } // namespace 104 105 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) { 106 auto &C = getContext(); 107 llvm::Value *Size = nullptr; 108 auto SizeInChars = C.getTypeSizeInChars(Ty); 109 if (SizeInChars.isZero()) { 110 // getTypeSizeInChars() returns 0 for a VLA. 111 while (auto *VAT = C.getAsVariableArrayType(Ty)) { 112 llvm::Value *ArraySize; 113 std::tie(ArraySize, Ty) = getVLASize(VAT); 114 Size = Size ? Builder.CreateNUWMul(Size, ArraySize) : ArraySize; 115 } 116 SizeInChars = C.getTypeSizeInChars(Ty); 117 if (SizeInChars.isZero()) 118 return llvm::ConstantInt::get(SizeTy, /*V=*/0); 119 Size = Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars)); 120 } else 121 Size = CGM.getSize(SizeInChars); 122 return Size; 123 } 124 125 void CodeGenFunction::GenerateOpenMPCapturedVars( 126 const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) { 127 const RecordDecl *RD = S.getCapturedRecordDecl(); 128 auto CurField = RD->field_begin(); 129 auto CurCap = S.captures().begin(); 130 for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(), 131 E = S.capture_init_end(); 132 I != E; ++I, ++CurField, ++CurCap) { 133 if (CurField->hasCapturedVLAType()) { 134 auto VAT = CurField->getCapturedVLAType(); 135 auto *Val = VLASizeMap[VAT->getSizeExpr()]; 136 CapturedVars.push_back(Val); 137 } else if (CurCap->capturesThis()) 138 CapturedVars.push_back(CXXThisValue); 139 else if (CurCap->capturesVariableByCopy()) 140 CapturedVars.push_back( 141 EmitLoadOfLValue(EmitLValue(*I), SourceLocation()).getScalarVal()); 142 else { 143 assert(CurCap->capturesVariable() && "Expected capture by reference."); 144 CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer()); 145 } 146 } 147 } 148 149 static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType, 150 StringRef Name, LValue AddrLV, 151 bool isReferenceType = false) { 152 ASTContext &Ctx = CGF.getContext(); 153 154 auto *CastedPtr = CGF.EmitScalarConversion( 155 AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(), 156 Ctx.getPointerType(DstType), SourceLocation()); 157 auto TmpAddr = 158 CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType)) 159 .getAddress(); 160 161 // If we are dealing with references we need to return the address of the 162 // reference instead of the reference of the value. 163 if (isReferenceType) { 164 QualType RefType = Ctx.getLValueReferenceType(DstType); 165 auto *RefVal = TmpAddr.getPointer(); 166 TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name) + ".ref"); 167 auto TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType); 168 CGF.EmitScalarInit(RefVal, TmpLVal); 169 } 170 171 return TmpAddr; 172 } 173 174 llvm::Function * 175 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { 176 assert( 177 CapturedStmtInfo && 178 "CapturedStmtInfo should be set when generating the captured function"); 179 const CapturedDecl *CD = S.getCapturedDecl(); 180 const RecordDecl *RD = S.getCapturedRecordDecl(); 181 assert(CD->hasBody() && "missing CapturedDecl body"); 182 183 // Build the argument list. 184 ASTContext &Ctx = CGM.getContext(); 185 FunctionArgList Args; 186 Args.append(CD->param_begin(), 187 std::next(CD->param_begin(), CD->getContextParamPosition())); 188 auto I = S.captures().begin(); 189 for (auto *FD : RD->fields()) { 190 QualType ArgType = FD->getType(); 191 IdentifierInfo *II = nullptr; 192 VarDecl *CapVar = nullptr; 193 194 // If this is a capture by copy and the type is not a pointer, the outlined 195 // function argument type should be uintptr and the value properly casted to 196 // uintptr. This is necessary given that the runtime library is only able to 197 // deal with pointers. We can pass in the same way the VLA type sizes to the 198 // outlined function. 199 if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) || 200 I->capturesVariableArrayType()) 201 ArgType = Ctx.getUIntPtrType(); 202 203 if (I->capturesVariable() || I->capturesVariableByCopy()) { 204 CapVar = I->getCapturedVar(); 205 II = CapVar->getIdentifier(); 206 } else if (I->capturesThis()) 207 II = &getContext().Idents.get("this"); 208 else { 209 assert(I->capturesVariableArrayType()); 210 II = &getContext().Idents.get("vla"); 211 } 212 if (ArgType->isVariablyModifiedType()) 213 ArgType = getContext().getVariableArrayDecayedType(ArgType); 214 Args.push_back(ImplicitParamDecl::Create(getContext(), nullptr, 215 FD->getLocation(), II, ArgType)); 216 ++I; 217 } 218 Args.append( 219 std::next(CD->param_begin(), CD->getContextParamPosition() + 1), 220 CD->param_end()); 221 222 // Create the function declaration. 223 FunctionType::ExtInfo ExtInfo; 224 const CGFunctionInfo &FuncInfo = 225 CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args); 226 llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); 227 228 llvm::Function *F = llvm::Function::Create( 229 FuncLLVMTy, llvm::GlobalValue::InternalLinkage, 230 CapturedStmtInfo->getHelperName(), &CGM.getModule()); 231 CGM.SetInternalFunctionAttributes(CD, F, FuncInfo); 232 if (CD->isNothrow()) 233 F->addFnAttr(llvm::Attribute::NoUnwind); 234 235 // Generate the function. 236 StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getLocation(), 237 CD->getBody()->getLocStart()); 238 unsigned Cnt = CD->getContextParamPosition(); 239 I = S.captures().begin(); 240 for (auto *FD : RD->fields()) { 241 // If we are capturing a pointer by copy we don't need to do anything, just 242 // use the value that we get from the arguments. 243 if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) { 244 setAddrOfLocalVar(I->getCapturedVar(), GetAddrOfLocalVar(Args[Cnt])); 245 ++Cnt; 246 ++I; 247 continue; 248 } 249 250 LValue ArgLVal = 251 MakeAddrLValue(GetAddrOfLocalVar(Args[Cnt]), Args[Cnt]->getType(), 252 AlignmentSource::Decl); 253 if (FD->hasCapturedVLAType()) { 254 LValue CastedArgLVal = 255 MakeAddrLValue(castValueFromUintptr(*this, FD->getType(), 256 Args[Cnt]->getName(), ArgLVal), 257 FD->getType(), AlignmentSource::Decl); 258 auto *ExprArg = 259 EmitLoadOfLValue(CastedArgLVal, SourceLocation()).getScalarVal(); 260 auto VAT = FD->getCapturedVLAType(); 261 VLASizeMap[VAT->getSizeExpr()] = ExprArg; 262 } else if (I->capturesVariable()) { 263 auto *Var = I->getCapturedVar(); 264 QualType VarTy = Var->getType(); 265 Address ArgAddr = ArgLVal.getAddress(); 266 if (!VarTy->isReferenceType()) { 267 ArgAddr = EmitLoadOfReference( 268 ArgAddr, ArgLVal.getType()->castAs<ReferenceType>()); 269 } 270 setAddrOfLocalVar( 271 Var, Address(ArgAddr.getPointer(), getContext().getDeclAlign(Var))); 272 } else if (I->capturesVariableByCopy()) { 273 assert(!FD->getType()->isAnyPointerType() && 274 "Not expecting a captured pointer."); 275 auto *Var = I->getCapturedVar(); 276 QualType VarTy = Var->getType(); 277 setAddrOfLocalVar(I->getCapturedVar(), 278 castValueFromUintptr(*this, FD->getType(), 279 Args[Cnt]->getName(), ArgLVal, 280 VarTy->isReferenceType())); 281 } else { 282 // If 'this' is captured, load it into CXXThisValue. 283 assert(I->capturesThis()); 284 CXXThisValue = 285 EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation()).getScalarVal(); 286 } 287 ++Cnt; 288 ++I; 289 } 290 291 PGO.assignRegionCounters(GlobalDecl(CD), F); 292 CapturedStmtInfo->EmitBody(*this, CD->getBody()); 293 FinishFunction(CD->getBodyRBrace()); 294 295 return F; 296 } 297 298 //===----------------------------------------------------------------------===// 299 // OpenMP Directive Emission 300 //===----------------------------------------------------------------------===// 301 void CodeGenFunction::EmitOMPAggregateAssign( 302 Address DestAddr, Address SrcAddr, QualType OriginalType, 303 const llvm::function_ref<void(Address, Address)> &CopyGen) { 304 // Perform element-by-element initialization. 305 QualType ElementTy; 306 307 // Drill down to the base element type on both arrays. 308 auto ArrayTy = OriginalType->getAsArrayTypeUnsafe(); 309 auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr); 310 SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 311 312 auto SrcBegin = SrcAddr.getPointer(); 313 auto DestBegin = DestAddr.getPointer(); 314 // Cast from pointer to array type to pointer to single element. 315 auto DestEnd = Builder.CreateGEP(DestBegin, NumElements); 316 // The basic structure here is a while-do loop. 317 auto BodyBB = createBasicBlock("omp.arraycpy.body"); 318 auto DoneBB = createBasicBlock("omp.arraycpy.done"); 319 auto IsEmpty = 320 Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty"); 321 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 322 323 // Enter the loop body, making that address the current address. 324 auto EntryBB = Builder.GetInsertBlock(); 325 EmitBlock(BodyBB); 326 327 CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy); 328 329 llvm::PHINode *SrcElementPHI = 330 Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 331 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 332 Address SrcElementCurrent = 333 Address(SrcElementPHI, 334 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 335 336 llvm::PHINode *DestElementPHI = 337 Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 338 DestElementPHI->addIncoming(DestBegin, EntryBB); 339 Address DestElementCurrent = 340 Address(DestElementPHI, 341 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 342 343 // Emit copy. 344 CopyGen(DestElementCurrent, SrcElementCurrent); 345 346 // Shift the address forward by one element. 347 auto DestElementNext = Builder.CreateConstGEP1_32( 348 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 349 auto SrcElementNext = Builder.CreateConstGEP1_32( 350 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 351 // Check whether we've reached the end. 352 auto Done = 353 Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 354 Builder.CreateCondBr(Done, DoneBB, BodyBB); 355 DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock()); 356 SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock()); 357 358 // Done. 359 EmitBlock(DoneBB, /*IsFinished=*/true); 360 } 361 362 /// Check if the combiner is a call to UDR combiner and if it is so return the 363 /// UDR decl used for reduction. 364 static const OMPDeclareReductionDecl * 365 getReductionInit(const Expr *ReductionOp) { 366 if (auto *CE = dyn_cast<CallExpr>(ReductionOp)) 367 if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 368 if (auto *DRE = 369 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 370 if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 371 return DRD; 372 return nullptr; 373 } 374 375 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 376 const OMPDeclareReductionDecl *DRD, 377 const Expr *InitOp, 378 Address Private, Address Original, 379 QualType Ty) { 380 if (DRD->getInitializer()) { 381 std::pair<llvm::Function *, llvm::Function *> Reduction = 382 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 383 auto *CE = cast<CallExpr>(InitOp); 384 auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 385 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 386 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 387 auto *LHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 388 auto *RHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 389 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 390 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 391 [=]() -> Address { return Private; }); 392 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 393 [=]() -> Address { return Original; }); 394 (void)PrivateScope.Privatize(); 395 RValue Func = RValue::get(Reduction.second); 396 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 397 CGF.EmitIgnoredExpr(InitOp); 398 } else { 399 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 400 auto *GV = new llvm::GlobalVariable( 401 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 402 llvm::GlobalValue::PrivateLinkage, Init, ".init"); 403 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 404 RValue InitRVal; 405 switch (CGF.getEvaluationKind(Ty)) { 406 case TEK_Scalar: 407 InitRVal = CGF.EmitLoadOfLValue(LV, SourceLocation()); 408 break; 409 case TEK_Complex: 410 InitRVal = 411 RValue::getComplex(CGF.EmitLoadOfComplex(LV, SourceLocation())); 412 break; 413 case TEK_Aggregate: 414 InitRVal = RValue::getAggregate(LV.getAddress()); 415 break; 416 } 417 OpaqueValueExpr OVE(SourceLocation(), Ty, VK_RValue); 418 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 419 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 420 /*IsInitializer=*/false); 421 } 422 } 423 424 /// \brief Emit initialization of arrays of complex types. 425 /// \param DestAddr Address of the array. 426 /// \param Type Type of array. 427 /// \param Init Initial expression of array. 428 /// \param SrcAddr Address of the original array. 429 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 430 QualType Type, const Expr *Init, 431 Address SrcAddr = Address::invalid()) { 432 auto *DRD = getReductionInit(Init); 433 // Perform element-by-element initialization. 434 QualType ElementTy; 435 436 // Drill down to the base element type on both arrays. 437 auto ArrayTy = Type->getAsArrayTypeUnsafe(); 438 auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 439 DestAddr = 440 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 441 if (DRD) 442 SrcAddr = 443 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 444 445 llvm::Value *SrcBegin = nullptr; 446 if (DRD) 447 SrcBegin = SrcAddr.getPointer(); 448 auto DestBegin = DestAddr.getPointer(); 449 // Cast from pointer to array type to pointer to single element. 450 auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 451 // The basic structure here is a while-do loop. 452 auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 453 auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 454 auto IsEmpty = 455 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 456 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 457 458 // Enter the loop body, making that address the current address. 459 auto EntryBB = CGF.Builder.GetInsertBlock(); 460 CGF.EmitBlock(BodyBB); 461 462 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 463 464 llvm::PHINode *SrcElementPHI = nullptr; 465 Address SrcElementCurrent = Address::invalid(); 466 if (DRD) { 467 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 468 "omp.arraycpy.srcElementPast"); 469 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 470 SrcElementCurrent = 471 Address(SrcElementPHI, 472 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 473 } 474 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 475 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 476 DestElementPHI->addIncoming(DestBegin, EntryBB); 477 Address DestElementCurrent = 478 Address(DestElementPHI, 479 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 480 481 // Emit copy. 482 { 483 CodeGenFunction::RunCleanupsScope InitScope(CGF); 484 if (DRD && (DRD->getInitializer() || !Init)) { 485 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 486 SrcElementCurrent, ElementTy); 487 } else 488 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 489 /*IsInitializer=*/false); 490 } 491 492 if (DRD) { 493 // Shift the address forward by one element. 494 auto SrcElementNext = CGF.Builder.CreateConstGEP1_32( 495 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 496 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 497 } 498 499 // Shift the address forward by one element. 500 auto DestElementNext = CGF.Builder.CreateConstGEP1_32( 501 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 502 // Check whether we've reached the end. 503 auto Done = 504 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 505 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 506 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 507 508 // Done. 509 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 510 } 511 512 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr, 513 Address SrcAddr, const VarDecl *DestVD, 514 const VarDecl *SrcVD, const Expr *Copy) { 515 if (OriginalType->isArrayType()) { 516 auto *BO = dyn_cast<BinaryOperator>(Copy); 517 if (BO && BO->getOpcode() == BO_Assign) { 518 // Perform simple memcpy for simple copying. 519 EmitAggregateAssign(DestAddr, SrcAddr, OriginalType); 520 } else { 521 // For arrays with complex element types perform element by element 522 // copying. 523 EmitOMPAggregateAssign( 524 DestAddr, SrcAddr, OriginalType, 525 [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) { 526 // Working with the single array element, so have to remap 527 // destination and source variables to corresponding array 528 // elements. 529 CodeGenFunction::OMPPrivateScope Remap(*this); 530 Remap.addPrivate(DestVD, [DestElement]() -> Address { 531 return DestElement; 532 }); 533 Remap.addPrivate( 534 SrcVD, [SrcElement]() -> Address { return SrcElement; }); 535 (void)Remap.Privatize(); 536 EmitIgnoredExpr(Copy); 537 }); 538 } 539 } else { 540 // Remap pseudo source variable to private copy. 541 CodeGenFunction::OMPPrivateScope Remap(*this); 542 Remap.addPrivate(SrcVD, [SrcAddr]() -> Address { return SrcAddr; }); 543 Remap.addPrivate(DestVD, [DestAddr]() -> Address { return DestAddr; }); 544 (void)Remap.Privatize(); 545 // Emit copying of the whole variable. 546 EmitIgnoredExpr(Copy); 547 } 548 } 549 550 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, 551 OMPPrivateScope &PrivateScope) { 552 if (!HaveInsertPoint()) 553 return false; 554 bool FirstprivateIsLastprivate = false; 555 llvm::DenseSet<const VarDecl *> Lastprivates; 556 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 557 for (const auto *D : C->varlists()) 558 Lastprivates.insert( 559 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); 560 } 561 llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate; 562 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { 563 auto IRef = C->varlist_begin(); 564 auto InitsRef = C->inits().begin(); 565 for (auto IInit : C->private_copies()) { 566 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 567 FirstprivateIsLastprivate = 568 FirstprivateIsLastprivate || 569 (Lastprivates.count(OrigVD->getCanonicalDecl()) > 0); 570 if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) { 571 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 572 auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl()); 573 bool IsRegistered; 574 DeclRefExpr DRE( 575 const_cast<VarDecl *>(OrigVD), 576 /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup( 577 OrigVD) != nullptr, 578 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 579 Address OriginalAddr = EmitLValue(&DRE).getAddress(); 580 QualType Type = VD->getType(); 581 if (Type->isArrayType()) { 582 // Emit VarDecl with copy init for arrays. 583 // Get the address of the original variable captured in current 584 // captured region. 585 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 586 auto Emission = EmitAutoVarAlloca(*VD); 587 auto *Init = VD->getInit(); 588 if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) { 589 // Perform simple memcpy. 590 EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr, 591 Type); 592 } else { 593 EmitOMPAggregateAssign( 594 Emission.getAllocatedAddress(), OriginalAddr, Type, 595 [this, VDInit, Init](Address DestElement, 596 Address SrcElement) { 597 // Clean up any temporaries needed by the initialization. 598 RunCleanupsScope InitScope(*this); 599 // Emit initialization for single element. 600 setAddrOfLocalVar(VDInit, SrcElement); 601 EmitAnyExprToMem(Init, DestElement, 602 Init->getType().getQualifiers(), 603 /*IsInitializer*/ false); 604 LocalDeclMap.erase(VDInit); 605 }); 606 } 607 EmitAutoVarCleanups(Emission); 608 return Emission.getAllocatedAddress(); 609 }); 610 } else { 611 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 612 // Emit private VarDecl with copy init. 613 // Remap temp VDInit variable to the address of the original 614 // variable 615 // (for proper handling of captured global variables). 616 setAddrOfLocalVar(VDInit, OriginalAddr); 617 EmitDecl(*VD); 618 LocalDeclMap.erase(VDInit); 619 return GetAddrOfLocalVar(VD); 620 }); 621 } 622 assert(IsRegistered && 623 "firstprivate var already registered as private"); 624 // Silence the warning about unused variable. 625 (void)IsRegistered; 626 } 627 ++IRef; 628 ++InitsRef; 629 } 630 } 631 return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty(); 632 } 633 634 void CodeGenFunction::EmitOMPPrivateClause( 635 const OMPExecutableDirective &D, 636 CodeGenFunction::OMPPrivateScope &PrivateScope) { 637 if (!HaveInsertPoint()) 638 return; 639 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 640 for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) { 641 auto IRef = C->varlist_begin(); 642 for (auto IInit : C->private_copies()) { 643 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 644 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 645 auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 646 bool IsRegistered = 647 PrivateScope.addPrivate(OrigVD, [&]() -> Address { 648 // Emit private VarDecl with copy init. 649 EmitDecl(*VD); 650 return GetAddrOfLocalVar(VD); 651 }); 652 assert(IsRegistered && "private var already registered as private"); 653 // Silence the warning about unused variable. 654 (void)IsRegistered; 655 } 656 ++IRef; 657 } 658 } 659 } 660 661 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { 662 if (!HaveInsertPoint()) 663 return false; 664 // threadprivate_var1 = master_threadprivate_var1; 665 // operator=(threadprivate_var2, master_threadprivate_var2); 666 // ... 667 // __kmpc_barrier(&loc, global_tid); 668 llvm::DenseSet<const VarDecl *> CopiedVars; 669 llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr; 670 for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) { 671 auto IRef = C->varlist_begin(); 672 auto ISrcRef = C->source_exprs().begin(); 673 auto IDestRef = C->destination_exprs().begin(); 674 for (auto *AssignOp : C->assignment_ops()) { 675 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 676 QualType Type = VD->getType(); 677 if (CopiedVars.insert(VD->getCanonicalDecl()).second) { 678 // Get the address of the master variable. If we are emitting code with 679 // TLS support, the address is passed from the master as field in the 680 // captured declaration. 681 Address MasterAddr = Address::invalid(); 682 if (getLangOpts().OpenMPUseTLS && 683 getContext().getTargetInfo().isTLSSupported()) { 684 assert(CapturedStmtInfo->lookup(VD) && 685 "Copyin threadprivates should have been captured!"); 686 DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(), 687 VK_LValue, (*IRef)->getExprLoc()); 688 MasterAddr = EmitLValue(&DRE).getAddress(); 689 LocalDeclMap.erase(VD); 690 } else { 691 MasterAddr = 692 Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD) 693 : CGM.GetAddrOfGlobal(VD), 694 getContext().getDeclAlign(VD)); 695 } 696 // Get the address of the threadprivate variable. 697 Address PrivateAddr = EmitLValue(*IRef).getAddress(); 698 if (CopiedVars.size() == 1) { 699 // At first check if current thread is a master thread. If it is, no 700 // need to copy data. 701 CopyBegin = createBasicBlock("copyin.not.master"); 702 CopyEnd = createBasicBlock("copyin.not.master.end"); 703 Builder.CreateCondBr( 704 Builder.CreateICmpNE( 705 Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy), 706 Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy)), 707 CopyBegin, CopyEnd); 708 EmitBlock(CopyBegin); 709 } 710 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 711 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 712 EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp); 713 } 714 ++IRef; 715 ++ISrcRef; 716 ++IDestRef; 717 } 718 } 719 if (CopyEnd) { 720 // Exit out of copying procedure for non-master thread. 721 EmitBlock(CopyEnd, /*IsFinished=*/true); 722 return true; 723 } 724 return false; 725 } 726 727 bool CodeGenFunction::EmitOMPLastprivateClauseInit( 728 const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) { 729 if (!HaveInsertPoint()) 730 return false; 731 bool HasAtLeastOneLastprivate = false; 732 llvm::DenseSet<const VarDecl *> SIMDLCVs; 733 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 734 auto *LoopDirective = cast<OMPLoopDirective>(&D); 735 for (auto *C : LoopDirective->counters()) { 736 SIMDLCVs.insert( 737 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 738 } 739 } 740 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 741 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 742 HasAtLeastOneLastprivate = true; 743 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) 744 break; 745 auto IRef = C->varlist_begin(); 746 auto IDestRef = C->destination_exprs().begin(); 747 for (auto *IInit : C->private_copies()) { 748 // Keep the address of the original variable for future update at the end 749 // of the loop. 750 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 751 // Taskloops do not require additional initialization, it is done in 752 // runtime support library. 753 if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) { 754 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 755 PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address { 756 DeclRefExpr DRE( 757 const_cast<VarDecl *>(OrigVD), 758 /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup( 759 OrigVD) != nullptr, 760 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 761 return EmitLValue(&DRE).getAddress(); 762 }); 763 // Check if the variable is also a firstprivate: in this case IInit is 764 // not generated. Initialization of this variable will happen in codegen 765 // for 'firstprivate' clause. 766 if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) { 767 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 768 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 769 // Emit private VarDecl with copy init. 770 EmitDecl(*VD); 771 return GetAddrOfLocalVar(VD); 772 }); 773 assert(IsRegistered && 774 "lastprivate var already registered as private"); 775 (void)IsRegistered; 776 } 777 } 778 ++IRef; 779 ++IDestRef; 780 } 781 } 782 return HasAtLeastOneLastprivate; 783 } 784 785 void CodeGenFunction::EmitOMPLastprivateClauseFinal( 786 const OMPExecutableDirective &D, bool NoFinals, 787 llvm::Value *IsLastIterCond) { 788 if (!HaveInsertPoint()) 789 return; 790 // Emit following code: 791 // if (<IsLastIterCond>) { 792 // orig_var1 = private_orig_var1; 793 // ... 794 // orig_varn = private_orig_varn; 795 // } 796 llvm::BasicBlock *ThenBB = nullptr; 797 llvm::BasicBlock *DoneBB = nullptr; 798 if (IsLastIterCond) { 799 ThenBB = createBasicBlock(".omp.lastprivate.then"); 800 DoneBB = createBasicBlock(".omp.lastprivate.done"); 801 Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB); 802 EmitBlock(ThenBB); 803 } 804 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 805 llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates; 806 if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) { 807 auto IC = LoopDirective->counters().begin(); 808 for (auto F : LoopDirective->finals()) { 809 auto *D = 810 cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl(); 811 if (NoFinals) 812 AlreadyEmittedVars.insert(D); 813 else 814 LoopCountersAndUpdates[D] = F; 815 ++IC; 816 } 817 } 818 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 819 auto IRef = C->varlist_begin(); 820 auto ISrcRef = C->source_exprs().begin(); 821 auto IDestRef = C->destination_exprs().begin(); 822 for (auto *AssignOp : C->assignment_ops()) { 823 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 824 QualType Type = PrivateVD->getType(); 825 auto *CanonicalVD = PrivateVD->getCanonicalDecl(); 826 if (AlreadyEmittedVars.insert(CanonicalVD).second) { 827 // If lastprivate variable is a loop control variable for loop-based 828 // directive, update its value before copyin back to original 829 // variable. 830 if (auto *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD)) 831 EmitIgnoredExpr(FinalExpr); 832 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 833 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 834 // Get the address of the original variable. 835 Address OriginalAddr = GetAddrOfLocalVar(DestVD); 836 // Get the address of the private variable. 837 Address PrivateAddr = GetAddrOfLocalVar(PrivateVD); 838 if (auto RefTy = PrivateVD->getType()->getAs<ReferenceType>()) 839 PrivateAddr = 840 Address(Builder.CreateLoad(PrivateAddr), 841 getNaturalTypeAlignment(RefTy->getPointeeType())); 842 EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp); 843 } 844 ++IRef; 845 ++ISrcRef; 846 ++IDestRef; 847 } 848 if (auto *PostUpdate = C->getPostUpdateExpr()) 849 EmitIgnoredExpr(PostUpdate); 850 } 851 if (IsLastIterCond) 852 EmitBlock(DoneBB, /*IsFinished=*/true); 853 } 854 855 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 856 LValue BaseLV, llvm::Value *Addr) { 857 Address Tmp = Address::invalid(); 858 Address TopTmp = Address::invalid(); 859 Address MostTopTmp = Address::invalid(); 860 BaseTy = BaseTy.getNonReferenceType(); 861 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 862 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 863 Tmp = CGF.CreateMemTemp(BaseTy); 864 if (TopTmp.isValid()) 865 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 866 else 867 MostTopTmp = Tmp; 868 TopTmp = Tmp; 869 BaseTy = BaseTy->getPointeeType(); 870 } 871 llvm::Type *Ty = BaseLV.getPointer()->getType(); 872 if (Tmp.isValid()) 873 Ty = Tmp.getElementType(); 874 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 875 if (Tmp.isValid()) { 876 CGF.Builder.CreateStore(Addr, Tmp); 877 return MostTopTmp; 878 } 879 return Address(Addr, BaseLV.getAlignment()); 880 } 881 882 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 883 LValue BaseLV) { 884 BaseTy = BaseTy.getNonReferenceType(); 885 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 886 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 887 if (auto *PtrTy = BaseTy->getAs<PointerType>()) 888 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy); 889 else { 890 BaseLV = CGF.EmitLoadOfReferenceLValue(BaseLV.getAddress(), 891 BaseTy->castAs<ReferenceType>()); 892 } 893 BaseTy = BaseTy->getPointeeType(); 894 } 895 return CGF.MakeAddrLValue( 896 Address( 897 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 898 BaseLV.getPointer(), CGF.ConvertTypeForMem(ElTy)->getPointerTo()), 899 BaseLV.getAlignment()), 900 BaseLV.getType(), BaseLV.getAlignmentSource()); 901 } 902 903 void CodeGenFunction::EmitOMPReductionClauseInit( 904 const OMPExecutableDirective &D, 905 CodeGenFunction::OMPPrivateScope &PrivateScope) { 906 if (!HaveInsertPoint()) 907 return; 908 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 909 auto ILHS = C->lhs_exprs().begin(); 910 auto IRHS = C->rhs_exprs().begin(); 911 auto IPriv = C->privates().begin(); 912 auto IRed = C->reduction_ops().begin(); 913 for (auto IRef : C->varlists()) { 914 auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 915 auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 916 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl()); 917 auto *DRD = getReductionInit(*IRed); 918 if (auto *OASE = dyn_cast<OMPArraySectionExpr>(IRef)) { 919 auto *Base = OASE->getBase()->IgnoreParenImpCasts(); 920 while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 921 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 922 while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 923 Base = TempASE->getBase()->IgnoreParenImpCasts(); 924 auto *DE = cast<DeclRefExpr>(Base); 925 auto *OrigVD = cast<VarDecl>(DE->getDecl()); 926 auto OASELValueLB = EmitOMPArraySectionExpr(OASE); 927 auto OASELValueUB = 928 EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 929 auto OriginalBaseLValue = EmitLValue(DE); 930 LValue BaseLValue = 931 loadToBegin(*this, OrigVD->getType(), OASELValueLB.getType(), 932 OriginalBaseLValue); 933 // Store the address of the original variable associated with the LHS 934 // implicit variable. 935 PrivateScope.addPrivate(LHSVD, [this, OASELValueLB]() -> Address { 936 return OASELValueLB.getAddress(); 937 }); 938 // Emit reduction copy. 939 bool IsRegistered = PrivateScope.addPrivate( 940 OrigVD, [this, OrigVD, PrivateVD, BaseLValue, OASELValueLB, 941 OASELValueUB, OriginalBaseLValue, DRD, IRed]() -> Address { 942 // Emit VarDecl with copy init for arrays. 943 // Get the address of the original variable captured in current 944 // captured region. 945 auto *Size = Builder.CreatePtrDiff(OASELValueUB.getPointer(), 946 OASELValueLB.getPointer()); 947 Size = Builder.CreateNUWAdd( 948 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 949 CodeGenFunction::OpaqueValueMapping OpaqueMap( 950 *this, cast<OpaqueValueExpr>( 951 getContext() 952 .getAsVariableArrayType(PrivateVD->getType()) 953 ->getSizeExpr()), 954 RValue::get(Size)); 955 EmitVariablyModifiedType(PrivateVD->getType()); 956 auto Emission = EmitAutoVarAlloca(*PrivateVD); 957 auto Addr = Emission.getAllocatedAddress(); 958 auto *Init = PrivateVD->getInit(); 959 EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(), 960 DRD ? *IRed : Init, 961 OASELValueLB.getAddress()); 962 EmitAutoVarCleanups(Emission); 963 // Emit private VarDecl with reduction init. 964 auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(), 965 OASELValueLB.getPointer()); 966 auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset); 967 return castToBase(*this, OrigVD->getType(), 968 OASELValueLB.getType(), OriginalBaseLValue, 969 Ptr); 970 }); 971 assert(IsRegistered && "private var already registered as private"); 972 // Silence the warning about unused variable. 973 (void)IsRegistered; 974 PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { 975 return GetAddrOfLocalVar(PrivateVD); 976 }); 977 } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(IRef)) { 978 auto *Base = ASE->getBase()->IgnoreParenImpCasts(); 979 while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 980 Base = TempASE->getBase()->IgnoreParenImpCasts(); 981 auto *DE = cast<DeclRefExpr>(Base); 982 auto *OrigVD = cast<VarDecl>(DE->getDecl()); 983 auto ASELValue = EmitLValue(ASE); 984 auto OriginalBaseLValue = EmitLValue(DE); 985 LValue BaseLValue = loadToBegin( 986 *this, OrigVD->getType(), ASELValue.getType(), OriginalBaseLValue); 987 // Store the address of the original variable associated with the LHS 988 // implicit variable. 989 PrivateScope.addPrivate(LHSVD, [this, ASELValue]() -> Address { 990 return ASELValue.getAddress(); 991 }); 992 // Emit reduction copy. 993 bool IsRegistered = PrivateScope.addPrivate( 994 OrigVD, [this, OrigVD, PrivateVD, BaseLValue, ASELValue, 995 OriginalBaseLValue, DRD, IRed]() -> Address { 996 // Emit private VarDecl with reduction init. 997 AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD); 998 auto Addr = Emission.getAllocatedAddress(); 999 if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 1000 emitInitWithReductionInitializer(*this, DRD, *IRed, Addr, 1001 ASELValue.getAddress(), 1002 ASELValue.getType()); 1003 } else 1004 EmitAutoVarInit(Emission); 1005 EmitAutoVarCleanups(Emission); 1006 auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(), 1007 ASELValue.getPointer()); 1008 auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset); 1009 return castToBase(*this, OrigVD->getType(), ASELValue.getType(), 1010 OriginalBaseLValue, Ptr); 1011 }); 1012 assert(IsRegistered && "private var already registered as private"); 1013 // Silence the warning about unused variable. 1014 (void)IsRegistered; 1015 PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address { 1016 return Builder.CreateElementBitCast( 1017 GetAddrOfLocalVar(PrivateVD), ConvertTypeForMem(RHSVD->getType()), 1018 "rhs.begin"); 1019 }); 1020 } else { 1021 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); 1022 QualType Type = PrivateVD->getType(); 1023 if (getContext().getAsArrayType(Type)) { 1024 // Store the address of the original variable associated with the LHS 1025 // implicit variable. 1026 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1027 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1028 IRef->getType(), VK_LValue, IRef->getExprLoc()); 1029 Address OriginalAddr = EmitLValue(&DRE).getAddress(); 1030 PrivateScope.addPrivate(LHSVD, [this, &OriginalAddr, 1031 LHSVD]() -> Address { 1032 OriginalAddr = Builder.CreateElementBitCast( 1033 OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin"); 1034 return OriginalAddr; 1035 }); 1036 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 1037 if (Type->isVariablyModifiedType()) { 1038 CodeGenFunction::OpaqueValueMapping OpaqueMap( 1039 *this, cast<OpaqueValueExpr>( 1040 getContext() 1041 .getAsVariableArrayType(PrivateVD->getType()) 1042 ->getSizeExpr()), 1043 RValue::get( 1044 getTypeSize(OrigVD->getType().getNonReferenceType()))); 1045 EmitVariablyModifiedType(Type); 1046 } 1047 auto Emission = EmitAutoVarAlloca(*PrivateVD); 1048 auto Addr = Emission.getAllocatedAddress(); 1049 auto *Init = PrivateVD->getInit(); 1050 EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(), 1051 DRD ? *IRed : Init, OriginalAddr); 1052 EmitAutoVarCleanups(Emission); 1053 return Emission.getAllocatedAddress(); 1054 }); 1055 assert(IsRegistered && "private var already registered as private"); 1056 // Silence the warning about unused variable. 1057 (void)IsRegistered; 1058 PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address { 1059 return Builder.CreateElementBitCast( 1060 GetAddrOfLocalVar(PrivateVD), 1061 ConvertTypeForMem(RHSVD->getType()), "rhs.begin"); 1062 }); 1063 } else { 1064 // Store the address of the original variable associated with the LHS 1065 // implicit variable. 1066 Address OriginalAddr = Address::invalid(); 1067 PrivateScope.addPrivate(LHSVD, [this, OrigVD, IRef, 1068 &OriginalAddr]() -> Address { 1069 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1070 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1071 IRef->getType(), VK_LValue, IRef->getExprLoc()); 1072 OriginalAddr = EmitLValue(&DRE).getAddress(); 1073 return OriginalAddr; 1074 }); 1075 // Emit reduction copy. 1076 bool IsRegistered = PrivateScope.addPrivate( 1077 OrigVD, [this, PrivateVD, OriginalAddr, DRD, IRed]() -> Address { 1078 // Emit private VarDecl with reduction init. 1079 AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD); 1080 auto Addr = Emission.getAllocatedAddress(); 1081 if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 1082 emitInitWithReductionInitializer(*this, DRD, *IRed, Addr, 1083 OriginalAddr, 1084 PrivateVD->getType()); 1085 } else 1086 EmitAutoVarInit(Emission); 1087 EmitAutoVarCleanups(Emission); 1088 return Addr; 1089 }); 1090 assert(IsRegistered && "private var already registered as private"); 1091 // Silence the warning about unused variable. 1092 (void)IsRegistered; 1093 PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { 1094 return GetAddrOfLocalVar(PrivateVD); 1095 }); 1096 } 1097 } 1098 ++ILHS; 1099 ++IRHS; 1100 ++IPriv; 1101 ++IRed; 1102 } 1103 } 1104 } 1105 1106 void CodeGenFunction::EmitOMPReductionClauseFinal( 1107 const OMPExecutableDirective &D) { 1108 if (!HaveInsertPoint()) 1109 return; 1110 llvm::SmallVector<const Expr *, 8> Privates; 1111 llvm::SmallVector<const Expr *, 8> LHSExprs; 1112 llvm::SmallVector<const Expr *, 8> RHSExprs; 1113 llvm::SmallVector<const Expr *, 8> ReductionOps; 1114 bool HasAtLeastOneReduction = false; 1115 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1116 HasAtLeastOneReduction = true; 1117 Privates.append(C->privates().begin(), C->privates().end()); 1118 LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 1119 RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 1120 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 1121 } 1122 if (HasAtLeastOneReduction) { 1123 // Emit nowait reduction if nowait clause is present or directive is a 1124 // parallel directive (it always has implicit barrier). 1125 CGM.getOpenMPRuntime().emitReduction( 1126 *this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps, 1127 D.getSingleClause<OMPNowaitClause>() || 1128 isOpenMPParallelDirective(D.getDirectiveKind()) || 1129 D.getDirectiveKind() == OMPD_simd, 1130 D.getDirectiveKind() == OMPD_simd); 1131 } 1132 } 1133 1134 static void emitPostUpdateForReductionClause( 1135 CodeGenFunction &CGF, const OMPExecutableDirective &D, 1136 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1137 if (!CGF.HaveInsertPoint()) 1138 return; 1139 llvm::BasicBlock *DoneBB = nullptr; 1140 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1141 if (auto *PostUpdate = C->getPostUpdateExpr()) { 1142 if (!DoneBB) { 1143 if (auto *Cond = CondGen(CGF)) { 1144 // If the first post-update expression is found, emit conditional 1145 // block if it was requested. 1146 auto *ThenBB = CGF.createBasicBlock(".omp.reduction.pu"); 1147 DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done"); 1148 CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1149 CGF.EmitBlock(ThenBB); 1150 } 1151 } 1152 CGF.EmitIgnoredExpr(PostUpdate); 1153 } 1154 } 1155 if (DoneBB) 1156 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 1157 } 1158 1159 static void emitCommonOMPParallelDirective(CodeGenFunction &CGF, 1160 const OMPExecutableDirective &S, 1161 OpenMPDirectiveKind InnermostKind, 1162 const RegionCodeGenTy &CodeGen) { 1163 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 1164 auto OutlinedFn = CGF.CGM.getOpenMPRuntime(). 1165 emitParallelOrTeamsOutlinedFunction(S, 1166 *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 1167 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) { 1168 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 1169 auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), 1170 /*IgnoreResultAssign*/ true); 1171 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause( 1172 CGF, NumThreads, NumThreadsClause->getLocStart()); 1173 } 1174 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) { 1175 CodeGenFunction::RunCleanupsScope ProcBindScope(CGF); 1176 CGF.CGM.getOpenMPRuntime().emitProcBindClause( 1177 CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart()); 1178 } 1179 const Expr *IfCond = nullptr; 1180 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 1181 if (C->getNameModifier() == OMPD_unknown || 1182 C->getNameModifier() == OMPD_parallel) { 1183 IfCond = C->getCondition(); 1184 break; 1185 } 1186 } 1187 1188 OMPLexicalScope Scope(CGF, S); 1189 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 1190 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 1191 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn, 1192 CapturedVars, IfCond); 1193 } 1194 1195 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { 1196 // Emit parallel region as a standalone region. 1197 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1198 OMPPrivateScope PrivateScope(CGF); 1199 bool Copyins = CGF.EmitOMPCopyinClause(S); 1200 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 1201 if (Copyins) { 1202 // Emit implicit barrier to synchronize threads and avoid data races on 1203 // propagation master's thread values of threadprivate variables to local 1204 // instances of that variables of all other implicit threads. 1205 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 1206 CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 1207 /*ForceSimpleCall=*/true); 1208 } 1209 CGF.EmitOMPPrivateClause(S, PrivateScope); 1210 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 1211 (void)PrivateScope.Privatize(); 1212 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1213 CGF.EmitOMPReductionClauseFinal(S); 1214 }; 1215 emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen); 1216 emitPostUpdateForReductionClause( 1217 *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1218 } 1219 1220 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, 1221 JumpDest LoopExit) { 1222 RunCleanupsScope BodyScope(*this); 1223 // Update counters values on current iteration. 1224 for (auto I : D.updates()) { 1225 EmitIgnoredExpr(I); 1226 } 1227 // Update the linear variables. 1228 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1229 for (auto *U : C->updates()) 1230 EmitIgnoredExpr(U); 1231 } 1232 1233 // On a continue in the body, jump to the end. 1234 auto Continue = getJumpDestInCurrentScope("omp.body.continue"); 1235 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1236 // Emit loop body. 1237 EmitStmt(D.getBody()); 1238 // The end (updates/cleanups). 1239 EmitBlock(Continue.getBlock()); 1240 BreakContinueStack.pop_back(); 1241 } 1242 1243 void CodeGenFunction::EmitOMPInnerLoop( 1244 const Stmt &S, bool RequiresCleanup, const Expr *LoopCond, 1245 const Expr *IncExpr, 1246 const llvm::function_ref<void(CodeGenFunction &)> &BodyGen, 1247 const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen) { 1248 auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end"); 1249 1250 // Start the loop with a block that tests the condition. 1251 auto CondBlock = createBasicBlock("omp.inner.for.cond"); 1252 EmitBlock(CondBlock); 1253 LoopStack.push(CondBlock); 1254 1255 // If there are any cleanups between here and the loop-exit scope, 1256 // create a block to stage a loop exit along. 1257 auto ExitBlock = LoopExit.getBlock(); 1258 if (RequiresCleanup) 1259 ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup"); 1260 1261 auto LoopBody = createBasicBlock("omp.inner.for.body"); 1262 1263 // Emit condition. 1264 EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S)); 1265 if (ExitBlock != LoopExit.getBlock()) { 1266 EmitBlock(ExitBlock); 1267 EmitBranchThroughCleanup(LoopExit); 1268 } 1269 1270 EmitBlock(LoopBody); 1271 incrementProfileCounter(&S); 1272 1273 // Create a block for the increment. 1274 auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc"); 1275 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1276 1277 BodyGen(*this); 1278 1279 // Emit "IV = IV + 1" and a back-edge to the condition block. 1280 EmitBlock(Continue.getBlock()); 1281 EmitIgnoredExpr(IncExpr); 1282 PostIncGen(*this); 1283 BreakContinueStack.pop_back(); 1284 EmitBranch(CondBlock); 1285 LoopStack.pop(); 1286 // Emit the fall-through block. 1287 EmitBlock(LoopExit.getBlock()); 1288 } 1289 1290 void CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { 1291 if (!HaveInsertPoint()) 1292 return; 1293 // Emit inits for the linear variables. 1294 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1295 for (auto *Init : C->inits()) { 1296 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl()); 1297 if (auto *Ref = dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) { 1298 AutoVarEmission Emission = EmitAutoVarAlloca(*VD); 1299 auto *OrigVD = cast<VarDecl>(Ref->getDecl()); 1300 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1301 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1302 VD->getInit()->getType(), VK_LValue, 1303 VD->getInit()->getExprLoc()); 1304 EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(), 1305 VD->getType()), 1306 /*capturedByInit=*/false); 1307 EmitAutoVarCleanups(Emission); 1308 } else 1309 EmitVarDecl(*VD); 1310 } 1311 // Emit the linear steps for the linear clauses. 1312 // If a step is not constant, it is pre-calculated before the loop. 1313 if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep())) 1314 if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) { 1315 EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl())); 1316 // Emit calculation of the linear step. 1317 EmitIgnoredExpr(CS); 1318 } 1319 } 1320 } 1321 1322 void CodeGenFunction::EmitOMPLinearClauseFinal( 1323 const OMPLoopDirective &D, 1324 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1325 if (!HaveInsertPoint()) 1326 return; 1327 llvm::BasicBlock *DoneBB = nullptr; 1328 // Emit the final values of the linear variables. 1329 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1330 auto IC = C->varlist_begin(); 1331 for (auto *F : C->finals()) { 1332 if (!DoneBB) { 1333 if (auto *Cond = CondGen(*this)) { 1334 // If the first post-update expression is found, emit conditional 1335 // block if it was requested. 1336 auto *ThenBB = createBasicBlock(".omp.linear.pu"); 1337 DoneBB = createBasicBlock(".omp.linear.pu.done"); 1338 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1339 EmitBlock(ThenBB); 1340 } 1341 } 1342 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl()); 1343 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1344 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1345 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); 1346 Address OrigAddr = EmitLValue(&DRE).getAddress(); 1347 CodeGenFunction::OMPPrivateScope VarScope(*this); 1348 VarScope.addPrivate(OrigVD, [OrigAddr]() -> Address { return OrigAddr; }); 1349 (void)VarScope.Privatize(); 1350 EmitIgnoredExpr(F); 1351 ++IC; 1352 } 1353 if (auto *PostUpdate = C->getPostUpdateExpr()) 1354 EmitIgnoredExpr(PostUpdate); 1355 } 1356 if (DoneBB) 1357 EmitBlock(DoneBB, /*IsFinished=*/true); 1358 } 1359 1360 static void emitAlignedClause(CodeGenFunction &CGF, 1361 const OMPExecutableDirective &D) { 1362 if (!CGF.HaveInsertPoint()) 1363 return; 1364 for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) { 1365 unsigned ClauseAlignment = 0; 1366 if (auto AlignmentExpr = Clause->getAlignment()) { 1367 auto AlignmentCI = 1368 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); 1369 ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue()); 1370 } 1371 for (auto E : Clause->varlists()) { 1372 unsigned Alignment = ClauseAlignment; 1373 if (Alignment == 0) { 1374 // OpenMP [2.8.1, Description] 1375 // If no optional parameter is specified, implementation-defined default 1376 // alignments for SIMD instructions on the target platforms are assumed. 1377 Alignment = 1378 CGF.getContext() 1379 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign( 1380 E->getType()->getPointeeType())) 1381 .getQuantity(); 1382 } 1383 assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) && 1384 "alignment is not power of 2"); 1385 if (Alignment != 0) { 1386 llvm::Value *PtrValue = CGF.EmitScalarExpr(E); 1387 CGF.EmitAlignmentAssumption(PtrValue, Alignment); 1388 } 1389 } 1390 } 1391 } 1392 1393 void CodeGenFunction::EmitOMPPrivateLoopCounters( 1394 const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) { 1395 if (!HaveInsertPoint()) 1396 return; 1397 auto I = S.private_counters().begin(); 1398 for (auto *E : S.counters()) { 1399 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1400 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()); 1401 (void)LoopScope.addPrivate(VD, [&]() -> Address { 1402 // Emit var without initialization. 1403 if (!LocalDeclMap.count(PrivateVD)) { 1404 auto VarEmission = EmitAutoVarAlloca(*PrivateVD); 1405 EmitAutoVarCleanups(VarEmission); 1406 } 1407 DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD), 1408 /*RefersToEnclosingVariableOrCapture=*/false, 1409 (*I)->getType(), VK_LValue, (*I)->getExprLoc()); 1410 return EmitLValue(&DRE).getAddress(); 1411 }); 1412 if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) || 1413 VD->hasGlobalStorage()) { 1414 (void)LoopScope.addPrivate(PrivateVD, [&]() -> Address { 1415 DeclRefExpr DRE(const_cast<VarDecl *>(VD), 1416 LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD), 1417 E->getType(), VK_LValue, E->getExprLoc()); 1418 return EmitLValue(&DRE).getAddress(); 1419 }); 1420 } 1421 ++I; 1422 } 1423 } 1424 1425 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S, 1426 const Expr *Cond, llvm::BasicBlock *TrueBlock, 1427 llvm::BasicBlock *FalseBlock, uint64_t TrueCount) { 1428 if (!CGF.HaveInsertPoint()) 1429 return; 1430 { 1431 CodeGenFunction::OMPPrivateScope PreCondScope(CGF); 1432 CGF.EmitOMPPrivateLoopCounters(S, PreCondScope); 1433 (void)PreCondScope.Privatize(); 1434 // Get initial values of real counters. 1435 for (auto I : S.inits()) { 1436 CGF.EmitIgnoredExpr(I); 1437 } 1438 } 1439 // Check that loop is executed at least one time. 1440 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount); 1441 } 1442 1443 void CodeGenFunction::EmitOMPLinearClause( 1444 const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) { 1445 if (!HaveInsertPoint()) 1446 return; 1447 llvm::DenseSet<const VarDecl *> SIMDLCVs; 1448 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 1449 auto *LoopDirective = cast<OMPLoopDirective>(&D); 1450 for (auto *C : LoopDirective->counters()) { 1451 SIMDLCVs.insert( 1452 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 1453 } 1454 } 1455 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1456 auto CurPrivate = C->privates().begin(); 1457 for (auto *E : C->varlists()) { 1458 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1459 auto *PrivateVD = 1460 cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl()); 1461 if (!SIMDLCVs.count(VD->getCanonicalDecl())) { 1462 bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address { 1463 // Emit private VarDecl with copy init. 1464 EmitVarDecl(*PrivateVD); 1465 return GetAddrOfLocalVar(PrivateVD); 1466 }); 1467 assert(IsRegistered && "linear var already registered as private"); 1468 // Silence the warning about unused variable. 1469 (void)IsRegistered; 1470 } else 1471 EmitVarDecl(*PrivateVD); 1472 ++CurPrivate; 1473 } 1474 } 1475 } 1476 1477 static void emitSimdlenSafelenClause(CodeGenFunction &CGF, 1478 const OMPExecutableDirective &D, 1479 bool IsMonotonic) { 1480 if (!CGF.HaveInsertPoint()) 1481 return; 1482 if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) { 1483 RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(), 1484 /*ignoreResult=*/true); 1485 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 1486 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 1487 // In presence of finite 'safelen', it may be unsafe to mark all 1488 // the memory instructions parallel, because loop-carried 1489 // dependences of 'safelen' iterations are possible. 1490 if (!IsMonotonic) 1491 CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>()); 1492 } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) { 1493 RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(), 1494 /*ignoreResult=*/true); 1495 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 1496 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 1497 // In presence of finite 'safelen', it may be unsafe to mark all 1498 // the memory instructions parallel, because loop-carried 1499 // dependences of 'safelen' iterations are possible. 1500 CGF.LoopStack.setParallel(false); 1501 } 1502 } 1503 1504 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D, 1505 bool IsMonotonic) { 1506 // Walk clauses and process safelen/lastprivate. 1507 LoopStack.setParallel(!IsMonotonic); 1508 LoopStack.setVectorizeEnable(true); 1509 emitSimdlenSafelenClause(*this, D, IsMonotonic); 1510 } 1511 1512 void CodeGenFunction::EmitOMPSimdFinal( 1513 const OMPLoopDirective &D, 1514 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1515 if (!HaveInsertPoint()) 1516 return; 1517 llvm::BasicBlock *DoneBB = nullptr; 1518 auto IC = D.counters().begin(); 1519 auto IPC = D.private_counters().begin(); 1520 for (auto F : D.finals()) { 1521 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl()); 1522 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl()); 1523 auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD); 1524 if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) || 1525 OrigVD->hasGlobalStorage() || CED) { 1526 if (!DoneBB) { 1527 if (auto *Cond = CondGen(*this)) { 1528 // If the first post-update expression is found, emit conditional 1529 // block if it was requested. 1530 auto *ThenBB = createBasicBlock(".omp.final.then"); 1531 DoneBB = createBasicBlock(".omp.final.done"); 1532 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1533 EmitBlock(ThenBB); 1534 } 1535 } 1536 Address OrigAddr = Address::invalid(); 1537 if (CED) 1538 OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress(); 1539 else { 1540 DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD), 1541 /*RefersToEnclosingVariableOrCapture=*/false, 1542 (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc()); 1543 OrigAddr = EmitLValue(&DRE).getAddress(); 1544 } 1545 OMPPrivateScope VarScope(*this); 1546 VarScope.addPrivate(OrigVD, 1547 [OrigAddr]() -> Address { return OrigAddr; }); 1548 (void)VarScope.Privatize(); 1549 EmitIgnoredExpr(F); 1550 } 1551 ++IC; 1552 ++IPC; 1553 } 1554 if (DoneBB) 1555 EmitBlock(DoneBB, /*IsFinished=*/true); 1556 } 1557 1558 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { 1559 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1560 OMPLoopScope PreInitScope(CGF, S); 1561 // if (PreCond) { 1562 // for (IV in 0..LastIteration) BODY; 1563 // <Final counter/linear vars updates>; 1564 // } 1565 // 1566 1567 // Emit: if (PreCond) - begin. 1568 // If the condition constant folds and can be elided, avoid emitting the 1569 // whole loop. 1570 bool CondConstant; 1571 llvm::BasicBlock *ContBlock = nullptr; 1572 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 1573 if (!CondConstant) 1574 return; 1575 } else { 1576 auto *ThenBlock = CGF.createBasicBlock("simd.if.then"); 1577 ContBlock = CGF.createBasicBlock("simd.if.end"); 1578 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 1579 CGF.getProfileCount(&S)); 1580 CGF.EmitBlock(ThenBlock); 1581 CGF.incrementProfileCounter(&S); 1582 } 1583 1584 // Emit the loop iteration variable. 1585 const Expr *IVExpr = S.getIterationVariable(); 1586 const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 1587 CGF.EmitVarDecl(*IVDecl); 1588 CGF.EmitIgnoredExpr(S.getInit()); 1589 1590 // Emit the iterations count variable. 1591 // If it is not a variable, Sema decided to calculate iterations count on 1592 // each iteration (e.g., it is foldable into a constant). 1593 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 1594 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 1595 // Emit calculation of the iterations count. 1596 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 1597 } 1598 1599 CGF.EmitOMPSimdInit(S); 1600 1601 emitAlignedClause(CGF, S); 1602 CGF.EmitOMPLinearClauseInit(S); 1603 { 1604 OMPPrivateScope LoopScope(CGF); 1605 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 1606 CGF.EmitOMPLinearClause(S, LoopScope); 1607 CGF.EmitOMPPrivateClause(S, LoopScope); 1608 CGF.EmitOMPReductionClauseInit(S, LoopScope); 1609 bool HasLastprivateClause = 1610 CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 1611 (void)LoopScope.Privatize(); 1612 CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 1613 S.getInc(), 1614 [&S](CodeGenFunction &CGF) { 1615 CGF.EmitOMPLoopBody(S, JumpDest()); 1616 CGF.EmitStopPoint(&S); 1617 }, 1618 [](CodeGenFunction &) {}); 1619 CGF.EmitOMPSimdFinal( 1620 S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1621 // Emit final copy of the lastprivate variables at the end of loops. 1622 if (HasLastprivateClause) 1623 CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true); 1624 CGF.EmitOMPReductionClauseFinal(S); 1625 emitPostUpdateForReductionClause( 1626 CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1627 } 1628 CGF.EmitOMPLinearClauseFinal( 1629 S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1630 // Emit: if (PreCond) - end. 1631 if (ContBlock) { 1632 CGF.EmitBranch(ContBlock); 1633 CGF.EmitBlock(ContBlock, true); 1634 } 1635 }; 1636 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1637 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 1638 } 1639 1640 void CodeGenFunction::EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic, 1641 const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, 1642 Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) { 1643 auto &RT = CGM.getOpenMPRuntime(); 1644 1645 const Expr *IVExpr = S.getIterationVariable(); 1646 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1647 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1648 1649 auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end"); 1650 1651 // Start the loop with a block that tests the condition. 1652 auto CondBlock = createBasicBlock("omp.dispatch.cond"); 1653 EmitBlock(CondBlock); 1654 LoopStack.push(CondBlock); 1655 1656 llvm::Value *BoolCondVal = nullptr; 1657 if (!DynamicOrOrdered) { 1658 // UB = min(UB, GlobalUB) 1659 EmitIgnoredExpr(S.getEnsureUpperBound()); 1660 // IV = LB 1661 EmitIgnoredExpr(S.getInit()); 1662 // IV < UB 1663 BoolCondVal = EvaluateExprAsBool(S.getCond()); 1664 } else { 1665 BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, IL, 1666 LB, UB, ST); 1667 } 1668 1669 // If there are any cleanups between here and the loop-exit scope, 1670 // create a block to stage a loop exit along. 1671 auto ExitBlock = LoopExit.getBlock(); 1672 if (LoopScope.requiresCleanups()) 1673 ExitBlock = createBasicBlock("omp.dispatch.cleanup"); 1674 1675 auto LoopBody = createBasicBlock("omp.dispatch.body"); 1676 Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock); 1677 if (ExitBlock != LoopExit.getBlock()) { 1678 EmitBlock(ExitBlock); 1679 EmitBranchThroughCleanup(LoopExit); 1680 } 1681 EmitBlock(LoopBody); 1682 1683 // Emit "IV = LB" (in case of static schedule, we have already calculated new 1684 // LB for loop condition and emitted it above). 1685 if (DynamicOrOrdered) 1686 EmitIgnoredExpr(S.getInit()); 1687 1688 // Create a block for the increment. 1689 auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); 1690 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1691 1692 // Generate !llvm.loop.parallel metadata for loads and stores for loops 1693 // with dynamic/guided scheduling and without ordered clause. 1694 if (!isOpenMPSimdDirective(S.getDirectiveKind())) 1695 LoopStack.setParallel(!IsMonotonic); 1696 else 1697 EmitOMPSimdInit(S, IsMonotonic); 1698 1699 SourceLocation Loc = S.getLocStart(); 1700 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), 1701 [&S, LoopExit](CodeGenFunction &CGF) { 1702 CGF.EmitOMPLoopBody(S, LoopExit); 1703 CGF.EmitStopPoint(&S); 1704 }, 1705 [Ordered, IVSize, IVSigned, Loc](CodeGenFunction &CGF) { 1706 if (Ordered) { 1707 CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd( 1708 CGF, Loc, IVSize, IVSigned); 1709 } 1710 }); 1711 1712 EmitBlock(Continue.getBlock()); 1713 BreakContinueStack.pop_back(); 1714 if (!DynamicOrOrdered) { 1715 // Emit "LB = LB + Stride", "UB = UB + Stride". 1716 EmitIgnoredExpr(S.getNextLowerBound()); 1717 EmitIgnoredExpr(S.getNextUpperBound()); 1718 } 1719 1720 EmitBranch(CondBlock); 1721 LoopStack.pop(); 1722 // Emit the fall-through block. 1723 EmitBlock(LoopExit.getBlock()); 1724 1725 // Tell the runtime we are done. 1726 if (!DynamicOrOrdered) 1727 RT.emitForStaticFinish(*this, S.getLocEnd()); 1728 1729 } 1730 1731 void CodeGenFunction::EmitOMPForOuterLoop( 1732 const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic, 1733 const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, 1734 Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) { 1735 auto &RT = CGM.getOpenMPRuntime(); 1736 1737 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). 1738 const bool DynamicOrOrdered = 1739 Ordered || RT.isDynamic(ScheduleKind.Schedule); 1740 1741 assert((Ordered || 1742 !RT.isStaticNonchunked(ScheduleKind.Schedule, 1743 /*Chunked=*/Chunk != nullptr)) && 1744 "static non-chunked schedule does not need outer loop"); 1745 1746 // Emit outer loop. 1747 // 1748 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1749 // When schedule(dynamic,chunk_size) is specified, the iterations are 1750 // distributed to threads in the team in chunks as the threads request them. 1751 // Each thread executes a chunk of iterations, then requests another chunk, 1752 // until no chunks remain to be distributed. Each chunk contains chunk_size 1753 // iterations, except for the last chunk to be distributed, which may have 1754 // fewer iterations. When no chunk_size is specified, it defaults to 1. 1755 // 1756 // When schedule(guided,chunk_size) is specified, the iterations are assigned 1757 // to threads in the team in chunks as the executing threads request them. 1758 // Each thread executes a chunk of iterations, then requests another chunk, 1759 // until no chunks remain to be assigned. For a chunk_size of 1, the size of 1760 // each chunk is proportional to the number of unassigned iterations divided 1761 // by the number of threads in the team, decreasing to 1. For a chunk_size 1762 // with value k (greater than 1), the size of each chunk is determined in the 1763 // same way, with the restriction that the chunks do not contain fewer than k 1764 // iterations (except for the last chunk to be assigned, which may have fewer 1765 // than k iterations). 1766 // 1767 // When schedule(auto) is specified, the decision regarding scheduling is 1768 // delegated to the compiler and/or runtime system. The programmer gives the 1769 // implementation the freedom to choose any possible mapping of iterations to 1770 // threads in the team. 1771 // 1772 // When schedule(runtime) is specified, the decision regarding scheduling is 1773 // deferred until run time, and the schedule and chunk size are taken from the 1774 // run-sched-var ICV. If the ICV is set to auto, the schedule is 1775 // implementation defined 1776 // 1777 // while(__kmpc_dispatch_next(&LB, &UB)) { 1778 // idx = LB; 1779 // while (idx <= UB) { BODY; ++idx; 1780 // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only. 1781 // } // inner loop 1782 // } 1783 // 1784 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1785 // When schedule(static, chunk_size) is specified, iterations are divided into 1786 // chunks of size chunk_size, and the chunks are assigned to the threads in 1787 // the team in a round-robin fashion in the order of the thread number. 1788 // 1789 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { 1790 // while (idx <= UB) { BODY; ++idx; } // inner loop 1791 // LB = LB + ST; 1792 // UB = UB + ST; 1793 // } 1794 // 1795 1796 const Expr *IVExpr = S.getIterationVariable(); 1797 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1798 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1799 1800 if (DynamicOrOrdered) { 1801 llvm::Value *UBVal = EmitScalarExpr(S.getLastIteration()); 1802 RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, IVSize, 1803 IVSigned, Ordered, UBVal, Chunk); 1804 } else { 1805 RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, 1806 Ordered, IL, LB, UB, ST, Chunk); 1807 } 1808 1809 EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, Ordered, LB, UB, 1810 ST, IL, Chunk); 1811 } 1812 1813 void CodeGenFunction::EmitOMPDistributeOuterLoop( 1814 OpenMPDistScheduleClauseKind ScheduleKind, 1815 const OMPDistributeDirective &S, OMPPrivateScope &LoopScope, 1816 Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) { 1817 1818 auto &RT = CGM.getOpenMPRuntime(); 1819 1820 // Emit outer loop. 1821 // Same behavior as a OMPForOuterLoop, except that schedule cannot be 1822 // dynamic 1823 // 1824 1825 const Expr *IVExpr = S.getIterationVariable(); 1826 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1827 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1828 1829 RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, 1830 IVSize, IVSigned, /* Ordered = */ false, 1831 IL, LB, UB, ST, Chunk); 1832 1833 EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, 1834 S, LoopScope, /* Ordered = */ false, LB, UB, ST, IL, Chunk); 1835 } 1836 1837 /// \brief Emit a helper variable and return corresponding lvalue. 1838 static LValue EmitOMPHelperVar(CodeGenFunction &CGF, 1839 const DeclRefExpr *Helper) { 1840 auto VDecl = cast<VarDecl>(Helper->getDecl()); 1841 CGF.EmitVarDecl(*VDecl); 1842 return CGF.EmitLValue(Helper); 1843 } 1844 1845 namespace { 1846 struct ScheduleKindModifiersTy { 1847 OpenMPScheduleClauseKind Kind; 1848 OpenMPScheduleClauseModifier M1; 1849 OpenMPScheduleClauseModifier M2; 1850 ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind, 1851 OpenMPScheduleClauseModifier M1, 1852 OpenMPScheduleClauseModifier M2) 1853 : Kind(Kind), M1(M1), M2(M2) {} 1854 }; 1855 } // namespace 1856 1857 bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { 1858 // Emit the loop iteration variable. 1859 auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 1860 auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); 1861 EmitVarDecl(*IVDecl); 1862 1863 // Emit the iterations count variable. 1864 // If it is not a variable, Sema decided to calculate iterations count on each 1865 // iteration (e.g., it is foldable into a constant). 1866 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 1867 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 1868 // Emit calculation of the iterations count. 1869 EmitIgnoredExpr(S.getCalcLastIteration()); 1870 } 1871 1872 auto &RT = CGM.getOpenMPRuntime(); 1873 1874 bool HasLastprivateClause; 1875 // Check pre-condition. 1876 { 1877 OMPLoopScope PreInitScope(*this, S); 1878 // Skip the entire loop if we don't meet the precondition. 1879 // If the condition constant folds and can be elided, avoid emitting the 1880 // whole loop. 1881 bool CondConstant; 1882 llvm::BasicBlock *ContBlock = nullptr; 1883 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 1884 if (!CondConstant) 1885 return false; 1886 } else { 1887 auto *ThenBlock = createBasicBlock("omp.precond.then"); 1888 ContBlock = createBasicBlock("omp.precond.end"); 1889 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 1890 getProfileCount(&S)); 1891 EmitBlock(ThenBlock); 1892 incrementProfileCounter(&S); 1893 } 1894 1895 llvm::DenseSet<const Expr *> EmittedFinals; 1896 emitAlignedClause(*this, S); 1897 EmitOMPLinearClauseInit(S); 1898 // Emit helper vars inits. 1899 LValue LB = 1900 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable())); 1901 LValue UB = 1902 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable())); 1903 LValue ST = 1904 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 1905 LValue IL = 1906 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 1907 1908 // Emit 'then' code. 1909 { 1910 OMPPrivateScope LoopScope(*this); 1911 if (EmitOMPFirstprivateClause(S, LoopScope)) { 1912 // Emit implicit barrier to synchronize threads and avoid data races on 1913 // initialization of firstprivate variables and post-update of 1914 // lastprivate variables. 1915 CGM.getOpenMPRuntime().emitBarrierCall( 1916 *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 1917 /*ForceSimpleCall=*/true); 1918 } 1919 EmitOMPPrivateClause(S, LoopScope); 1920 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 1921 EmitOMPReductionClauseInit(S, LoopScope); 1922 EmitOMPPrivateLoopCounters(S, LoopScope); 1923 EmitOMPLinearClause(S, LoopScope); 1924 (void)LoopScope.Privatize(); 1925 1926 // Detect the loop schedule kind and chunk. 1927 llvm::Value *Chunk = nullptr; 1928 OpenMPScheduleTy ScheduleKind; 1929 if (auto *C = S.getSingleClause<OMPScheduleClause>()) { 1930 ScheduleKind.Schedule = C->getScheduleKind(); 1931 ScheduleKind.M1 = C->getFirstScheduleModifier(); 1932 ScheduleKind.M2 = C->getSecondScheduleModifier(); 1933 if (const auto *Ch = C->getChunkSize()) { 1934 Chunk = EmitScalarExpr(Ch); 1935 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 1936 S.getIterationVariable()->getType(), 1937 S.getLocStart()); 1938 } 1939 } 1940 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1941 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1942 const bool Ordered = S.getSingleClause<OMPOrderedClause>() != nullptr; 1943 // OpenMP 4.5, 2.7.1 Loop Construct, Description. 1944 // If the static schedule kind is specified or if the ordered clause is 1945 // specified, and if no monotonic modifier is specified, the effect will 1946 // be as if the monotonic modifier was specified. 1947 if (RT.isStaticNonchunked(ScheduleKind.Schedule, 1948 /* Chunked */ Chunk != nullptr) && 1949 !Ordered) { 1950 if (isOpenMPSimdDirective(S.getDirectiveKind())) 1951 EmitOMPSimdInit(S, /*IsMonotonic=*/true); 1952 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1953 // When no chunk_size is specified, the iteration space is divided into 1954 // chunks that are approximately equal in size, and at most one chunk is 1955 // distributed to each thread. Note that the size of the chunks is 1956 // unspecified in this case. 1957 RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, 1958 IVSize, IVSigned, Ordered, 1959 IL.getAddress(), LB.getAddress(), 1960 UB.getAddress(), ST.getAddress()); 1961 auto LoopExit = 1962 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 1963 // UB = min(UB, GlobalUB); 1964 EmitIgnoredExpr(S.getEnsureUpperBound()); 1965 // IV = LB; 1966 EmitIgnoredExpr(S.getInit()); 1967 // while (idx <= UB) { BODY; ++idx; } 1968 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 1969 S.getInc(), 1970 [&S, LoopExit](CodeGenFunction &CGF) { 1971 CGF.EmitOMPLoopBody(S, LoopExit); 1972 CGF.EmitStopPoint(&S); 1973 }, 1974 [](CodeGenFunction &) {}); 1975 EmitBlock(LoopExit.getBlock()); 1976 // Tell the runtime we are done. 1977 RT.emitForStaticFinish(*this, S.getLocStart()); 1978 } else { 1979 const bool IsMonotonic = 1980 Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static || 1981 ScheduleKind.Schedule == OMPC_SCHEDULE_unknown || 1982 ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic || 1983 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic; 1984 // Emit the outer loop, which requests its work chunk [LB..UB] from 1985 // runtime and runs the inner loop to process it. 1986 EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered, 1987 LB.getAddress(), UB.getAddress(), ST.getAddress(), 1988 IL.getAddress(), Chunk); 1989 } 1990 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 1991 EmitOMPSimdFinal(S, 1992 [&](CodeGenFunction &CGF) -> llvm::Value * { 1993 return CGF.Builder.CreateIsNotNull( 1994 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 1995 }); 1996 } 1997 EmitOMPReductionClauseFinal(S); 1998 // Emit post-update of the reduction variables if IsLastIter != 0. 1999 emitPostUpdateForReductionClause( 2000 *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * { 2001 return CGF.Builder.CreateIsNotNull( 2002 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2003 }); 2004 // Emit final copy of the lastprivate variables if IsLastIter != 0. 2005 if (HasLastprivateClause) 2006 EmitOMPLastprivateClauseFinal( 2007 S, isOpenMPSimdDirective(S.getDirectiveKind()), 2008 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart()))); 2009 } 2010 EmitOMPLinearClauseFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * { 2011 return CGF.Builder.CreateIsNotNull( 2012 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2013 }); 2014 // We're now done with the loop, so jump to the continuation block. 2015 if (ContBlock) { 2016 EmitBranch(ContBlock); 2017 EmitBlock(ContBlock, true); 2018 } 2019 } 2020 return HasLastprivateClause; 2021 } 2022 2023 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { 2024 bool HasLastprivates = false; 2025 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, 2026 PrePostActionTy &) { 2027 HasLastprivates = CGF.EmitOMPWorksharingLoop(S); 2028 }; 2029 { 2030 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2031 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen, 2032 S.hasCancel()); 2033 } 2034 2035 // Emit an implicit barrier at the end. 2036 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) { 2037 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); 2038 } 2039 } 2040 2041 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { 2042 bool HasLastprivates = false; 2043 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, 2044 PrePostActionTy &) { 2045 HasLastprivates = CGF.EmitOMPWorksharingLoop(S); 2046 }; 2047 { 2048 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2049 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 2050 } 2051 2052 // Emit an implicit barrier at the end. 2053 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) { 2054 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); 2055 } 2056 } 2057 2058 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, 2059 const Twine &Name, 2060 llvm::Value *Init = nullptr) { 2061 auto LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty); 2062 if (Init) 2063 CGF.EmitScalarInit(Init, LVal); 2064 return LVal; 2065 } 2066 2067 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { 2068 auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt(); 2069 auto *CS = dyn_cast<CompoundStmt>(Stmt); 2070 bool HasLastprivates = false; 2071 auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF, 2072 PrePostActionTy &) { 2073 auto &C = CGF.CGM.getContext(); 2074 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2075 // Emit helper vars inits. 2076 LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.", 2077 CGF.Builder.getInt32(0)); 2078 auto *GlobalUBVal = CS != nullptr ? CGF.Builder.getInt32(CS->size() - 1) 2079 : CGF.Builder.getInt32(0); 2080 LValue UB = 2081 createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal); 2082 LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.", 2083 CGF.Builder.getInt32(1)); 2084 LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.", 2085 CGF.Builder.getInt32(0)); 2086 // Loop counter. 2087 LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv."); 2088 OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 2089 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV); 2090 OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 2091 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); 2092 // Generate condition for loop. 2093 BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, 2094 OK_Ordinary, S.getLocStart(), 2095 /*fpContractable=*/false); 2096 // Increment for loop counter. 2097 UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary, 2098 S.getLocStart()); 2099 auto BodyGen = [Stmt, CS, &S, &IV](CodeGenFunction &CGF) { 2100 // Iterate through all sections and emit a switch construct: 2101 // switch (IV) { 2102 // case 0: 2103 // <SectionStmt[0]>; 2104 // break; 2105 // ... 2106 // case <NumSection> - 1: 2107 // <SectionStmt[<NumSection> - 1]>; 2108 // break; 2109 // } 2110 // .omp.sections.exit: 2111 auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit"); 2112 auto *SwitchStmt = CGF.Builder.CreateSwitch( 2113 CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB, 2114 CS == nullptr ? 1 : CS->size()); 2115 if (CS) { 2116 unsigned CaseNumber = 0; 2117 for (auto *SubStmt : CS->children()) { 2118 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 2119 CGF.EmitBlock(CaseBB); 2120 SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB); 2121 CGF.EmitStmt(SubStmt); 2122 CGF.EmitBranch(ExitBB); 2123 ++CaseNumber; 2124 } 2125 } else { 2126 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 2127 CGF.EmitBlock(CaseBB); 2128 SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB); 2129 CGF.EmitStmt(Stmt); 2130 CGF.EmitBranch(ExitBB); 2131 } 2132 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 2133 }; 2134 2135 CodeGenFunction::OMPPrivateScope LoopScope(CGF); 2136 if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) { 2137 // Emit implicit barrier to synchronize threads and avoid data races on 2138 // initialization of firstprivate variables and post-update of lastprivate 2139 // variables. 2140 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 2141 CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 2142 /*ForceSimpleCall=*/true); 2143 } 2144 CGF.EmitOMPPrivateClause(S, LoopScope); 2145 HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 2146 CGF.EmitOMPReductionClauseInit(S, LoopScope); 2147 (void)LoopScope.Privatize(); 2148 2149 // Emit static non-chunked loop. 2150 OpenMPScheduleTy ScheduleKind; 2151 ScheduleKind.Schedule = OMPC_SCHEDULE_static; 2152 CGF.CGM.getOpenMPRuntime().emitForStaticInit( 2153 CGF, S.getLocStart(), ScheduleKind, /*IVSize=*/32, 2154 /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), LB.getAddress(), 2155 UB.getAddress(), ST.getAddress()); 2156 // UB = min(UB, GlobalUB); 2157 auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart()); 2158 auto *MinUBGlobalUB = CGF.Builder.CreateSelect( 2159 CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal); 2160 CGF.EmitStoreOfScalar(MinUBGlobalUB, UB); 2161 // IV = LB; 2162 CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV); 2163 // while (idx <= UB) { BODY; ++idx; } 2164 CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen, 2165 [](CodeGenFunction &) {}); 2166 // Tell the runtime we are done. 2167 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocStart()); 2168 CGF.EmitOMPReductionClauseFinal(S); 2169 // Emit post-update of the reduction variables if IsLastIter != 0. 2170 emitPostUpdateForReductionClause( 2171 CGF, S, [&](CodeGenFunction &CGF) -> llvm::Value * { 2172 return CGF.Builder.CreateIsNotNull( 2173 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2174 }); 2175 2176 // Emit final copy of the lastprivate variables if IsLastIter != 0. 2177 if (HasLastprivates) 2178 CGF.EmitOMPLastprivateClauseFinal( 2179 S, /*NoFinals=*/false, 2180 CGF.Builder.CreateIsNotNull( 2181 CGF.EmitLoadOfScalar(IL, S.getLocStart()))); 2182 }; 2183 2184 bool HasCancel = false; 2185 if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S)) 2186 HasCancel = OSD->hasCancel(); 2187 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S)) 2188 HasCancel = OPSD->hasCancel(); 2189 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen, 2190 HasCancel); 2191 // Emit barrier for lastprivates only if 'sections' directive has 'nowait' 2192 // clause. Otherwise the barrier will be generated by the codegen for the 2193 // directive. 2194 if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) { 2195 // Emit implicit barrier to synchronize threads and avoid data races on 2196 // initialization of firstprivate variables. 2197 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), 2198 OMPD_unknown); 2199 } 2200 } 2201 2202 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { 2203 { 2204 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2205 EmitSections(S); 2206 } 2207 // Emit an implicit barrier at the end. 2208 if (!S.getSingleClause<OMPNowaitClause>()) { 2209 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), 2210 OMPD_sections); 2211 } 2212 } 2213 2214 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { 2215 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2216 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2217 }; 2218 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2219 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen, 2220 S.hasCancel()); 2221 } 2222 2223 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { 2224 llvm::SmallVector<const Expr *, 8> CopyprivateVars; 2225 llvm::SmallVector<const Expr *, 8> DestExprs; 2226 llvm::SmallVector<const Expr *, 8> SrcExprs; 2227 llvm::SmallVector<const Expr *, 8> AssignmentOps; 2228 // Check if there are any 'copyprivate' clauses associated with this 2229 // 'single' construct. 2230 // Build a list of copyprivate variables along with helper expressions 2231 // (<source>, <destination>, <destination>=<source> expressions) 2232 for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) { 2233 CopyprivateVars.append(C->varlists().begin(), C->varlists().end()); 2234 DestExprs.append(C->destination_exprs().begin(), 2235 C->destination_exprs().end()); 2236 SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end()); 2237 AssignmentOps.append(C->assignment_ops().begin(), 2238 C->assignment_ops().end()); 2239 } 2240 // Emit code for 'single' region along with 'copyprivate' clauses 2241 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2242 Action.Enter(CGF); 2243 OMPPrivateScope SingleScope(CGF); 2244 (void)CGF.EmitOMPFirstprivateClause(S, SingleScope); 2245 CGF.EmitOMPPrivateClause(S, SingleScope); 2246 (void)SingleScope.Privatize(); 2247 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2248 }; 2249 { 2250 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2251 CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(), 2252 CopyprivateVars, DestExprs, 2253 SrcExprs, AssignmentOps); 2254 } 2255 // Emit an implicit barrier at the end (to avoid data race on firstprivate 2256 // init or if no 'nowait' clause was specified and no 'copyprivate' clause). 2257 if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) { 2258 CGM.getOpenMPRuntime().emitBarrierCall( 2259 *this, S.getLocStart(), 2260 S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single); 2261 } 2262 } 2263 2264 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { 2265 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2266 Action.Enter(CGF); 2267 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2268 }; 2269 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2270 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart()); 2271 } 2272 2273 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { 2274 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2275 Action.Enter(CGF); 2276 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2277 }; 2278 Expr *Hint = nullptr; 2279 if (auto *HintClause = S.getSingleClause<OMPHintClause>()) 2280 Hint = HintClause->getHint(); 2281 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2282 CGM.getOpenMPRuntime().emitCriticalRegion(*this, 2283 S.getDirectiveName().getAsString(), 2284 CodeGen, S.getLocStart(), Hint); 2285 } 2286 2287 void CodeGenFunction::EmitOMPParallelForDirective( 2288 const OMPParallelForDirective &S) { 2289 // Emit directive as a combined directive that consists of two implicit 2290 // directives: 'parallel' with 'for' directive. 2291 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2292 CGF.EmitOMPWorksharingLoop(S); 2293 }; 2294 emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen); 2295 } 2296 2297 void CodeGenFunction::EmitOMPParallelForSimdDirective( 2298 const OMPParallelForSimdDirective &S) { 2299 // Emit directive as a combined directive that consists of two implicit 2300 // directives: 'parallel' with 'for' directive. 2301 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2302 CGF.EmitOMPWorksharingLoop(S); 2303 }; 2304 emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen); 2305 } 2306 2307 void CodeGenFunction::EmitOMPParallelSectionsDirective( 2308 const OMPParallelSectionsDirective &S) { 2309 // Emit directive as a combined directive that consists of two implicit 2310 // directives: 'parallel' with 'sections' directive. 2311 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2312 CGF.EmitSections(S); 2313 }; 2314 emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen); 2315 } 2316 2317 void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, 2318 const RegionCodeGenTy &BodyGen, 2319 const TaskGenTy &TaskGen, 2320 OMPTaskDataTy &Data) { 2321 // Emit outlined function for task construct. 2322 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2323 auto *I = CS->getCapturedDecl()->param_begin(); 2324 auto *PartId = std::next(I); 2325 auto *TaskT = std::next(I, 4); 2326 // Check if the task is final 2327 if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) { 2328 // If the condition constant folds and can be elided, try to avoid emitting 2329 // the condition and the dead arm of the if/else. 2330 auto *Cond = Clause->getCondition(); 2331 bool CondConstant; 2332 if (ConstantFoldsToSimpleInteger(Cond, CondConstant)) 2333 Data.Final.setInt(CondConstant); 2334 else 2335 Data.Final.setPointer(EvaluateExprAsBool(Cond)); 2336 } else { 2337 // By default the task is not final. 2338 Data.Final.setInt(/*IntVal=*/false); 2339 } 2340 // Check if the task has 'priority' clause. 2341 if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) { 2342 // Runtime currently does not support codegen for priority clause argument. 2343 // TODO: Add codegen for priority clause arg when runtime lib support it. 2344 auto *Prio = Clause->getPriority(); 2345 Data.Priority.setInt(Prio); 2346 } 2347 // The first function argument for tasks is a thread id, the second one is a 2348 // part id (0 for tied tasks, >=0 for untied task). 2349 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 2350 // Get list of private variables. 2351 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 2352 auto IRef = C->varlist_begin(); 2353 for (auto *IInit : C->private_copies()) { 2354 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2355 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2356 Data.PrivateVars.push_back(*IRef); 2357 Data.PrivateCopies.push_back(IInit); 2358 } 2359 ++IRef; 2360 } 2361 } 2362 EmittedAsPrivate.clear(); 2363 // Get list of firstprivate variables. 2364 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 2365 auto IRef = C->varlist_begin(); 2366 auto IElemInitRef = C->inits().begin(); 2367 for (auto *IInit : C->private_copies()) { 2368 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2369 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2370 Data.FirstprivateVars.push_back(*IRef); 2371 Data.FirstprivateCopies.push_back(IInit); 2372 Data.FirstprivateInits.push_back(*IElemInitRef); 2373 } 2374 ++IRef; 2375 ++IElemInitRef; 2376 } 2377 } 2378 // Get list of lastprivate variables (for taskloops). 2379 llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs; 2380 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 2381 auto IRef = C->varlist_begin(); 2382 auto ID = C->destination_exprs().begin(); 2383 for (auto *IInit : C->private_copies()) { 2384 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2385 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2386 Data.LastprivateVars.push_back(*IRef); 2387 Data.LastprivateCopies.push_back(IInit); 2388 } 2389 LastprivateDstsOrigs.insert( 2390 {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()), 2391 cast<DeclRefExpr>(*IRef)}); 2392 ++IRef; 2393 ++ID; 2394 } 2395 } 2396 // Build list of dependences. 2397 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) 2398 for (auto *IRef : C->varlists()) 2399 Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef)); 2400 auto &&CodeGen = [PartId, &S, &Data, CS, &BodyGen, &LastprivateDstsOrigs]( 2401 CodeGenFunction &CGF, PrePostActionTy &Action) { 2402 // Set proper addresses for generated private copies. 2403 OMPPrivateScope Scope(CGF); 2404 if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() || 2405 !Data.LastprivateVars.empty()) { 2406 auto *CopyFn = CGF.Builder.CreateLoad( 2407 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3))); 2408 auto *PrivatesPtr = CGF.Builder.CreateLoad( 2409 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2))); 2410 // Map privates. 2411 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; 2412 llvm::SmallVector<llvm::Value *, 16> CallArgs; 2413 CallArgs.push_back(PrivatesPtr); 2414 for (auto *E : Data.PrivateVars) { 2415 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2416 Address PrivatePtr = CGF.CreateMemTemp( 2417 CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr"); 2418 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2419 CallArgs.push_back(PrivatePtr.getPointer()); 2420 } 2421 for (auto *E : Data.FirstprivateVars) { 2422 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2423 Address PrivatePtr = 2424 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 2425 ".firstpriv.ptr.addr"); 2426 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2427 CallArgs.push_back(PrivatePtr.getPointer()); 2428 } 2429 for (auto *E : Data.LastprivateVars) { 2430 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2431 Address PrivatePtr = 2432 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 2433 ".lastpriv.ptr.addr"); 2434 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2435 CallArgs.push_back(PrivatePtr.getPointer()); 2436 } 2437 CGF.EmitRuntimeCall(CopyFn, CallArgs); 2438 for (auto &&Pair : LastprivateDstsOrigs) { 2439 auto *OrigVD = cast<VarDecl>(Pair.second->getDecl()); 2440 DeclRefExpr DRE( 2441 const_cast<VarDecl *>(OrigVD), 2442 /*RefersToEnclosingVariableOrCapture=*/CGF.CapturedStmtInfo->lookup( 2443 OrigVD) != nullptr, 2444 Pair.second->getType(), VK_LValue, Pair.second->getExprLoc()); 2445 Scope.addPrivate(Pair.first, [&CGF, &DRE]() { 2446 return CGF.EmitLValue(&DRE).getAddress(); 2447 }); 2448 } 2449 for (auto &&Pair : PrivatePtrs) { 2450 Address Replacement(CGF.Builder.CreateLoad(Pair.second), 2451 CGF.getContext().getDeclAlign(Pair.first)); 2452 Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); 2453 } 2454 } 2455 (void)Scope.Privatize(); 2456 2457 Action.Enter(CGF); 2458 BodyGen(CGF); 2459 }; 2460 auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( 2461 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied, 2462 Data.NumberOfParts); 2463 OMPLexicalScope Scope(*this, S); 2464 TaskGen(*this, OutlinedFn, Data); 2465 } 2466 2467 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { 2468 // Emit outlined function for task construct. 2469 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2470 auto CapturedStruct = GenerateCapturedStmtArgument(*CS); 2471 auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 2472 const Expr *IfCond = nullptr; 2473 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 2474 if (C->getNameModifier() == OMPD_unknown || 2475 C->getNameModifier() == OMPD_task) { 2476 IfCond = C->getCondition(); 2477 break; 2478 } 2479 } 2480 2481 OMPTaskDataTy Data; 2482 // Check if we should emit tied or untied task. 2483 Data.Tied = !S.getSingleClause<OMPUntiedClause>(); 2484 auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { 2485 CGF.EmitStmt(CS->getCapturedStmt()); 2486 }; 2487 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 2488 IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn, 2489 const OMPTaskDataTy &Data) { 2490 CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getLocStart(), S, OutlinedFn, 2491 SharedsTy, CapturedStruct, IfCond, 2492 Data); 2493 }; 2494 EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data); 2495 } 2496 2497 void CodeGenFunction::EmitOMPTaskyieldDirective( 2498 const OMPTaskyieldDirective &S) { 2499 CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart()); 2500 } 2501 2502 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { 2503 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier); 2504 } 2505 2506 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) { 2507 CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart()); 2508 } 2509 2510 void CodeGenFunction::EmitOMPTaskgroupDirective( 2511 const OMPTaskgroupDirective &S) { 2512 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2513 Action.Enter(CGF); 2514 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2515 }; 2516 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2517 CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart()); 2518 } 2519 2520 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { 2521 CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> { 2522 if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) { 2523 return llvm::makeArrayRef(FlushClause->varlist_begin(), 2524 FlushClause->varlist_end()); 2525 } 2526 return llvm::None; 2527 }(), S.getLocStart()); 2528 } 2529 2530 void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) { 2531 // Emit the loop iteration variable. 2532 auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 2533 auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); 2534 EmitVarDecl(*IVDecl); 2535 2536 // Emit the iterations count variable. 2537 // If it is not a variable, Sema decided to calculate iterations count on each 2538 // iteration (e.g., it is foldable into a constant). 2539 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 2540 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 2541 // Emit calculation of the iterations count. 2542 EmitIgnoredExpr(S.getCalcLastIteration()); 2543 } 2544 2545 auto &RT = CGM.getOpenMPRuntime(); 2546 2547 // Check pre-condition. 2548 { 2549 OMPLoopScope PreInitScope(*this, S); 2550 // Skip the entire loop if we don't meet the precondition. 2551 // If the condition constant folds and can be elided, avoid emitting the 2552 // whole loop. 2553 bool CondConstant; 2554 llvm::BasicBlock *ContBlock = nullptr; 2555 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 2556 if (!CondConstant) 2557 return; 2558 } else { 2559 auto *ThenBlock = createBasicBlock("omp.precond.then"); 2560 ContBlock = createBasicBlock("omp.precond.end"); 2561 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 2562 getProfileCount(&S)); 2563 EmitBlock(ThenBlock); 2564 incrementProfileCounter(&S); 2565 } 2566 2567 // Emit 'then' code. 2568 { 2569 // Emit helper vars inits. 2570 LValue LB = 2571 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable())); 2572 LValue UB = 2573 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable())); 2574 LValue ST = 2575 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 2576 LValue IL = 2577 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 2578 2579 OMPPrivateScope LoopScope(*this); 2580 EmitOMPPrivateLoopCounters(S, LoopScope); 2581 (void)LoopScope.Privatize(); 2582 2583 // Detect the distribute schedule kind and chunk. 2584 llvm::Value *Chunk = nullptr; 2585 OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown; 2586 if (auto *C = S.getSingleClause<OMPDistScheduleClause>()) { 2587 ScheduleKind = C->getDistScheduleKind(); 2588 if (const auto *Ch = C->getChunkSize()) { 2589 Chunk = EmitScalarExpr(Ch); 2590 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 2591 S.getIterationVariable()->getType(), 2592 S.getLocStart()); 2593 } 2594 } 2595 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2596 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2597 2598 // OpenMP [2.10.8, distribute Construct, Description] 2599 // If dist_schedule is specified, kind must be static. If specified, 2600 // iterations are divided into chunks of size chunk_size, chunks are 2601 // assigned to the teams of the league in a round-robin fashion in the 2602 // order of the team number. When no chunk_size is specified, the 2603 // iteration space is divided into chunks that are approximately equal 2604 // in size, and at most one chunk is distributed to each team of the 2605 // league. The size of the chunks is unspecified in this case. 2606 if (RT.isStaticNonchunked(ScheduleKind, 2607 /* Chunked */ Chunk != nullptr)) { 2608 RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, 2609 IVSize, IVSigned, /* Ordered = */ false, 2610 IL.getAddress(), LB.getAddress(), 2611 UB.getAddress(), ST.getAddress()); 2612 auto LoopExit = 2613 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 2614 // UB = min(UB, GlobalUB); 2615 EmitIgnoredExpr(S.getEnsureUpperBound()); 2616 // IV = LB; 2617 EmitIgnoredExpr(S.getInit()); 2618 // while (idx <= UB) { BODY; ++idx; } 2619 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 2620 S.getInc(), 2621 [&S, LoopExit](CodeGenFunction &CGF) { 2622 CGF.EmitOMPLoopBody(S, LoopExit); 2623 CGF.EmitStopPoint(&S); 2624 }, 2625 [](CodeGenFunction &) {}); 2626 EmitBlock(LoopExit.getBlock()); 2627 // Tell the runtime we are done. 2628 RT.emitForStaticFinish(*this, S.getLocStart()); 2629 } else { 2630 // Emit the outer loop, which requests its work chunk [LB..UB] from 2631 // runtime and runs the inner loop to process it. 2632 EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, 2633 LB.getAddress(), UB.getAddress(), ST.getAddress(), 2634 IL.getAddress(), Chunk); 2635 } 2636 } 2637 2638 // We're now done with the loop, so jump to the continuation block. 2639 if (ContBlock) { 2640 EmitBranch(ContBlock); 2641 EmitBlock(ContBlock, true); 2642 } 2643 } 2644 } 2645 2646 void CodeGenFunction::EmitOMPDistributeDirective( 2647 const OMPDistributeDirective &S) { 2648 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2649 CGF.EmitOMPDistributeLoop(S); 2650 }; 2651 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2652 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen, 2653 false); 2654 } 2655 2656 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, 2657 const CapturedStmt *S) { 2658 CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); 2659 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; 2660 CGF.CapturedStmtInfo = &CapStmtInfo; 2661 auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S); 2662 Fn->addFnAttr(llvm::Attribute::NoInline); 2663 return Fn; 2664 } 2665 2666 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { 2667 if (!S.getAssociatedStmt()) 2668 return; 2669 auto *C = S.getSingleClause<OMPSIMDClause>(); 2670 auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF, 2671 PrePostActionTy &Action) { 2672 if (C) { 2673 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2674 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 2675 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 2676 auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS); 2677 CGF.EmitNounwindRuntimeCall(OutlinedFn, CapturedVars); 2678 } else { 2679 Action.Enter(CGF); 2680 CGF.EmitStmt( 2681 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2682 } 2683 }; 2684 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2685 CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C); 2686 } 2687 2688 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, 2689 QualType SrcType, QualType DestType, 2690 SourceLocation Loc) { 2691 assert(CGF.hasScalarEvaluationKind(DestType) && 2692 "DestType must have scalar evaluation kind."); 2693 assert(!Val.isAggregate() && "Must be a scalar or complex."); 2694 return Val.isScalar() 2695 ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType, 2696 Loc) 2697 : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType, 2698 DestType, Loc); 2699 } 2700 2701 static CodeGenFunction::ComplexPairTy 2702 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, 2703 QualType DestType, SourceLocation Loc) { 2704 assert(CGF.getEvaluationKind(DestType) == TEK_Complex && 2705 "DestType must have complex evaluation kind."); 2706 CodeGenFunction::ComplexPairTy ComplexVal; 2707 if (Val.isScalar()) { 2708 // Convert the input element to the element type of the complex. 2709 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 2710 auto ScalarVal = CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, 2711 DestElementType, Loc); 2712 ComplexVal = CodeGenFunction::ComplexPairTy( 2713 ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType())); 2714 } else { 2715 assert(Val.isComplex() && "Must be a scalar or complex."); 2716 auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType(); 2717 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 2718 ComplexVal.first = CGF.EmitScalarConversion( 2719 Val.getComplexVal().first, SrcElementType, DestElementType, Loc); 2720 ComplexVal.second = CGF.EmitScalarConversion( 2721 Val.getComplexVal().second, SrcElementType, DestElementType, Loc); 2722 } 2723 return ComplexVal; 2724 } 2725 2726 static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst, 2727 LValue LVal, RValue RVal) { 2728 if (LVal.isGlobalReg()) { 2729 CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal); 2730 } else { 2731 CGF.EmitAtomicStore(RVal, LVal, 2732 IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 2733 : llvm::AtomicOrdering::Monotonic, 2734 LVal.isVolatile(), /*IsInit=*/false); 2735 } 2736 } 2737 2738 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal, 2739 QualType RValTy, SourceLocation Loc) { 2740 switch (getEvaluationKind(LVal.getType())) { 2741 case TEK_Scalar: 2742 EmitStoreThroughLValue(RValue::get(convertToScalarValue( 2743 *this, RVal, RValTy, LVal.getType(), Loc)), 2744 LVal); 2745 break; 2746 case TEK_Complex: 2747 EmitStoreOfComplex( 2748 convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal, 2749 /*isInit=*/false); 2750 break; 2751 case TEK_Aggregate: 2752 llvm_unreachable("Must be a scalar or complex."); 2753 } 2754 } 2755 2756 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst, 2757 const Expr *X, const Expr *V, 2758 SourceLocation Loc) { 2759 // v = x; 2760 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue"); 2761 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue"); 2762 LValue XLValue = CGF.EmitLValue(X); 2763 LValue VLValue = CGF.EmitLValue(V); 2764 RValue Res = XLValue.isGlobalReg() 2765 ? CGF.EmitLoadOfLValue(XLValue, Loc) 2766 : CGF.EmitAtomicLoad( 2767 XLValue, Loc, 2768 IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 2769 : llvm::AtomicOrdering::Monotonic, 2770 XLValue.isVolatile()); 2771 // OpenMP, 2.12.6, atomic Construct 2772 // Any atomic construct with a seq_cst clause forces the atomically 2773 // performed operation to include an implicit flush operation without a 2774 // list. 2775 if (IsSeqCst) 2776 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 2777 CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc); 2778 } 2779 2780 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst, 2781 const Expr *X, const Expr *E, 2782 SourceLocation Loc) { 2783 // x = expr; 2784 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue"); 2785 emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E)); 2786 // OpenMP, 2.12.6, atomic Construct 2787 // Any atomic construct with a seq_cst clause forces the atomically 2788 // performed operation to include an implicit flush operation without a 2789 // list. 2790 if (IsSeqCst) 2791 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 2792 } 2793 2794 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, 2795 RValue Update, 2796 BinaryOperatorKind BO, 2797 llvm::AtomicOrdering AO, 2798 bool IsXLHSInRHSPart) { 2799 auto &Context = CGF.CGM.getContext(); 2800 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x' 2801 // expression is simple and atomic is allowed for the given type for the 2802 // target platform. 2803 if (BO == BO_Comma || !Update.isScalar() || 2804 !Update.getScalarVal()->getType()->isIntegerTy() || 2805 !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) && 2806 (Update.getScalarVal()->getType() != 2807 X.getAddress().getElementType())) || 2808 !X.getAddress().getElementType()->isIntegerTy() || 2809 !Context.getTargetInfo().hasBuiltinAtomic( 2810 Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment()))) 2811 return std::make_pair(false, RValue::get(nullptr)); 2812 2813 llvm::AtomicRMWInst::BinOp RMWOp; 2814 switch (BO) { 2815 case BO_Add: 2816 RMWOp = llvm::AtomicRMWInst::Add; 2817 break; 2818 case BO_Sub: 2819 if (!IsXLHSInRHSPart) 2820 return std::make_pair(false, RValue::get(nullptr)); 2821 RMWOp = llvm::AtomicRMWInst::Sub; 2822 break; 2823 case BO_And: 2824 RMWOp = llvm::AtomicRMWInst::And; 2825 break; 2826 case BO_Or: 2827 RMWOp = llvm::AtomicRMWInst::Or; 2828 break; 2829 case BO_Xor: 2830 RMWOp = llvm::AtomicRMWInst::Xor; 2831 break; 2832 case BO_LT: 2833 RMWOp = X.getType()->hasSignedIntegerRepresentation() 2834 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min 2835 : llvm::AtomicRMWInst::Max) 2836 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin 2837 : llvm::AtomicRMWInst::UMax); 2838 break; 2839 case BO_GT: 2840 RMWOp = X.getType()->hasSignedIntegerRepresentation() 2841 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max 2842 : llvm::AtomicRMWInst::Min) 2843 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax 2844 : llvm::AtomicRMWInst::UMin); 2845 break; 2846 case BO_Assign: 2847 RMWOp = llvm::AtomicRMWInst::Xchg; 2848 break; 2849 case BO_Mul: 2850 case BO_Div: 2851 case BO_Rem: 2852 case BO_Shl: 2853 case BO_Shr: 2854 case BO_LAnd: 2855 case BO_LOr: 2856 return std::make_pair(false, RValue::get(nullptr)); 2857 case BO_PtrMemD: 2858 case BO_PtrMemI: 2859 case BO_LE: 2860 case BO_GE: 2861 case BO_EQ: 2862 case BO_NE: 2863 case BO_AddAssign: 2864 case BO_SubAssign: 2865 case BO_AndAssign: 2866 case BO_OrAssign: 2867 case BO_XorAssign: 2868 case BO_MulAssign: 2869 case BO_DivAssign: 2870 case BO_RemAssign: 2871 case BO_ShlAssign: 2872 case BO_ShrAssign: 2873 case BO_Comma: 2874 llvm_unreachable("Unsupported atomic update operation"); 2875 } 2876 auto *UpdateVal = Update.getScalarVal(); 2877 if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) { 2878 UpdateVal = CGF.Builder.CreateIntCast( 2879 IC, X.getAddress().getElementType(), 2880 X.getType()->hasSignedIntegerRepresentation()); 2881 } 2882 auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO); 2883 return std::make_pair(true, RValue::get(Res)); 2884 } 2885 2886 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr( 2887 LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart, 2888 llvm::AtomicOrdering AO, SourceLocation Loc, 2889 const llvm::function_ref<RValue(RValue)> &CommonGen) { 2890 // Update expressions are allowed to have the following forms: 2891 // x binop= expr; -> xrval + expr; 2892 // x++, ++x -> xrval + 1; 2893 // x--, --x -> xrval - 1; 2894 // x = x binop expr; -> xrval binop expr 2895 // x = expr Op x; - > expr binop xrval; 2896 auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart); 2897 if (!Res.first) { 2898 if (X.isGlobalReg()) { 2899 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop 2900 // 'xrval'. 2901 EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X); 2902 } else { 2903 // Perform compare-and-swap procedure. 2904 EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified()); 2905 } 2906 } 2907 return Res; 2908 } 2909 2910 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst, 2911 const Expr *X, const Expr *E, 2912 const Expr *UE, bool IsXLHSInRHSPart, 2913 SourceLocation Loc) { 2914 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 2915 "Update expr in 'atomic update' must be a binary operator."); 2916 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 2917 // Update expressions are allowed to have the following forms: 2918 // x binop= expr; -> xrval + expr; 2919 // x++, ++x -> xrval + 1; 2920 // x--, --x -> xrval - 1; 2921 // x = x binop expr; -> xrval binop expr 2922 // x = expr Op x; - > expr binop xrval; 2923 assert(X->isLValue() && "X of 'omp atomic update' is not lvalue"); 2924 LValue XLValue = CGF.EmitLValue(X); 2925 RValue ExprRValue = CGF.EmitAnyExpr(E); 2926 auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 2927 : llvm::AtomicOrdering::Monotonic; 2928 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 2929 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 2930 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 2931 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 2932 auto Gen = 2933 [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue { 2934 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 2935 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 2936 return CGF.EmitAnyExpr(UE); 2937 }; 2938 (void)CGF.EmitOMPAtomicSimpleUpdateExpr( 2939 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 2940 // OpenMP, 2.12.6, atomic Construct 2941 // Any atomic construct with a seq_cst clause forces the atomically 2942 // performed operation to include an implicit flush operation without a 2943 // list. 2944 if (IsSeqCst) 2945 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 2946 } 2947 2948 static RValue convertToType(CodeGenFunction &CGF, RValue Value, 2949 QualType SourceType, QualType ResType, 2950 SourceLocation Loc) { 2951 switch (CGF.getEvaluationKind(ResType)) { 2952 case TEK_Scalar: 2953 return RValue::get( 2954 convertToScalarValue(CGF, Value, SourceType, ResType, Loc)); 2955 case TEK_Complex: { 2956 auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc); 2957 return RValue::getComplex(Res.first, Res.second); 2958 } 2959 case TEK_Aggregate: 2960 break; 2961 } 2962 llvm_unreachable("Must be a scalar or complex."); 2963 } 2964 2965 static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst, 2966 bool IsPostfixUpdate, const Expr *V, 2967 const Expr *X, const Expr *E, 2968 const Expr *UE, bool IsXLHSInRHSPart, 2969 SourceLocation Loc) { 2970 assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue"); 2971 assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue"); 2972 RValue NewVVal; 2973 LValue VLValue = CGF.EmitLValue(V); 2974 LValue XLValue = CGF.EmitLValue(X); 2975 RValue ExprRValue = CGF.EmitAnyExpr(E); 2976 auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 2977 : llvm::AtomicOrdering::Monotonic; 2978 QualType NewVValType; 2979 if (UE) { 2980 // 'x' is updated with some additional value. 2981 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 2982 "Update expr in 'atomic capture' must be a binary operator."); 2983 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 2984 // Update expressions are allowed to have the following forms: 2985 // x binop= expr; -> xrval + expr; 2986 // x++, ++x -> xrval + 1; 2987 // x--, --x -> xrval - 1; 2988 // x = x binop expr; -> xrval binop expr 2989 // x = expr Op x; - > expr binop xrval; 2990 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 2991 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 2992 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 2993 NewVValType = XRValExpr->getType(); 2994 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 2995 auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr, 2996 IsSeqCst, IsPostfixUpdate](RValue XRValue) -> RValue { 2997 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 2998 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 2999 RValue Res = CGF.EmitAnyExpr(UE); 3000 NewVVal = IsPostfixUpdate ? XRValue : Res; 3001 return Res; 3002 }; 3003 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 3004 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 3005 if (Res.first) { 3006 // 'atomicrmw' instruction was generated. 3007 if (IsPostfixUpdate) { 3008 // Use old value from 'atomicrmw'. 3009 NewVVal = Res.second; 3010 } else { 3011 // 'atomicrmw' does not provide new value, so evaluate it using old 3012 // value of 'x'. 3013 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3014 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second); 3015 NewVVal = CGF.EmitAnyExpr(UE); 3016 } 3017 } 3018 } else { 3019 // 'x' is simply rewritten with some 'expr'. 3020 NewVValType = X->getType().getNonReferenceType(); 3021 ExprRValue = convertToType(CGF, ExprRValue, E->getType(), 3022 X->getType().getNonReferenceType(), Loc); 3023 auto &&Gen = [&CGF, &NewVVal, ExprRValue](RValue XRValue) -> RValue { 3024 NewVVal = XRValue; 3025 return ExprRValue; 3026 }; 3027 // Try to perform atomicrmw xchg, otherwise simple exchange. 3028 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 3029 XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO, 3030 Loc, Gen); 3031 if (Res.first) { 3032 // 'atomicrmw' instruction was generated. 3033 NewVVal = IsPostfixUpdate ? Res.second : ExprRValue; 3034 } 3035 } 3036 // Emit post-update store to 'v' of old/new 'x' value. 3037 CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc); 3038 // OpenMP, 2.12.6, atomic Construct 3039 // Any atomic construct with a seq_cst clause forces the atomically 3040 // performed operation to include an implicit flush operation without a 3041 // list. 3042 if (IsSeqCst) 3043 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3044 } 3045 3046 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, 3047 bool IsSeqCst, bool IsPostfixUpdate, 3048 const Expr *X, const Expr *V, const Expr *E, 3049 const Expr *UE, bool IsXLHSInRHSPart, 3050 SourceLocation Loc) { 3051 switch (Kind) { 3052 case OMPC_read: 3053 EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc); 3054 break; 3055 case OMPC_write: 3056 EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc); 3057 break; 3058 case OMPC_unknown: 3059 case OMPC_update: 3060 EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc); 3061 break; 3062 case OMPC_capture: 3063 EmitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE, 3064 IsXLHSInRHSPart, Loc); 3065 break; 3066 case OMPC_if: 3067 case OMPC_final: 3068 case OMPC_num_threads: 3069 case OMPC_private: 3070 case OMPC_firstprivate: 3071 case OMPC_lastprivate: 3072 case OMPC_reduction: 3073 case OMPC_safelen: 3074 case OMPC_simdlen: 3075 case OMPC_collapse: 3076 case OMPC_default: 3077 case OMPC_seq_cst: 3078 case OMPC_shared: 3079 case OMPC_linear: 3080 case OMPC_aligned: 3081 case OMPC_copyin: 3082 case OMPC_copyprivate: 3083 case OMPC_flush: 3084 case OMPC_proc_bind: 3085 case OMPC_schedule: 3086 case OMPC_ordered: 3087 case OMPC_nowait: 3088 case OMPC_untied: 3089 case OMPC_threadprivate: 3090 case OMPC_depend: 3091 case OMPC_mergeable: 3092 case OMPC_device: 3093 case OMPC_threads: 3094 case OMPC_simd: 3095 case OMPC_map: 3096 case OMPC_num_teams: 3097 case OMPC_thread_limit: 3098 case OMPC_priority: 3099 case OMPC_grainsize: 3100 case OMPC_nogroup: 3101 case OMPC_num_tasks: 3102 case OMPC_hint: 3103 case OMPC_dist_schedule: 3104 case OMPC_defaultmap: 3105 case OMPC_uniform: 3106 llvm_unreachable("Clause is not allowed in 'omp atomic'."); 3107 } 3108 } 3109 3110 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { 3111 bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>(); 3112 OpenMPClauseKind Kind = OMPC_unknown; 3113 for (auto *C : S.clauses()) { 3114 // Find first clause (skip seq_cst clause, if it is first). 3115 if (C->getClauseKind() != OMPC_seq_cst) { 3116 Kind = C->getClauseKind(); 3117 break; 3118 } 3119 } 3120 3121 const auto *CS = 3122 S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 3123 if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) { 3124 enterFullExpression(EWC); 3125 } 3126 // Processing for statements under 'atomic capture'. 3127 if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) { 3128 for (const auto *C : Compound->body()) { 3129 if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) { 3130 enterFullExpression(EWC); 3131 } 3132 } 3133 } 3134 3135 auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF, 3136 PrePostActionTy &) { 3137 CGF.EmitStopPoint(CS); 3138 EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(), 3139 S.getV(), S.getExpr(), S.getUpdateExpr(), 3140 S.isXLHSInRHSPart(), S.getLocStart()); 3141 }; 3142 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 3143 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen); 3144 } 3145 3146 std::pair<llvm::Function * /*OutlinedFn*/, llvm::Constant * /*OutlinedFnID*/> 3147 CodeGenFunction::EmitOMPTargetDirectiveOutlinedFunction( 3148 CodeGenModule &CGM, const OMPTargetDirective &S, StringRef ParentName, 3149 bool IsOffloadEntry) { 3150 llvm::Function *OutlinedFn = nullptr; 3151 llvm::Constant *OutlinedFnID = nullptr; 3152 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3153 OMPPrivateScope PrivateScope(CGF); 3154 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 3155 CGF.EmitOMPPrivateClause(S, PrivateScope); 3156 (void)PrivateScope.Privatize(); 3157 3158 Action.Enter(CGF); 3159 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3160 }; 3161 // Emit target region as a standalone region. 3162 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 3163 S, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, CodeGen); 3164 return std::make_pair(OutlinedFn, OutlinedFnID); 3165 } 3166 3167 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { 3168 const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt()); 3169 3170 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 3171 GenerateOpenMPCapturedVars(CS, CapturedVars); 3172 3173 llvm::Function *Fn = nullptr; 3174 llvm::Constant *FnID = nullptr; 3175 3176 // Check if we have any if clause associated with the directive. 3177 const Expr *IfCond = nullptr; 3178 3179 if (auto *C = S.getSingleClause<OMPIfClause>()) { 3180 IfCond = C->getCondition(); 3181 } 3182 3183 // Check if we have any device clause associated with the directive. 3184 const Expr *Device = nullptr; 3185 if (auto *C = S.getSingleClause<OMPDeviceClause>()) { 3186 Device = C->getDevice(); 3187 } 3188 3189 // Check if we have an if clause whose conditional always evaluates to false 3190 // or if we do not have any targets specified. If so the target region is not 3191 // an offload entry point. 3192 bool IsOffloadEntry = true; 3193 if (IfCond) { 3194 bool Val; 3195 if (ConstantFoldsToSimpleInteger(IfCond, Val) && !Val) 3196 IsOffloadEntry = false; 3197 } 3198 if (CGM.getLangOpts().OMPTargetTriples.empty()) 3199 IsOffloadEntry = false; 3200 3201 assert(CurFuncDecl && "No parent declaration for target region!"); 3202 StringRef ParentName; 3203 // In case we have Ctors/Dtors we use the complete type variant to produce 3204 // the mangling of the device outlined kernel. 3205 if (auto *D = dyn_cast<CXXConstructorDecl>(CurFuncDecl)) 3206 ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete)); 3207 else if (auto *D = dyn_cast<CXXDestructorDecl>(CurFuncDecl)) 3208 ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete)); 3209 else 3210 ParentName = 3211 CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CurFuncDecl))); 3212 3213 std::tie(Fn, FnID) = EmitOMPTargetDirectiveOutlinedFunction( 3214 CGM, S, ParentName, IsOffloadEntry); 3215 OMPLexicalScope Scope(*this, S); 3216 CGM.getOpenMPRuntime().emitTargetCall(*this, S, Fn, FnID, IfCond, Device, 3217 CapturedVars); 3218 } 3219 3220 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF, 3221 const OMPExecutableDirective &S, 3222 OpenMPDirectiveKind InnermostKind, 3223 const RegionCodeGenTy &CodeGen) { 3224 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 3225 auto OutlinedFn = CGF.CGM.getOpenMPRuntime(). 3226 emitParallelOrTeamsOutlinedFunction(S, 3227 *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 3228 3229 const OMPTeamsDirective &TD = *dyn_cast<OMPTeamsDirective>(&S); 3230 const OMPNumTeamsClause *NT = TD.getSingleClause<OMPNumTeamsClause>(); 3231 const OMPThreadLimitClause *TL = TD.getSingleClause<OMPThreadLimitClause>(); 3232 if (NT || TL) { 3233 Expr *NumTeams = (NT) ? NT->getNumTeams() : nullptr; 3234 Expr *ThreadLimit = (TL) ? TL->getThreadLimit() : nullptr; 3235 3236 CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit, 3237 S.getLocStart()); 3238 } 3239 3240 OMPLexicalScope Scope(CGF, S); 3241 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 3242 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 3243 CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn, 3244 CapturedVars); 3245 } 3246 3247 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) { 3248 // Emit parallel region as a standalone region. 3249 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3250 OMPPrivateScope PrivateScope(CGF); 3251 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 3252 CGF.EmitOMPPrivateClause(S, PrivateScope); 3253 (void)PrivateScope.Privatize(); 3254 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3255 }; 3256 emitCommonOMPTeamsDirective(*this, S, OMPD_teams, CodeGen); 3257 } 3258 3259 void CodeGenFunction::EmitOMPCancellationPointDirective( 3260 const OMPCancellationPointDirective &S) { 3261 CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(), 3262 S.getCancelRegion()); 3263 } 3264 3265 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { 3266 const Expr *IfCond = nullptr; 3267 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 3268 if (C->getNameModifier() == OMPD_unknown || 3269 C->getNameModifier() == OMPD_cancel) { 3270 IfCond = C->getCondition(); 3271 break; 3272 } 3273 } 3274 CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(), IfCond, 3275 S.getCancelRegion()); 3276 } 3277 3278 CodeGenFunction::JumpDest 3279 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) { 3280 if (Kind == OMPD_parallel || Kind == OMPD_task) 3281 return ReturnBlock; 3282 assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections || 3283 Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for); 3284 return BreakContinueStack.back().BreakBlock; 3285 } 3286 3287 // Generate the instructions for '#pragma omp target data' directive. 3288 void CodeGenFunction::EmitOMPTargetDataDirective( 3289 const OMPTargetDataDirective &S) { 3290 // The target data enclosed region is implemented just by emitting the 3291 // statement. 3292 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3293 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3294 }; 3295 3296 // If we don't have target devices, don't bother emitting the data mapping 3297 // code. 3298 if (CGM.getLangOpts().OMPTargetTriples.empty()) { 3299 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 3300 3301 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_target_data, 3302 CodeGen); 3303 return; 3304 } 3305 3306 // Check if we have any if clause associated with the directive. 3307 const Expr *IfCond = nullptr; 3308 if (auto *C = S.getSingleClause<OMPIfClause>()) 3309 IfCond = C->getCondition(); 3310 3311 // Check if we have any device clause associated with the directive. 3312 const Expr *Device = nullptr; 3313 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 3314 Device = C->getDevice(); 3315 3316 CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, CodeGen); 3317 } 3318 3319 void CodeGenFunction::EmitOMPTargetEnterDataDirective( 3320 const OMPTargetEnterDataDirective &S) { 3321 // If we don't have target devices, don't bother emitting the data mapping 3322 // code. 3323 if (CGM.getLangOpts().OMPTargetTriples.empty()) 3324 return; 3325 3326 // Check if we have any if clause associated with the directive. 3327 const Expr *IfCond = nullptr; 3328 if (auto *C = S.getSingleClause<OMPIfClause>()) 3329 IfCond = C->getCondition(); 3330 3331 // Check if we have any device clause associated with the directive. 3332 const Expr *Device = nullptr; 3333 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 3334 Device = C->getDevice(); 3335 3336 CGM.getOpenMPRuntime().emitTargetEnterOrExitDataCall(*this, S, IfCond, 3337 Device); 3338 } 3339 3340 void CodeGenFunction::EmitOMPTargetExitDataDirective( 3341 const OMPTargetExitDataDirective &S) { 3342 // If we don't have target devices, don't bother emitting the data mapping 3343 // code. 3344 if (CGM.getLangOpts().OMPTargetTriples.empty()) 3345 return; 3346 3347 // Check if we have any if clause associated with the directive. 3348 const Expr *IfCond = nullptr; 3349 if (auto *C = S.getSingleClause<OMPIfClause>()) 3350 IfCond = C->getCondition(); 3351 3352 // Check if we have any device clause associated with the directive. 3353 const Expr *Device = nullptr; 3354 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 3355 Device = C->getDevice(); 3356 3357 CGM.getOpenMPRuntime().emitTargetEnterOrExitDataCall(*this, S, IfCond, 3358 Device); 3359 } 3360 3361 void CodeGenFunction::EmitOMPTargetParallelDirective( 3362 const OMPTargetParallelDirective &S) { 3363 // TODO: codegen for target parallel. 3364 } 3365 3366 void CodeGenFunction::EmitOMPTargetParallelForDirective( 3367 const OMPTargetParallelForDirective &S) { 3368 // TODO: codegen for target parallel for. 3369 } 3370 3371 /// Emit a helper variable and return corresponding lvalue. 3372 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper, 3373 const ImplicitParamDecl *PVD, 3374 CodeGenFunction::OMPPrivateScope &Privates) { 3375 auto *VDecl = cast<VarDecl>(Helper->getDecl()); 3376 Privates.addPrivate( 3377 VDecl, [&CGF, PVD]() -> Address { return CGF.GetAddrOfLocalVar(PVD); }); 3378 } 3379 3380 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { 3381 assert(isOpenMPTaskLoopDirective(S.getDirectiveKind())); 3382 // Emit outlined function for task construct. 3383 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 3384 auto CapturedStruct = GenerateCapturedStmtArgument(*CS); 3385 auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 3386 const Expr *IfCond = nullptr; 3387 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 3388 if (C->getNameModifier() == OMPD_unknown || 3389 C->getNameModifier() == OMPD_taskloop) { 3390 IfCond = C->getCondition(); 3391 break; 3392 } 3393 } 3394 3395 OMPTaskDataTy Data; 3396 // Check if taskloop must be emitted without taskgroup. 3397 Data.Nogroup = S.getSingleClause<OMPNogroupClause>(); 3398 // TODO: Check if we should emit tied or untied task. 3399 Data.Tied = true; 3400 // Set scheduling for taskloop 3401 if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) { 3402 // grainsize clause 3403 Data.Schedule.setInt(/*IntVal=*/false); 3404 Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize())); 3405 } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) { 3406 // num_tasks clause 3407 Data.Schedule.setInt(/*IntVal=*/true); 3408 Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks())); 3409 } 3410 3411 auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) { 3412 // if (PreCond) { 3413 // for (IV in 0..LastIteration) BODY; 3414 // <Final counter/linear vars updates>; 3415 // } 3416 // 3417 3418 // Emit: if (PreCond) - begin. 3419 // If the condition constant folds and can be elided, avoid emitting the 3420 // whole loop. 3421 bool CondConstant; 3422 llvm::BasicBlock *ContBlock = nullptr; 3423 OMPLoopScope PreInitScope(CGF, S); 3424 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 3425 if (!CondConstant) 3426 return; 3427 } else { 3428 auto *ThenBlock = CGF.createBasicBlock("taskloop.if.then"); 3429 ContBlock = CGF.createBasicBlock("taskloop.if.end"); 3430 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 3431 CGF.getProfileCount(&S)); 3432 CGF.EmitBlock(ThenBlock); 3433 CGF.incrementProfileCounter(&S); 3434 } 3435 3436 if (isOpenMPSimdDirective(S.getDirectiveKind())) 3437 CGF.EmitOMPSimdInit(S); 3438 3439 OMPPrivateScope LoopScope(CGF); 3440 // Emit helper vars inits. 3441 enum { LowerBound = 5, UpperBound, Stride, LastIter }; 3442 auto *I = CS->getCapturedDecl()->param_begin(); 3443 auto *LBP = std::next(I, LowerBound); 3444 auto *UBP = std::next(I, UpperBound); 3445 auto *STP = std::next(I, Stride); 3446 auto *LIP = std::next(I, LastIter); 3447 mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP, 3448 LoopScope); 3449 mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP, 3450 LoopScope); 3451 mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope); 3452 mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP, 3453 LoopScope); 3454 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 3455 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 3456 (void)LoopScope.Privatize(); 3457 // Emit the loop iteration variable. 3458 const Expr *IVExpr = S.getIterationVariable(); 3459 const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 3460 CGF.EmitVarDecl(*IVDecl); 3461 CGF.EmitIgnoredExpr(S.getInit()); 3462 3463 // Emit the iterations count variable. 3464 // If it is not a variable, Sema decided to calculate iterations count on 3465 // each iteration (e.g., it is foldable into a constant). 3466 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 3467 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 3468 // Emit calculation of the iterations count. 3469 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 3470 } 3471 3472 CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 3473 S.getInc(), 3474 [&S](CodeGenFunction &CGF) { 3475 CGF.EmitOMPLoopBody(S, JumpDest()); 3476 CGF.EmitStopPoint(&S); 3477 }, 3478 [](CodeGenFunction &) {}); 3479 // Emit: if (PreCond) - end. 3480 if (ContBlock) { 3481 CGF.EmitBranch(ContBlock); 3482 CGF.EmitBlock(ContBlock, true); 3483 } 3484 // Emit final copy of the lastprivate variables if IsLastIter != 0. 3485 if (HasLastprivateClause) { 3486 CGF.EmitOMPLastprivateClauseFinal( 3487 S, isOpenMPSimdDirective(S.getDirectiveKind()), 3488 CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar( 3489 CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false, 3490 (*LIP)->getType(), S.getLocStart()))); 3491 } 3492 }; 3493 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 3494 IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn, 3495 const OMPTaskDataTy &Data) { 3496 auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &) { 3497 OMPLoopScope PreInitScope(CGF, S); 3498 CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getLocStart(), S, 3499 OutlinedFn, SharedsTy, 3500 CapturedStruct, IfCond, Data); 3501 }; 3502 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop, 3503 CodeGen); 3504 }; 3505 EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data); 3506 } 3507 3508 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) { 3509 EmitOMPTaskLoopBasedDirective(S); 3510 } 3511 3512 void CodeGenFunction::EmitOMPTaskLoopSimdDirective( 3513 const OMPTaskLoopSimdDirective &S) { 3514 EmitOMPTaskLoopBasedDirective(S); 3515 } 3516