1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This contains code to emit OpenMP nodes as LLVM code. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGCleanup.h" 15 #include "CGOpenMPRuntime.h" 16 #include "CodeGenFunction.h" 17 #include "CodeGenModule.h" 18 #include "TargetInfo.h" 19 #include "clang/AST/Stmt.h" 20 #include "clang/AST/StmtOpenMP.h" 21 #include "clang/AST/DeclOpenMP.h" 22 #include "llvm/IR/CallSite.h" 23 using namespace clang; 24 using namespace CodeGen; 25 26 namespace { 27 /// Lexical scope for OpenMP executable constructs, that handles correct codegen 28 /// for captured expressions. 29 class OMPLexicalScope : public CodeGenFunction::LexicalScope { 30 void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 31 for (const auto *C : S.clauses()) { 32 if (auto *CPI = OMPClauseWithPreInit::get(C)) { 33 if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { 34 for (const auto *I : PreInit->decls()) { 35 if (!I->hasAttr<OMPCaptureNoInitAttr>()) 36 CGF.EmitVarDecl(cast<VarDecl>(*I)); 37 else { 38 CodeGenFunction::AutoVarEmission Emission = 39 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 40 CGF.EmitAutoVarCleanups(Emission); 41 } 42 } 43 } 44 } 45 } 46 } 47 48 public: 49 OMPLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 50 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()) { 51 emitPreInitStmt(CGF, S); 52 } 53 }; 54 55 /// Private scope for OpenMP loop-based directives, that supports capturing 56 /// of used expression from loop statement. 57 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { 58 void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) { 59 if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) { 60 if (auto *PreInits = cast_or_null<DeclStmt>(LD->getPreInits())) { 61 for (const auto *I : PreInits->decls()) 62 CGF.EmitVarDecl(cast<VarDecl>(*I)); 63 } 64 } 65 } 66 67 public: 68 OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S) 69 : CodeGenFunction::RunCleanupsScope(CGF) { 70 emitPreInitStmt(CGF, S); 71 } 72 }; 73 74 } // namespace 75 76 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) { 77 auto &C = getContext(); 78 llvm::Value *Size = nullptr; 79 auto SizeInChars = C.getTypeSizeInChars(Ty); 80 if (SizeInChars.isZero()) { 81 // getTypeSizeInChars() returns 0 for a VLA. 82 while (auto *VAT = C.getAsVariableArrayType(Ty)) { 83 llvm::Value *ArraySize; 84 std::tie(ArraySize, Ty) = getVLASize(VAT); 85 Size = Size ? Builder.CreateNUWMul(Size, ArraySize) : ArraySize; 86 } 87 SizeInChars = C.getTypeSizeInChars(Ty); 88 if (SizeInChars.isZero()) 89 return llvm::ConstantInt::get(SizeTy, /*V=*/0); 90 Size = Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars)); 91 } else 92 Size = CGM.getSize(SizeInChars); 93 return Size; 94 } 95 96 void CodeGenFunction::GenerateOpenMPCapturedVars( 97 const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) { 98 const RecordDecl *RD = S.getCapturedRecordDecl(); 99 auto CurField = RD->field_begin(); 100 auto CurCap = S.captures().begin(); 101 for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(), 102 E = S.capture_init_end(); 103 I != E; ++I, ++CurField, ++CurCap) { 104 if (CurField->hasCapturedVLAType()) { 105 auto VAT = CurField->getCapturedVLAType(); 106 auto *Val = VLASizeMap[VAT->getSizeExpr()]; 107 CapturedVars.push_back(Val); 108 } else if (CurCap->capturesThis()) 109 CapturedVars.push_back(CXXThisValue); 110 else if (CurCap->capturesVariableByCopy()) 111 CapturedVars.push_back( 112 EmitLoadOfLValue(EmitLValue(*I), SourceLocation()).getScalarVal()); 113 else { 114 assert(CurCap->capturesVariable() && "Expected capture by reference."); 115 CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer()); 116 } 117 } 118 } 119 120 static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType, 121 StringRef Name, LValue AddrLV, 122 bool isReferenceType = false) { 123 ASTContext &Ctx = CGF.getContext(); 124 125 auto *CastedPtr = CGF.EmitScalarConversion( 126 AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(), 127 Ctx.getPointerType(DstType), SourceLocation()); 128 auto TmpAddr = 129 CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType)) 130 .getAddress(); 131 132 // If we are dealing with references we need to return the address of the 133 // reference instead of the reference of the value. 134 if (isReferenceType) { 135 QualType RefType = Ctx.getLValueReferenceType(DstType); 136 auto *RefVal = TmpAddr.getPointer(); 137 TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name) + ".ref"); 138 auto TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType); 139 CGF.EmitScalarInit(RefVal, TmpLVal); 140 } 141 142 return TmpAddr; 143 } 144 145 llvm::Function * 146 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { 147 assert( 148 CapturedStmtInfo && 149 "CapturedStmtInfo should be set when generating the captured function"); 150 const CapturedDecl *CD = S.getCapturedDecl(); 151 const RecordDecl *RD = S.getCapturedRecordDecl(); 152 assert(CD->hasBody() && "missing CapturedDecl body"); 153 154 // Build the argument list. 155 ASTContext &Ctx = CGM.getContext(); 156 FunctionArgList Args; 157 Args.append(CD->param_begin(), 158 std::next(CD->param_begin(), CD->getContextParamPosition())); 159 auto I = S.captures().begin(); 160 for (auto *FD : RD->fields()) { 161 QualType ArgType = FD->getType(); 162 IdentifierInfo *II = nullptr; 163 VarDecl *CapVar = nullptr; 164 165 // If this is a capture by copy and the type is not a pointer, the outlined 166 // function argument type should be uintptr and the value properly casted to 167 // uintptr. This is necessary given that the runtime library is only able to 168 // deal with pointers. We can pass in the same way the VLA type sizes to the 169 // outlined function. 170 if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) || 171 I->capturesVariableArrayType()) 172 ArgType = Ctx.getUIntPtrType(); 173 174 if (I->capturesVariable() || I->capturesVariableByCopy()) { 175 CapVar = I->getCapturedVar(); 176 II = CapVar->getIdentifier(); 177 } else if (I->capturesThis()) 178 II = &getContext().Idents.get("this"); 179 else { 180 assert(I->capturesVariableArrayType()); 181 II = &getContext().Idents.get("vla"); 182 } 183 if (ArgType->isVariablyModifiedType()) 184 ArgType = getContext().getVariableArrayDecayedType(ArgType); 185 Args.push_back(ImplicitParamDecl::Create(getContext(), nullptr, 186 FD->getLocation(), II, ArgType)); 187 ++I; 188 } 189 Args.append( 190 std::next(CD->param_begin(), CD->getContextParamPosition() + 1), 191 CD->param_end()); 192 193 // Create the function declaration. 194 FunctionType::ExtInfo ExtInfo; 195 const CGFunctionInfo &FuncInfo = 196 CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args); 197 llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); 198 199 llvm::Function *F = llvm::Function::Create( 200 FuncLLVMTy, llvm::GlobalValue::InternalLinkage, 201 CapturedStmtInfo->getHelperName(), &CGM.getModule()); 202 CGM.SetInternalFunctionAttributes(CD, F, FuncInfo); 203 if (CD->isNothrow()) 204 F->addFnAttr(llvm::Attribute::NoUnwind); 205 206 // Generate the function. 207 StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getLocation(), 208 CD->getBody()->getLocStart()); 209 unsigned Cnt = CD->getContextParamPosition(); 210 I = S.captures().begin(); 211 for (auto *FD : RD->fields()) { 212 // If we are capturing a pointer by copy we don't need to do anything, just 213 // use the value that we get from the arguments. 214 if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) { 215 setAddrOfLocalVar(I->getCapturedVar(), GetAddrOfLocalVar(Args[Cnt])); 216 ++Cnt; 217 ++I; 218 continue; 219 } 220 221 LValue ArgLVal = 222 MakeAddrLValue(GetAddrOfLocalVar(Args[Cnt]), Args[Cnt]->getType(), 223 AlignmentSource::Decl); 224 if (FD->hasCapturedVLAType()) { 225 LValue CastedArgLVal = 226 MakeAddrLValue(castValueFromUintptr(*this, FD->getType(), 227 Args[Cnt]->getName(), ArgLVal), 228 FD->getType(), AlignmentSource::Decl); 229 auto *ExprArg = 230 EmitLoadOfLValue(CastedArgLVal, SourceLocation()).getScalarVal(); 231 auto VAT = FD->getCapturedVLAType(); 232 VLASizeMap[VAT->getSizeExpr()] = ExprArg; 233 } else if (I->capturesVariable()) { 234 auto *Var = I->getCapturedVar(); 235 QualType VarTy = Var->getType(); 236 Address ArgAddr = ArgLVal.getAddress(); 237 if (!VarTy->isReferenceType()) { 238 ArgAddr = EmitLoadOfReference( 239 ArgAddr, ArgLVal.getType()->castAs<ReferenceType>()); 240 } 241 setAddrOfLocalVar( 242 Var, Address(ArgAddr.getPointer(), getContext().getDeclAlign(Var))); 243 } else if (I->capturesVariableByCopy()) { 244 assert(!FD->getType()->isAnyPointerType() && 245 "Not expecting a captured pointer."); 246 auto *Var = I->getCapturedVar(); 247 QualType VarTy = Var->getType(); 248 setAddrOfLocalVar(I->getCapturedVar(), 249 castValueFromUintptr(*this, FD->getType(), 250 Args[Cnt]->getName(), ArgLVal, 251 VarTy->isReferenceType())); 252 } else { 253 // If 'this' is captured, load it into CXXThisValue. 254 assert(I->capturesThis()); 255 CXXThisValue = 256 EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation()).getScalarVal(); 257 } 258 ++Cnt; 259 ++I; 260 } 261 262 PGO.assignRegionCounters(GlobalDecl(CD), F); 263 CapturedStmtInfo->EmitBody(*this, CD->getBody()); 264 FinishFunction(CD->getBodyRBrace()); 265 266 return F; 267 } 268 269 //===----------------------------------------------------------------------===// 270 // OpenMP Directive Emission 271 //===----------------------------------------------------------------------===// 272 void CodeGenFunction::EmitOMPAggregateAssign( 273 Address DestAddr, Address SrcAddr, QualType OriginalType, 274 const llvm::function_ref<void(Address, Address)> &CopyGen) { 275 // Perform element-by-element initialization. 276 QualType ElementTy; 277 278 // Drill down to the base element type on both arrays. 279 auto ArrayTy = OriginalType->getAsArrayTypeUnsafe(); 280 auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr); 281 SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 282 283 auto SrcBegin = SrcAddr.getPointer(); 284 auto DestBegin = DestAddr.getPointer(); 285 // Cast from pointer to array type to pointer to single element. 286 auto DestEnd = Builder.CreateGEP(DestBegin, NumElements); 287 // The basic structure here is a while-do loop. 288 auto BodyBB = createBasicBlock("omp.arraycpy.body"); 289 auto DoneBB = createBasicBlock("omp.arraycpy.done"); 290 auto IsEmpty = 291 Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty"); 292 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 293 294 // Enter the loop body, making that address the current address. 295 auto EntryBB = Builder.GetInsertBlock(); 296 EmitBlock(BodyBB); 297 298 CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy); 299 300 llvm::PHINode *SrcElementPHI = 301 Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 302 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 303 Address SrcElementCurrent = 304 Address(SrcElementPHI, 305 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 306 307 llvm::PHINode *DestElementPHI = 308 Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 309 DestElementPHI->addIncoming(DestBegin, EntryBB); 310 Address DestElementCurrent = 311 Address(DestElementPHI, 312 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 313 314 // Emit copy. 315 CopyGen(DestElementCurrent, SrcElementCurrent); 316 317 // Shift the address forward by one element. 318 auto DestElementNext = Builder.CreateConstGEP1_32( 319 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 320 auto SrcElementNext = Builder.CreateConstGEP1_32( 321 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 322 // Check whether we've reached the end. 323 auto Done = 324 Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 325 Builder.CreateCondBr(Done, DoneBB, BodyBB); 326 DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock()); 327 SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock()); 328 329 // Done. 330 EmitBlock(DoneBB, /*IsFinished=*/true); 331 } 332 333 /// Check if the combiner is a call to UDR combiner and if it is so return the 334 /// UDR decl used for reduction. 335 static const OMPDeclareReductionDecl * 336 getReductionInit(const Expr *ReductionOp) { 337 if (auto *CE = dyn_cast<CallExpr>(ReductionOp)) 338 if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 339 if (auto *DRE = 340 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 341 if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 342 return DRD; 343 return nullptr; 344 } 345 346 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 347 const OMPDeclareReductionDecl *DRD, 348 const Expr *InitOp, 349 Address Private, Address Original, 350 QualType Ty) { 351 if (DRD->getInitializer()) { 352 std::pair<llvm::Function *, llvm::Function *> Reduction = 353 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 354 auto *CE = cast<CallExpr>(InitOp); 355 auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 356 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 357 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 358 auto *LHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 359 auto *RHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 360 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 361 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 362 [=]() -> Address { return Private; }); 363 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 364 [=]() -> Address { return Original; }); 365 (void)PrivateScope.Privatize(); 366 RValue Func = RValue::get(Reduction.second); 367 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 368 CGF.EmitIgnoredExpr(InitOp); 369 } else { 370 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 371 auto *GV = new llvm::GlobalVariable( 372 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 373 llvm::GlobalValue::PrivateLinkage, Init, ".init"); 374 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 375 RValue InitRVal; 376 switch (CGF.getEvaluationKind(Ty)) { 377 case TEK_Scalar: 378 InitRVal = CGF.EmitLoadOfLValue(LV, SourceLocation()); 379 break; 380 case TEK_Complex: 381 InitRVal = 382 RValue::getComplex(CGF.EmitLoadOfComplex(LV, SourceLocation())); 383 break; 384 case TEK_Aggregate: 385 InitRVal = RValue::getAggregate(LV.getAddress()); 386 break; 387 } 388 OpaqueValueExpr OVE(SourceLocation(), Ty, VK_RValue); 389 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 390 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 391 /*IsInitializer=*/false); 392 } 393 } 394 395 /// \brief Emit initialization of arrays of complex types. 396 /// \param DestAddr Address of the array. 397 /// \param Type Type of array. 398 /// \param Init Initial expression of array. 399 /// \param SrcAddr Address of the original array. 400 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 401 QualType Type, const Expr *Init, 402 Address SrcAddr = Address::invalid()) { 403 auto *DRD = getReductionInit(Init); 404 // Perform element-by-element initialization. 405 QualType ElementTy; 406 407 // Drill down to the base element type on both arrays. 408 auto ArrayTy = Type->getAsArrayTypeUnsafe(); 409 auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 410 DestAddr = 411 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 412 if (DRD) 413 SrcAddr = 414 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 415 416 llvm::Value *SrcBegin = nullptr; 417 if (DRD) 418 SrcBegin = SrcAddr.getPointer(); 419 auto DestBegin = DestAddr.getPointer(); 420 // Cast from pointer to array type to pointer to single element. 421 auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 422 // The basic structure here is a while-do loop. 423 auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 424 auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 425 auto IsEmpty = 426 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 427 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 428 429 // Enter the loop body, making that address the current address. 430 auto EntryBB = CGF.Builder.GetInsertBlock(); 431 CGF.EmitBlock(BodyBB); 432 433 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 434 435 llvm::PHINode *SrcElementPHI = nullptr; 436 Address SrcElementCurrent = Address::invalid(); 437 if (DRD) { 438 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 439 "omp.arraycpy.srcElementPast"); 440 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 441 SrcElementCurrent = 442 Address(SrcElementPHI, 443 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 444 } 445 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 446 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 447 DestElementPHI->addIncoming(DestBegin, EntryBB); 448 Address DestElementCurrent = 449 Address(DestElementPHI, 450 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 451 452 // Emit copy. 453 { 454 CodeGenFunction::RunCleanupsScope InitScope(CGF); 455 if (DRD) { 456 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 457 SrcElementCurrent, ElementTy); 458 } else 459 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 460 /*IsInitializer=*/false); 461 } 462 463 if (DRD) { 464 // Shift the address forward by one element. 465 auto SrcElementNext = CGF.Builder.CreateConstGEP1_32( 466 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 467 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 468 } 469 470 // Shift the address forward by one element. 471 auto DestElementNext = CGF.Builder.CreateConstGEP1_32( 472 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 473 // Check whether we've reached the end. 474 auto Done = 475 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 476 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 477 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 478 479 // Done. 480 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 481 } 482 483 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr, 484 Address SrcAddr, const VarDecl *DestVD, 485 const VarDecl *SrcVD, const Expr *Copy) { 486 if (OriginalType->isArrayType()) { 487 auto *BO = dyn_cast<BinaryOperator>(Copy); 488 if (BO && BO->getOpcode() == BO_Assign) { 489 // Perform simple memcpy for simple copying. 490 EmitAggregateAssign(DestAddr, SrcAddr, OriginalType); 491 } else { 492 // For arrays with complex element types perform element by element 493 // copying. 494 EmitOMPAggregateAssign( 495 DestAddr, SrcAddr, OriginalType, 496 [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) { 497 // Working with the single array element, so have to remap 498 // destination and source variables to corresponding array 499 // elements. 500 CodeGenFunction::OMPPrivateScope Remap(*this); 501 Remap.addPrivate(DestVD, [DestElement]() -> Address { 502 return DestElement; 503 }); 504 Remap.addPrivate( 505 SrcVD, [SrcElement]() -> Address { return SrcElement; }); 506 (void)Remap.Privatize(); 507 EmitIgnoredExpr(Copy); 508 }); 509 } 510 } else { 511 // Remap pseudo source variable to private copy. 512 CodeGenFunction::OMPPrivateScope Remap(*this); 513 Remap.addPrivate(SrcVD, [SrcAddr]() -> Address { return SrcAddr; }); 514 Remap.addPrivate(DestVD, [DestAddr]() -> Address { return DestAddr; }); 515 (void)Remap.Privatize(); 516 // Emit copying of the whole variable. 517 EmitIgnoredExpr(Copy); 518 } 519 } 520 521 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, 522 OMPPrivateScope &PrivateScope) { 523 if (!HaveInsertPoint()) 524 return false; 525 bool FirstprivateIsLastprivate = false; 526 llvm::DenseSet<const VarDecl *> Lastprivates; 527 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 528 for (const auto *D : C->varlists()) 529 Lastprivates.insert( 530 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); 531 } 532 llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate; 533 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { 534 auto IRef = C->varlist_begin(); 535 auto InitsRef = C->inits().begin(); 536 for (auto IInit : C->private_copies()) { 537 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 538 FirstprivateIsLastprivate = 539 FirstprivateIsLastprivate || 540 (Lastprivates.count(OrigVD->getCanonicalDecl()) > 0); 541 if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) { 542 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 543 auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl()); 544 bool IsRegistered; 545 DeclRefExpr DRE( 546 const_cast<VarDecl *>(OrigVD), 547 /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup( 548 OrigVD) != nullptr, 549 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 550 Address OriginalAddr = EmitLValue(&DRE).getAddress(); 551 QualType Type = OrigVD->getType(); 552 if (Type->isArrayType()) { 553 // Emit VarDecl with copy init for arrays. 554 // Get the address of the original variable captured in current 555 // captured region. 556 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 557 auto Emission = EmitAutoVarAlloca(*VD); 558 auto *Init = VD->getInit(); 559 if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) { 560 // Perform simple memcpy. 561 EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr, 562 Type); 563 } else { 564 EmitOMPAggregateAssign( 565 Emission.getAllocatedAddress(), OriginalAddr, Type, 566 [this, VDInit, Init](Address DestElement, 567 Address SrcElement) { 568 // Clean up any temporaries needed by the initialization. 569 RunCleanupsScope InitScope(*this); 570 // Emit initialization for single element. 571 setAddrOfLocalVar(VDInit, SrcElement); 572 EmitAnyExprToMem(Init, DestElement, 573 Init->getType().getQualifiers(), 574 /*IsInitializer*/ false); 575 LocalDeclMap.erase(VDInit); 576 }); 577 } 578 EmitAutoVarCleanups(Emission); 579 return Emission.getAllocatedAddress(); 580 }); 581 } else { 582 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 583 // Emit private VarDecl with copy init. 584 // Remap temp VDInit variable to the address of the original 585 // variable 586 // (for proper handling of captured global variables). 587 setAddrOfLocalVar(VDInit, OriginalAddr); 588 EmitDecl(*VD); 589 LocalDeclMap.erase(VDInit); 590 return GetAddrOfLocalVar(VD); 591 }); 592 } 593 assert(IsRegistered && 594 "firstprivate var already registered as private"); 595 // Silence the warning about unused variable. 596 (void)IsRegistered; 597 } 598 ++IRef; 599 ++InitsRef; 600 } 601 } 602 return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty(); 603 } 604 605 void CodeGenFunction::EmitOMPPrivateClause( 606 const OMPExecutableDirective &D, 607 CodeGenFunction::OMPPrivateScope &PrivateScope) { 608 if (!HaveInsertPoint()) 609 return; 610 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 611 for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) { 612 auto IRef = C->varlist_begin(); 613 for (auto IInit : C->private_copies()) { 614 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 615 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 616 auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 617 bool IsRegistered = 618 PrivateScope.addPrivate(OrigVD, [&]() -> Address { 619 // Emit private VarDecl with copy init. 620 EmitDecl(*VD); 621 return GetAddrOfLocalVar(VD); 622 }); 623 assert(IsRegistered && "private var already registered as private"); 624 // Silence the warning about unused variable. 625 (void)IsRegistered; 626 } 627 ++IRef; 628 } 629 } 630 } 631 632 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { 633 if (!HaveInsertPoint()) 634 return false; 635 // threadprivate_var1 = master_threadprivate_var1; 636 // operator=(threadprivate_var2, master_threadprivate_var2); 637 // ... 638 // __kmpc_barrier(&loc, global_tid); 639 llvm::DenseSet<const VarDecl *> CopiedVars; 640 llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr; 641 for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) { 642 auto IRef = C->varlist_begin(); 643 auto ISrcRef = C->source_exprs().begin(); 644 auto IDestRef = C->destination_exprs().begin(); 645 for (auto *AssignOp : C->assignment_ops()) { 646 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 647 QualType Type = VD->getType(); 648 if (CopiedVars.insert(VD->getCanonicalDecl()).second) { 649 // Get the address of the master variable. If we are emitting code with 650 // TLS support, the address is passed from the master as field in the 651 // captured declaration. 652 Address MasterAddr = Address::invalid(); 653 if (getLangOpts().OpenMPUseTLS && 654 getContext().getTargetInfo().isTLSSupported()) { 655 assert(CapturedStmtInfo->lookup(VD) && 656 "Copyin threadprivates should have been captured!"); 657 DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(), 658 VK_LValue, (*IRef)->getExprLoc()); 659 MasterAddr = EmitLValue(&DRE).getAddress(); 660 LocalDeclMap.erase(VD); 661 } else { 662 MasterAddr = 663 Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD) 664 : CGM.GetAddrOfGlobal(VD), 665 getContext().getDeclAlign(VD)); 666 } 667 // Get the address of the threadprivate variable. 668 Address PrivateAddr = EmitLValue(*IRef).getAddress(); 669 if (CopiedVars.size() == 1) { 670 // At first check if current thread is a master thread. If it is, no 671 // need to copy data. 672 CopyBegin = createBasicBlock("copyin.not.master"); 673 CopyEnd = createBasicBlock("copyin.not.master.end"); 674 Builder.CreateCondBr( 675 Builder.CreateICmpNE( 676 Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy), 677 Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy)), 678 CopyBegin, CopyEnd); 679 EmitBlock(CopyBegin); 680 } 681 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 682 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 683 EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp); 684 } 685 ++IRef; 686 ++ISrcRef; 687 ++IDestRef; 688 } 689 } 690 if (CopyEnd) { 691 // Exit out of copying procedure for non-master thread. 692 EmitBlock(CopyEnd, /*IsFinished=*/true); 693 return true; 694 } 695 return false; 696 } 697 698 bool CodeGenFunction::EmitOMPLastprivateClauseInit( 699 const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) { 700 if (!HaveInsertPoint()) 701 return false; 702 bool HasAtLeastOneLastprivate = false; 703 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 704 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 705 HasAtLeastOneLastprivate = true; 706 auto IRef = C->varlist_begin(); 707 auto IDestRef = C->destination_exprs().begin(); 708 for (auto *IInit : C->private_copies()) { 709 // Keep the address of the original variable for future update at the end 710 // of the loop. 711 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 712 if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) { 713 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 714 PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address { 715 DeclRefExpr DRE( 716 const_cast<VarDecl *>(OrigVD), 717 /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup( 718 OrigVD) != nullptr, 719 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 720 return EmitLValue(&DRE).getAddress(); 721 }); 722 // Check if the variable is also a firstprivate: in this case IInit is 723 // not generated. Initialization of this variable will happen in codegen 724 // for 'firstprivate' clause. 725 if (IInit) { 726 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 727 bool IsRegistered = 728 PrivateScope.addPrivate(OrigVD, [&]() -> Address { 729 // Emit private VarDecl with copy init. 730 EmitDecl(*VD); 731 return GetAddrOfLocalVar(VD); 732 }); 733 assert(IsRegistered && 734 "lastprivate var already registered as private"); 735 (void)IsRegistered; 736 } 737 } 738 ++IRef; 739 ++IDestRef; 740 } 741 } 742 return HasAtLeastOneLastprivate; 743 } 744 745 void CodeGenFunction::EmitOMPLastprivateClauseFinal( 746 const OMPExecutableDirective &D, llvm::Value *IsLastIterCond) { 747 if (!HaveInsertPoint()) 748 return; 749 // Emit following code: 750 // if (<IsLastIterCond>) { 751 // orig_var1 = private_orig_var1; 752 // ... 753 // orig_varn = private_orig_varn; 754 // } 755 llvm::BasicBlock *ThenBB = nullptr; 756 llvm::BasicBlock *DoneBB = nullptr; 757 if (IsLastIterCond) { 758 ThenBB = createBasicBlock(".omp.lastprivate.then"); 759 DoneBB = createBasicBlock(".omp.lastprivate.done"); 760 Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB); 761 EmitBlock(ThenBB); 762 } 763 llvm::DenseMap<const Decl *, const Expr *> LoopCountersAndUpdates; 764 if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) { 765 auto IC = LoopDirective->counters().begin(); 766 for (auto F : LoopDirective->finals()) { 767 auto *D = cast<DeclRefExpr>(*IC)->getDecl()->getCanonicalDecl(); 768 LoopCountersAndUpdates[D] = F; 769 ++IC; 770 } 771 } 772 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 773 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 774 auto IRef = C->varlist_begin(); 775 auto ISrcRef = C->source_exprs().begin(); 776 auto IDestRef = C->destination_exprs().begin(); 777 for (auto *AssignOp : C->assignment_ops()) { 778 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 779 QualType Type = PrivateVD->getType(); 780 auto *CanonicalVD = PrivateVD->getCanonicalDecl(); 781 if (AlreadyEmittedVars.insert(CanonicalVD).second) { 782 // If lastprivate variable is a loop control variable for loop-based 783 // directive, update its value before copyin back to original 784 // variable. 785 if (auto *UpExpr = LoopCountersAndUpdates.lookup(CanonicalVD)) 786 EmitIgnoredExpr(UpExpr); 787 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 788 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 789 // Get the address of the original variable. 790 Address OriginalAddr = GetAddrOfLocalVar(DestVD); 791 // Get the address of the private variable. 792 Address PrivateAddr = GetAddrOfLocalVar(PrivateVD); 793 if (auto RefTy = PrivateVD->getType()->getAs<ReferenceType>()) 794 PrivateAddr = 795 Address(Builder.CreateLoad(PrivateAddr), 796 getNaturalTypeAlignment(RefTy->getPointeeType())); 797 EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp); 798 } 799 ++IRef; 800 ++ISrcRef; 801 ++IDestRef; 802 } 803 if (auto *PostUpdate = C->getPostUpdateExpr()) 804 EmitIgnoredExpr(PostUpdate); 805 } 806 if (IsLastIterCond) 807 EmitBlock(DoneBB, /*IsFinished=*/true); 808 } 809 810 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 811 LValue BaseLV, llvm::Value *Addr) { 812 Address Tmp = Address::invalid(); 813 Address TopTmp = Address::invalid(); 814 Address MostTopTmp = Address::invalid(); 815 BaseTy = BaseTy.getNonReferenceType(); 816 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 817 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 818 Tmp = CGF.CreateMemTemp(BaseTy); 819 if (TopTmp.isValid()) 820 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 821 else 822 MostTopTmp = Tmp; 823 TopTmp = Tmp; 824 BaseTy = BaseTy->getPointeeType(); 825 } 826 llvm::Type *Ty = BaseLV.getPointer()->getType(); 827 if (Tmp.isValid()) 828 Ty = Tmp.getElementType(); 829 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 830 if (Tmp.isValid()) { 831 CGF.Builder.CreateStore(Addr, Tmp); 832 return MostTopTmp; 833 } 834 return Address(Addr, BaseLV.getAlignment()); 835 } 836 837 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 838 LValue BaseLV) { 839 BaseTy = BaseTy.getNonReferenceType(); 840 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 841 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 842 if (auto *PtrTy = BaseTy->getAs<PointerType>()) 843 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy); 844 else { 845 BaseLV = CGF.EmitLoadOfReferenceLValue(BaseLV.getAddress(), 846 BaseTy->castAs<ReferenceType>()); 847 } 848 BaseTy = BaseTy->getPointeeType(); 849 } 850 return CGF.MakeAddrLValue( 851 Address( 852 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 853 BaseLV.getPointer(), CGF.ConvertTypeForMem(ElTy)->getPointerTo()), 854 BaseLV.getAlignment()), 855 BaseLV.getType(), BaseLV.getAlignmentSource()); 856 } 857 858 void CodeGenFunction::EmitOMPReductionClauseInit( 859 const OMPExecutableDirective &D, 860 CodeGenFunction::OMPPrivateScope &PrivateScope) { 861 if (!HaveInsertPoint()) 862 return; 863 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 864 auto ILHS = C->lhs_exprs().begin(); 865 auto IRHS = C->rhs_exprs().begin(); 866 auto IPriv = C->privates().begin(); 867 auto IRed = C->reduction_ops().begin(); 868 for (auto IRef : C->varlists()) { 869 auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 870 auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 871 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl()); 872 auto *DRD = getReductionInit(*IRed); 873 if (auto *OASE = dyn_cast<OMPArraySectionExpr>(IRef)) { 874 auto *Base = OASE->getBase()->IgnoreParenImpCasts(); 875 while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 876 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 877 while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 878 Base = TempASE->getBase()->IgnoreParenImpCasts(); 879 auto *DE = cast<DeclRefExpr>(Base); 880 auto *OrigVD = cast<VarDecl>(DE->getDecl()); 881 auto OASELValueLB = EmitOMPArraySectionExpr(OASE); 882 auto OASELValueUB = 883 EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 884 auto OriginalBaseLValue = EmitLValue(DE); 885 LValue BaseLValue = 886 loadToBegin(*this, OrigVD->getType(), OASELValueLB.getType(), 887 OriginalBaseLValue); 888 // Store the address of the original variable associated with the LHS 889 // implicit variable. 890 PrivateScope.addPrivate(LHSVD, [this, OASELValueLB]() -> Address { 891 return OASELValueLB.getAddress(); 892 }); 893 // Emit reduction copy. 894 bool IsRegistered = PrivateScope.addPrivate( 895 OrigVD, [this, OrigVD, PrivateVD, BaseLValue, OASELValueLB, 896 OASELValueUB, OriginalBaseLValue, DRD, IRed]() -> Address { 897 // Emit VarDecl with copy init for arrays. 898 // Get the address of the original variable captured in current 899 // captured region. 900 auto *Size = Builder.CreatePtrDiff(OASELValueUB.getPointer(), 901 OASELValueLB.getPointer()); 902 Size = Builder.CreateNUWAdd( 903 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 904 CodeGenFunction::OpaqueValueMapping OpaqueMap( 905 *this, cast<OpaqueValueExpr>( 906 getContext() 907 .getAsVariableArrayType(PrivateVD->getType()) 908 ->getSizeExpr()), 909 RValue::get(Size)); 910 EmitVariablyModifiedType(PrivateVD->getType()); 911 auto Emission = EmitAutoVarAlloca(*PrivateVD); 912 auto Addr = Emission.getAllocatedAddress(); 913 auto *Init = PrivateVD->getInit(); 914 EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(), 915 DRD ? *IRed : Init, 916 OASELValueLB.getAddress()); 917 EmitAutoVarCleanups(Emission); 918 // Emit private VarDecl with reduction init. 919 auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(), 920 OASELValueLB.getPointer()); 921 auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset); 922 return castToBase(*this, OrigVD->getType(), 923 OASELValueLB.getType(), OriginalBaseLValue, 924 Ptr); 925 }); 926 assert(IsRegistered && "private var already registered as private"); 927 // Silence the warning about unused variable. 928 (void)IsRegistered; 929 PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { 930 return GetAddrOfLocalVar(PrivateVD); 931 }); 932 } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(IRef)) { 933 auto *Base = ASE->getBase()->IgnoreParenImpCasts(); 934 while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 935 Base = TempASE->getBase()->IgnoreParenImpCasts(); 936 auto *DE = cast<DeclRefExpr>(Base); 937 auto *OrigVD = cast<VarDecl>(DE->getDecl()); 938 auto ASELValue = EmitLValue(ASE); 939 auto OriginalBaseLValue = EmitLValue(DE); 940 LValue BaseLValue = loadToBegin( 941 *this, OrigVD->getType(), ASELValue.getType(), OriginalBaseLValue); 942 // Store the address of the original variable associated with the LHS 943 // implicit variable. 944 PrivateScope.addPrivate(LHSVD, [this, ASELValue]() -> Address { 945 return ASELValue.getAddress(); 946 }); 947 // Emit reduction copy. 948 bool IsRegistered = PrivateScope.addPrivate( 949 OrigVD, [this, OrigVD, PrivateVD, BaseLValue, ASELValue, 950 OriginalBaseLValue, DRD, IRed]() -> Address { 951 // Emit private VarDecl with reduction init. 952 AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD); 953 auto Addr = Emission.getAllocatedAddress(); 954 if (DRD) { 955 emitInitWithReductionInitializer(*this, DRD, *IRed, Addr, 956 ASELValue.getAddress(), 957 ASELValue.getType()); 958 } else 959 EmitAutoVarInit(Emission); 960 EmitAutoVarCleanups(Emission); 961 auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(), 962 ASELValue.getPointer()); 963 auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset); 964 return castToBase(*this, OrigVD->getType(), ASELValue.getType(), 965 OriginalBaseLValue, Ptr); 966 }); 967 assert(IsRegistered && "private var already registered as private"); 968 // Silence the warning about unused variable. 969 (void)IsRegistered; 970 PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address { 971 return Builder.CreateElementBitCast( 972 GetAddrOfLocalVar(PrivateVD), ConvertTypeForMem(RHSVD->getType()), 973 "rhs.begin"); 974 }); 975 } else { 976 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); 977 QualType Type = PrivateVD->getType(); 978 if (getContext().getAsArrayType(Type)) { 979 // Store the address of the original variable associated with the LHS 980 // implicit variable. 981 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 982 CapturedStmtInfo->lookup(OrigVD) != nullptr, 983 IRef->getType(), VK_LValue, IRef->getExprLoc()); 984 Address OriginalAddr = EmitLValue(&DRE).getAddress(); 985 PrivateScope.addPrivate(LHSVD, [this, &OriginalAddr, 986 LHSVD]() -> Address { 987 OriginalAddr = Builder.CreateElementBitCast( 988 OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin"); 989 return OriginalAddr; 990 }); 991 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 992 if (Type->isVariablyModifiedType()) { 993 CodeGenFunction::OpaqueValueMapping OpaqueMap( 994 *this, cast<OpaqueValueExpr>( 995 getContext() 996 .getAsVariableArrayType(PrivateVD->getType()) 997 ->getSizeExpr()), 998 RValue::get( 999 getTypeSize(OrigVD->getType().getNonReferenceType()))); 1000 EmitVariablyModifiedType(Type); 1001 } 1002 auto Emission = EmitAutoVarAlloca(*PrivateVD); 1003 auto Addr = Emission.getAllocatedAddress(); 1004 auto *Init = PrivateVD->getInit(); 1005 EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(), 1006 DRD ? *IRed : Init, OriginalAddr); 1007 EmitAutoVarCleanups(Emission); 1008 return Emission.getAllocatedAddress(); 1009 }); 1010 assert(IsRegistered && "private var already registered as private"); 1011 // Silence the warning about unused variable. 1012 (void)IsRegistered; 1013 PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address { 1014 return Builder.CreateElementBitCast( 1015 GetAddrOfLocalVar(PrivateVD), 1016 ConvertTypeForMem(RHSVD->getType()), "rhs.begin"); 1017 }); 1018 } else { 1019 // Store the address of the original variable associated with the LHS 1020 // implicit variable. 1021 Address OriginalAddr = Address::invalid(); 1022 PrivateScope.addPrivate(LHSVD, [this, OrigVD, IRef, 1023 &OriginalAddr]() -> Address { 1024 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1025 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1026 IRef->getType(), VK_LValue, IRef->getExprLoc()); 1027 OriginalAddr = EmitLValue(&DRE).getAddress(); 1028 return OriginalAddr; 1029 }); 1030 // Emit reduction copy. 1031 bool IsRegistered = PrivateScope.addPrivate( 1032 OrigVD, [this, PrivateVD, OriginalAddr, DRD, IRed]() -> Address { 1033 // Emit private VarDecl with reduction init. 1034 AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD); 1035 auto Addr = Emission.getAllocatedAddress(); 1036 if (DRD) { 1037 emitInitWithReductionInitializer(*this, DRD, *IRed, Addr, 1038 OriginalAddr, 1039 PrivateVD->getType()); 1040 } else 1041 EmitAutoVarInit(Emission); 1042 EmitAutoVarCleanups(Emission); 1043 return Addr; 1044 }); 1045 assert(IsRegistered && "private var already registered as private"); 1046 // Silence the warning about unused variable. 1047 (void)IsRegistered; 1048 PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { 1049 return GetAddrOfLocalVar(PrivateVD); 1050 }); 1051 } 1052 } 1053 ++ILHS; 1054 ++IRHS; 1055 ++IPriv; 1056 ++IRed; 1057 } 1058 } 1059 } 1060 1061 void CodeGenFunction::EmitOMPReductionClauseFinal( 1062 const OMPExecutableDirective &D) { 1063 if (!HaveInsertPoint()) 1064 return; 1065 llvm::SmallVector<const Expr *, 8> Privates; 1066 llvm::SmallVector<const Expr *, 8> LHSExprs; 1067 llvm::SmallVector<const Expr *, 8> RHSExprs; 1068 llvm::SmallVector<const Expr *, 8> ReductionOps; 1069 bool HasAtLeastOneReduction = false; 1070 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1071 HasAtLeastOneReduction = true; 1072 Privates.append(C->privates().begin(), C->privates().end()); 1073 LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 1074 RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 1075 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 1076 } 1077 if (HasAtLeastOneReduction) { 1078 // Emit nowait reduction if nowait clause is present or directive is a 1079 // parallel directive (it always has implicit barrier). 1080 CGM.getOpenMPRuntime().emitReduction( 1081 *this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps, 1082 D.getSingleClause<OMPNowaitClause>() || 1083 isOpenMPParallelDirective(D.getDirectiveKind()) || 1084 D.getDirectiveKind() == OMPD_simd, 1085 D.getDirectiveKind() == OMPD_simd); 1086 } 1087 } 1088 1089 static void emitPostUpdateForReductionClause( 1090 CodeGenFunction &CGF, const OMPExecutableDirective &D, 1091 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1092 if (!CGF.HaveInsertPoint()) 1093 return; 1094 llvm::BasicBlock *DoneBB = nullptr; 1095 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1096 if (auto *PostUpdate = C->getPostUpdateExpr()) { 1097 if (!DoneBB) { 1098 if (auto *Cond = CondGen(CGF)) { 1099 // If the first post-update expression is found, emit conditional 1100 // block if it was requested. 1101 auto *ThenBB = CGF.createBasicBlock(".omp.reduction.pu"); 1102 DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done"); 1103 CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1104 CGF.EmitBlock(ThenBB); 1105 } 1106 } 1107 CGF.EmitIgnoredExpr(PostUpdate); 1108 } 1109 } 1110 if (DoneBB) 1111 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 1112 } 1113 1114 static void emitCommonOMPParallelDirective(CodeGenFunction &CGF, 1115 const OMPExecutableDirective &S, 1116 OpenMPDirectiveKind InnermostKind, 1117 const RegionCodeGenTy &CodeGen) { 1118 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 1119 auto OutlinedFn = CGF.CGM.getOpenMPRuntime(). 1120 emitParallelOrTeamsOutlinedFunction(S, 1121 *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 1122 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) { 1123 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 1124 auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), 1125 /*IgnoreResultAssign*/ true); 1126 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause( 1127 CGF, NumThreads, NumThreadsClause->getLocStart()); 1128 } 1129 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) { 1130 CodeGenFunction::RunCleanupsScope ProcBindScope(CGF); 1131 CGF.CGM.getOpenMPRuntime().emitProcBindClause( 1132 CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart()); 1133 } 1134 const Expr *IfCond = nullptr; 1135 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 1136 if (C->getNameModifier() == OMPD_unknown || 1137 C->getNameModifier() == OMPD_parallel) { 1138 IfCond = C->getCondition(); 1139 break; 1140 } 1141 } 1142 1143 OMPLexicalScope Scope(CGF, S); 1144 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 1145 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 1146 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn, 1147 CapturedVars, IfCond); 1148 } 1149 1150 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { 1151 // Emit parallel region as a standalone region. 1152 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1153 OMPPrivateScope PrivateScope(CGF); 1154 bool Copyins = CGF.EmitOMPCopyinClause(S); 1155 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 1156 if (Copyins) { 1157 // Emit implicit barrier to synchronize threads and avoid data races on 1158 // propagation master's thread values of threadprivate variables to local 1159 // instances of that variables of all other implicit threads. 1160 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 1161 CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 1162 /*ForceSimpleCall=*/true); 1163 } 1164 CGF.EmitOMPPrivateClause(S, PrivateScope); 1165 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 1166 (void)PrivateScope.Privatize(); 1167 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1168 CGF.EmitOMPReductionClauseFinal(S); 1169 }; 1170 emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen); 1171 emitPostUpdateForReductionClause( 1172 *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1173 } 1174 1175 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, 1176 JumpDest LoopExit) { 1177 RunCleanupsScope BodyScope(*this); 1178 // Update counters values on current iteration. 1179 for (auto I : D.updates()) { 1180 EmitIgnoredExpr(I); 1181 } 1182 // Update the linear variables. 1183 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1184 for (auto U : C->updates()) { 1185 EmitIgnoredExpr(U); 1186 } 1187 } 1188 1189 // On a continue in the body, jump to the end. 1190 auto Continue = getJumpDestInCurrentScope("omp.body.continue"); 1191 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1192 // Emit loop body. 1193 EmitStmt(D.getBody()); 1194 // The end (updates/cleanups). 1195 EmitBlock(Continue.getBlock()); 1196 BreakContinueStack.pop_back(); 1197 } 1198 1199 void CodeGenFunction::EmitOMPInnerLoop( 1200 const Stmt &S, bool RequiresCleanup, const Expr *LoopCond, 1201 const Expr *IncExpr, 1202 const llvm::function_ref<void(CodeGenFunction &)> &BodyGen, 1203 const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen) { 1204 auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end"); 1205 1206 // Start the loop with a block that tests the condition. 1207 auto CondBlock = createBasicBlock("omp.inner.for.cond"); 1208 EmitBlock(CondBlock); 1209 LoopStack.push(CondBlock); 1210 1211 // If there are any cleanups between here and the loop-exit scope, 1212 // create a block to stage a loop exit along. 1213 auto ExitBlock = LoopExit.getBlock(); 1214 if (RequiresCleanup) 1215 ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup"); 1216 1217 auto LoopBody = createBasicBlock("omp.inner.for.body"); 1218 1219 // Emit condition. 1220 EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S)); 1221 if (ExitBlock != LoopExit.getBlock()) { 1222 EmitBlock(ExitBlock); 1223 EmitBranchThroughCleanup(LoopExit); 1224 } 1225 1226 EmitBlock(LoopBody); 1227 incrementProfileCounter(&S); 1228 1229 // Create a block for the increment. 1230 auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc"); 1231 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1232 1233 BodyGen(*this); 1234 1235 // Emit "IV = IV + 1" and a back-edge to the condition block. 1236 EmitBlock(Continue.getBlock()); 1237 EmitIgnoredExpr(IncExpr); 1238 PostIncGen(*this); 1239 BreakContinueStack.pop_back(); 1240 EmitBranch(CondBlock); 1241 LoopStack.pop(); 1242 // Emit the fall-through block. 1243 EmitBlock(LoopExit.getBlock()); 1244 } 1245 1246 void CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { 1247 if (!HaveInsertPoint()) 1248 return; 1249 // Emit inits for the linear variables. 1250 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1251 for (auto Init : C->inits()) { 1252 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl()); 1253 if (auto *Ref = dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) { 1254 AutoVarEmission Emission = EmitAutoVarAlloca(*VD); 1255 auto *OrigVD = cast<VarDecl>(Ref->getDecl()); 1256 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1257 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1258 VD->getInit()->getType(), VK_LValue, 1259 VD->getInit()->getExprLoc()); 1260 EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(), 1261 VD->getType()), 1262 /*capturedByInit=*/false); 1263 EmitAutoVarCleanups(Emission); 1264 } else 1265 EmitVarDecl(*VD); 1266 } 1267 // Emit the linear steps for the linear clauses. 1268 // If a step is not constant, it is pre-calculated before the loop. 1269 if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep())) 1270 if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) { 1271 EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl())); 1272 // Emit calculation of the linear step. 1273 EmitIgnoredExpr(CS); 1274 } 1275 } 1276 } 1277 1278 static void emitLinearClauseFinal( 1279 CodeGenFunction &CGF, const OMPLoopDirective &D, 1280 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1281 if (!CGF.HaveInsertPoint()) 1282 return; 1283 llvm::BasicBlock *DoneBB = nullptr; 1284 // Emit the final values of the linear variables. 1285 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1286 auto IC = C->varlist_begin(); 1287 for (auto F : C->finals()) { 1288 if (!DoneBB) { 1289 if (auto *Cond = CondGen(CGF)) { 1290 // If the first post-update expression is found, emit conditional 1291 // block if it was requested. 1292 auto *ThenBB = CGF.createBasicBlock(".omp.linear.pu"); 1293 DoneBB = CGF.createBasicBlock(".omp.linear.pu.done"); 1294 CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1295 CGF.EmitBlock(ThenBB); 1296 } 1297 } 1298 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl()); 1299 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1300 CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr, 1301 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); 1302 Address OrigAddr = CGF.EmitLValue(&DRE).getAddress(); 1303 CodeGenFunction::OMPPrivateScope VarScope(CGF); 1304 VarScope.addPrivate(OrigVD, 1305 [OrigAddr]() -> Address { return OrigAddr; }); 1306 (void)VarScope.Privatize(); 1307 CGF.EmitIgnoredExpr(F); 1308 ++IC; 1309 } 1310 if (auto *PostUpdate = C->getPostUpdateExpr()) 1311 CGF.EmitIgnoredExpr(PostUpdate); 1312 } 1313 if (DoneBB) 1314 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 1315 } 1316 1317 static void emitAlignedClause(CodeGenFunction &CGF, 1318 const OMPExecutableDirective &D) { 1319 if (!CGF.HaveInsertPoint()) 1320 return; 1321 for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) { 1322 unsigned ClauseAlignment = 0; 1323 if (auto AlignmentExpr = Clause->getAlignment()) { 1324 auto AlignmentCI = 1325 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); 1326 ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue()); 1327 } 1328 for (auto E : Clause->varlists()) { 1329 unsigned Alignment = ClauseAlignment; 1330 if (Alignment == 0) { 1331 // OpenMP [2.8.1, Description] 1332 // If no optional parameter is specified, implementation-defined default 1333 // alignments for SIMD instructions on the target platforms are assumed. 1334 Alignment = 1335 CGF.getContext() 1336 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign( 1337 E->getType()->getPointeeType())) 1338 .getQuantity(); 1339 } 1340 assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) && 1341 "alignment is not power of 2"); 1342 if (Alignment != 0) { 1343 llvm::Value *PtrValue = CGF.EmitScalarExpr(E); 1344 CGF.EmitAlignmentAssumption(PtrValue, Alignment); 1345 } 1346 } 1347 } 1348 } 1349 1350 static void emitPrivateLoopCounters(CodeGenFunction &CGF, 1351 CodeGenFunction::OMPPrivateScope &LoopScope, 1352 ArrayRef<Expr *> Counters, 1353 ArrayRef<Expr *> PrivateCounters) { 1354 if (!CGF.HaveInsertPoint()) 1355 return; 1356 auto I = PrivateCounters.begin(); 1357 for (auto *E : Counters) { 1358 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1359 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()); 1360 Address Addr = Address::invalid(); 1361 (void)LoopScope.addPrivate(PrivateVD, [&]() -> Address { 1362 // Emit var without initialization. 1363 auto VarEmission = CGF.EmitAutoVarAlloca(*PrivateVD); 1364 CGF.EmitAutoVarCleanups(VarEmission); 1365 Addr = VarEmission.getAllocatedAddress(); 1366 return Addr; 1367 }); 1368 (void)LoopScope.addPrivate(VD, [&]() -> Address { return Addr; }); 1369 ++I; 1370 } 1371 } 1372 1373 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S, 1374 const Expr *Cond, llvm::BasicBlock *TrueBlock, 1375 llvm::BasicBlock *FalseBlock, uint64_t TrueCount) { 1376 if (!CGF.HaveInsertPoint()) 1377 return; 1378 { 1379 CodeGenFunction::OMPPrivateScope PreCondScope(CGF); 1380 emitPrivateLoopCounters(CGF, PreCondScope, S.counters(), 1381 S.private_counters()); 1382 (void)PreCondScope.Privatize(); 1383 // Get initial values of real counters. 1384 for (auto I : S.inits()) { 1385 CGF.EmitIgnoredExpr(I); 1386 } 1387 } 1388 // Check that loop is executed at least one time. 1389 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount); 1390 } 1391 1392 static void 1393 emitPrivateLinearVars(CodeGenFunction &CGF, const OMPExecutableDirective &D, 1394 CodeGenFunction::OMPPrivateScope &PrivateScope) { 1395 if (!CGF.HaveInsertPoint()) 1396 return; 1397 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1398 auto CurPrivate = C->privates().begin(); 1399 for (auto *E : C->varlists()) { 1400 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1401 auto *PrivateVD = 1402 cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl()); 1403 bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address { 1404 // Emit private VarDecl with copy init. 1405 CGF.EmitVarDecl(*PrivateVD); 1406 return CGF.GetAddrOfLocalVar(PrivateVD); 1407 }); 1408 assert(IsRegistered && "linear var already registered as private"); 1409 // Silence the warning about unused variable. 1410 (void)IsRegistered; 1411 ++CurPrivate; 1412 } 1413 } 1414 } 1415 1416 static void emitSimdlenSafelenClause(CodeGenFunction &CGF, 1417 const OMPExecutableDirective &D, 1418 bool IsMonotonic) { 1419 if (!CGF.HaveInsertPoint()) 1420 return; 1421 if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) { 1422 RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(), 1423 /*ignoreResult=*/true); 1424 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 1425 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 1426 // In presence of finite 'safelen', it may be unsafe to mark all 1427 // the memory instructions parallel, because loop-carried 1428 // dependences of 'safelen' iterations are possible. 1429 if (!IsMonotonic) 1430 CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>()); 1431 } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) { 1432 RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(), 1433 /*ignoreResult=*/true); 1434 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 1435 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 1436 // In presence of finite 'safelen', it may be unsafe to mark all 1437 // the memory instructions parallel, because loop-carried 1438 // dependences of 'safelen' iterations are possible. 1439 CGF.LoopStack.setParallel(false); 1440 } 1441 } 1442 1443 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D, 1444 bool IsMonotonic) { 1445 // Walk clauses and process safelen/lastprivate. 1446 LoopStack.setParallel(!IsMonotonic); 1447 LoopStack.setVectorizeEnable(true); 1448 emitSimdlenSafelenClause(*this, D, IsMonotonic); 1449 } 1450 1451 void CodeGenFunction::EmitOMPSimdFinal( 1452 const OMPLoopDirective &D, 1453 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1454 if (!HaveInsertPoint()) 1455 return; 1456 llvm::BasicBlock *DoneBB = nullptr; 1457 auto IC = D.counters().begin(); 1458 for (auto F : D.finals()) { 1459 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl()); 1460 if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD)) { 1461 if (!DoneBB) { 1462 if (auto *Cond = CondGen(*this)) { 1463 // If the first post-update expression is found, emit conditional 1464 // block if it was requested. 1465 auto *ThenBB = createBasicBlock(".omp.final.then"); 1466 DoneBB = createBasicBlock(".omp.final.done"); 1467 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1468 EmitBlock(ThenBB); 1469 } 1470 } 1471 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1472 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1473 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); 1474 Address OrigAddr = EmitLValue(&DRE).getAddress(); 1475 OMPPrivateScope VarScope(*this); 1476 VarScope.addPrivate(OrigVD, 1477 [OrigAddr]() -> Address { return OrigAddr; }); 1478 (void)VarScope.Privatize(); 1479 EmitIgnoredExpr(F); 1480 } 1481 ++IC; 1482 } 1483 if (DoneBB) 1484 EmitBlock(DoneBB, /*IsFinished=*/true); 1485 } 1486 1487 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { 1488 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1489 OMPLoopScope PreInitScope(CGF, S); 1490 // if (PreCond) { 1491 // for (IV in 0..LastIteration) BODY; 1492 // <Final counter/linear vars updates>; 1493 // } 1494 // 1495 1496 // Emit: if (PreCond) - begin. 1497 // If the condition constant folds and can be elided, avoid emitting the 1498 // whole loop. 1499 bool CondConstant; 1500 llvm::BasicBlock *ContBlock = nullptr; 1501 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 1502 if (!CondConstant) 1503 return; 1504 } else { 1505 auto *ThenBlock = CGF.createBasicBlock("simd.if.then"); 1506 ContBlock = CGF.createBasicBlock("simd.if.end"); 1507 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 1508 CGF.getProfileCount(&S)); 1509 CGF.EmitBlock(ThenBlock); 1510 CGF.incrementProfileCounter(&S); 1511 } 1512 1513 // Emit the loop iteration variable. 1514 const Expr *IVExpr = S.getIterationVariable(); 1515 const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 1516 CGF.EmitVarDecl(*IVDecl); 1517 CGF.EmitIgnoredExpr(S.getInit()); 1518 1519 // Emit the iterations count variable. 1520 // If it is not a variable, Sema decided to calculate iterations count on 1521 // each iteration (e.g., it is foldable into a constant). 1522 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 1523 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 1524 // Emit calculation of the iterations count. 1525 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 1526 } 1527 1528 CGF.EmitOMPSimdInit(S); 1529 1530 emitAlignedClause(CGF, S); 1531 CGF.EmitOMPLinearClauseInit(S); 1532 { 1533 OMPPrivateScope LoopScope(CGF); 1534 emitPrivateLoopCounters(CGF, LoopScope, S.counters(), 1535 S.private_counters()); 1536 emitPrivateLinearVars(CGF, S, LoopScope); 1537 CGF.EmitOMPPrivateClause(S, LoopScope); 1538 CGF.EmitOMPReductionClauseInit(S, LoopScope); 1539 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 1540 (void)LoopScope.Privatize(); 1541 CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 1542 S.getInc(), 1543 [&S](CodeGenFunction &CGF) { 1544 CGF.EmitOMPLoopBody(S, JumpDest()); 1545 CGF.EmitStopPoint(&S); 1546 }, 1547 [](CodeGenFunction &) {}); 1548 // Emit final copy of the lastprivate variables at the end of loops. 1549 if (HasLastprivateClause) 1550 CGF.EmitOMPLastprivateClauseFinal(S); 1551 CGF.EmitOMPReductionClauseFinal(S); 1552 emitPostUpdateForReductionClause( 1553 CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1554 } 1555 CGF.EmitOMPSimdFinal( 1556 S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1557 emitLinearClauseFinal( 1558 CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1559 // Emit: if (PreCond) - end. 1560 if (ContBlock) { 1561 CGF.EmitBranch(ContBlock); 1562 CGF.EmitBlock(ContBlock, true); 1563 } 1564 }; 1565 OMPLexicalScope Scope(*this, S); 1566 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 1567 } 1568 1569 void CodeGenFunction::EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic, 1570 const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, 1571 Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) { 1572 auto &RT = CGM.getOpenMPRuntime(); 1573 1574 const Expr *IVExpr = S.getIterationVariable(); 1575 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1576 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1577 1578 auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end"); 1579 1580 // Start the loop with a block that tests the condition. 1581 auto CondBlock = createBasicBlock("omp.dispatch.cond"); 1582 EmitBlock(CondBlock); 1583 LoopStack.push(CondBlock); 1584 1585 llvm::Value *BoolCondVal = nullptr; 1586 if (!DynamicOrOrdered) { 1587 // UB = min(UB, GlobalUB) 1588 EmitIgnoredExpr(S.getEnsureUpperBound()); 1589 // IV = LB 1590 EmitIgnoredExpr(S.getInit()); 1591 // IV < UB 1592 BoolCondVal = EvaluateExprAsBool(S.getCond()); 1593 } else { 1594 BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, 1595 IL, LB, UB, ST); 1596 } 1597 1598 // If there are any cleanups between here and the loop-exit scope, 1599 // create a block to stage a loop exit along. 1600 auto ExitBlock = LoopExit.getBlock(); 1601 if (LoopScope.requiresCleanups()) 1602 ExitBlock = createBasicBlock("omp.dispatch.cleanup"); 1603 1604 auto LoopBody = createBasicBlock("omp.dispatch.body"); 1605 Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock); 1606 if (ExitBlock != LoopExit.getBlock()) { 1607 EmitBlock(ExitBlock); 1608 EmitBranchThroughCleanup(LoopExit); 1609 } 1610 EmitBlock(LoopBody); 1611 1612 // Emit "IV = LB" (in case of static schedule, we have already calculated new 1613 // LB for loop condition and emitted it above). 1614 if (DynamicOrOrdered) 1615 EmitIgnoredExpr(S.getInit()); 1616 1617 // Create a block for the increment. 1618 auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); 1619 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1620 1621 // Generate !llvm.loop.parallel metadata for loads and stores for loops 1622 // with dynamic/guided scheduling and without ordered clause. 1623 if (!isOpenMPSimdDirective(S.getDirectiveKind())) 1624 LoopStack.setParallel(!IsMonotonic); 1625 else 1626 EmitOMPSimdInit(S, IsMonotonic); 1627 1628 SourceLocation Loc = S.getLocStart(); 1629 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), 1630 [&S, LoopExit](CodeGenFunction &CGF) { 1631 CGF.EmitOMPLoopBody(S, LoopExit); 1632 CGF.EmitStopPoint(&S); 1633 }, 1634 [Ordered, IVSize, IVSigned, Loc](CodeGenFunction &CGF) { 1635 if (Ordered) { 1636 CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd( 1637 CGF, Loc, IVSize, IVSigned); 1638 } 1639 }); 1640 1641 EmitBlock(Continue.getBlock()); 1642 BreakContinueStack.pop_back(); 1643 if (!DynamicOrOrdered) { 1644 // Emit "LB = LB + Stride", "UB = UB + Stride". 1645 EmitIgnoredExpr(S.getNextLowerBound()); 1646 EmitIgnoredExpr(S.getNextUpperBound()); 1647 } 1648 1649 EmitBranch(CondBlock); 1650 LoopStack.pop(); 1651 // Emit the fall-through block. 1652 EmitBlock(LoopExit.getBlock()); 1653 1654 // Tell the runtime we are done. 1655 if (!DynamicOrOrdered) 1656 RT.emitForStaticFinish(*this, S.getLocEnd()); 1657 1658 } 1659 1660 void CodeGenFunction::EmitOMPForOuterLoop( 1661 OpenMPScheduleClauseKind ScheduleKind, bool IsMonotonic, 1662 const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, 1663 Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) { 1664 auto &RT = CGM.getOpenMPRuntime(); 1665 1666 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). 1667 const bool DynamicOrOrdered = Ordered || RT.isDynamic(ScheduleKind); 1668 1669 assert((Ordered || 1670 !RT.isStaticNonchunked(ScheduleKind, /*Chunked=*/Chunk != nullptr)) && 1671 "static non-chunked schedule does not need outer loop"); 1672 1673 // Emit outer loop. 1674 // 1675 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1676 // When schedule(dynamic,chunk_size) is specified, the iterations are 1677 // distributed to threads in the team in chunks as the threads request them. 1678 // Each thread executes a chunk of iterations, then requests another chunk, 1679 // until no chunks remain to be distributed. Each chunk contains chunk_size 1680 // iterations, except for the last chunk to be distributed, which may have 1681 // fewer iterations. When no chunk_size is specified, it defaults to 1. 1682 // 1683 // When schedule(guided,chunk_size) is specified, the iterations are assigned 1684 // to threads in the team in chunks as the executing threads request them. 1685 // Each thread executes a chunk of iterations, then requests another chunk, 1686 // until no chunks remain to be assigned. For a chunk_size of 1, the size of 1687 // each chunk is proportional to the number of unassigned iterations divided 1688 // by the number of threads in the team, decreasing to 1. For a chunk_size 1689 // with value k (greater than 1), the size of each chunk is determined in the 1690 // same way, with the restriction that the chunks do not contain fewer than k 1691 // iterations (except for the last chunk to be assigned, which may have fewer 1692 // than k iterations). 1693 // 1694 // When schedule(auto) is specified, the decision regarding scheduling is 1695 // delegated to the compiler and/or runtime system. The programmer gives the 1696 // implementation the freedom to choose any possible mapping of iterations to 1697 // threads in the team. 1698 // 1699 // When schedule(runtime) is specified, the decision regarding scheduling is 1700 // deferred until run time, and the schedule and chunk size are taken from the 1701 // run-sched-var ICV. If the ICV is set to auto, the schedule is 1702 // implementation defined 1703 // 1704 // while(__kmpc_dispatch_next(&LB, &UB)) { 1705 // idx = LB; 1706 // while (idx <= UB) { BODY; ++idx; 1707 // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only. 1708 // } // inner loop 1709 // } 1710 // 1711 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1712 // When schedule(static, chunk_size) is specified, iterations are divided into 1713 // chunks of size chunk_size, and the chunks are assigned to the threads in 1714 // the team in a round-robin fashion in the order of the thread number. 1715 // 1716 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { 1717 // while (idx <= UB) { BODY; ++idx; } // inner loop 1718 // LB = LB + ST; 1719 // UB = UB + ST; 1720 // } 1721 // 1722 1723 const Expr *IVExpr = S.getIterationVariable(); 1724 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1725 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1726 1727 if (DynamicOrOrdered) { 1728 llvm::Value *UBVal = EmitScalarExpr(S.getLastIteration()); 1729 RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, 1730 IVSize, IVSigned, Ordered, UBVal, Chunk); 1731 } else { 1732 RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, 1733 Ordered, IL, LB, UB, ST, Chunk); 1734 } 1735 1736 EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, Ordered, LB, UB, 1737 ST, IL, Chunk); 1738 } 1739 1740 void CodeGenFunction::EmitOMPDistributeOuterLoop( 1741 OpenMPDistScheduleClauseKind ScheduleKind, 1742 const OMPDistributeDirective &S, OMPPrivateScope &LoopScope, 1743 Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) { 1744 1745 auto &RT = CGM.getOpenMPRuntime(); 1746 1747 // Emit outer loop. 1748 // Same behavior as a OMPForOuterLoop, except that schedule cannot be 1749 // dynamic 1750 // 1751 1752 const Expr *IVExpr = S.getIterationVariable(); 1753 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1754 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1755 1756 RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, 1757 IVSize, IVSigned, /* Ordered = */ false, 1758 IL, LB, UB, ST, Chunk); 1759 1760 EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, 1761 S, LoopScope, /* Ordered = */ false, LB, UB, ST, IL, Chunk); 1762 } 1763 1764 /// \brief Emit a helper variable and return corresponding lvalue. 1765 static LValue EmitOMPHelperVar(CodeGenFunction &CGF, 1766 const DeclRefExpr *Helper) { 1767 auto VDecl = cast<VarDecl>(Helper->getDecl()); 1768 CGF.EmitVarDecl(*VDecl); 1769 return CGF.EmitLValue(Helper); 1770 } 1771 1772 namespace { 1773 struct ScheduleKindModifiersTy { 1774 OpenMPScheduleClauseKind Kind; 1775 OpenMPScheduleClauseModifier M1; 1776 OpenMPScheduleClauseModifier M2; 1777 ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind, 1778 OpenMPScheduleClauseModifier M1, 1779 OpenMPScheduleClauseModifier M2) 1780 : Kind(Kind), M1(M1), M2(M2) {} 1781 }; 1782 } // namespace 1783 1784 bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { 1785 // Emit the loop iteration variable. 1786 auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 1787 auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); 1788 EmitVarDecl(*IVDecl); 1789 1790 // Emit the iterations count variable. 1791 // If it is not a variable, Sema decided to calculate iterations count on each 1792 // iteration (e.g., it is foldable into a constant). 1793 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 1794 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 1795 // Emit calculation of the iterations count. 1796 EmitIgnoredExpr(S.getCalcLastIteration()); 1797 } 1798 1799 auto &RT = CGM.getOpenMPRuntime(); 1800 1801 bool HasLastprivateClause; 1802 // Check pre-condition. 1803 { 1804 OMPLoopScope PreInitScope(*this, S); 1805 // Skip the entire loop if we don't meet the precondition. 1806 // If the condition constant folds and can be elided, avoid emitting the 1807 // whole loop. 1808 bool CondConstant; 1809 llvm::BasicBlock *ContBlock = nullptr; 1810 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 1811 if (!CondConstant) 1812 return false; 1813 } else { 1814 auto *ThenBlock = createBasicBlock("omp.precond.then"); 1815 ContBlock = createBasicBlock("omp.precond.end"); 1816 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 1817 getProfileCount(&S)); 1818 EmitBlock(ThenBlock); 1819 incrementProfileCounter(&S); 1820 } 1821 1822 emitAlignedClause(*this, S); 1823 EmitOMPLinearClauseInit(S); 1824 // Emit helper vars inits. 1825 LValue LB = 1826 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable())); 1827 LValue UB = 1828 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable())); 1829 LValue ST = 1830 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 1831 LValue IL = 1832 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 1833 1834 // Emit 'then' code. 1835 { 1836 OMPPrivateScope LoopScope(*this); 1837 if (EmitOMPFirstprivateClause(S, LoopScope)) { 1838 // Emit implicit barrier to synchronize threads and avoid data races on 1839 // initialization of firstprivate variables and post-update of 1840 // lastprivate variables. 1841 CGM.getOpenMPRuntime().emitBarrierCall( 1842 *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 1843 /*ForceSimpleCall=*/true); 1844 } 1845 EmitOMPPrivateClause(S, LoopScope); 1846 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 1847 EmitOMPReductionClauseInit(S, LoopScope); 1848 emitPrivateLoopCounters(*this, LoopScope, S.counters(), 1849 S.private_counters()); 1850 emitPrivateLinearVars(*this, S, LoopScope); 1851 (void)LoopScope.Privatize(); 1852 1853 // Detect the loop schedule kind and chunk. 1854 llvm::Value *Chunk = nullptr; 1855 OpenMPScheduleClauseKind ScheduleKind = OMPC_SCHEDULE_unknown; 1856 OpenMPScheduleClauseModifier M1 = OMPC_SCHEDULE_MODIFIER_unknown; 1857 OpenMPScheduleClauseModifier M2 = OMPC_SCHEDULE_MODIFIER_unknown; 1858 if (auto *C = S.getSingleClause<OMPScheduleClause>()) { 1859 ScheduleKind = C->getScheduleKind(); 1860 M1 = C->getFirstScheduleModifier(); 1861 M2 = C->getSecondScheduleModifier(); 1862 if (const auto *Ch = C->getChunkSize()) { 1863 Chunk = EmitScalarExpr(Ch); 1864 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 1865 S.getIterationVariable()->getType(), 1866 S.getLocStart()); 1867 } 1868 } 1869 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1870 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1871 const bool Ordered = S.getSingleClause<OMPOrderedClause>() != nullptr; 1872 // OpenMP 4.5, 2.7.1 Loop Construct, Description. 1873 // If the static schedule kind is specified or if the ordered clause is 1874 // specified, and if no monotonic modifier is specified, the effect will 1875 // be as if the monotonic modifier was specified. 1876 if (RT.isStaticNonchunked(ScheduleKind, 1877 /* Chunked */ Chunk != nullptr) && 1878 !Ordered) { 1879 if (isOpenMPSimdDirective(S.getDirectiveKind())) 1880 EmitOMPSimdInit(S, /*IsMonotonic=*/true); 1881 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1882 // When no chunk_size is specified, the iteration space is divided into 1883 // chunks that are approximately equal in size, and at most one chunk is 1884 // distributed to each thread. Note that the size of the chunks is 1885 // unspecified in this case. 1886 RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, 1887 IVSize, IVSigned, Ordered, 1888 IL.getAddress(), LB.getAddress(), 1889 UB.getAddress(), ST.getAddress()); 1890 auto LoopExit = 1891 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 1892 // UB = min(UB, GlobalUB); 1893 EmitIgnoredExpr(S.getEnsureUpperBound()); 1894 // IV = LB; 1895 EmitIgnoredExpr(S.getInit()); 1896 // while (idx <= UB) { BODY; ++idx; } 1897 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 1898 S.getInc(), 1899 [&S, LoopExit](CodeGenFunction &CGF) { 1900 CGF.EmitOMPLoopBody(S, LoopExit); 1901 CGF.EmitStopPoint(&S); 1902 }, 1903 [](CodeGenFunction &) {}); 1904 EmitBlock(LoopExit.getBlock()); 1905 // Tell the runtime we are done. 1906 RT.emitForStaticFinish(*this, S.getLocStart()); 1907 } else { 1908 const bool IsMonotonic = Ordered || 1909 ScheduleKind == OMPC_SCHEDULE_static || 1910 ScheduleKind == OMPC_SCHEDULE_unknown || 1911 M1 == OMPC_SCHEDULE_MODIFIER_monotonic || 1912 M2 == OMPC_SCHEDULE_MODIFIER_monotonic; 1913 // Emit the outer loop, which requests its work chunk [LB..UB] from 1914 // runtime and runs the inner loop to process it. 1915 EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered, 1916 LB.getAddress(), UB.getAddress(), ST.getAddress(), 1917 IL.getAddress(), Chunk); 1918 } 1919 EmitOMPReductionClauseFinal(S); 1920 // Emit post-update of the reduction variables if IsLastIter != 0. 1921 emitPostUpdateForReductionClause( 1922 *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * { 1923 return CGF.Builder.CreateIsNotNull( 1924 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 1925 }); 1926 // Emit final copy of the lastprivate variables if IsLastIter != 0. 1927 if (HasLastprivateClause) 1928 EmitOMPLastprivateClauseFinal( 1929 S, Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart()))); 1930 } 1931 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 1932 EmitOMPSimdFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * { 1933 return CGF.Builder.CreateIsNotNull( 1934 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 1935 }); 1936 } 1937 emitLinearClauseFinal(*this, S, [&](CodeGenFunction &CGF) -> llvm::Value * { 1938 return CGF.Builder.CreateIsNotNull( 1939 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 1940 }); 1941 // We're now done with the loop, so jump to the continuation block. 1942 if (ContBlock) { 1943 EmitBranch(ContBlock); 1944 EmitBlock(ContBlock, true); 1945 } 1946 } 1947 return HasLastprivateClause; 1948 } 1949 1950 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { 1951 bool HasLastprivates = false; 1952 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, 1953 PrePostActionTy &) { 1954 HasLastprivates = CGF.EmitOMPWorksharingLoop(S); 1955 }; 1956 { 1957 OMPLexicalScope Scope(*this, S); 1958 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen, 1959 S.hasCancel()); 1960 } 1961 1962 // Emit an implicit barrier at the end. 1963 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) { 1964 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); 1965 } 1966 } 1967 1968 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { 1969 bool HasLastprivates = false; 1970 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, 1971 PrePostActionTy &) { 1972 HasLastprivates = CGF.EmitOMPWorksharingLoop(S); 1973 }; 1974 { 1975 OMPLexicalScope Scope(*this, S); 1976 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 1977 } 1978 1979 // Emit an implicit barrier at the end. 1980 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) { 1981 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); 1982 } 1983 } 1984 1985 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, 1986 const Twine &Name, 1987 llvm::Value *Init = nullptr) { 1988 auto LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty); 1989 if (Init) 1990 CGF.EmitScalarInit(Init, LVal); 1991 return LVal; 1992 } 1993 1994 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { 1995 auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt(); 1996 auto *CS = dyn_cast<CompoundStmt>(Stmt); 1997 bool HasLastprivates = false; 1998 auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF, 1999 PrePostActionTy &) { 2000 auto &C = CGF.CGM.getContext(); 2001 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2002 // Emit helper vars inits. 2003 LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.", 2004 CGF.Builder.getInt32(0)); 2005 auto *GlobalUBVal = CS != nullptr ? CGF.Builder.getInt32(CS->size() - 1) 2006 : CGF.Builder.getInt32(0); 2007 LValue UB = 2008 createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal); 2009 LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.", 2010 CGF.Builder.getInt32(1)); 2011 LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.", 2012 CGF.Builder.getInt32(0)); 2013 // Loop counter. 2014 LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv."); 2015 OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 2016 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV); 2017 OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 2018 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); 2019 // Generate condition for loop. 2020 BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, 2021 OK_Ordinary, S.getLocStart(), 2022 /*fpContractable=*/false); 2023 // Increment for loop counter. 2024 UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary, 2025 S.getLocStart()); 2026 auto BodyGen = [Stmt, CS, &S, &IV](CodeGenFunction &CGF) { 2027 // Iterate through all sections and emit a switch construct: 2028 // switch (IV) { 2029 // case 0: 2030 // <SectionStmt[0]>; 2031 // break; 2032 // ... 2033 // case <NumSection> - 1: 2034 // <SectionStmt[<NumSection> - 1]>; 2035 // break; 2036 // } 2037 // .omp.sections.exit: 2038 auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit"); 2039 auto *SwitchStmt = CGF.Builder.CreateSwitch( 2040 CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB, 2041 CS == nullptr ? 1 : CS->size()); 2042 if (CS) { 2043 unsigned CaseNumber = 0; 2044 for (auto *SubStmt : CS->children()) { 2045 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 2046 CGF.EmitBlock(CaseBB); 2047 SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB); 2048 CGF.EmitStmt(SubStmt); 2049 CGF.EmitBranch(ExitBB); 2050 ++CaseNumber; 2051 } 2052 } else { 2053 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 2054 CGF.EmitBlock(CaseBB); 2055 SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB); 2056 CGF.EmitStmt(Stmt); 2057 CGF.EmitBranch(ExitBB); 2058 } 2059 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 2060 }; 2061 2062 CodeGenFunction::OMPPrivateScope LoopScope(CGF); 2063 if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) { 2064 // Emit implicit barrier to synchronize threads and avoid data races on 2065 // initialization of firstprivate variables and post-update of lastprivate 2066 // variables. 2067 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 2068 CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 2069 /*ForceSimpleCall=*/true); 2070 } 2071 CGF.EmitOMPPrivateClause(S, LoopScope); 2072 HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 2073 CGF.EmitOMPReductionClauseInit(S, LoopScope); 2074 (void)LoopScope.Privatize(); 2075 2076 // Emit static non-chunked loop. 2077 CGF.CGM.getOpenMPRuntime().emitForStaticInit( 2078 CGF, S.getLocStart(), OMPC_SCHEDULE_static, /*IVSize=*/32, 2079 /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), LB.getAddress(), 2080 UB.getAddress(), ST.getAddress()); 2081 // UB = min(UB, GlobalUB); 2082 auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart()); 2083 auto *MinUBGlobalUB = CGF.Builder.CreateSelect( 2084 CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal); 2085 CGF.EmitStoreOfScalar(MinUBGlobalUB, UB); 2086 // IV = LB; 2087 CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV); 2088 // while (idx <= UB) { BODY; ++idx; } 2089 CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen, 2090 [](CodeGenFunction &) {}); 2091 // Tell the runtime we are done. 2092 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocStart()); 2093 CGF.EmitOMPReductionClauseFinal(S); 2094 // Emit post-update of the reduction variables if IsLastIter != 0. 2095 emitPostUpdateForReductionClause( 2096 CGF, S, [&](CodeGenFunction &CGF) -> llvm::Value * { 2097 return CGF.Builder.CreateIsNotNull( 2098 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2099 }); 2100 2101 // Emit final copy of the lastprivate variables if IsLastIter != 0. 2102 if (HasLastprivates) 2103 CGF.EmitOMPLastprivateClauseFinal( 2104 S, CGF.Builder.CreateIsNotNull( 2105 CGF.EmitLoadOfScalar(IL, S.getLocStart()))); 2106 }; 2107 2108 bool HasCancel = false; 2109 if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S)) 2110 HasCancel = OSD->hasCancel(); 2111 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S)) 2112 HasCancel = OPSD->hasCancel(); 2113 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen, 2114 HasCancel); 2115 // Emit barrier for lastprivates only if 'sections' directive has 'nowait' 2116 // clause. Otherwise the barrier will be generated by the codegen for the 2117 // directive. 2118 if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) { 2119 // Emit implicit barrier to synchronize threads and avoid data races on 2120 // initialization of firstprivate variables. 2121 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), 2122 OMPD_unknown); 2123 } 2124 } 2125 2126 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { 2127 { 2128 OMPLexicalScope Scope(*this, S); 2129 EmitSections(S); 2130 } 2131 // Emit an implicit barrier at the end. 2132 if (!S.getSingleClause<OMPNowaitClause>()) { 2133 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), 2134 OMPD_sections); 2135 } 2136 } 2137 2138 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { 2139 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2140 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2141 }; 2142 OMPLexicalScope Scope(*this, S); 2143 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen, 2144 S.hasCancel()); 2145 } 2146 2147 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { 2148 llvm::SmallVector<const Expr *, 8> CopyprivateVars; 2149 llvm::SmallVector<const Expr *, 8> DestExprs; 2150 llvm::SmallVector<const Expr *, 8> SrcExprs; 2151 llvm::SmallVector<const Expr *, 8> AssignmentOps; 2152 // Check if there are any 'copyprivate' clauses associated with this 2153 // 'single' construct. 2154 // Build a list of copyprivate variables along with helper expressions 2155 // (<source>, <destination>, <destination>=<source> expressions) 2156 for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) { 2157 CopyprivateVars.append(C->varlists().begin(), C->varlists().end()); 2158 DestExprs.append(C->destination_exprs().begin(), 2159 C->destination_exprs().end()); 2160 SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end()); 2161 AssignmentOps.append(C->assignment_ops().begin(), 2162 C->assignment_ops().end()); 2163 } 2164 // Emit code for 'single' region along with 'copyprivate' clauses 2165 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2166 Action.Enter(CGF); 2167 OMPPrivateScope SingleScope(CGF); 2168 (void)CGF.EmitOMPFirstprivateClause(S, SingleScope); 2169 CGF.EmitOMPPrivateClause(S, SingleScope); 2170 (void)SingleScope.Privatize(); 2171 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2172 }; 2173 { 2174 OMPLexicalScope Scope(*this, S); 2175 CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(), 2176 CopyprivateVars, DestExprs, 2177 SrcExprs, AssignmentOps); 2178 } 2179 // Emit an implicit barrier at the end (to avoid data race on firstprivate 2180 // init or if no 'nowait' clause was specified and no 'copyprivate' clause). 2181 if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) { 2182 CGM.getOpenMPRuntime().emitBarrierCall( 2183 *this, S.getLocStart(), 2184 S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single); 2185 } 2186 } 2187 2188 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { 2189 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2190 Action.Enter(CGF); 2191 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2192 }; 2193 OMPLexicalScope Scope(*this, S); 2194 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart()); 2195 } 2196 2197 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { 2198 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2199 Action.Enter(CGF); 2200 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2201 }; 2202 Expr *Hint = nullptr; 2203 if (auto *HintClause = S.getSingleClause<OMPHintClause>()) 2204 Hint = HintClause->getHint(); 2205 OMPLexicalScope Scope(*this, S); 2206 CGM.getOpenMPRuntime().emitCriticalRegion(*this, 2207 S.getDirectiveName().getAsString(), 2208 CodeGen, S.getLocStart(), Hint); 2209 } 2210 2211 void CodeGenFunction::EmitOMPParallelForDirective( 2212 const OMPParallelForDirective &S) { 2213 // Emit directive as a combined directive that consists of two implicit 2214 // directives: 'parallel' with 'for' directive. 2215 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2216 CGF.EmitOMPWorksharingLoop(S); 2217 }; 2218 emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen); 2219 } 2220 2221 void CodeGenFunction::EmitOMPParallelForSimdDirective( 2222 const OMPParallelForSimdDirective &S) { 2223 // Emit directive as a combined directive that consists of two implicit 2224 // directives: 'parallel' with 'for' directive. 2225 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2226 CGF.EmitOMPWorksharingLoop(S); 2227 }; 2228 emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen); 2229 } 2230 2231 void CodeGenFunction::EmitOMPParallelSectionsDirective( 2232 const OMPParallelSectionsDirective &S) { 2233 // Emit directive as a combined directive that consists of two implicit 2234 // directives: 'parallel' with 'sections' directive. 2235 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2236 CGF.EmitSections(S); 2237 }; 2238 emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen); 2239 } 2240 2241 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { 2242 // Emit outlined function for task construct. 2243 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2244 auto CapturedStruct = GenerateCapturedStmtArgument(*CS); 2245 auto *I = CS->getCapturedDecl()->param_begin(); 2246 auto *PartId = std::next(I); 2247 // The first function argument for tasks is a thread id, the second one is a 2248 // part id (0 for tied tasks, >=0 for untied task). 2249 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 2250 // Get list of private variables. 2251 llvm::SmallVector<const Expr *, 8> PrivateVars; 2252 llvm::SmallVector<const Expr *, 8> PrivateCopies; 2253 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 2254 auto IRef = C->varlist_begin(); 2255 for (auto *IInit : C->private_copies()) { 2256 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2257 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2258 PrivateVars.push_back(*IRef); 2259 PrivateCopies.push_back(IInit); 2260 } 2261 ++IRef; 2262 } 2263 } 2264 EmittedAsPrivate.clear(); 2265 // Get list of firstprivate variables. 2266 llvm::SmallVector<const Expr *, 8> FirstprivateVars; 2267 llvm::SmallVector<const Expr *, 8> FirstprivateCopies; 2268 llvm::SmallVector<const Expr *, 8> FirstprivateInits; 2269 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 2270 auto IRef = C->varlist_begin(); 2271 auto IElemInitRef = C->inits().begin(); 2272 for (auto *IInit : C->private_copies()) { 2273 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2274 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2275 FirstprivateVars.push_back(*IRef); 2276 FirstprivateCopies.push_back(IInit); 2277 FirstprivateInits.push_back(*IElemInitRef); 2278 } 2279 ++IRef; 2280 ++IElemInitRef; 2281 } 2282 } 2283 // Build list of dependences. 2284 llvm::SmallVector<std::pair<OpenMPDependClauseKind, const Expr *>, 8> 2285 Dependences; 2286 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) { 2287 for (auto *IRef : C->varlists()) { 2288 Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef)); 2289 } 2290 } 2291 auto &&CodeGen = [PartId, &S, &PrivateVars, &FirstprivateVars]( 2292 CodeGenFunction &CGF, PrePostActionTy &) { 2293 // Set proper addresses for generated private copies. 2294 auto *CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2295 { 2296 OMPPrivateScope Scope(CGF); 2297 if (!PrivateVars.empty() || !FirstprivateVars.empty()) { 2298 auto *CopyFn = CGF.Builder.CreateLoad( 2299 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3))); 2300 auto *PrivatesPtr = CGF.Builder.CreateLoad( 2301 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2))); 2302 // Map privates. 2303 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; 2304 llvm::SmallVector<llvm::Value *, 16> CallArgs; 2305 CallArgs.push_back(PrivatesPtr); 2306 for (auto *E : PrivateVars) { 2307 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2308 Address PrivatePtr = 2309 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType())); 2310 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2311 CallArgs.push_back(PrivatePtr.getPointer()); 2312 } 2313 for (auto *E : FirstprivateVars) { 2314 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2315 Address PrivatePtr = 2316 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType())); 2317 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2318 CallArgs.push_back(PrivatePtr.getPointer()); 2319 } 2320 CGF.EmitRuntimeCall(CopyFn, CallArgs); 2321 for (auto &&Pair : PrivatePtrs) { 2322 Address Replacement(CGF.Builder.CreateLoad(Pair.second), 2323 CGF.getContext().getDeclAlign(Pair.first)); 2324 Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); 2325 } 2326 } 2327 (void)Scope.Privatize(); 2328 if (*PartId) { 2329 // TODO: emit code for untied tasks. 2330 } 2331 CGF.EmitStmt(CS->getCapturedStmt()); 2332 } 2333 }; 2334 auto OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( 2335 S, *I, OMPD_task, CodeGen); 2336 // Check if we should emit tied or untied task. 2337 bool Tied = !S.getSingleClause<OMPUntiedClause>(); 2338 // Check if the task is final 2339 llvm::PointerIntPair<llvm::Value *, 1, bool> Final; 2340 if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) { 2341 // If the condition constant folds and can be elided, try to avoid emitting 2342 // the condition and the dead arm of the if/else. 2343 auto *Cond = Clause->getCondition(); 2344 bool CondConstant; 2345 if (ConstantFoldsToSimpleInteger(Cond, CondConstant)) 2346 Final.setInt(CondConstant); 2347 else 2348 Final.setPointer(EvaluateExprAsBool(Cond)); 2349 } else { 2350 // By default the task is not final. 2351 Final.setInt(/*IntVal=*/false); 2352 } 2353 auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 2354 const Expr *IfCond = nullptr; 2355 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 2356 if (C->getNameModifier() == OMPD_unknown || 2357 C->getNameModifier() == OMPD_task) { 2358 IfCond = C->getCondition(); 2359 break; 2360 } 2361 } 2362 OMPLexicalScope Scope(*this, S); 2363 CGM.getOpenMPRuntime().emitTaskCall( 2364 *this, S.getLocStart(), S, Tied, Final, OutlinedFn, SharedsTy, 2365 CapturedStruct, IfCond, PrivateVars, PrivateCopies, FirstprivateVars, 2366 FirstprivateCopies, FirstprivateInits, Dependences); 2367 } 2368 2369 void CodeGenFunction::EmitOMPTaskyieldDirective( 2370 const OMPTaskyieldDirective &S) { 2371 CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart()); 2372 } 2373 2374 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { 2375 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier); 2376 } 2377 2378 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) { 2379 CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart()); 2380 } 2381 2382 void CodeGenFunction::EmitOMPTaskgroupDirective( 2383 const OMPTaskgroupDirective &S) { 2384 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2385 Action.Enter(CGF); 2386 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2387 }; 2388 OMPLexicalScope Scope(*this, S); 2389 CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart()); 2390 } 2391 2392 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { 2393 CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> { 2394 if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) { 2395 return llvm::makeArrayRef(FlushClause->varlist_begin(), 2396 FlushClause->varlist_end()); 2397 } 2398 return llvm::None; 2399 }(), S.getLocStart()); 2400 } 2401 2402 void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) { 2403 // Emit the loop iteration variable. 2404 auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 2405 auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); 2406 EmitVarDecl(*IVDecl); 2407 2408 // Emit the iterations count variable. 2409 // If it is not a variable, Sema decided to calculate iterations count on each 2410 // iteration (e.g., it is foldable into a constant). 2411 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 2412 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 2413 // Emit calculation of the iterations count. 2414 EmitIgnoredExpr(S.getCalcLastIteration()); 2415 } 2416 2417 auto &RT = CGM.getOpenMPRuntime(); 2418 2419 // Check pre-condition. 2420 { 2421 OMPLoopScope PreInitScope(*this, S); 2422 // Skip the entire loop if we don't meet the precondition. 2423 // If the condition constant folds and can be elided, avoid emitting the 2424 // whole loop. 2425 bool CondConstant; 2426 llvm::BasicBlock *ContBlock = nullptr; 2427 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 2428 if (!CondConstant) 2429 return; 2430 } else { 2431 auto *ThenBlock = createBasicBlock("omp.precond.then"); 2432 ContBlock = createBasicBlock("omp.precond.end"); 2433 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 2434 getProfileCount(&S)); 2435 EmitBlock(ThenBlock); 2436 incrementProfileCounter(&S); 2437 } 2438 2439 // Emit 'then' code. 2440 { 2441 // Emit helper vars inits. 2442 LValue LB = 2443 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable())); 2444 LValue UB = 2445 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable())); 2446 LValue ST = 2447 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 2448 LValue IL = 2449 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 2450 2451 OMPPrivateScope LoopScope(*this); 2452 emitPrivateLoopCounters(*this, LoopScope, S.counters(), 2453 S.private_counters()); 2454 (void)LoopScope.Privatize(); 2455 2456 // Detect the distribute schedule kind and chunk. 2457 llvm::Value *Chunk = nullptr; 2458 OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown; 2459 if (auto *C = S.getSingleClause<OMPDistScheduleClause>()) { 2460 ScheduleKind = C->getDistScheduleKind(); 2461 if (const auto *Ch = C->getChunkSize()) { 2462 Chunk = EmitScalarExpr(Ch); 2463 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 2464 S.getIterationVariable()->getType(), 2465 S.getLocStart()); 2466 } 2467 } 2468 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2469 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2470 2471 // OpenMP [2.10.8, distribute Construct, Description] 2472 // If dist_schedule is specified, kind must be static. If specified, 2473 // iterations are divided into chunks of size chunk_size, chunks are 2474 // assigned to the teams of the league in a round-robin fashion in the 2475 // order of the team number. When no chunk_size is specified, the 2476 // iteration space is divided into chunks that are approximately equal 2477 // in size, and at most one chunk is distributed to each team of the 2478 // league. The size of the chunks is unspecified in this case. 2479 if (RT.isStaticNonchunked(ScheduleKind, 2480 /* Chunked */ Chunk != nullptr)) { 2481 RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, 2482 IVSize, IVSigned, /* Ordered = */ false, 2483 IL.getAddress(), LB.getAddress(), 2484 UB.getAddress(), ST.getAddress()); 2485 auto LoopExit = 2486 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 2487 // UB = min(UB, GlobalUB); 2488 EmitIgnoredExpr(S.getEnsureUpperBound()); 2489 // IV = LB; 2490 EmitIgnoredExpr(S.getInit()); 2491 // while (idx <= UB) { BODY; ++idx; } 2492 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 2493 S.getInc(), 2494 [&S, LoopExit](CodeGenFunction &CGF) { 2495 CGF.EmitOMPLoopBody(S, LoopExit); 2496 CGF.EmitStopPoint(&S); 2497 }, 2498 [](CodeGenFunction &) {}); 2499 EmitBlock(LoopExit.getBlock()); 2500 // Tell the runtime we are done. 2501 RT.emitForStaticFinish(*this, S.getLocStart()); 2502 } else { 2503 // Emit the outer loop, which requests its work chunk [LB..UB] from 2504 // runtime and runs the inner loop to process it. 2505 EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, 2506 LB.getAddress(), UB.getAddress(), ST.getAddress(), 2507 IL.getAddress(), Chunk); 2508 } 2509 } 2510 2511 // We're now done with the loop, so jump to the continuation block. 2512 if (ContBlock) { 2513 EmitBranch(ContBlock); 2514 EmitBlock(ContBlock, true); 2515 } 2516 } 2517 } 2518 2519 void CodeGenFunction::EmitOMPDistributeDirective( 2520 const OMPDistributeDirective &S) { 2521 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2522 CGF.EmitOMPDistributeLoop(S); 2523 }; 2524 OMPLexicalScope Scope(*this, S); 2525 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen, 2526 false); 2527 } 2528 2529 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, 2530 const CapturedStmt *S) { 2531 CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); 2532 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; 2533 CGF.CapturedStmtInfo = &CapStmtInfo; 2534 auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S); 2535 Fn->addFnAttr(llvm::Attribute::NoInline); 2536 return Fn; 2537 } 2538 2539 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { 2540 if (!S.getAssociatedStmt()) 2541 return; 2542 auto *C = S.getSingleClause<OMPSIMDClause>(); 2543 auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF, 2544 PrePostActionTy &Action) { 2545 if (C) { 2546 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2547 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 2548 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 2549 auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS); 2550 CGF.EmitNounwindRuntimeCall(OutlinedFn, CapturedVars); 2551 } else { 2552 Action.Enter(CGF); 2553 CGF.EmitStmt( 2554 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2555 } 2556 }; 2557 OMPLexicalScope Scope(*this, S); 2558 CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C); 2559 } 2560 2561 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, 2562 QualType SrcType, QualType DestType, 2563 SourceLocation Loc) { 2564 assert(CGF.hasScalarEvaluationKind(DestType) && 2565 "DestType must have scalar evaluation kind."); 2566 assert(!Val.isAggregate() && "Must be a scalar or complex."); 2567 return Val.isScalar() 2568 ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType, 2569 Loc) 2570 : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType, 2571 DestType, Loc); 2572 } 2573 2574 static CodeGenFunction::ComplexPairTy 2575 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, 2576 QualType DestType, SourceLocation Loc) { 2577 assert(CGF.getEvaluationKind(DestType) == TEK_Complex && 2578 "DestType must have complex evaluation kind."); 2579 CodeGenFunction::ComplexPairTy ComplexVal; 2580 if (Val.isScalar()) { 2581 // Convert the input element to the element type of the complex. 2582 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 2583 auto ScalarVal = CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, 2584 DestElementType, Loc); 2585 ComplexVal = CodeGenFunction::ComplexPairTy( 2586 ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType())); 2587 } else { 2588 assert(Val.isComplex() && "Must be a scalar or complex."); 2589 auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType(); 2590 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 2591 ComplexVal.first = CGF.EmitScalarConversion( 2592 Val.getComplexVal().first, SrcElementType, DestElementType, Loc); 2593 ComplexVal.second = CGF.EmitScalarConversion( 2594 Val.getComplexVal().second, SrcElementType, DestElementType, Loc); 2595 } 2596 return ComplexVal; 2597 } 2598 2599 static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst, 2600 LValue LVal, RValue RVal) { 2601 if (LVal.isGlobalReg()) { 2602 CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal); 2603 } else { 2604 CGF.EmitAtomicStore(RVal, LVal, 2605 IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 2606 : llvm::AtomicOrdering::Monotonic, 2607 LVal.isVolatile(), /*IsInit=*/false); 2608 } 2609 } 2610 2611 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal, 2612 QualType RValTy, SourceLocation Loc) { 2613 switch (getEvaluationKind(LVal.getType())) { 2614 case TEK_Scalar: 2615 EmitStoreThroughLValue(RValue::get(convertToScalarValue( 2616 *this, RVal, RValTy, LVal.getType(), Loc)), 2617 LVal); 2618 break; 2619 case TEK_Complex: 2620 EmitStoreOfComplex( 2621 convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal, 2622 /*isInit=*/false); 2623 break; 2624 case TEK_Aggregate: 2625 llvm_unreachable("Must be a scalar or complex."); 2626 } 2627 } 2628 2629 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst, 2630 const Expr *X, const Expr *V, 2631 SourceLocation Loc) { 2632 // v = x; 2633 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue"); 2634 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue"); 2635 LValue XLValue = CGF.EmitLValue(X); 2636 LValue VLValue = CGF.EmitLValue(V); 2637 RValue Res = XLValue.isGlobalReg() 2638 ? CGF.EmitLoadOfLValue(XLValue, Loc) 2639 : CGF.EmitAtomicLoad( 2640 XLValue, Loc, 2641 IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 2642 : llvm::AtomicOrdering::Monotonic, 2643 XLValue.isVolatile()); 2644 // OpenMP, 2.12.6, atomic Construct 2645 // Any atomic construct with a seq_cst clause forces the atomically 2646 // performed operation to include an implicit flush operation without a 2647 // list. 2648 if (IsSeqCst) 2649 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 2650 CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc); 2651 } 2652 2653 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst, 2654 const Expr *X, const Expr *E, 2655 SourceLocation Loc) { 2656 // x = expr; 2657 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue"); 2658 emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E)); 2659 // OpenMP, 2.12.6, atomic Construct 2660 // Any atomic construct with a seq_cst clause forces the atomically 2661 // performed operation to include an implicit flush operation without a 2662 // list. 2663 if (IsSeqCst) 2664 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 2665 } 2666 2667 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, 2668 RValue Update, 2669 BinaryOperatorKind BO, 2670 llvm::AtomicOrdering AO, 2671 bool IsXLHSInRHSPart) { 2672 auto &Context = CGF.CGM.getContext(); 2673 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x' 2674 // expression is simple and atomic is allowed for the given type for the 2675 // target platform. 2676 if (BO == BO_Comma || !Update.isScalar() || 2677 !Update.getScalarVal()->getType()->isIntegerTy() || 2678 !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) && 2679 (Update.getScalarVal()->getType() != 2680 X.getAddress().getElementType())) || 2681 !X.getAddress().getElementType()->isIntegerTy() || 2682 !Context.getTargetInfo().hasBuiltinAtomic( 2683 Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment()))) 2684 return std::make_pair(false, RValue::get(nullptr)); 2685 2686 llvm::AtomicRMWInst::BinOp RMWOp; 2687 switch (BO) { 2688 case BO_Add: 2689 RMWOp = llvm::AtomicRMWInst::Add; 2690 break; 2691 case BO_Sub: 2692 if (!IsXLHSInRHSPart) 2693 return std::make_pair(false, RValue::get(nullptr)); 2694 RMWOp = llvm::AtomicRMWInst::Sub; 2695 break; 2696 case BO_And: 2697 RMWOp = llvm::AtomicRMWInst::And; 2698 break; 2699 case BO_Or: 2700 RMWOp = llvm::AtomicRMWInst::Or; 2701 break; 2702 case BO_Xor: 2703 RMWOp = llvm::AtomicRMWInst::Xor; 2704 break; 2705 case BO_LT: 2706 RMWOp = X.getType()->hasSignedIntegerRepresentation() 2707 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min 2708 : llvm::AtomicRMWInst::Max) 2709 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin 2710 : llvm::AtomicRMWInst::UMax); 2711 break; 2712 case BO_GT: 2713 RMWOp = X.getType()->hasSignedIntegerRepresentation() 2714 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max 2715 : llvm::AtomicRMWInst::Min) 2716 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax 2717 : llvm::AtomicRMWInst::UMin); 2718 break; 2719 case BO_Assign: 2720 RMWOp = llvm::AtomicRMWInst::Xchg; 2721 break; 2722 case BO_Mul: 2723 case BO_Div: 2724 case BO_Rem: 2725 case BO_Shl: 2726 case BO_Shr: 2727 case BO_LAnd: 2728 case BO_LOr: 2729 return std::make_pair(false, RValue::get(nullptr)); 2730 case BO_PtrMemD: 2731 case BO_PtrMemI: 2732 case BO_LE: 2733 case BO_GE: 2734 case BO_EQ: 2735 case BO_NE: 2736 case BO_AddAssign: 2737 case BO_SubAssign: 2738 case BO_AndAssign: 2739 case BO_OrAssign: 2740 case BO_XorAssign: 2741 case BO_MulAssign: 2742 case BO_DivAssign: 2743 case BO_RemAssign: 2744 case BO_ShlAssign: 2745 case BO_ShrAssign: 2746 case BO_Comma: 2747 llvm_unreachable("Unsupported atomic update operation"); 2748 } 2749 auto *UpdateVal = Update.getScalarVal(); 2750 if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) { 2751 UpdateVal = CGF.Builder.CreateIntCast( 2752 IC, X.getAddress().getElementType(), 2753 X.getType()->hasSignedIntegerRepresentation()); 2754 } 2755 auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO); 2756 return std::make_pair(true, RValue::get(Res)); 2757 } 2758 2759 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr( 2760 LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart, 2761 llvm::AtomicOrdering AO, SourceLocation Loc, 2762 const llvm::function_ref<RValue(RValue)> &CommonGen) { 2763 // Update expressions are allowed to have the following forms: 2764 // x binop= expr; -> xrval + expr; 2765 // x++, ++x -> xrval + 1; 2766 // x--, --x -> xrval - 1; 2767 // x = x binop expr; -> xrval binop expr 2768 // x = expr Op x; - > expr binop xrval; 2769 auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart); 2770 if (!Res.first) { 2771 if (X.isGlobalReg()) { 2772 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop 2773 // 'xrval'. 2774 EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X); 2775 } else { 2776 // Perform compare-and-swap procedure. 2777 EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified()); 2778 } 2779 } 2780 return Res; 2781 } 2782 2783 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst, 2784 const Expr *X, const Expr *E, 2785 const Expr *UE, bool IsXLHSInRHSPart, 2786 SourceLocation Loc) { 2787 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 2788 "Update expr in 'atomic update' must be a binary operator."); 2789 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 2790 // Update expressions are allowed to have the following forms: 2791 // x binop= expr; -> xrval + expr; 2792 // x++, ++x -> xrval + 1; 2793 // x--, --x -> xrval - 1; 2794 // x = x binop expr; -> xrval binop expr 2795 // x = expr Op x; - > expr binop xrval; 2796 assert(X->isLValue() && "X of 'omp atomic update' is not lvalue"); 2797 LValue XLValue = CGF.EmitLValue(X); 2798 RValue ExprRValue = CGF.EmitAnyExpr(E); 2799 auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 2800 : llvm::AtomicOrdering::Monotonic; 2801 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 2802 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 2803 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 2804 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 2805 auto Gen = 2806 [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue { 2807 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 2808 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 2809 return CGF.EmitAnyExpr(UE); 2810 }; 2811 (void)CGF.EmitOMPAtomicSimpleUpdateExpr( 2812 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 2813 // OpenMP, 2.12.6, atomic Construct 2814 // Any atomic construct with a seq_cst clause forces the atomically 2815 // performed operation to include an implicit flush operation without a 2816 // list. 2817 if (IsSeqCst) 2818 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 2819 } 2820 2821 static RValue convertToType(CodeGenFunction &CGF, RValue Value, 2822 QualType SourceType, QualType ResType, 2823 SourceLocation Loc) { 2824 switch (CGF.getEvaluationKind(ResType)) { 2825 case TEK_Scalar: 2826 return RValue::get( 2827 convertToScalarValue(CGF, Value, SourceType, ResType, Loc)); 2828 case TEK_Complex: { 2829 auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc); 2830 return RValue::getComplex(Res.first, Res.second); 2831 } 2832 case TEK_Aggregate: 2833 break; 2834 } 2835 llvm_unreachable("Must be a scalar or complex."); 2836 } 2837 2838 static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst, 2839 bool IsPostfixUpdate, const Expr *V, 2840 const Expr *X, const Expr *E, 2841 const Expr *UE, bool IsXLHSInRHSPart, 2842 SourceLocation Loc) { 2843 assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue"); 2844 assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue"); 2845 RValue NewVVal; 2846 LValue VLValue = CGF.EmitLValue(V); 2847 LValue XLValue = CGF.EmitLValue(X); 2848 RValue ExprRValue = CGF.EmitAnyExpr(E); 2849 auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 2850 : llvm::AtomicOrdering::Monotonic; 2851 QualType NewVValType; 2852 if (UE) { 2853 // 'x' is updated with some additional value. 2854 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 2855 "Update expr in 'atomic capture' must be a binary operator."); 2856 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 2857 // Update expressions are allowed to have the following forms: 2858 // x binop= expr; -> xrval + expr; 2859 // x++, ++x -> xrval + 1; 2860 // x--, --x -> xrval - 1; 2861 // x = x binop expr; -> xrval binop expr 2862 // x = expr Op x; - > expr binop xrval; 2863 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 2864 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 2865 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 2866 NewVValType = XRValExpr->getType(); 2867 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 2868 auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr, 2869 IsSeqCst, IsPostfixUpdate](RValue XRValue) -> RValue { 2870 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 2871 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 2872 RValue Res = CGF.EmitAnyExpr(UE); 2873 NewVVal = IsPostfixUpdate ? XRValue : Res; 2874 return Res; 2875 }; 2876 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 2877 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 2878 if (Res.first) { 2879 // 'atomicrmw' instruction was generated. 2880 if (IsPostfixUpdate) { 2881 // Use old value from 'atomicrmw'. 2882 NewVVal = Res.second; 2883 } else { 2884 // 'atomicrmw' does not provide new value, so evaluate it using old 2885 // value of 'x'. 2886 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 2887 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second); 2888 NewVVal = CGF.EmitAnyExpr(UE); 2889 } 2890 } 2891 } else { 2892 // 'x' is simply rewritten with some 'expr'. 2893 NewVValType = X->getType().getNonReferenceType(); 2894 ExprRValue = convertToType(CGF, ExprRValue, E->getType(), 2895 X->getType().getNonReferenceType(), Loc); 2896 auto &&Gen = [&CGF, &NewVVal, ExprRValue](RValue XRValue) -> RValue { 2897 NewVVal = XRValue; 2898 return ExprRValue; 2899 }; 2900 // Try to perform atomicrmw xchg, otherwise simple exchange. 2901 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 2902 XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO, 2903 Loc, Gen); 2904 if (Res.first) { 2905 // 'atomicrmw' instruction was generated. 2906 NewVVal = IsPostfixUpdate ? Res.second : ExprRValue; 2907 } 2908 } 2909 // Emit post-update store to 'v' of old/new 'x' value. 2910 CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc); 2911 // OpenMP, 2.12.6, atomic Construct 2912 // Any atomic construct with a seq_cst clause forces the atomically 2913 // performed operation to include an implicit flush operation without a 2914 // list. 2915 if (IsSeqCst) 2916 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 2917 } 2918 2919 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, 2920 bool IsSeqCst, bool IsPostfixUpdate, 2921 const Expr *X, const Expr *V, const Expr *E, 2922 const Expr *UE, bool IsXLHSInRHSPart, 2923 SourceLocation Loc) { 2924 switch (Kind) { 2925 case OMPC_read: 2926 EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc); 2927 break; 2928 case OMPC_write: 2929 EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc); 2930 break; 2931 case OMPC_unknown: 2932 case OMPC_update: 2933 EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc); 2934 break; 2935 case OMPC_capture: 2936 EmitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE, 2937 IsXLHSInRHSPart, Loc); 2938 break; 2939 case OMPC_if: 2940 case OMPC_final: 2941 case OMPC_num_threads: 2942 case OMPC_private: 2943 case OMPC_firstprivate: 2944 case OMPC_lastprivate: 2945 case OMPC_reduction: 2946 case OMPC_safelen: 2947 case OMPC_simdlen: 2948 case OMPC_collapse: 2949 case OMPC_default: 2950 case OMPC_seq_cst: 2951 case OMPC_shared: 2952 case OMPC_linear: 2953 case OMPC_aligned: 2954 case OMPC_copyin: 2955 case OMPC_copyprivate: 2956 case OMPC_flush: 2957 case OMPC_proc_bind: 2958 case OMPC_schedule: 2959 case OMPC_ordered: 2960 case OMPC_nowait: 2961 case OMPC_untied: 2962 case OMPC_threadprivate: 2963 case OMPC_depend: 2964 case OMPC_mergeable: 2965 case OMPC_device: 2966 case OMPC_threads: 2967 case OMPC_simd: 2968 case OMPC_map: 2969 case OMPC_num_teams: 2970 case OMPC_thread_limit: 2971 case OMPC_priority: 2972 case OMPC_grainsize: 2973 case OMPC_nogroup: 2974 case OMPC_num_tasks: 2975 case OMPC_hint: 2976 case OMPC_dist_schedule: 2977 case OMPC_defaultmap: 2978 case OMPC_uniform: 2979 llvm_unreachable("Clause is not allowed in 'omp atomic'."); 2980 } 2981 } 2982 2983 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { 2984 bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>(); 2985 OpenMPClauseKind Kind = OMPC_unknown; 2986 for (auto *C : S.clauses()) { 2987 // Find first clause (skip seq_cst clause, if it is first). 2988 if (C->getClauseKind() != OMPC_seq_cst) { 2989 Kind = C->getClauseKind(); 2990 break; 2991 } 2992 } 2993 2994 const auto *CS = 2995 S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 2996 if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) { 2997 enterFullExpression(EWC); 2998 } 2999 // Processing for statements under 'atomic capture'. 3000 if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) { 3001 for (const auto *C : Compound->body()) { 3002 if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) { 3003 enterFullExpression(EWC); 3004 } 3005 } 3006 } 3007 3008 auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF, 3009 PrePostActionTy &) { 3010 CGF.EmitStopPoint(CS); 3011 EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(), 3012 S.getV(), S.getExpr(), S.getUpdateExpr(), 3013 S.isXLHSInRHSPart(), S.getLocStart()); 3014 }; 3015 OMPLexicalScope Scope(*this, S); 3016 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen); 3017 } 3018 3019 std::pair<llvm::Function * /*OutlinedFn*/, llvm::Constant * /*OutlinedFnID*/> 3020 CodeGenFunction::EmitOMPTargetDirectiveOutlinedFunction( 3021 CodeGenModule &CGM, const OMPTargetDirective &S, StringRef ParentName, 3022 bool IsOffloadEntry) { 3023 llvm::Function *OutlinedFn = nullptr; 3024 llvm::Constant *OutlinedFnID = nullptr; 3025 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3026 OMPPrivateScope PrivateScope(CGF); 3027 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 3028 CGF.EmitOMPPrivateClause(S, PrivateScope); 3029 (void)PrivateScope.Privatize(); 3030 3031 Action.Enter(CGF); 3032 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3033 }; 3034 // Emit target region as a standalone region. 3035 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 3036 S, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, CodeGen); 3037 return std::make_pair(OutlinedFn, OutlinedFnID); 3038 } 3039 3040 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { 3041 const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt()); 3042 3043 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 3044 GenerateOpenMPCapturedVars(CS, CapturedVars); 3045 3046 llvm::Function *Fn = nullptr; 3047 llvm::Constant *FnID = nullptr; 3048 3049 // Check if we have any if clause associated with the directive. 3050 const Expr *IfCond = nullptr; 3051 3052 if (auto *C = S.getSingleClause<OMPIfClause>()) { 3053 IfCond = C->getCondition(); 3054 } 3055 3056 // Check if we have any device clause associated with the directive. 3057 const Expr *Device = nullptr; 3058 if (auto *C = S.getSingleClause<OMPDeviceClause>()) { 3059 Device = C->getDevice(); 3060 } 3061 3062 // Check if we have an if clause whose conditional always evaluates to false 3063 // or if we do not have any targets specified. If so the target region is not 3064 // an offload entry point. 3065 bool IsOffloadEntry = true; 3066 if (IfCond) { 3067 bool Val; 3068 if (ConstantFoldsToSimpleInteger(IfCond, Val) && !Val) 3069 IsOffloadEntry = false; 3070 } 3071 if (CGM.getLangOpts().OMPTargetTriples.empty()) 3072 IsOffloadEntry = false; 3073 3074 assert(CurFuncDecl && "No parent declaration for target region!"); 3075 StringRef ParentName; 3076 // In case we have Ctors/Dtors we use the complete type variant to produce 3077 // the mangling of the device outlined kernel. 3078 if (auto *D = dyn_cast<CXXConstructorDecl>(CurFuncDecl)) 3079 ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete)); 3080 else if (auto *D = dyn_cast<CXXDestructorDecl>(CurFuncDecl)) 3081 ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete)); 3082 else 3083 ParentName = 3084 CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CurFuncDecl))); 3085 3086 std::tie(Fn, FnID) = EmitOMPTargetDirectiveOutlinedFunction( 3087 CGM, S, ParentName, IsOffloadEntry); 3088 OMPLexicalScope Scope(*this, S); 3089 CGM.getOpenMPRuntime().emitTargetCall(*this, S, Fn, FnID, IfCond, Device, 3090 CapturedVars); 3091 } 3092 3093 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF, 3094 const OMPExecutableDirective &S, 3095 OpenMPDirectiveKind InnermostKind, 3096 const RegionCodeGenTy &CodeGen) { 3097 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 3098 auto OutlinedFn = CGF.CGM.getOpenMPRuntime(). 3099 emitParallelOrTeamsOutlinedFunction(S, 3100 *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 3101 3102 const OMPTeamsDirective &TD = *dyn_cast<OMPTeamsDirective>(&S); 3103 const OMPNumTeamsClause *NT = TD.getSingleClause<OMPNumTeamsClause>(); 3104 const OMPThreadLimitClause *TL = TD.getSingleClause<OMPThreadLimitClause>(); 3105 if (NT || TL) { 3106 Expr *NumTeams = (NT) ? NT->getNumTeams() : nullptr; 3107 Expr *ThreadLimit = (TL) ? TL->getThreadLimit() : nullptr; 3108 3109 CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit, 3110 S.getLocStart()); 3111 } 3112 3113 OMPLexicalScope Scope(CGF, S); 3114 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 3115 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 3116 CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn, 3117 CapturedVars); 3118 } 3119 3120 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) { 3121 // Emit parallel region as a standalone region. 3122 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3123 OMPPrivateScope PrivateScope(CGF); 3124 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 3125 CGF.EmitOMPPrivateClause(S, PrivateScope); 3126 (void)PrivateScope.Privatize(); 3127 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3128 }; 3129 emitCommonOMPTeamsDirective(*this, S, OMPD_teams, CodeGen); 3130 } 3131 3132 void CodeGenFunction::EmitOMPCancellationPointDirective( 3133 const OMPCancellationPointDirective &S) { 3134 CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(), 3135 S.getCancelRegion()); 3136 } 3137 3138 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { 3139 const Expr *IfCond = nullptr; 3140 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 3141 if (C->getNameModifier() == OMPD_unknown || 3142 C->getNameModifier() == OMPD_cancel) { 3143 IfCond = C->getCondition(); 3144 break; 3145 } 3146 } 3147 CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(), IfCond, 3148 S.getCancelRegion()); 3149 } 3150 3151 CodeGenFunction::JumpDest 3152 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) { 3153 if (Kind == OMPD_parallel || Kind == OMPD_task) 3154 return ReturnBlock; 3155 assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections || 3156 Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for); 3157 return BreakContinueStack.back().BreakBlock; 3158 } 3159 3160 // Generate the instructions for '#pragma omp target data' directive. 3161 void CodeGenFunction::EmitOMPTargetDataDirective( 3162 const OMPTargetDataDirective &S) { 3163 // emit the code inside the construct for now 3164 OMPLexicalScope Scope(*this, S); 3165 CGM.getOpenMPRuntime().emitInlinedDirective( 3166 *this, OMPD_target_data, [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3167 CGF.EmitStmt( 3168 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3169 }); 3170 } 3171 3172 void CodeGenFunction::EmitOMPTargetEnterDataDirective( 3173 const OMPTargetEnterDataDirective &S) { 3174 // TODO: codegen for target enter data. 3175 } 3176 3177 void CodeGenFunction::EmitOMPTargetExitDataDirective( 3178 const OMPTargetExitDataDirective &S) { 3179 // TODO: codegen for target exit data. 3180 } 3181 3182 void CodeGenFunction::EmitOMPTargetParallelDirective( 3183 const OMPTargetParallelDirective &S) { 3184 // TODO: codegen for target parallel. 3185 } 3186 3187 void CodeGenFunction::EmitOMPTargetParallelForDirective( 3188 const OMPTargetParallelForDirective &S) { 3189 // TODO: codegen for target parallel for. 3190 } 3191 3192 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) { 3193 // emit the code inside the construct for now 3194 OMPLexicalScope Scope(*this, S); 3195 CGM.getOpenMPRuntime().emitInlinedDirective( 3196 *this, OMPD_taskloop, [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3197 OMPLoopScope PreInitScope(CGF, S); 3198 CGF.EmitStmt( 3199 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3200 }); 3201 } 3202 3203 void CodeGenFunction::EmitOMPTaskLoopSimdDirective( 3204 const OMPTaskLoopSimdDirective &S) { 3205 // emit the code inside the construct for now 3206 OMPLexicalScope Scope(*this, S); 3207 CGM.getOpenMPRuntime().emitInlinedDirective( 3208 *this, OMPD_taskloop_simd, [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3209 OMPLoopScope PreInitScope(CGF, S); 3210 CGF.EmitStmt( 3211 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3212 }); 3213 } 3214