1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This contains code to emit OpenMP nodes as LLVM code. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGOpenMPRuntime.h" 15 #include "CodeGenFunction.h" 16 #include "CodeGenModule.h" 17 #include "TargetInfo.h" 18 #include "clang/AST/Stmt.h" 19 #include "clang/AST/StmtOpenMP.h" 20 using namespace clang; 21 using namespace CodeGen; 22 23 void CodeGenFunction::GenerateOpenMPCapturedVars( 24 const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) { 25 const RecordDecl *RD = S.getCapturedRecordDecl(); 26 auto CurField = RD->field_begin(); 27 auto CurCap = S.captures().begin(); 28 for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(), 29 E = S.capture_init_end(); 30 I != E; ++I, ++CurField, ++CurCap) { 31 if (CurField->hasCapturedVLAType()) { 32 auto VAT = CurField->getCapturedVLAType(); 33 auto *Val = VLASizeMap[VAT->getSizeExpr()]; 34 CapturedVars.push_back(Val); 35 } else if (CurCap->capturesThis()) 36 CapturedVars.push_back(CXXThisValue); 37 else if (CurCap->capturesVariableByCopy()) 38 CapturedVars.push_back( 39 EmitLoadOfLValue(EmitLValue(*I), SourceLocation()).getScalarVal()); 40 else { 41 assert(CurCap->capturesVariable() && "Expected capture by reference."); 42 CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer()); 43 } 44 } 45 } 46 47 static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType, 48 StringRef Name, LValue AddrLV, 49 bool isReferenceType = false) { 50 ASTContext &Ctx = CGF.getContext(); 51 52 auto *CastedPtr = CGF.EmitScalarConversion( 53 AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(), 54 Ctx.getPointerType(DstType), SourceLocation()); 55 auto TmpAddr = 56 CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType)) 57 .getAddress(); 58 59 // If we are dealing with references we need to return the address of the 60 // reference instead of the reference of the value. 61 if (isReferenceType) { 62 QualType RefType = Ctx.getLValueReferenceType(DstType); 63 auto *RefVal = TmpAddr.getPointer(); 64 TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name) + ".ref"); 65 auto TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType); 66 CGF.EmitScalarInit(RefVal, TmpLVal); 67 } 68 69 return TmpAddr; 70 } 71 72 llvm::Function * 73 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { 74 assert( 75 CapturedStmtInfo && 76 "CapturedStmtInfo should be set when generating the captured function"); 77 const CapturedDecl *CD = S.getCapturedDecl(); 78 const RecordDecl *RD = S.getCapturedRecordDecl(); 79 assert(CD->hasBody() && "missing CapturedDecl body"); 80 81 // Build the argument list. 82 ASTContext &Ctx = CGM.getContext(); 83 FunctionArgList Args; 84 Args.append(CD->param_begin(), 85 std::next(CD->param_begin(), CD->getContextParamPosition())); 86 auto I = S.captures().begin(); 87 for (auto *FD : RD->fields()) { 88 QualType ArgType = FD->getType(); 89 IdentifierInfo *II = nullptr; 90 VarDecl *CapVar = nullptr; 91 92 // If this is a capture by copy and the type is not a pointer, the outlined 93 // function argument type should be uintptr and the value properly casted to 94 // uintptr. This is necessary given that the runtime library is only able to 95 // deal with pointers. We can pass in the same way the VLA type sizes to the 96 // outlined function. 97 if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) || 98 I->capturesVariableArrayType()) 99 ArgType = Ctx.getUIntPtrType(); 100 101 if (I->capturesVariable() || I->capturesVariableByCopy()) { 102 CapVar = I->getCapturedVar(); 103 II = CapVar->getIdentifier(); 104 } else if (I->capturesThis()) 105 II = &getContext().Idents.get("this"); 106 else { 107 assert(I->capturesVariableArrayType()); 108 II = &getContext().Idents.get("vla"); 109 } 110 if (ArgType->isVariablyModifiedType()) 111 ArgType = getContext().getVariableArrayDecayedType(ArgType); 112 Args.push_back(ImplicitParamDecl::Create(getContext(), nullptr, 113 FD->getLocation(), II, ArgType)); 114 ++I; 115 } 116 Args.append( 117 std::next(CD->param_begin(), CD->getContextParamPosition() + 1), 118 CD->param_end()); 119 120 // Create the function declaration. 121 FunctionType::ExtInfo ExtInfo; 122 const CGFunctionInfo &FuncInfo = 123 CGM.getTypes().arrangeFreeFunctionDeclaration(Ctx.VoidTy, Args, ExtInfo, 124 /*IsVariadic=*/false); 125 llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); 126 127 llvm::Function *F = llvm::Function::Create( 128 FuncLLVMTy, llvm::GlobalValue::InternalLinkage, 129 CapturedStmtInfo->getHelperName(), &CGM.getModule()); 130 CGM.SetInternalFunctionAttributes(CD, F, FuncInfo); 131 if (CD->isNothrow()) 132 F->addFnAttr(llvm::Attribute::NoUnwind); 133 134 // Generate the function. 135 StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getLocation(), 136 CD->getBody()->getLocStart()); 137 unsigned Cnt = CD->getContextParamPosition(); 138 I = S.captures().begin(); 139 for (auto *FD : RD->fields()) { 140 // If we are capturing a pointer by copy we don't need to do anything, just 141 // use the value that we get from the arguments. 142 if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) { 143 setAddrOfLocalVar(I->getCapturedVar(), GetAddrOfLocalVar(Args[Cnt])); 144 ++Cnt, ++I; 145 continue; 146 } 147 148 LValue ArgLVal = 149 MakeAddrLValue(GetAddrOfLocalVar(Args[Cnt]), Args[Cnt]->getType(), 150 AlignmentSource::Decl); 151 if (FD->hasCapturedVLAType()) { 152 LValue CastedArgLVal = 153 MakeAddrLValue(castValueFromUintptr(*this, FD->getType(), 154 Args[Cnt]->getName(), ArgLVal), 155 FD->getType(), AlignmentSource::Decl); 156 auto *ExprArg = 157 EmitLoadOfLValue(CastedArgLVal, SourceLocation()).getScalarVal(); 158 auto VAT = FD->getCapturedVLAType(); 159 VLASizeMap[VAT->getSizeExpr()] = ExprArg; 160 } else if (I->capturesVariable()) { 161 auto *Var = I->getCapturedVar(); 162 QualType VarTy = Var->getType(); 163 Address ArgAddr = ArgLVal.getAddress(); 164 if (!VarTy->isReferenceType()) { 165 ArgAddr = EmitLoadOfReference( 166 ArgAddr, ArgLVal.getType()->castAs<ReferenceType>()); 167 } 168 setAddrOfLocalVar( 169 Var, Address(ArgAddr.getPointer(), getContext().getDeclAlign(Var))); 170 } else if (I->capturesVariableByCopy()) { 171 assert(!FD->getType()->isAnyPointerType() && 172 "Not expecting a captured pointer."); 173 auto *Var = I->getCapturedVar(); 174 QualType VarTy = Var->getType(); 175 setAddrOfLocalVar(I->getCapturedVar(), 176 castValueFromUintptr(*this, FD->getType(), 177 Args[Cnt]->getName(), ArgLVal, 178 VarTy->isReferenceType())); 179 } else { 180 // If 'this' is captured, load it into CXXThisValue. 181 assert(I->capturesThis()); 182 CXXThisValue = 183 EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation()).getScalarVal(); 184 } 185 ++Cnt, ++I; 186 } 187 188 PGO.assignRegionCounters(GlobalDecl(CD), F); 189 CapturedStmtInfo->EmitBody(*this, CD->getBody()); 190 FinishFunction(CD->getBodyRBrace()); 191 192 return F; 193 } 194 195 //===----------------------------------------------------------------------===// 196 // OpenMP Directive Emission 197 //===----------------------------------------------------------------------===// 198 void CodeGenFunction::EmitOMPAggregateAssign( 199 Address DestAddr, Address SrcAddr, QualType OriginalType, 200 const llvm::function_ref<void(Address, Address)> &CopyGen) { 201 // Perform element-by-element initialization. 202 QualType ElementTy; 203 204 // Drill down to the base element type on both arrays. 205 auto ArrayTy = OriginalType->getAsArrayTypeUnsafe(); 206 auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr); 207 SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 208 209 auto SrcBegin = SrcAddr.getPointer(); 210 auto DestBegin = DestAddr.getPointer(); 211 // Cast from pointer to array type to pointer to single element. 212 auto DestEnd = Builder.CreateGEP(DestBegin, NumElements); 213 // The basic structure here is a while-do loop. 214 auto BodyBB = createBasicBlock("omp.arraycpy.body"); 215 auto DoneBB = createBasicBlock("omp.arraycpy.done"); 216 auto IsEmpty = 217 Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty"); 218 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 219 220 // Enter the loop body, making that address the current address. 221 auto EntryBB = Builder.GetInsertBlock(); 222 EmitBlock(BodyBB); 223 224 CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy); 225 226 llvm::PHINode *SrcElementPHI = 227 Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 228 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 229 Address SrcElementCurrent = 230 Address(SrcElementPHI, 231 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 232 233 llvm::PHINode *DestElementPHI = 234 Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 235 DestElementPHI->addIncoming(DestBegin, EntryBB); 236 Address DestElementCurrent = 237 Address(DestElementPHI, 238 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 239 240 // Emit copy. 241 CopyGen(DestElementCurrent, SrcElementCurrent); 242 243 // Shift the address forward by one element. 244 auto DestElementNext = Builder.CreateConstGEP1_32( 245 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 246 auto SrcElementNext = Builder.CreateConstGEP1_32( 247 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 248 // Check whether we've reached the end. 249 auto Done = 250 Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 251 Builder.CreateCondBr(Done, DoneBB, BodyBB); 252 DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock()); 253 SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock()); 254 255 // Done. 256 EmitBlock(DoneBB, /*IsFinished=*/true); 257 } 258 259 /// \brief Emit initialization of arrays of complex types. 260 /// \param DestAddr Address of the array. 261 /// \param Type Type of array. 262 /// \param Init Initial expression of array. 263 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 264 QualType Type, const Expr *Init) { 265 // Perform element-by-element initialization. 266 QualType ElementTy; 267 268 // Drill down to the base element type on both arrays. 269 auto ArrayTy = Type->getAsArrayTypeUnsafe(); 270 auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 271 DestAddr = 272 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 273 274 auto DestBegin = DestAddr.getPointer(); 275 // Cast from pointer to array type to pointer to single element. 276 auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 277 // The basic structure here is a while-do loop. 278 auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 279 auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 280 auto IsEmpty = 281 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 282 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 283 284 // Enter the loop body, making that address the current address. 285 auto EntryBB = CGF.Builder.GetInsertBlock(); 286 CGF.EmitBlock(BodyBB); 287 288 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 289 290 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 291 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 292 DestElementPHI->addIncoming(DestBegin, EntryBB); 293 Address DestElementCurrent = 294 Address(DestElementPHI, 295 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 296 297 // Emit copy. 298 { 299 CodeGenFunction::RunCleanupsScope InitScope(CGF); 300 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 301 /*IsInitializer=*/false); 302 } 303 304 // Shift the address forward by one element. 305 auto DestElementNext = CGF.Builder.CreateConstGEP1_32( 306 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 307 // Check whether we've reached the end. 308 auto Done = 309 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 310 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 311 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 312 313 // Done. 314 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 315 } 316 317 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr, 318 Address SrcAddr, const VarDecl *DestVD, 319 const VarDecl *SrcVD, const Expr *Copy) { 320 if (OriginalType->isArrayType()) { 321 auto *BO = dyn_cast<BinaryOperator>(Copy); 322 if (BO && BO->getOpcode() == BO_Assign) { 323 // Perform simple memcpy for simple copying. 324 EmitAggregateAssign(DestAddr, SrcAddr, OriginalType); 325 } else { 326 // For arrays with complex element types perform element by element 327 // copying. 328 EmitOMPAggregateAssign( 329 DestAddr, SrcAddr, OriginalType, 330 [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) { 331 // Working with the single array element, so have to remap 332 // destination and source variables to corresponding array 333 // elements. 334 CodeGenFunction::OMPPrivateScope Remap(*this); 335 Remap.addPrivate(DestVD, [DestElement]() -> Address { 336 return DestElement; 337 }); 338 Remap.addPrivate( 339 SrcVD, [SrcElement]() -> Address { return SrcElement; }); 340 (void)Remap.Privatize(); 341 EmitIgnoredExpr(Copy); 342 }); 343 } 344 } else { 345 // Remap pseudo source variable to private copy. 346 CodeGenFunction::OMPPrivateScope Remap(*this); 347 Remap.addPrivate(SrcVD, [SrcAddr]() -> Address { return SrcAddr; }); 348 Remap.addPrivate(DestVD, [DestAddr]() -> Address { return DestAddr; }); 349 (void)Remap.Privatize(); 350 // Emit copying of the whole variable. 351 EmitIgnoredExpr(Copy); 352 } 353 } 354 355 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, 356 OMPPrivateScope &PrivateScope) { 357 llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate; 358 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { 359 auto IRef = C->varlist_begin(); 360 auto InitsRef = C->inits().begin(); 361 for (auto IInit : C->private_copies()) { 362 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 363 if (EmittedAsFirstprivate.count(OrigVD) == 0) { 364 EmittedAsFirstprivate.insert(OrigVD); 365 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 366 auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl()); 367 bool IsRegistered; 368 DeclRefExpr DRE( 369 const_cast<VarDecl *>(OrigVD), 370 /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup( 371 OrigVD) != nullptr, 372 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 373 Address OriginalAddr = EmitLValue(&DRE).getAddress(); 374 QualType Type = OrigVD->getType(); 375 if (Type->isArrayType()) { 376 // Emit VarDecl with copy init for arrays. 377 // Get the address of the original variable captured in current 378 // captured region. 379 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 380 auto Emission = EmitAutoVarAlloca(*VD); 381 auto *Init = VD->getInit(); 382 if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) { 383 // Perform simple memcpy. 384 EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr, 385 Type); 386 } else { 387 EmitOMPAggregateAssign( 388 Emission.getAllocatedAddress(), OriginalAddr, Type, 389 [this, VDInit, Init](Address DestElement, 390 Address SrcElement) { 391 // Clean up any temporaries needed by the initialization. 392 RunCleanupsScope InitScope(*this); 393 // Emit initialization for single element. 394 setAddrOfLocalVar(VDInit, SrcElement); 395 EmitAnyExprToMem(Init, DestElement, 396 Init->getType().getQualifiers(), 397 /*IsInitializer*/ false); 398 LocalDeclMap.erase(VDInit); 399 }); 400 } 401 EmitAutoVarCleanups(Emission); 402 return Emission.getAllocatedAddress(); 403 }); 404 } else { 405 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 406 // Emit private VarDecl with copy init. 407 // Remap temp VDInit variable to the address of the original 408 // variable 409 // (for proper handling of captured global variables). 410 setAddrOfLocalVar(VDInit, OriginalAddr); 411 EmitDecl(*VD); 412 LocalDeclMap.erase(VDInit); 413 return GetAddrOfLocalVar(VD); 414 }); 415 } 416 assert(IsRegistered && 417 "firstprivate var already registered as private"); 418 // Silence the warning about unused variable. 419 (void)IsRegistered; 420 } 421 ++IRef, ++InitsRef; 422 } 423 } 424 return !EmittedAsFirstprivate.empty(); 425 } 426 427 void CodeGenFunction::EmitOMPPrivateClause( 428 const OMPExecutableDirective &D, 429 CodeGenFunction::OMPPrivateScope &PrivateScope) { 430 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 431 for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) { 432 auto IRef = C->varlist_begin(); 433 for (auto IInit : C->private_copies()) { 434 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 435 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 436 auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 437 bool IsRegistered = 438 PrivateScope.addPrivate(OrigVD, [&]() -> Address { 439 // Emit private VarDecl with copy init. 440 EmitDecl(*VD); 441 return GetAddrOfLocalVar(VD); 442 }); 443 assert(IsRegistered && "private var already registered as private"); 444 // Silence the warning about unused variable. 445 (void)IsRegistered; 446 } 447 ++IRef; 448 } 449 } 450 } 451 452 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { 453 // threadprivate_var1 = master_threadprivate_var1; 454 // operator=(threadprivate_var2, master_threadprivate_var2); 455 // ... 456 // __kmpc_barrier(&loc, global_tid); 457 llvm::DenseSet<const VarDecl *> CopiedVars; 458 llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr; 459 for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) { 460 auto IRef = C->varlist_begin(); 461 auto ISrcRef = C->source_exprs().begin(); 462 auto IDestRef = C->destination_exprs().begin(); 463 for (auto *AssignOp : C->assignment_ops()) { 464 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 465 QualType Type = VD->getType(); 466 if (CopiedVars.insert(VD->getCanonicalDecl()).second) { 467 468 // Get the address of the master variable. If we are emitting code with 469 // TLS support, the address is passed from the master as field in the 470 // captured declaration. 471 Address MasterAddr = Address::invalid(); 472 if (getLangOpts().OpenMPUseTLS && 473 getContext().getTargetInfo().isTLSSupported()) { 474 assert(CapturedStmtInfo->lookup(VD) && 475 "Copyin threadprivates should have been captured!"); 476 DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(), 477 VK_LValue, (*IRef)->getExprLoc()); 478 MasterAddr = EmitLValue(&DRE).getAddress(); 479 LocalDeclMap.erase(VD); 480 } else { 481 MasterAddr = 482 Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD) 483 : CGM.GetAddrOfGlobal(VD), 484 getContext().getDeclAlign(VD)); 485 } 486 // Get the address of the threadprivate variable. 487 Address PrivateAddr = EmitLValue(*IRef).getAddress(); 488 if (CopiedVars.size() == 1) { 489 // At first check if current thread is a master thread. If it is, no 490 // need to copy data. 491 CopyBegin = createBasicBlock("copyin.not.master"); 492 CopyEnd = createBasicBlock("copyin.not.master.end"); 493 Builder.CreateCondBr( 494 Builder.CreateICmpNE( 495 Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy), 496 Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy)), 497 CopyBegin, CopyEnd); 498 EmitBlock(CopyBegin); 499 } 500 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 501 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 502 EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp); 503 } 504 ++IRef; 505 ++ISrcRef; 506 ++IDestRef; 507 } 508 } 509 if (CopyEnd) { 510 // Exit out of copying procedure for non-master thread. 511 EmitBlock(CopyEnd, /*IsFinished=*/true); 512 return true; 513 } 514 return false; 515 } 516 517 bool CodeGenFunction::EmitOMPLastprivateClauseInit( 518 const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) { 519 bool HasAtLeastOneLastprivate = false; 520 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 521 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 522 HasAtLeastOneLastprivate = true; 523 auto IRef = C->varlist_begin(); 524 auto IDestRef = C->destination_exprs().begin(); 525 for (auto *IInit : C->private_copies()) { 526 // Keep the address of the original variable for future update at the end 527 // of the loop. 528 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 529 if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) { 530 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 531 PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address { 532 DeclRefExpr DRE( 533 const_cast<VarDecl *>(OrigVD), 534 /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup( 535 OrigVD) != nullptr, 536 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 537 return EmitLValue(&DRE).getAddress(); 538 }); 539 // Check if the variable is also a firstprivate: in this case IInit is 540 // not generated. Initialization of this variable will happen in codegen 541 // for 'firstprivate' clause. 542 if (IInit) { 543 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 544 bool IsRegistered = 545 PrivateScope.addPrivate(OrigVD, [&]() -> Address { 546 // Emit private VarDecl with copy init. 547 EmitDecl(*VD); 548 return GetAddrOfLocalVar(VD); 549 }); 550 assert(IsRegistered && 551 "lastprivate var already registered as private"); 552 (void)IsRegistered; 553 } 554 } 555 ++IRef, ++IDestRef; 556 } 557 } 558 return HasAtLeastOneLastprivate; 559 } 560 561 void CodeGenFunction::EmitOMPLastprivateClauseFinal( 562 const OMPExecutableDirective &D, llvm::Value *IsLastIterCond) { 563 // Emit following code: 564 // if (<IsLastIterCond>) { 565 // orig_var1 = private_orig_var1; 566 // ... 567 // orig_varn = private_orig_varn; 568 // } 569 llvm::BasicBlock *ThenBB = nullptr; 570 llvm::BasicBlock *DoneBB = nullptr; 571 if (IsLastIterCond) { 572 ThenBB = createBasicBlock(".omp.lastprivate.then"); 573 DoneBB = createBasicBlock(".omp.lastprivate.done"); 574 Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB); 575 EmitBlock(ThenBB); 576 } 577 llvm::DenseMap<const Decl *, const Expr *> LoopCountersAndUpdates; 578 const Expr *LastIterVal = nullptr; 579 const Expr *IVExpr = nullptr; 580 const Expr *IncExpr = nullptr; 581 if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) { 582 if (isOpenMPWorksharingDirective(D.getDirectiveKind())) { 583 LastIterVal = cast<VarDecl>(cast<DeclRefExpr>( 584 LoopDirective->getUpperBoundVariable()) 585 ->getDecl()) 586 ->getAnyInitializer(); 587 IVExpr = LoopDirective->getIterationVariable(); 588 IncExpr = LoopDirective->getInc(); 589 auto IUpdate = LoopDirective->updates().begin(); 590 for (auto *E : LoopDirective->counters()) { 591 auto *D = cast<DeclRefExpr>(E)->getDecl()->getCanonicalDecl(); 592 LoopCountersAndUpdates[D] = *IUpdate; 593 ++IUpdate; 594 } 595 } 596 } 597 { 598 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 599 bool FirstLCV = true; 600 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 601 auto IRef = C->varlist_begin(); 602 auto ISrcRef = C->source_exprs().begin(); 603 auto IDestRef = C->destination_exprs().begin(); 604 for (auto *AssignOp : C->assignment_ops()) { 605 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 606 QualType Type = PrivateVD->getType(); 607 auto *CanonicalVD = PrivateVD->getCanonicalDecl(); 608 if (AlreadyEmittedVars.insert(CanonicalVD).second) { 609 // If lastprivate variable is a loop control variable for loop-based 610 // directive, update its value before copyin back to original 611 // variable. 612 if (auto *UpExpr = LoopCountersAndUpdates.lookup(CanonicalVD)) { 613 if (FirstLCV && LastIterVal) { 614 EmitAnyExprToMem(LastIterVal, EmitLValue(IVExpr).getAddress(), 615 IVExpr->getType().getQualifiers(), 616 /*IsInitializer=*/false); 617 EmitIgnoredExpr(IncExpr); 618 FirstLCV = false; 619 } 620 EmitIgnoredExpr(UpExpr); 621 } 622 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 623 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 624 // Get the address of the original variable. 625 Address OriginalAddr = GetAddrOfLocalVar(DestVD); 626 // Get the address of the private variable. 627 Address PrivateAddr = GetAddrOfLocalVar(PrivateVD); 628 if (auto RefTy = PrivateVD->getType()->getAs<ReferenceType>()) 629 PrivateAddr = 630 Address(Builder.CreateLoad(PrivateAddr), 631 getNaturalTypeAlignment(RefTy->getPointeeType())); 632 EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp); 633 } 634 ++IRef; 635 ++ISrcRef; 636 ++IDestRef; 637 } 638 } 639 } 640 if (IsLastIterCond) { 641 EmitBlock(DoneBB, /*IsFinished=*/true); 642 } 643 } 644 645 void CodeGenFunction::EmitOMPReductionClauseInit( 646 const OMPExecutableDirective &D, 647 CodeGenFunction::OMPPrivateScope &PrivateScope) { 648 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 649 auto ILHS = C->lhs_exprs().begin(); 650 auto IRHS = C->rhs_exprs().begin(); 651 auto IPriv = C->privates().begin(); 652 for (auto IRef : C->varlists()) { 653 auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 654 auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 655 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl()); 656 if (auto *OASE = dyn_cast<OMPArraySectionExpr>(IRef)) { 657 auto *Base = OASE->getBase()->IgnoreParenImpCasts(); 658 while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 659 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 660 while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 661 Base = TempASE->getBase()->IgnoreParenImpCasts(); 662 auto *DE = cast<DeclRefExpr>(Base); 663 auto *OrigVD = cast<VarDecl>(DE->getDecl()); 664 auto OASELValueLB = EmitOMPArraySectionExpr(OASE); 665 auto OASELValueUB = 666 EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 667 auto OriginalBaseLValue = EmitLValue(DE); 668 auto BaseLValue = OriginalBaseLValue; 669 auto *Zero = Builder.getInt64(/*C=*/0); 670 llvm::SmallVector<llvm::Value *, 4> Indexes; 671 Indexes.push_back(Zero); 672 auto *ItemTy = 673 OASELValueLB.getPointer()->getType()->getPointerElementType(); 674 auto *Ty = BaseLValue.getPointer()->getType()->getPointerElementType(); 675 while (Ty != ItemTy) { 676 Indexes.push_back(Zero); 677 Ty = Ty->getPointerElementType(); 678 } 679 BaseLValue = MakeAddrLValue( 680 Address(Builder.CreateInBoundsGEP(BaseLValue.getPointer(), Indexes), 681 OASELValueLB.getAlignment()), 682 OASELValueLB.getType(), OASELValueLB.getAlignmentSource()); 683 // Store the address of the original variable associated with the LHS 684 // implicit variable. 685 PrivateScope.addPrivate(LHSVD, [this, OASELValueLB]() -> Address { 686 return OASELValueLB.getAddress(); 687 }); 688 // Emit reduction copy. 689 bool IsRegistered = PrivateScope.addPrivate( 690 OrigVD, [this, PrivateVD, BaseLValue, OASELValueLB, OASELValueUB, 691 OriginalBaseLValue]() -> Address { 692 // Emit VarDecl with copy init for arrays. 693 // Get the address of the original variable captured in current 694 // captured region. 695 auto *Size = Builder.CreatePtrDiff(OASELValueUB.getPointer(), 696 OASELValueLB.getPointer()); 697 Size = Builder.CreateNUWAdd( 698 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 699 CodeGenFunction::OpaqueValueMapping OpaqueMap( 700 *this, cast<OpaqueValueExpr>( 701 getContext() 702 .getAsVariableArrayType(PrivateVD->getType()) 703 ->getSizeExpr()), 704 RValue::get(Size)); 705 EmitVariablyModifiedType(PrivateVD->getType()); 706 auto Emission = EmitAutoVarAlloca(*PrivateVD); 707 auto Addr = Emission.getAllocatedAddress(); 708 auto *Init = PrivateVD->getInit(); 709 EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(), Init); 710 EmitAutoVarCleanups(Emission); 711 // Emit private VarDecl with reduction init. 712 auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(), 713 OASELValueLB.getPointer()); 714 auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset); 715 Ptr = Builder.CreatePointerBitCastOrAddrSpaceCast( 716 Ptr, OriginalBaseLValue.getPointer()->getType()); 717 return Address(Ptr, OriginalBaseLValue.getAlignment()); 718 }); 719 assert(IsRegistered && "private var already registered as private"); 720 // Silence the warning about unused variable. 721 (void)IsRegistered; 722 PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { 723 return GetAddrOfLocalVar(PrivateVD); 724 }); 725 } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(IRef)) { 726 auto *Base = ASE->getBase()->IgnoreParenImpCasts(); 727 while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 728 Base = TempASE->getBase()->IgnoreParenImpCasts(); 729 auto *DE = cast<DeclRefExpr>(Base); 730 auto *OrigVD = cast<VarDecl>(DE->getDecl()); 731 auto ASELValue = EmitLValue(ASE); 732 auto OriginalBaseLValue = EmitLValue(DE); 733 auto BaseLValue = OriginalBaseLValue; 734 auto *Zero = Builder.getInt64(/*C=*/0); 735 llvm::SmallVector<llvm::Value *, 4> Indexes; 736 Indexes.push_back(Zero); 737 auto *ItemTy = 738 ASELValue.getPointer()->getType()->getPointerElementType(); 739 auto *Ty = BaseLValue.getPointer()->getType()->getPointerElementType(); 740 while (Ty != ItemTy) { 741 Indexes.push_back(Zero); 742 Ty = Ty->getPointerElementType(); 743 } 744 BaseLValue = MakeAddrLValue( 745 Address(Builder.CreateInBoundsGEP(BaseLValue.getPointer(), Indexes), 746 ASELValue.getAlignment()), 747 ASELValue.getType(), ASELValue.getAlignmentSource()); 748 // Store the address of the original variable associated with the LHS 749 // implicit variable. 750 PrivateScope.addPrivate(LHSVD, [this, ASELValue]() -> Address { 751 return ASELValue.getAddress(); 752 }); 753 // Emit reduction copy. 754 bool IsRegistered = PrivateScope.addPrivate( 755 OrigVD, [this, PrivateVD, BaseLValue, ASELValue, 756 OriginalBaseLValue]() -> Address { 757 // Emit private VarDecl with reduction init. 758 EmitDecl(*PrivateVD); 759 auto Addr = GetAddrOfLocalVar(PrivateVD); 760 auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(), 761 ASELValue.getPointer()); 762 auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset); 763 Ptr = Builder.CreatePointerBitCastOrAddrSpaceCast( 764 Ptr, OriginalBaseLValue.getPointer()->getType()); 765 return Address(Ptr, OriginalBaseLValue.getAlignment()); 766 }); 767 assert(IsRegistered && "private var already registered as private"); 768 // Silence the warning about unused variable. 769 (void)IsRegistered; 770 PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { 771 return GetAddrOfLocalVar(PrivateVD); 772 }); 773 } else { 774 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); 775 // Store the address of the original variable associated with the LHS 776 // implicit variable. 777 PrivateScope.addPrivate(LHSVD, [this, OrigVD, IRef]() -> Address { 778 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 779 CapturedStmtInfo->lookup(OrigVD) != nullptr, 780 IRef->getType(), VK_LValue, IRef->getExprLoc()); 781 return EmitLValue(&DRE).getAddress(); 782 }); 783 // Emit reduction copy. 784 bool IsRegistered = 785 PrivateScope.addPrivate(OrigVD, [this, PrivateVD]() -> Address { 786 // Emit private VarDecl with reduction init. 787 EmitDecl(*PrivateVD); 788 return GetAddrOfLocalVar(PrivateVD); 789 }); 790 assert(IsRegistered && "private var already registered as private"); 791 // Silence the warning about unused variable. 792 (void)IsRegistered; 793 PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { 794 return GetAddrOfLocalVar(PrivateVD); 795 }); 796 } 797 ++ILHS, ++IRHS, ++IPriv; 798 } 799 } 800 } 801 802 void CodeGenFunction::EmitOMPReductionClauseFinal( 803 const OMPExecutableDirective &D) { 804 llvm::SmallVector<const Expr *, 8> Privates; 805 llvm::SmallVector<const Expr *, 8> LHSExprs; 806 llvm::SmallVector<const Expr *, 8> RHSExprs; 807 llvm::SmallVector<const Expr *, 8> ReductionOps; 808 bool HasAtLeastOneReduction = false; 809 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 810 HasAtLeastOneReduction = true; 811 Privates.append(C->privates().begin(), C->privates().end()); 812 LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 813 RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 814 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 815 } 816 if (HasAtLeastOneReduction) { 817 // Emit nowait reduction if nowait clause is present or directive is a 818 // parallel directive (it always has implicit barrier). 819 CGM.getOpenMPRuntime().emitReduction( 820 *this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps, 821 D.getSingleClause<OMPNowaitClause>() || 822 isOpenMPParallelDirective(D.getDirectiveKind()) || 823 D.getDirectiveKind() == OMPD_simd, 824 D.getDirectiveKind() == OMPD_simd); 825 } 826 } 827 828 static void emitCommonOMPParallelDirective(CodeGenFunction &CGF, 829 const OMPExecutableDirective &S, 830 OpenMPDirectiveKind InnermostKind, 831 const RegionCodeGenTy &CodeGen) { 832 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 833 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 834 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 835 auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( 836 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 837 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) { 838 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 839 auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), 840 /*IgnoreResultAssign*/ true); 841 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause( 842 CGF, NumThreads, NumThreadsClause->getLocStart()); 843 } 844 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) { 845 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 846 CGF.CGM.getOpenMPRuntime().emitProcBindClause( 847 CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart()); 848 } 849 const Expr *IfCond = nullptr; 850 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 851 if (C->getNameModifier() == OMPD_unknown || 852 C->getNameModifier() == OMPD_parallel) { 853 IfCond = C->getCondition(); 854 break; 855 } 856 } 857 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn, 858 CapturedVars, IfCond); 859 } 860 861 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { 862 LexicalScope Scope(*this, S.getSourceRange()); 863 // Emit parallel region as a standalone region. 864 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 865 OMPPrivateScope PrivateScope(CGF); 866 bool Copyins = CGF.EmitOMPCopyinClause(S); 867 bool Firstprivates = CGF.EmitOMPFirstprivateClause(S, PrivateScope); 868 if (Copyins || Firstprivates) { 869 // Emit implicit barrier to synchronize threads and avoid data races on 870 // initialization of firstprivate variables or propagation master's thread 871 // values of threadprivate variables to local instances of that variables 872 // of all other implicit threads. 873 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 874 CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 875 /*ForceSimpleCall=*/true); 876 } 877 CGF.EmitOMPPrivateClause(S, PrivateScope); 878 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 879 (void)PrivateScope.Privatize(); 880 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 881 CGF.EmitOMPReductionClauseFinal(S); 882 // Emit implicit barrier at the end of the 'parallel' directive. 883 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 884 CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 885 /*ForceSimpleCall=*/true); 886 }; 887 emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen); 888 } 889 890 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, 891 JumpDest LoopExit) { 892 RunCleanupsScope BodyScope(*this); 893 // Update counters values on current iteration. 894 for (auto I : D.updates()) { 895 EmitIgnoredExpr(I); 896 } 897 // Update the linear variables. 898 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 899 for (auto U : C->updates()) { 900 EmitIgnoredExpr(U); 901 } 902 } 903 904 // On a continue in the body, jump to the end. 905 auto Continue = getJumpDestInCurrentScope("omp.body.continue"); 906 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 907 // Emit loop body. 908 EmitStmt(D.getBody()); 909 // The end (updates/cleanups). 910 EmitBlock(Continue.getBlock()); 911 BreakContinueStack.pop_back(); 912 // TODO: Update lastprivates if the SeparateIter flag is true. 913 // This will be implemented in a follow-up OMPLastprivateClause patch, but 914 // result should be still correct without it, as we do not make these 915 // variables private yet. 916 } 917 918 void CodeGenFunction::EmitOMPInnerLoop( 919 const Stmt &S, bool RequiresCleanup, const Expr *LoopCond, 920 const Expr *IncExpr, 921 const llvm::function_ref<void(CodeGenFunction &)> &BodyGen, 922 const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen) { 923 auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end"); 924 925 // Start the loop with a block that tests the condition. 926 auto CondBlock = createBasicBlock("omp.inner.for.cond"); 927 EmitBlock(CondBlock); 928 LoopStack.push(CondBlock); 929 930 // If there are any cleanups between here and the loop-exit scope, 931 // create a block to stage a loop exit along. 932 auto ExitBlock = LoopExit.getBlock(); 933 if (RequiresCleanup) 934 ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup"); 935 936 auto LoopBody = createBasicBlock("omp.inner.for.body"); 937 938 // Emit condition. 939 EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S)); 940 if (ExitBlock != LoopExit.getBlock()) { 941 EmitBlock(ExitBlock); 942 EmitBranchThroughCleanup(LoopExit); 943 } 944 945 EmitBlock(LoopBody); 946 incrementProfileCounter(&S); 947 948 // Create a block for the increment. 949 auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc"); 950 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 951 952 BodyGen(*this); 953 954 // Emit "IV = IV + 1" and a back-edge to the condition block. 955 EmitBlock(Continue.getBlock()); 956 EmitIgnoredExpr(IncExpr); 957 PostIncGen(*this); 958 BreakContinueStack.pop_back(); 959 EmitBranch(CondBlock); 960 LoopStack.pop(); 961 // Emit the fall-through block. 962 EmitBlock(LoopExit.getBlock()); 963 } 964 965 void CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { 966 // Emit inits for the linear variables. 967 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 968 for (auto Init : C->inits()) { 969 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl()); 970 auto *OrigVD = cast<VarDecl>( 971 cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())->getDecl()); 972 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 973 CapturedStmtInfo->lookup(OrigVD) != nullptr, 974 VD->getInit()->getType(), VK_LValue, 975 VD->getInit()->getExprLoc()); 976 AutoVarEmission Emission = EmitAutoVarAlloca(*VD); 977 EmitExprAsInit(&DRE, VD, 978 MakeAddrLValue(Emission.getAllocatedAddress(), VD->getType()), 979 /*capturedByInit=*/false); 980 EmitAutoVarCleanups(Emission); 981 } 982 // Emit the linear steps for the linear clauses. 983 // If a step is not constant, it is pre-calculated before the loop. 984 if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep())) 985 if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) { 986 EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl())); 987 // Emit calculation of the linear step. 988 EmitIgnoredExpr(CS); 989 } 990 } 991 } 992 993 static void emitLinearClauseFinal(CodeGenFunction &CGF, 994 const OMPLoopDirective &D) { 995 // Emit the final values of the linear variables. 996 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 997 auto IC = C->varlist_begin(); 998 for (auto F : C->finals()) { 999 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl()); 1000 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1001 CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr, 1002 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); 1003 Address OrigAddr = CGF.EmitLValue(&DRE).getAddress(); 1004 CodeGenFunction::OMPPrivateScope VarScope(CGF); 1005 VarScope.addPrivate(OrigVD, 1006 [OrigAddr]() -> Address { return OrigAddr; }); 1007 (void)VarScope.Privatize(); 1008 CGF.EmitIgnoredExpr(F); 1009 ++IC; 1010 } 1011 } 1012 } 1013 1014 static void emitAlignedClause(CodeGenFunction &CGF, 1015 const OMPExecutableDirective &D) { 1016 for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) { 1017 unsigned ClauseAlignment = 0; 1018 if (auto AlignmentExpr = Clause->getAlignment()) { 1019 auto AlignmentCI = 1020 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); 1021 ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue()); 1022 } 1023 for (auto E : Clause->varlists()) { 1024 unsigned Alignment = ClauseAlignment; 1025 if (Alignment == 0) { 1026 // OpenMP [2.8.1, Description] 1027 // If no optional parameter is specified, implementation-defined default 1028 // alignments for SIMD instructions on the target platforms are assumed. 1029 Alignment = 1030 CGF.getContext() 1031 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign( 1032 E->getType()->getPointeeType())) 1033 .getQuantity(); 1034 } 1035 assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) && 1036 "alignment is not power of 2"); 1037 if (Alignment != 0) { 1038 llvm::Value *PtrValue = CGF.EmitScalarExpr(E); 1039 CGF.EmitAlignmentAssumption(PtrValue, Alignment); 1040 } 1041 } 1042 } 1043 } 1044 1045 static void emitPrivateLoopCounters(CodeGenFunction &CGF, 1046 CodeGenFunction::OMPPrivateScope &LoopScope, 1047 ArrayRef<Expr *> Counters, 1048 ArrayRef<Expr *> PrivateCounters) { 1049 auto I = PrivateCounters.begin(); 1050 for (auto *E : Counters) { 1051 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1052 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()); 1053 Address Addr = Address::invalid(); 1054 (void)LoopScope.addPrivate(PrivateVD, [&]() -> Address { 1055 // Emit var without initialization. 1056 auto VarEmission = CGF.EmitAutoVarAlloca(*PrivateVD); 1057 CGF.EmitAutoVarCleanups(VarEmission); 1058 Addr = VarEmission.getAllocatedAddress(); 1059 return Addr; 1060 }); 1061 (void)LoopScope.addPrivate(VD, [&]() -> Address { return Addr; }); 1062 ++I; 1063 } 1064 } 1065 1066 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S, 1067 const Expr *Cond, llvm::BasicBlock *TrueBlock, 1068 llvm::BasicBlock *FalseBlock, uint64_t TrueCount) { 1069 { 1070 CodeGenFunction::OMPPrivateScope PreCondScope(CGF); 1071 emitPrivateLoopCounters(CGF, PreCondScope, S.counters(), 1072 S.private_counters()); 1073 (void)PreCondScope.Privatize(); 1074 // Get initial values of real counters. 1075 for (auto I : S.inits()) { 1076 CGF.EmitIgnoredExpr(I); 1077 } 1078 } 1079 // Check that loop is executed at least one time. 1080 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount); 1081 } 1082 1083 static void 1084 emitPrivateLinearVars(CodeGenFunction &CGF, const OMPExecutableDirective &D, 1085 CodeGenFunction::OMPPrivateScope &PrivateScope) { 1086 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1087 auto CurPrivate = C->privates().begin(); 1088 for (auto *E : C->varlists()) { 1089 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1090 auto *PrivateVD = 1091 cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl()); 1092 bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address { 1093 // Emit private VarDecl with copy init. 1094 CGF.EmitVarDecl(*PrivateVD); 1095 return CGF.GetAddrOfLocalVar(PrivateVD); 1096 }); 1097 assert(IsRegistered && "linear var already registered as private"); 1098 // Silence the warning about unused variable. 1099 (void)IsRegistered; 1100 ++CurPrivate; 1101 } 1102 } 1103 } 1104 1105 static void emitSimdlenSafelenClause(CodeGenFunction &CGF, 1106 const OMPExecutableDirective &D) { 1107 if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) { 1108 RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(), 1109 /*ignoreResult=*/true); 1110 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 1111 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 1112 // In presence of finite 'safelen', it may be unsafe to mark all 1113 // the memory instructions parallel, because loop-carried 1114 // dependences of 'safelen' iterations are possible. 1115 CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>()); 1116 } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) { 1117 RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(), 1118 /*ignoreResult=*/true); 1119 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 1120 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 1121 // In presence of finite 'safelen', it may be unsafe to mark all 1122 // the memory instructions parallel, because loop-carried 1123 // dependences of 'safelen' iterations are possible. 1124 CGF.LoopStack.setParallel(false); 1125 } 1126 } 1127 1128 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D) { 1129 // Walk clauses and process safelen/lastprivate. 1130 LoopStack.setParallel(); 1131 LoopStack.setVectorizeEnable(true); 1132 emitSimdlenSafelenClause(*this, D); 1133 } 1134 1135 void CodeGenFunction::EmitOMPSimdFinal(const OMPLoopDirective &D) { 1136 auto IC = D.counters().begin(); 1137 for (auto F : D.finals()) { 1138 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl()); 1139 if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD)) { 1140 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1141 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1142 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); 1143 Address OrigAddr = EmitLValue(&DRE).getAddress(); 1144 OMPPrivateScope VarScope(*this); 1145 VarScope.addPrivate(OrigVD, 1146 [OrigAddr]() -> Address { return OrigAddr; }); 1147 (void)VarScope.Privatize(); 1148 EmitIgnoredExpr(F); 1149 } 1150 ++IC; 1151 } 1152 emitLinearClauseFinal(*this, D); 1153 } 1154 1155 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { 1156 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 1157 // if (PreCond) { 1158 // for (IV in 0..LastIteration) BODY; 1159 // <Final counter/linear vars updates>; 1160 // } 1161 // 1162 1163 // Emit: if (PreCond) - begin. 1164 // If the condition constant folds and can be elided, avoid emitting the 1165 // whole loop. 1166 bool CondConstant; 1167 llvm::BasicBlock *ContBlock = nullptr; 1168 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 1169 if (!CondConstant) 1170 return; 1171 } else { 1172 auto *ThenBlock = CGF.createBasicBlock("simd.if.then"); 1173 ContBlock = CGF.createBasicBlock("simd.if.end"); 1174 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 1175 CGF.getProfileCount(&S)); 1176 CGF.EmitBlock(ThenBlock); 1177 CGF.incrementProfileCounter(&S); 1178 } 1179 1180 // Emit the loop iteration variable. 1181 const Expr *IVExpr = S.getIterationVariable(); 1182 const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 1183 CGF.EmitVarDecl(*IVDecl); 1184 CGF.EmitIgnoredExpr(S.getInit()); 1185 1186 // Emit the iterations count variable. 1187 // If it is not a variable, Sema decided to calculate iterations count on 1188 // each iteration (e.g., it is foldable into a constant). 1189 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 1190 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 1191 // Emit calculation of the iterations count. 1192 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 1193 } 1194 1195 CGF.EmitOMPSimdInit(S); 1196 1197 emitAlignedClause(CGF, S); 1198 CGF.EmitOMPLinearClauseInit(S); 1199 bool HasLastprivateClause; 1200 { 1201 OMPPrivateScope LoopScope(CGF); 1202 emitPrivateLoopCounters(CGF, LoopScope, S.counters(), 1203 S.private_counters()); 1204 emitPrivateLinearVars(CGF, S, LoopScope); 1205 CGF.EmitOMPPrivateClause(S, LoopScope); 1206 CGF.EmitOMPReductionClauseInit(S, LoopScope); 1207 HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 1208 (void)LoopScope.Privatize(); 1209 CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 1210 S.getInc(), 1211 [&S](CodeGenFunction &CGF) { 1212 CGF.EmitOMPLoopBody(S, JumpDest()); 1213 CGF.EmitStopPoint(&S); 1214 }, 1215 [](CodeGenFunction &) {}); 1216 // Emit final copy of the lastprivate variables at the end of loops. 1217 if (HasLastprivateClause) { 1218 CGF.EmitOMPLastprivateClauseFinal(S); 1219 } 1220 CGF.EmitOMPReductionClauseFinal(S); 1221 } 1222 CGF.EmitOMPSimdFinal(S); 1223 // Emit: if (PreCond) - end. 1224 if (ContBlock) { 1225 CGF.EmitBranch(ContBlock); 1226 CGF.EmitBlock(ContBlock, true); 1227 } 1228 }; 1229 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 1230 } 1231 1232 void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind, 1233 const OMPLoopDirective &S, 1234 OMPPrivateScope &LoopScope, 1235 bool Ordered, Address LB, 1236 Address UB, Address ST, 1237 Address IL, llvm::Value *Chunk) { 1238 auto &RT = CGM.getOpenMPRuntime(); 1239 1240 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). 1241 const bool DynamicOrOrdered = Ordered || RT.isDynamic(ScheduleKind); 1242 1243 assert((Ordered || 1244 !RT.isStaticNonchunked(ScheduleKind, /*Chunked=*/Chunk != nullptr)) && 1245 "static non-chunked schedule does not need outer loop"); 1246 1247 // Emit outer loop. 1248 // 1249 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1250 // When schedule(dynamic,chunk_size) is specified, the iterations are 1251 // distributed to threads in the team in chunks as the threads request them. 1252 // Each thread executes a chunk of iterations, then requests another chunk, 1253 // until no chunks remain to be distributed. Each chunk contains chunk_size 1254 // iterations, except for the last chunk to be distributed, which may have 1255 // fewer iterations. When no chunk_size is specified, it defaults to 1. 1256 // 1257 // When schedule(guided,chunk_size) is specified, the iterations are assigned 1258 // to threads in the team in chunks as the executing threads request them. 1259 // Each thread executes a chunk of iterations, then requests another chunk, 1260 // until no chunks remain to be assigned. For a chunk_size of 1, the size of 1261 // each chunk is proportional to the number of unassigned iterations divided 1262 // by the number of threads in the team, decreasing to 1. For a chunk_size 1263 // with value k (greater than 1), the size of each chunk is determined in the 1264 // same way, with the restriction that the chunks do not contain fewer than k 1265 // iterations (except for the last chunk to be assigned, which may have fewer 1266 // than k iterations). 1267 // 1268 // When schedule(auto) is specified, the decision regarding scheduling is 1269 // delegated to the compiler and/or runtime system. The programmer gives the 1270 // implementation the freedom to choose any possible mapping of iterations to 1271 // threads in the team. 1272 // 1273 // When schedule(runtime) is specified, the decision regarding scheduling is 1274 // deferred until run time, and the schedule and chunk size are taken from the 1275 // run-sched-var ICV. If the ICV is set to auto, the schedule is 1276 // implementation defined 1277 // 1278 // while(__kmpc_dispatch_next(&LB, &UB)) { 1279 // idx = LB; 1280 // while (idx <= UB) { BODY; ++idx; 1281 // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only. 1282 // } // inner loop 1283 // } 1284 // 1285 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1286 // When schedule(static, chunk_size) is specified, iterations are divided into 1287 // chunks of size chunk_size, and the chunks are assigned to the threads in 1288 // the team in a round-robin fashion in the order of the thread number. 1289 // 1290 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { 1291 // while (idx <= UB) { BODY; ++idx; } // inner loop 1292 // LB = LB + ST; 1293 // UB = UB + ST; 1294 // } 1295 // 1296 1297 const Expr *IVExpr = S.getIterationVariable(); 1298 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1299 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1300 1301 if (DynamicOrOrdered) { 1302 llvm::Value *UBVal = EmitScalarExpr(S.getLastIteration()); 1303 RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, 1304 IVSize, IVSigned, Ordered, UBVal, Chunk); 1305 } else { 1306 RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, 1307 IVSize, IVSigned, Ordered, IL, LB, UB, ST, Chunk); 1308 } 1309 1310 auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end"); 1311 1312 // Start the loop with a block that tests the condition. 1313 auto CondBlock = createBasicBlock("omp.dispatch.cond"); 1314 EmitBlock(CondBlock); 1315 LoopStack.push(CondBlock); 1316 1317 llvm::Value *BoolCondVal = nullptr; 1318 if (!DynamicOrOrdered) { 1319 // UB = min(UB, GlobalUB) 1320 EmitIgnoredExpr(S.getEnsureUpperBound()); 1321 // IV = LB 1322 EmitIgnoredExpr(S.getInit()); 1323 // IV < UB 1324 BoolCondVal = EvaluateExprAsBool(S.getCond()); 1325 } else { 1326 BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, 1327 IL, LB, UB, ST); 1328 } 1329 1330 // If there are any cleanups between here and the loop-exit scope, 1331 // create a block to stage a loop exit along. 1332 auto ExitBlock = LoopExit.getBlock(); 1333 if (LoopScope.requiresCleanups()) 1334 ExitBlock = createBasicBlock("omp.dispatch.cleanup"); 1335 1336 auto LoopBody = createBasicBlock("omp.dispatch.body"); 1337 Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock); 1338 if (ExitBlock != LoopExit.getBlock()) { 1339 EmitBlock(ExitBlock); 1340 EmitBranchThroughCleanup(LoopExit); 1341 } 1342 EmitBlock(LoopBody); 1343 1344 // Emit "IV = LB" (in case of static schedule, we have already calculated new 1345 // LB for loop condition and emitted it above). 1346 if (DynamicOrOrdered) 1347 EmitIgnoredExpr(S.getInit()); 1348 1349 // Create a block for the increment. 1350 auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); 1351 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1352 1353 // Generate !llvm.loop.parallel metadata for loads and stores for loops 1354 // with dynamic/guided scheduling and without ordered clause. 1355 if (!isOpenMPSimdDirective(S.getDirectiveKind())) { 1356 LoopStack.setParallel((ScheduleKind == OMPC_SCHEDULE_dynamic || 1357 ScheduleKind == OMPC_SCHEDULE_guided) && 1358 !Ordered); 1359 } else { 1360 EmitOMPSimdInit(S); 1361 } 1362 1363 SourceLocation Loc = S.getLocStart(); 1364 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), 1365 [&S, LoopExit](CodeGenFunction &CGF) { 1366 CGF.EmitOMPLoopBody(S, LoopExit); 1367 CGF.EmitStopPoint(&S); 1368 }, 1369 [Ordered, IVSize, IVSigned, Loc](CodeGenFunction &CGF) { 1370 if (Ordered) { 1371 CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd( 1372 CGF, Loc, IVSize, IVSigned); 1373 } 1374 }); 1375 1376 EmitBlock(Continue.getBlock()); 1377 BreakContinueStack.pop_back(); 1378 if (!DynamicOrOrdered) { 1379 // Emit "LB = LB + Stride", "UB = UB + Stride". 1380 EmitIgnoredExpr(S.getNextLowerBound()); 1381 EmitIgnoredExpr(S.getNextUpperBound()); 1382 } 1383 1384 EmitBranch(CondBlock); 1385 LoopStack.pop(); 1386 // Emit the fall-through block. 1387 EmitBlock(LoopExit.getBlock()); 1388 1389 // Tell the runtime we are done. 1390 if (!DynamicOrOrdered) 1391 RT.emitForStaticFinish(*this, S.getLocEnd()); 1392 } 1393 1394 /// \brief Emit a helper variable and return corresponding lvalue. 1395 static LValue EmitOMPHelperVar(CodeGenFunction &CGF, 1396 const DeclRefExpr *Helper) { 1397 auto VDecl = cast<VarDecl>(Helper->getDecl()); 1398 CGF.EmitVarDecl(*VDecl); 1399 return CGF.EmitLValue(Helper); 1400 } 1401 1402 static std::pair<llvm::Value * /*Chunk*/, OpenMPScheduleClauseKind> 1403 emitScheduleClause(CodeGenFunction &CGF, const OMPLoopDirective &S, 1404 bool OuterRegion) { 1405 // Detect the loop schedule kind and chunk. 1406 auto ScheduleKind = OMPC_SCHEDULE_unknown; 1407 llvm::Value *Chunk = nullptr; 1408 if (const auto *C = S.getSingleClause<OMPScheduleClause>()) { 1409 ScheduleKind = C->getScheduleKind(); 1410 if (const auto *Ch = C->getChunkSize()) { 1411 if (auto *ImpRef = cast_or_null<DeclRefExpr>(C->getHelperChunkSize())) { 1412 if (OuterRegion) { 1413 const VarDecl *ImpVar = cast<VarDecl>(ImpRef->getDecl()); 1414 CGF.EmitVarDecl(*ImpVar); 1415 CGF.EmitStoreThroughLValue( 1416 CGF.EmitAnyExpr(Ch), 1417 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(ImpVar), 1418 ImpVar->getType())); 1419 } else { 1420 Ch = ImpRef; 1421 } 1422 } 1423 if (!C->getHelperChunkSize() || !OuterRegion) { 1424 Chunk = CGF.EmitScalarExpr(Ch); 1425 Chunk = CGF.EmitScalarConversion(Chunk, Ch->getType(), 1426 S.getIterationVariable()->getType(), 1427 S.getLocStart()); 1428 } 1429 } 1430 } 1431 return std::make_pair(Chunk, ScheduleKind); 1432 } 1433 1434 bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { 1435 // Emit the loop iteration variable. 1436 auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 1437 auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); 1438 EmitVarDecl(*IVDecl); 1439 1440 // Emit the iterations count variable. 1441 // If it is not a variable, Sema decided to calculate iterations count on each 1442 // iteration (e.g., it is foldable into a constant). 1443 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 1444 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 1445 // Emit calculation of the iterations count. 1446 EmitIgnoredExpr(S.getCalcLastIteration()); 1447 } 1448 1449 auto &RT = CGM.getOpenMPRuntime(); 1450 1451 bool HasLastprivateClause; 1452 // Check pre-condition. 1453 { 1454 // Skip the entire loop if we don't meet the precondition. 1455 // If the condition constant folds and can be elided, avoid emitting the 1456 // whole loop. 1457 bool CondConstant; 1458 llvm::BasicBlock *ContBlock = nullptr; 1459 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 1460 if (!CondConstant) 1461 return false; 1462 } else { 1463 auto *ThenBlock = createBasicBlock("omp.precond.then"); 1464 ContBlock = createBasicBlock("omp.precond.end"); 1465 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 1466 getProfileCount(&S)); 1467 EmitBlock(ThenBlock); 1468 incrementProfileCounter(&S); 1469 } 1470 1471 emitAlignedClause(*this, S); 1472 EmitOMPLinearClauseInit(S); 1473 // Emit 'then' code. 1474 { 1475 // Emit helper vars inits. 1476 LValue LB = 1477 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable())); 1478 LValue UB = 1479 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable())); 1480 LValue ST = 1481 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 1482 LValue IL = 1483 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 1484 1485 OMPPrivateScope LoopScope(*this); 1486 if (EmitOMPFirstprivateClause(S, LoopScope)) { 1487 // Emit implicit barrier to synchronize threads and avoid data races on 1488 // initialization of firstprivate variables. 1489 CGM.getOpenMPRuntime().emitBarrierCall( 1490 *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 1491 /*ForceSimpleCall=*/true); 1492 } 1493 EmitOMPPrivateClause(S, LoopScope); 1494 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 1495 EmitOMPReductionClauseInit(S, LoopScope); 1496 emitPrivateLoopCounters(*this, LoopScope, S.counters(), 1497 S.private_counters()); 1498 emitPrivateLinearVars(*this, S, LoopScope); 1499 (void)LoopScope.Privatize(); 1500 1501 // Detect the loop schedule kind and chunk. 1502 llvm::Value *Chunk; 1503 OpenMPScheduleClauseKind ScheduleKind; 1504 auto ScheduleInfo = 1505 emitScheduleClause(*this, S, /*OuterRegion=*/false); 1506 Chunk = ScheduleInfo.first; 1507 ScheduleKind = ScheduleInfo.second; 1508 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1509 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1510 const bool Ordered = S.getSingleClause<OMPOrderedClause>() != nullptr; 1511 if (RT.isStaticNonchunked(ScheduleKind, 1512 /* Chunked */ Chunk != nullptr) && 1513 !Ordered) { 1514 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 1515 EmitOMPSimdInit(S); 1516 } 1517 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1518 // When no chunk_size is specified, the iteration space is divided into 1519 // chunks that are approximately equal in size, and at most one chunk is 1520 // distributed to each thread. Note that the size of the chunks is 1521 // unspecified in this case. 1522 RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, 1523 IVSize, IVSigned, Ordered, 1524 IL.getAddress(), LB.getAddress(), 1525 UB.getAddress(), ST.getAddress()); 1526 auto LoopExit = getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 1527 // UB = min(UB, GlobalUB); 1528 EmitIgnoredExpr(S.getEnsureUpperBound()); 1529 // IV = LB; 1530 EmitIgnoredExpr(S.getInit()); 1531 // while (idx <= UB) { BODY; ++idx; } 1532 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 1533 S.getInc(), 1534 [&S, LoopExit](CodeGenFunction &CGF) { 1535 CGF.EmitOMPLoopBody(S, LoopExit); 1536 CGF.EmitStopPoint(&S); 1537 }, 1538 [](CodeGenFunction &) {}); 1539 EmitBlock(LoopExit.getBlock()); 1540 // Tell the runtime we are done. 1541 RT.emitForStaticFinish(*this, S.getLocStart()); 1542 } else { 1543 // Emit the outer loop, which requests its work chunk [LB..UB] from 1544 // runtime and runs the inner loop to process it. 1545 EmitOMPForOuterLoop(ScheduleKind, S, LoopScope, Ordered, 1546 LB.getAddress(), UB.getAddress(), ST.getAddress(), 1547 IL.getAddress(), Chunk); 1548 } 1549 EmitOMPReductionClauseFinal(S); 1550 // Emit final copy of the lastprivate variables if IsLastIter != 0. 1551 if (HasLastprivateClause) 1552 EmitOMPLastprivateClauseFinal( 1553 S, Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart()))); 1554 } 1555 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 1556 EmitOMPSimdFinal(S); 1557 } 1558 // We're now done with the loop, so jump to the continuation block. 1559 if (ContBlock) { 1560 EmitBranch(ContBlock); 1561 EmitBlock(ContBlock, true); 1562 } 1563 } 1564 return HasLastprivateClause; 1565 } 1566 1567 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { 1568 LexicalScope Scope(*this, S.getSourceRange()); 1569 bool HasLastprivates = false; 1570 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF) { 1571 HasLastprivates = CGF.EmitOMPWorksharingLoop(S); 1572 }; 1573 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen, 1574 S.hasCancel()); 1575 1576 // Emit an implicit barrier at the end. 1577 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) { 1578 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); 1579 } 1580 } 1581 1582 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { 1583 LexicalScope Scope(*this, S.getSourceRange()); 1584 bool HasLastprivates = false; 1585 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF) { 1586 HasLastprivates = CGF.EmitOMPWorksharingLoop(S); 1587 }; 1588 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 1589 1590 // Emit an implicit barrier at the end. 1591 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) { 1592 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); 1593 } 1594 } 1595 1596 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, 1597 const Twine &Name, 1598 llvm::Value *Init = nullptr) { 1599 auto LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty); 1600 if (Init) 1601 CGF.EmitScalarInit(Init, LVal); 1602 return LVal; 1603 } 1604 1605 OpenMPDirectiveKind 1606 CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { 1607 auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt(); 1608 auto *CS = dyn_cast<CompoundStmt>(Stmt); 1609 if (CS && CS->size() > 1) { 1610 bool HasLastprivates = false; 1611 auto &&CodeGen = [&S, CS, &HasLastprivates](CodeGenFunction &CGF) { 1612 auto &C = CGF.CGM.getContext(); 1613 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 1614 // Emit helper vars inits. 1615 LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.", 1616 CGF.Builder.getInt32(0)); 1617 auto *GlobalUBVal = CGF.Builder.getInt32(CS->size() - 1); 1618 LValue UB = 1619 createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal); 1620 LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.", 1621 CGF.Builder.getInt32(1)); 1622 LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.", 1623 CGF.Builder.getInt32(0)); 1624 // Loop counter. 1625 LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv."); 1626 OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 1627 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV); 1628 OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 1629 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); 1630 // Generate condition for loop. 1631 BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, 1632 OK_Ordinary, S.getLocStart(), 1633 /*fpContractable=*/false); 1634 // Increment for loop counter. 1635 UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, 1636 OK_Ordinary, S.getLocStart()); 1637 auto BodyGen = [CS, &S, &IV](CodeGenFunction &CGF) { 1638 // Iterate through all sections and emit a switch construct: 1639 // switch (IV) { 1640 // case 0: 1641 // <SectionStmt[0]>; 1642 // break; 1643 // ... 1644 // case <NumSection> - 1: 1645 // <SectionStmt[<NumSection> - 1]>; 1646 // break; 1647 // } 1648 // .omp.sections.exit: 1649 auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit"); 1650 auto *SwitchStmt = CGF.Builder.CreateSwitch( 1651 CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB, 1652 CS->size()); 1653 unsigned CaseNumber = 0; 1654 for (auto *SubStmt : CS->children()) { 1655 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 1656 CGF.EmitBlock(CaseBB); 1657 SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB); 1658 CGF.EmitStmt(SubStmt); 1659 CGF.EmitBranch(ExitBB); 1660 ++CaseNumber; 1661 } 1662 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 1663 }; 1664 1665 CodeGenFunction::OMPPrivateScope LoopScope(CGF); 1666 if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) { 1667 // Emit implicit barrier to synchronize threads and avoid data races on 1668 // initialization of firstprivate variables. 1669 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 1670 CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 1671 /*ForceSimpleCall=*/true); 1672 } 1673 CGF.EmitOMPPrivateClause(S, LoopScope); 1674 HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 1675 CGF.EmitOMPReductionClauseInit(S, LoopScope); 1676 (void)LoopScope.Privatize(); 1677 1678 // Emit static non-chunked loop. 1679 CGF.CGM.getOpenMPRuntime().emitForStaticInit( 1680 CGF, S.getLocStart(), OMPC_SCHEDULE_static, /*IVSize=*/32, 1681 /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), 1682 LB.getAddress(), UB.getAddress(), ST.getAddress()); 1683 // UB = min(UB, GlobalUB); 1684 auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart()); 1685 auto *MinUBGlobalUB = CGF.Builder.CreateSelect( 1686 CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal); 1687 CGF.EmitStoreOfScalar(MinUBGlobalUB, UB); 1688 // IV = LB; 1689 CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV); 1690 // while (idx <= UB) { BODY; ++idx; } 1691 CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen, 1692 [](CodeGenFunction &) {}); 1693 // Tell the runtime we are done. 1694 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocStart()); 1695 CGF.EmitOMPReductionClauseFinal(S); 1696 1697 // Emit final copy of the lastprivate variables if IsLastIter != 0. 1698 if (HasLastprivates) 1699 CGF.EmitOMPLastprivateClauseFinal( 1700 S, CGF.Builder.CreateIsNotNull( 1701 CGF.EmitLoadOfScalar(IL, S.getLocStart()))); 1702 }; 1703 1704 bool HasCancel = false; 1705 if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S)) 1706 HasCancel = OSD->hasCancel(); 1707 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S)) 1708 HasCancel = OPSD->hasCancel(); 1709 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen, 1710 HasCancel); 1711 // Emit barrier for lastprivates only if 'sections' directive has 'nowait' 1712 // clause. Otherwise the barrier will be generated by the codegen for the 1713 // directive. 1714 if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) { 1715 // Emit implicit barrier to synchronize threads and avoid data races on 1716 // initialization of firstprivate variables. 1717 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), 1718 OMPD_unknown); 1719 } 1720 return OMPD_sections; 1721 } 1722 // If only one section is found - no need to generate loop, emit as a single 1723 // region. 1724 bool HasFirstprivates; 1725 // No need to generate reductions for sections with single section region, we 1726 // can use original shared variables for all operations. 1727 bool HasReductions = S.hasClausesOfKind<OMPReductionClause>(); 1728 // No need to generate lastprivates for sections with single section region, 1729 // we can use original shared variable for all calculations with barrier at 1730 // the end of the sections. 1731 bool HasLastprivates = S.hasClausesOfKind<OMPLastprivateClause>(); 1732 auto &&CodeGen = [Stmt, &S, &HasFirstprivates](CodeGenFunction &CGF) { 1733 CodeGenFunction::OMPPrivateScope SingleScope(CGF); 1734 HasFirstprivates = CGF.EmitOMPFirstprivateClause(S, SingleScope); 1735 CGF.EmitOMPPrivateClause(S, SingleScope); 1736 (void)SingleScope.Privatize(); 1737 1738 CGF.EmitStmt(Stmt); 1739 }; 1740 CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(), 1741 llvm::None, llvm::None, llvm::None, 1742 llvm::None); 1743 // Emit barrier for firstprivates, lastprivates or reductions only if 1744 // 'sections' directive has 'nowait' clause. Otherwise the barrier will be 1745 // generated by the codegen for the directive. 1746 if ((HasFirstprivates || HasLastprivates || HasReductions) && 1747 S.getSingleClause<OMPNowaitClause>()) { 1748 // Emit implicit barrier to synchronize threads and avoid data races on 1749 // initialization of firstprivate variables. 1750 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_unknown, 1751 /*EmitChecks=*/false, 1752 /*ForceSimpleCall=*/true); 1753 } 1754 return OMPD_single; 1755 } 1756 1757 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { 1758 LexicalScope Scope(*this, S.getSourceRange()); 1759 OpenMPDirectiveKind EmittedAs = EmitSections(S); 1760 // Emit an implicit barrier at the end. 1761 if (!S.getSingleClause<OMPNowaitClause>()) { 1762 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), EmittedAs); 1763 } 1764 } 1765 1766 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { 1767 LexicalScope Scope(*this, S.getSourceRange()); 1768 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 1769 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1770 CGF.EnsureInsertPoint(); 1771 }; 1772 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen, 1773 S.hasCancel()); 1774 } 1775 1776 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { 1777 llvm::SmallVector<const Expr *, 8> CopyprivateVars; 1778 llvm::SmallVector<const Expr *, 8> DestExprs; 1779 llvm::SmallVector<const Expr *, 8> SrcExprs; 1780 llvm::SmallVector<const Expr *, 8> AssignmentOps; 1781 // Check if there are any 'copyprivate' clauses associated with this 1782 // 'single' 1783 // construct. 1784 // Build a list of copyprivate variables along with helper expressions 1785 // (<source>, <destination>, <destination>=<source> expressions) 1786 for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) { 1787 CopyprivateVars.append(C->varlists().begin(), C->varlists().end()); 1788 DestExprs.append(C->destination_exprs().begin(), 1789 C->destination_exprs().end()); 1790 SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end()); 1791 AssignmentOps.append(C->assignment_ops().begin(), 1792 C->assignment_ops().end()); 1793 } 1794 LexicalScope Scope(*this, S.getSourceRange()); 1795 // Emit code for 'single' region along with 'copyprivate' clauses 1796 bool HasFirstprivates; 1797 auto &&CodeGen = [&S, &HasFirstprivates](CodeGenFunction &CGF) { 1798 CodeGenFunction::OMPPrivateScope SingleScope(CGF); 1799 HasFirstprivates = CGF.EmitOMPFirstprivateClause(S, SingleScope); 1800 CGF.EmitOMPPrivateClause(S, SingleScope); 1801 (void)SingleScope.Privatize(); 1802 1803 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1804 CGF.EnsureInsertPoint(); 1805 }; 1806 CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(), 1807 CopyprivateVars, DestExprs, SrcExprs, 1808 AssignmentOps); 1809 // Emit an implicit barrier at the end (to avoid data race on firstprivate 1810 // init or if no 'nowait' clause was specified and no 'copyprivate' clause). 1811 if ((!S.getSingleClause<OMPNowaitClause>() || HasFirstprivates) && 1812 CopyprivateVars.empty()) { 1813 CGM.getOpenMPRuntime().emitBarrierCall( 1814 *this, S.getLocStart(), 1815 S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single); 1816 } 1817 } 1818 1819 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { 1820 LexicalScope Scope(*this, S.getSourceRange()); 1821 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 1822 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1823 CGF.EnsureInsertPoint(); 1824 }; 1825 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart()); 1826 } 1827 1828 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { 1829 LexicalScope Scope(*this, S.getSourceRange()); 1830 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 1831 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1832 CGF.EnsureInsertPoint(); 1833 }; 1834 CGM.getOpenMPRuntime().emitCriticalRegion( 1835 *this, S.getDirectiveName().getAsString(), CodeGen, S.getLocStart()); 1836 } 1837 1838 void CodeGenFunction::EmitOMPParallelForDirective( 1839 const OMPParallelForDirective &S) { 1840 // Emit directive as a combined directive that consists of two implicit 1841 // directives: 'parallel' with 'for' directive. 1842 LexicalScope Scope(*this, S.getSourceRange()); 1843 (void)emitScheduleClause(*this, S, /*OuterRegion=*/true); 1844 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 1845 CGF.EmitOMPWorksharingLoop(S); 1846 // Emit implicit barrier at the end of parallel region, but this barrier 1847 // is at the end of 'for' directive, so emit it as the implicit barrier for 1848 // this 'for' directive. 1849 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 1850 CGF, S.getLocStart(), OMPD_parallel, /*EmitChecks=*/false, 1851 /*ForceSimpleCall=*/true); 1852 }; 1853 emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen); 1854 } 1855 1856 void CodeGenFunction::EmitOMPParallelForSimdDirective( 1857 const OMPParallelForSimdDirective &S) { 1858 // Emit directive as a combined directive that consists of two implicit 1859 // directives: 'parallel' with 'for' directive. 1860 LexicalScope Scope(*this, S.getSourceRange()); 1861 (void)emitScheduleClause(*this, S, /*OuterRegion=*/true); 1862 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 1863 CGF.EmitOMPWorksharingLoop(S); 1864 // Emit implicit barrier at the end of parallel region, but this barrier 1865 // is at the end of 'for' directive, so emit it as the implicit barrier for 1866 // this 'for' directive. 1867 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 1868 CGF, S.getLocStart(), OMPD_parallel, /*EmitChecks=*/false, 1869 /*ForceSimpleCall=*/true); 1870 }; 1871 emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen); 1872 } 1873 1874 void CodeGenFunction::EmitOMPParallelSectionsDirective( 1875 const OMPParallelSectionsDirective &S) { 1876 // Emit directive as a combined directive that consists of two implicit 1877 // directives: 'parallel' with 'sections' directive. 1878 LexicalScope Scope(*this, S.getSourceRange()); 1879 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 1880 (void)CGF.EmitSections(S); 1881 // Emit implicit barrier at the end of parallel region. 1882 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 1883 CGF, S.getLocStart(), OMPD_parallel, /*EmitChecks=*/false, 1884 /*ForceSimpleCall=*/true); 1885 }; 1886 emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen); 1887 } 1888 1889 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { 1890 // Emit outlined function for task construct. 1891 LexicalScope Scope(*this, S.getSourceRange()); 1892 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 1893 auto CapturedStruct = GenerateCapturedStmtArgument(*CS); 1894 auto *I = CS->getCapturedDecl()->param_begin(); 1895 auto *PartId = std::next(I); 1896 // The first function argument for tasks is a thread id, the second one is a 1897 // part id (0 for tied tasks, >=0 for untied task). 1898 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 1899 // Get list of private variables. 1900 llvm::SmallVector<const Expr *, 8> PrivateVars; 1901 llvm::SmallVector<const Expr *, 8> PrivateCopies; 1902 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 1903 auto IRef = C->varlist_begin(); 1904 for (auto *IInit : C->private_copies()) { 1905 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 1906 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 1907 PrivateVars.push_back(*IRef); 1908 PrivateCopies.push_back(IInit); 1909 } 1910 ++IRef; 1911 } 1912 } 1913 EmittedAsPrivate.clear(); 1914 // Get list of firstprivate variables. 1915 llvm::SmallVector<const Expr *, 8> FirstprivateVars; 1916 llvm::SmallVector<const Expr *, 8> FirstprivateCopies; 1917 llvm::SmallVector<const Expr *, 8> FirstprivateInits; 1918 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 1919 auto IRef = C->varlist_begin(); 1920 auto IElemInitRef = C->inits().begin(); 1921 for (auto *IInit : C->private_copies()) { 1922 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 1923 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 1924 FirstprivateVars.push_back(*IRef); 1925 FirstprivateCopies.push_back(IInit); 1926 FirstprivateInits.push_back(*IElemInitRef); 1927 } 1928 ++IRef, ++IElemInitRef; 1929 } 1930 } 1931 // Build list of dependences. 1932 llvm::SmallVector<std::pair<OpenMPDependClauseKind, const Expr *>, 8> 1933 Dependences; 1934 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) { 1935 for (auto *IRef : C->varlists()) { 1936 Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef)); 1937 } 1938 } 1939 auto &&CodeGen = [PartId, &S, &PrivateVars, &FirstprivateVars]( 1940 CodeGenFunction &CGF) { 1941 // Set proper addresses for generated private copies. 1942 auto *CS = cast<CapturedStmt>(S.getAssociatedStmt()); 1943 OMPPrivateScope Scope(CGF); 1944 if (!PrivateVars.empty() || !FirstprivateVars.empty()) { 1945 auto *CopyFn = CGF.Builder.CreateLoad( 1946 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3))); 1947 auto *PrivatesPtr = CGF.Builder.CreateLoad( 1948 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2))); 1949 // Map privates. 1950 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> 1951 PrivatePtrs; 1952 llvm::SmallVector<llvm::Value *, 16> CallArgs; 1953 CallArgs.push_back(PrivatesPtr); 1954 for (auto *E : PrivateVars) { 1955 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1956 Address PrivatePtr = 1957 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType())); 1958 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 1959 CallArgs.push_back(PrivatePtr.getPointer()); 1960 } 1961 for (auto *E : FirstprivateVars) { 1962 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1963 Address PrivatePtr = 1964 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType())); 1965 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 1966 CallArgs.push_back(PrivatePtr.getPointer()); 1967 } 1968 CGF.EmitRuntimeCall(CopyFn, CallArgs); 1969 for (auto &&Pair : PrivatePtrs) { 1970 Address Replacement(CGF.Builder.CreateLoad(Pair.second), 1971 CGF.getContext().getDeclAlign(Pair.first)); 1972 Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); 1973 } 1974 } 1975 (void)Scope.Privatize(); 1976 if (*PartId) { 1977 // TODO: emit code for untied tasks. 1978 } 1979 CGF.EmitStmt(CS->getCapturedStmt()); 1980 }; 1981 auto OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( 1982 S, *I, OMPD_task, CodeGen); 1983 // Check if we should emit tied or untied task. 1984 bool Tied = !S.getSingleClause<OMPUntiedClause>(); 1985 // Check if the task is final 1986 llvm::PointerIntPair<llvm::Value *, 1, bool> Final; 1987 if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) { 1988 // If the condition constant folds and can be elided, try to avoid emitting 1989 // the condition and the dead arm of the if/else. 1990 auto *Cond = Clause->getCondition(); 1991 bool CondConstant; 1992 if (ConstantFoldsToSimpleInteger(Cond, CondConstant)) 1993 Final.setInt(CondConstant); 1994 else 1995 Final.setPointer(EvaluateExprAsBool(Cond)); 1996 } else { 1997 // By default the task is not final. 1998 Final.setInt(/*IntVal=*/false); 1999 } 2000 auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 2001 const Expr *IfCond = nullptr; 2002 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 2003 if (C->getNameModifier() == OMPD_unknown || 2004 C->getNameModifier() == OMPD_task) { 2005 IfCond = C->getCondition(); 2006 break; 2007 } 2008 } 2009 CGM.getOpenMPRuntime().emitTaskCall( 2010 *this, S.getLocStart(), S, Tied, Final, OutlinedFn, SharedsTy, 2011 CapturedStruct, IfCond, PrivateVars, PrivateCopies, FirstprivateVars, 2012 FirstprivateCopies, FirstprivateInits, Dependences); 2013 } 2014 2015 void CodeGenFunction::EmitOMPTaskyieldDirective( 2016 const OMPTaskyieldDirective &S) { 2017 CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart()); 2018 } 2019 2020 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { 2021 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier); 2022 } 2023 2024 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) { 2025 CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart()); 2026 } 2027 2028 void CodeGenFunction::EmitOMPTaskgroupDirective( 2029 const OMPTaskgroupDirective &S) { 2030 LexicalScope Scope(*this, S.getSourceRange()); 2031 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 2032 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2033 CGF.EnsureInsertPoint(); 2034 }; 2035 CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart()); 2036 } 2037 2038 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { 2039 CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> { 2040 if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) { 2041 return llvm::makeArrayRef(FlushClause->varlist_begin(), 2042 FlushClause->varlist_end()); 2043 } 2044 return llvm::None; 2045 }(), S.getLocStart()); 2046 } 2047 2048 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, 2049 const CapturedStmt *S) { 2050 CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); 2051 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; 2052 CGF.CapturedStmtInfo = &CapStmtInfo; 2053 auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S); 2054 Fn->addFnAttr(llvm::Attribute::NoInline); 2055 return Fn; 2056 } 2057 2058 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { 2059 LexicalScope Scope(*this, S.getSourceRange()); 2060 auto *C = S.getSingleClause<OMPSIMDClause>(); 2061 auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF) { 2062 if (C) { 2063 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2064 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 2065 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 2066 auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS); 2067 CGF.EmitNounwindRuntimeCall(OutlinedFn, CapturedVars); 2068 } else { 2069 CGF.EmitStmt( 2070 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2071 } 2072 CGF.EnsureInsertPoint(); 2073 }; 2074 CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C); 2075 } 2076 2077 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, 2078 QualType SrcType, QualType DestType, 2079 SourceLocation Loc) { 2080 assert(CGF.hasScalarEvaluationKind(DestType) && 2081 "DestType must have scalar evaluation kind."); 2082 assert(!Val.isAggregate() && "Must be a scalar or complex."); 2083 return Val.isScalar() 2084 ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType, 2085 Loc) 2086 : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType, 2087 DestType, Loc); 2088 } 2089 2090 static CodeGenFunction::ComplexPairTy 2091 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, 2092 QualType DestType, SourceLocation Loc) { 2093 assert(CGF.getEvaluationKind(DestType) == TEK_Complex && 2094 "DestType must have complex evaluation kind."); 2095 CodeGenFunction::ComplexPairTy ComplexVal; 2096 if (Val.isScalar()) { 2097 // Convert the input element to the element type of the complex. 2098 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 2099 auto ScalarVal = CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, 2100 DestElementType, Loc); 2101 ComplexVal = CodeGenFunction::ComplexPairTy( 2102 ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType())); 2103 } else { 2104 assert(Val.isComplex() && "Must be a scalar or complex."); 2105 auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType(); 2106 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 2107 ComplexVal.first = CGF.EmitScalarConversion( 2108 Val.getComplexVal().first, SrcElementType, DestElementType, Loc); 2109 ComplexVal.second = CGF.EmitScalarConversion( 2110 Val.getComplexVal().second, SrcElementType, DestElementType, Loc); 2111 } 2112 return ComplexVal; 2113 } 2114 2115 static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst, 2116 LValue LVal, RValue RVal) { 2117 if (LVal.isGlobalReg()) { 2118 CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal); 2119 } else { 2120 CGF.EmitAtomicStore(RVal, LVal, IsSeqCst ? llvm::SequentiallyConsistent 2121 : llvm::Monotonic, 2122 LVal.isVolatile(), /*IsInit=*/false); 2123 } 2124 } 2125 2126 static void emitSimpleStore(CodeGenFunction &CGF, LValue LVal, RValue RVal, 2127 QualType RValTy, SourceLocation Loc) { 2128 switch (CGF.getEvaluationKind(LVal.getType())) { 2129 case TEK_Scalar: 2130 CGF.EmitStoreThroughLValue(RValue::get(convertToScalarValue( 2131 CGF, RVal, RValTy, LVal.getType(), Loc)), 2132 LVal); 2133 break; 2134 case TEK_Complex: 2135 CGF.EmitStoreOfComplex( 2136 convertToComplexValue(CGF, RVal, RValTy, LVal.getType(), Loc), LVal, 2137 /*isInit=*/false); 2138 break; 2139 case TEK_Aggregate: 2140 llvm_unreachable("Must be a scalar or complex."); 2141 } 2142 } 2143 2144 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst, 2145 const Expr *X, const Expr *V, 2146 SourceLocation Loc) { 2147 // v = x; 2148 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue"); 2149 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue"); 2150 LValue XLValue = CGF.EmitLValue(X); 2151 LValue VLValue = CGF.EmitLValue(V); 2152 RValue Res = XLValue.isGlobalReg() 2153 ? CGF.EmitLoadOfLValue(XLValue, Loc) 2154 : CGF.EmitAtomicLoad(XLValue, Loc, 2155 IsSeqCst ? llvm::SequentiallyConsistent 2156 : llvm::Monotonic, 2157 XLValue.isVolatile()); 2158 // OpenMP, 2.12.6, atomic Construct 2159 // Any atomic construct with a seq_cst clause forces the atomically 2160 // performed operation to include an implicit flush operation without a 2161 // list. 2162 if (IsSeqCst) 2163 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 2164 emitSimpleStore(CGF, VLValue, Res, X->getType().getNonReferenceType(), Loc); 2165 } 2166 2167 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst, 2168 const Expr *X, const Expr *E, 2169 SourceLocation Loc) { 2170 // x = expr; 2171 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue"); 2172 emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E)); 2173 // OpenMP, 2.12.6, atomic Construct 2174 // Any atomic construct with a seq_cst clause forces the atomically 2175 // performed operation to include an implicit flush operation without a 2176 // list. 2177 if (IsSeqCst) 2178 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 2179 } 2180 2181 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, 2182 RValue Update, 2183 BinaryOperatorKind BO, 2184 llvm::AtomicOrdering AO, 2185 bool IsXLHSInRHSPart) { 2186 auto &Context = CGF.CGM.getContext(); 2187 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x' 2188 // expression is simple and atomic is allowed for the given type for the 2189 // target platform. 2190 if (BO == BO_Comma || !Update.isScalar() || 2191 !Update.getScalarVal()->getType()->isIntegerTy() || 2192 !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) && 2193 (Update.getScalarVal()->getType() != 2194 X.getAddress().getElementType())) || 2195 !X.getAddress().getElementType()->isIntegerTy() || 2196 !Context.getTargetInfo().hasBuiltinAtomic( 2197 Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment()))) 2198 return std::make_pair(false, RValue::get(nullptr)); 2199 2200 llvm::AtomicRMWInst::BinOp RMWOp; 2201 switch (BO) { 2202 case BO_Add: 2203 RMWOp = llvm::AtomicRMWInst::Add; 2204 break; 2205 case BO_Sub: 2206 if (!IsXLHSInRHSPart) 2207 return std::make_pair(false, RValue::get(nullptr)); 2208 RMWOp = llvm::AtomicRMWInst::Sub; 2209 break; 2210 case BO_And: 2211 RMWOp = llvm::AtomicRMWInst::And; 2212 break; 2213 case BO_Or: 2214 RMWOp = llvm::AtomicRMWInst::Or; 2215 break; 2216 case BO_Xor: 2217 RMWOp = llvm::AtomicRMWInst::Xor; 2218 break; 2219 case BO_LT: 2220 RMWOp = X.getType()->hasSignedIntegerRepresentation() 2221 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min 2222 : llvm::AtomicRMWInst::Max) 2223 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin 2224 : llvm::AtomicRMWInst::UMax); 2225 break; 2226 case BO_GT: 2227 RMWOp = X.getType()->hasSignedIntegerRepresentation() 2228 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max 2229 : llvm::AtomicRMWInst::Min) 2230 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax 2231 : llvm::AtomicRMWInst::UMin); 2232 break; 2233 case BO_Assign: 2234 RMWOp = llvm::AtomicRMWInst::Xchg; 2235 break; 2236 case BO_Mul: 2237 case BO_Div: 2238 case BO_Rem: 2239 case BO_Shl: 2240 case BO_Shr: 2241 case BO_LAnd: 2242 case BO_LOr: 2243 return std::make_pair(false, RValue::get(nullptr)); 2244 case BO_PtrMemD: 2245 case BO_PtrMemI: 2246 case BO_LE: 2247 case BO_GE: 2248 case BO_EQ: 2249 case BO_NE: 2250 case BO_AddAssign: 2251 case BO_SubAssign: 2252 case BO_AndAssign: 2253 case BO_OrAssign: 2254 case BO_XorAssign: 2255 case BO_MulAssign: 2256 case BO_DivAssign: 2257 case BO_RemAssign: 2258 case BO_ShlAssign: 2259 case BO_ShrAssign: 2260 case BO_Comma: 2261 llvm_unreachable("Unsupported atomic update operation"); 2262 } 2263 auto *UpdateVal = Update.getScalarVal(); 2264 if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) { 2265 UpdateVal = CGF.Builder.CreateIntCast( 2266 IC, X.getAddress().getElementType(), 2267 X.getType()->hasSignedIntegerRepresentation()); 2268 } 2269 auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO); 2270 return std::make_pair(true, RValue::get(Res)); 2271 } 2272 2273 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr( 2274 LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart, 2275 llvm::AtomicOrdering AO, SourceLocation Loc, 2276 const llvm::function_ref<RValue(RValue)> &CommonGen) { 2277 // Update expressions are allowed to have the following forms: 2278 // x binop= expr; -> xrval + expr; 2279 // x++, ++x -> xrval + 1; 2280 // x--, --x -> xrval - 1; 2281 // x = x binop expr; -> xrval binop expr 2282 // x = expr Op x; - > expr binop xrval; 2283 auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart); 2284 if (!Res.first) { 2285 if (X.isGlobalReg()) { 2286 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop 2287 // 'xrval'. 2288 EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X); 2289 } else { 2290 // Perform compare-and-swap procedure. 2291 EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified()); 2292 } 2293 } 2294 return Res; 2295 } 2296 2297 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst, 2298 const Expr *X, const Expr *E, 2299 const Expr *UE, bool IsXLHSInRHSPart, 2300 SourceLocation Loc) { 2301 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 2302 "Update expr in 'atomic update' must be a binary operator."); 2303 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 2304 // Update expressions are allowed to have the following forms: 2305 // x binop= expr; -> xrval + expr; 2306 // x++, ++x -> xrval + 1; 2307 // x--, --x -> xrval - 1; 2308 // x = x binop expr; -> xrval binop expr 2309 // x = expr Op x; - > expr binop xrval; 2310 assert(X->isLValue() && "X of 'omp atomic update' is not lvalue"); 2311 LValue XLValue = CGF.EmitLValue(X); 2312 RValue ExprRValue = CGF.EmitAnyExpr(E); 2313 auto AO = IsSeqCst ? llvm::SequentiallyConsistent : llvm::Monotonic; 2314 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 2315 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 2316 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 2317 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 2318 auto Gen = 2319 [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue { 2320 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 2321 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 2322 return CGF.EmitAnyExpr(UE); 2323 }; 2324 (void)CGF.EmitOMPAtomicSimpleUpdateExpr( 2325 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 2326 // OpenMP, 2.12.6, atomic Construct 2327 // Any atomic construct with a seq_cst clause forces the atomically 2328 // performed operation to include an implicit flush operation without a 2329 // list. 2330 if (IsSeqCst) 2331 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 2332 } 2333 2334 static RValue convertToType(CodeGenFunction &CGF, RValue Value, 2335 QualType SourceType, QualType ResType, 2336 SourceLocation Loc) { 2337 switch (CGF.getEvaluationKind(ResType)) { 2338 case TEK_Scalar: 2339 return RValue::get( 2340 convertToScalarValue(CGF, Value, SourceType, ResType, Loc)); 2341 case TEK_Complex: { 2342 auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc); 2343 return RValue::getComplex(Res.first, Res.second); 2344 } 2345 case TEK_Aggregate: 2346 break; 2347 } 2348 llvm_unreachable("Must be a scalar or complex."); 2349 } 2350 2351 static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst, 2352 bool IsPostfixUpdate, const Expr *V, 2353 const Expr *X, const Expr *E, 2354 const Expr *UE, bool IsXLHSInRHSPart, 2355 SourceLocation Loc) { 2356 assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue"); 2357 assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue"); 2358 RValue NewVVal; 2359 LValue VLValue = CGF.EmitLValue(V); 2360 LValue XLValue = CGF.EmitLValue(X); 2361 RValue ExprRValue = CGF.EmitAnyExpr(E); 2362 auto AO = IsSeqCst ? llvm::SequentiallyConsistent : llvm::Monotonic; 2363 QualType NewVValType; 2364 if (UE) { 2365 // 'x' is updated with some additional value. 2366 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 2367 "Update expr in 'atomic capture' must be a binary operator."); 2368 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 2369 // Update expressions are allowed to have the following forms: 2370 // x binop= expr; -> xrval + expr; 2371 // x++, ++x -> xrval + 1; 2372 // x--, --x -> xrval - 1; 2373 // x = x binop expr; -> xrval binop expr 2374 // x = expr Op x; - > expr binop xrval; 2375 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 2376 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 2377 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 2378 NewVValType = XRValExpr->getType(); 2379 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 2380 auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr, 2381 IsSeqCst, IsPostfixUpdate](RValue XRValue) -> RValue { 2382 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 2383 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 2384 RValue Res = CGF.EmitAnyExpr(UE); 2385 NewVVal = IsPostfixUpdate ? XRValue : Res; 2386 return Res; 2387 }; 2388 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 2389 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 2390 if (Res.first) { 2391 // 'atomicrmw' instruction was generated. 2392 if (IsPostfixUpdate) { 2393 // Use old value from 'atomicrmw'. 2394 NewVVal = Res.second; 2395 } else { 2396 // 'atomicrmw' does not provide new value, so evaluate it using old 2397 // value of 'x'. 2398 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 2399 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second); 2400 NewVVal = CGF.EmitAnyExpr(UE); 2401 } 2402 } 2403 } else { 2404 // 'x' is simply rewritten with some 'expr'. 2405 NewVValType = X->getType().getNonReferenceType(); 2406 ExprRValue = convertToType(CGF, ExprRValue, E->getType(), 2407 X->getType().getNonReferenceType(), Loc); 2408 auto &&Gen = [&CGF, &NewVVal, ExprRValue](RValue XRValue) -> RValue { 2409 NewVVal = XRValue; 2410 return ExprRValue; 2411 }; 2412 // Try to perform atomicrmw xchg, otherwise simple exchange. 2413 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 2414 XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO, 2415 Loc, Gen); 2416 if (Res.first) { 2417 // 'atomicrmw' instruction was generated. 2418 NewVVal = IsPostfixUpdate ? Res.second : ExprRValue; 2419 } 2420 } 2421 // Emit post-update store to 'v' of old/new 'x' value. 2422 emitSimpleStore(CGF, VLValue, NewVVal, NewVValType, Loc); 2423 // OpenMP, 2.12.6, atomic Construct 2424 // Any atomic construct with a seq_cst clause forces the atomically 2425 // performed operation to include an implicit flush operation without a 2426 // list. 2427 if (IsSeqCst) 2428 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 2429 } 2430 2431 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, 2432 bool IsSeqCst, bool IsPostfixUpdate, 2433 const Expr *X, const Expr *V, const Expr *E, 2434 const Expr *UE, bool IsXLHSInRHSPart, 2435 SourceLocation Loc) { 2436 switch (Kind) { 2437 case OMPC_read: 2438 EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc); 2439 break; 2440 case OMPC_write: 2441 EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc); 2442 break; 2443 case OMPC_unknown: 2444 case OMPC_update: 2445 EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc); 2446 break; 2447 case OMPC_capture: 2448 EmitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE, 2449 IsXLHSInRHSPart, Loc); 2450 break; 2451 case OMPC_if: 2452 case OMPC_final: 2453 case OMPC_num_threads: 2454 case OMPC_private: 2455 case OMPC_firstprivate: 2456 case OMPC_lastprivate: 2457 case OMPC_reduction: 2458 case OMPC_safelen: 2459 case OMPC_simdlen: 2460 case OMPC_collapse: 2461 case OMPC_default: 2462 case OMPC_seq_cst: 2463 case OMPC_shared: 2464 case OMPC_linear: 2465 case OMPC_aligned: 2466 case OMPC_copyin: 2467 case OMPC_copyprivate: 2468 case OMPC_flush: 2469 case OMPC_proc_bind: 2470 case OMPC_schedule: 2471 case OMPC_ordered: 2472 case OMPC_nowait: 2473 case OMPC_untied: 2474 case OMPC_threadprivate: 2475 case OMPC_depend: 2476 case OMPC_mergeable: 2477 case OMPC_device: 2478 case OMPC_threads: 2479 case OMPC_simd: 2480 case OMPC_map: 2481 case OMPC_num_teams: 2482 case OMPC_thread_limit: 2483 case OMPC_priority: 2484 case OMPC_grainsize: 2485 case OMPC_nogroup: 2486 case OMPC_num_tasks: 2487 llvm_unreachable("Clause is not allowed in 'omp atomic'."); 2488 } 2489 } 2490 2491 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { 2492 bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>(); 2493 OpenMPClauseKind Kind = OMPC_unknown; 2494 for (auto *C : S.clauses()) { 2495 // Find first clause (skip seq_cst clause, if it is first). 2496 if (C->getClauseKind() != OMPC_seq_cst) { 2497 Kind = C->getClauseKind(); 2498 break; 2499 } 2500 } 2501 2502 const auto *CS = 2503 S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 2504 if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) { 2505 enterFullExpression(EWC); 2506 } 2507 // Processing for statements under 'atomic capture'. 2508 if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) { 2509 for (const auto *C : Compound->body()) { 2510 if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) { 2511 enterFullExpression(EWC); 2512 } 2513 } 2514 } 2515 2516 LexicalScope Scope(*this, S.getSourceRange()); 2517 auto &&CodeGen = [&S, Kind, IsSeqCst](CodeGenFunction &CGF) { 2518 EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(), 2519 S.getV(), S.getExpr(), S.getUpdateExpr(), 2520 S.isXLHSInRHSPart(), S.getLocStart()); 2521 }; 2522 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen); 2523 } 2524 2525 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { 2526 LexicalScope Scope(*this, S.getSourceRange()); 2527 const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt()); 2528 2529 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 2530 GenerateOpenMPCapturedVars(CS, CapturedVars); 2531 2532 // Emit target region as a standalone region. 2533 auto &&CodeGen = [&CS](CodeGenFunction &CGF) { 2534 CGF.EmitStmt(CS.getCapturedStmt()); 2535 }; 2536 2537 // Obtain the target region outlined function. 2538 llvm::Value *Fn = 2539 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, CodeGen); 2540 2541 // Check if we have any if clause associated with the directive. 2542 const Expr *IfCond = nullptr; 2543 2544 if (auto *C = S.getSingleClause<OMPIfClause>()) { 2545 IfCond = C->getCondition(); 2546 } 2547 2548 // Check if we have any device clause associated with the directive. 2549 const Expr *Device = nullptr; 2550 if (auto *C = S.getSingleClause<OMPDeviceClause>()) { 2551 Device = C->getDevice(); 2552 } 2553 2554 CGM.getOpenMPRuntime().emitTargetCall(*this, S, Fn, IfCond, Device, 2555 CapturedVars); 2556 } 2557 2558 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &) { 2559 llvm_unreachable("CodeGen for 'omp teams' is not supported yet."); 2560 } 2561 2562 void CodeGenFunction::EmitOMPCancellationPointDirective( 2563 const OMPCancellationPointDirective &S) { 2564 CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(), 2565 S.getCancelRegion()); 2566 } 2567 2568 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { 2569 const Expr *IfCond = nullptr; 2570 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 2571 if (C->getNameModifier() == OMPD_unknown || 2572 C->getNameModifier() == OMPD_cancel) { 2573 IfCond = C->getCondition(); 2574 break; 2575 } 2576 } 2577 CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(), IfCond, 2578 S.getCancelRegion()); 2579 } 2580 2581 CodeGenFunction::JumpDest 2582 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) { 2583 if (Kind == OMPD_parallel || Kind == OMPD_task) 2584 return ReturnBlock; 2585 assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections || 2586 Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for); 2587 return BreakContinueStack.back().BreakBlock; 2588 } 2589 2590 // Generate the instructions for '#pragma omp target data' directive. 2591 void CodeGenFunction::EmitOMPTargetDataDirective( 2592 const OMPTargetDataDirective &S) { 2593 // emit the code inside the construct for now 2594 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2595 CGM.getOpenMPRuntime().emitInlinedDirective( 2596 *this, OMPD_target_data, 2597 [&CS](CodeGenFunction &CGF) { CGF.EmitStmt(CS->getCapturedStmt()); }); 2598 } 2599 2600 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) { 2601 // emit the code inside the construct for now 2602 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2603 CGM.getOpenMPRuntime().emitInlinedDirective( 2604 *this, OMPD_taskloop, 2605 [&CS](CodeGenFunction &CGF) { CGF.EmitStmt(CS->getCapturedStmt()); }); 2606 } 2607 2608 void CodeGenFunction::EmitOMPTaskLoopSimdDirective( 2609 const OMPTaskLoopSimdDirective &S) { 2610 // emit the code inside the construct for now 2611 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2612 CGM.getOpenMPRuntime().emitInlinedDirective( 2613 *this, OMPD_taskloop_simd, 2614 [&CS](CodeGenFunction &CGF) { CGF.EmitStmt(CS->getCapturedStmt()); }); 2615 } 2616 2617