1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This contains code to emit OpenMP nodes as LLVM code. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGOpenMPRuntime.h" 15 #include "CodeGenFunction.h" 16 #include "CodeGenModule.h" 17 #include "TargetInfo.h" 18 #include "clang/AST/Stmt.h" 19 #include "clang/AST/StmtOpenMP.h" 20 using namespace clang; 21 using namespace CodeGen; 22 23 void CodeGenFunction::GenerateOpenMPCapturedVars( 24 const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars, 25 bool UseOnlyReferences) { 26 const RecordDecl *RD = S.getCapturedRecordDecl(); 27 auto CurField = RD->field_begin(); 28 auto CurCap = S.captures().begin(); 29 for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(), 30 E = S.capture_init_end(); 31 I != E; ++I, ++CurField, ++CurCap) { 32 if (CurField->hasCapturedVLAType()) { 33 auto VAT = CurField->getCapturedVLAType(); 34 auto *Val = VLASizeMap[VAT->getSizeExpr()]; 35 // If we need to use only references, create a temporary location for the 36 // size of the VAT. 37 if (UseOnlyReferences) { 38 LValue LV = 39 MakeAddrLValue(CreateMemTemp(CurField->getType(), "__vla_size_ref"), 40 CurField->getType()); 41 EmitStoreThroughLValue(RValue::get(Val), LV); 42 Val = LV.getAddress().getPointer(); 43 } 44 CapturedVars.push_back(Val); 45 } else if (CurCap->capturesThis()) 46 CapturedVars.push_back(CXXThisValue); 47 else 48 CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer()); 49 } 50 } 51 52 llvm::Function * 53 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, 54 bool UseOnlyReferences) { 55 assert( 56 CapturedStmtInfo && 57 "CapturedStmtInfo should be set when generating the captured function"); 58 const CapturedDecl *CD = S.getCapturedDecl(); 59 const RecordDecl *RD = S.getCapturedRecordDecl(); 60 assert(CD->hasBody() && "missing CapturedDecl body"); 61 62 // Build the argument list. 63 ASTContext &Ctx = CGM.getContext(); 64 FunctionArgList Args; 65 Args.append(CD->param_begin(), 66 std::next(CD->param_begin(), CD->getContextParamPosition())); 67 auto I = S.captures().begin(); 68 for (auto *FD : RD->fields()) { 69 QualType ArgType = FD->getType(); 70 IdentifierInfo *II = nullptr; 71 VarDecl *CapVar = nullptr; 72 if (I->capturesVariable()) { 73 CapVar = I->getCapturedVar(); 74 II = CapVar->getIdentifier(); 75 } else if (I->capturesThis()) 76 II = &getContext().Idents.get("this"); 77 else { 78 assert(I->capturesVariableArrayType()); 79 II = &getContext().Idents.get("vla"); 80 if (UseOnlyReferences) 81 ArgType = getContext().getLValueReferenceType( 82 ArgType, /*SpelledAsLValue=*/false); 83 } 84 if (ArgType->isVariablyModifiedType()) 85 ArgType = getContext().getVariableArrayDecayedType(ArgType); 86 Args.push_back(ImplicitParamDecl::Create(getContext(), nullptr, 87 FD->getLocation(), II, ArgType)); 88 ++I; 89 } 90 Args.append( 91 std::next(CD->param_begin(), CD->getContextParamPosition() + 1), 92 CD->param_end()); 93 94 // Create the function declaration. 95 FunctionType::ExtInfo ExtInfo; 96 const CGFunctionInfo &FuncInfo = 97 CGM.getTypes().arrangeFreeFunctionDeclaration(Ctx.VoidTy, Args, ExtInfo, 98 /*IsVariadic=*/false); 99 llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); 100 101 llvm::Function *F = llvm::Function::Create( 102 FuncLLVMTy, llvm::GlobalValue::InternalLinkage, 103 CapturedStmtInfo->getHelperName(), &CGM.getModule()); 104 CGM.SetInternalFunctionAttributes(CD, F, FuncInfo); 105 if (CD->isNothrow()) 106 F->addFnAttr(llvm::Attribute::NoUnwind); 107 108 // Generate the function. 109 StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getLocation(), 110 CD->getBody()->getLocStart()); 111 unsigned Cnt = CD->getContextParamPosition(); 112 I = S.captures().begin(); 113 for (auto *FD : RD->fields()) { 114 LValue ArgLVal = 115 MakeAddrLValue(GetAddrOfLocalVar(Args[Cnt]), Args[Cnt]->getType(), 116 AlignmentSource::Decl); 117 if (FD->hasCapturedVLAType()) { 118 if (UseOnlyReferences) 119 ArgLVal = EmitLoadOfReferenceLValue( 120 ArgLVal.getAddress(), ArgLVal.getType()->castAs<ReferenceType>()); 121 auto *ExprArg = 122 EmitLoadOfLValue(ArgLVal, SourceLocation()).getScalarVal(); 123 auto VAT = FD->getCapturedVLAType(); 124 VLASizeMap[VAT->getSizeExpr()] = ExprArg; 125 } else if (I->capturesVariable()) { 126 auto *Var = I->getCapturedVar(); 127 QualType VarTy = Var->getType(); 128 Address ArgAddr = ArgLVal.getAddress(); 129 if (!VarTy->isReferenceType()) { 130 ArgAddr = EmitLoadOfReference( 131 ArgAddr, ArgLVal.getType()->castAs<ReferenceType>()); 132 } 133 setAddrOfLocalVar( 134 Var, Address(ArgAddr.getPointer(), getContext().getDeclAlign(Var))); 135 } else { 136 // If 'this' is captured, load it into CXXThisValue. 137 assert(I->capturesThis()); 138 CXXThisValue = 139 EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation()).getScalarVal(); 140 } 141 ++Cnt, ++I; 142 } 143 144 PGO.assignRegionCounters(CD, F); 145 CapturedStmtInfo->EmitBody(*this, CD->getBody()); 146 FinishFunction(CD->getBodyRBrace()); 147 148 return F; 149 } 150 151 //===----------------------------------------------------------------------===// 152 // OpenMP Directive Emission 153 //===----------------------------------------------------------------------===// 154 void CodeGenFunction::EmitOMPAggregateAssign( 155 Address DestAddr, Address SrcAddr, QualType OriginalType, 156 const llvm::function_ref<void(Address, Address)> &CopyGen) { 157 // Perform element-by-element initialization. 158 QualType ElementTy; 159 160 // Drill down to the base element type on both arrays. 161 auto ArrayTy = OriginalType->getAsArrayTypeUnsafe(); 162 auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr); 163 SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 164 165 auto SrcBegin = SrcAddr.getPointer(); 166 auto DestBegin = DestAddr.getPointer(); 167 // Cast from pointer to array type to pointer to single element. 168 auto DestEnd = Builder.CreateGEP(DestBegin, NumElements); 169 // The basic structure here is a while-do loop. 170 auto BodyBB = createBasicBlock("omp.arraycpy.body"); 171 auto DoneBB = createBasicBlock("omp.arraycpy.done"); 172 auto IsEmpty = 173 Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty"); 174 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 175 176 // Enter the loop body, making that address the current address. 177 auto EntryBB = Builder.GetInsertBlock(); 178 EmitBlock(BodyBB); 179 180 CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy); 181 182 llvm::PHINode *SrcElementPHI = 183 Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 184 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 185 Address SrcElementCurrent = 186 Address(SrcElementPHI, 187 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 188 189 llvm::PHINode *DestElementPHI = 190 Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 191 DestElementPHI->addIncoming(DestBegin, EntryBB); 192 Address DestElementCurrent = 193 Address(DestElementPHI, 194 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 195 196 // Emit copy. 197 CopyGen(DestElementCurrent, SrcElementCurrent); 198 199 // Shift the address forward by one element. 200 auto DestElementNext = Builder.CreateConstGEP1_32( 201 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 202 auto SrcElementNext = Builder.CreateConstGEP1_32( 203 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 204 // Check whether we've reached the end. 205 auto Done = 206 Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 207 Builder.CreateCondBr(Done, DoneBB, BodyBB); 208 DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock()); 209 SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock()); 210 211 // Done. 212 EmitBlock(DoneBB, /*IsFinished=*/true); 213 } 214 215 /// \brief Emit initialization of arrays of complex types. 216 /// \param DestAddr Address of the array. 217 /// \param Type Type of array. 218 /// \param Init Initial expression of array. 219 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 220 QualType Type, const Expr *Init) { 221 // Perform element-by-element initialization. 222 QualType ElementTy; 223 224 // Drill down to the base element type on both arrays. 225 auto ArrayTy = Type->getAsArrayTypeUnsafe(); 226 auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 227 DestAddr = 228 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 229 230 auto DestBegin = DestAddr.getPointer(); 231 // Cast from pointer to array type to pointer to single element. 232 auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 233 // The basic structure here is a while-do loop. 234 auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 235 auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 236 auto IsEmpty = 237 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 238 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 239 240 // Enter the loop body, making that address the current address. 241 auto EntryBB = CGF.Builder.GetInsertBlock(); 242 CGF.EmitBlock(BodyBB); 243 244 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 245 246 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 247 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 248 DestElementPHI->addIncoming(DestBegin, EntryBB); 249 Address DestElementCurrent = 250 Address(DestElementPHI, 251 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 252 253 // Emit copy. 254 { 255 CodeGenFunction::RunCleanupsScope InitScope(CGF); 256 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 257 /*IsInitializer=*/false); 258 } 259 260 // Shift the address forward by one element. 261 auto DestElementNext = CGF.Builder.CreateConstGEP1_32( 262 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 263 // Check whether we've reached the end. 264 auto Done = 265 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 266 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 267 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 268 269 // Done. 270 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 271 } 272 273 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr, 274 Address SrcAddr, const VarDecl *DestVD, 275 const VarDecl *SrcVD, const Expr *Copy) { 276 if (OriginalType->isArrayType()) { 277 auto *BO = dyn_cast<BinaryOperator>(Copy); 278 if (BO && BO->getOpcode() == BO_Assign) { 279 // Perform simple memcpy for simple copying. 280 EmitAggregateAssign(DestAddr, SrcAddr, OriginalType); 281 } else { 282 // For arrays with complex element types perform element by element 283 // copying. 284 EmitOMPAggregateAssign( 285 DestAddr, SrcAddr, OriginalType, 286 [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) { 287 // Working with the single array element, so have to remap 288 // destination and source variables to corresponding array 289 // elements. 290 CodeGenFunction::OMPPrivateScope Remap(*this); 291 Remap.addPrivate(DestVD, [DestElement]() -> Address { 292 return DestElement; 293 }); 294 Remap.addPrivate( 295 SrcVD, [SrcElement]() -> Address { return SrcElement; }); 296 (void)Remap.Privatize(); 297 EmitIgnoredExpr(Copy); 298 }); 299 } 300 } else { 301 // Remap pseudo source variable to private copy. 302 CodeGenFunction::OMPPrivateScope Remap(*this); 303 Remap.addPrivate(SrcVD, [SrcAddr]() -> Address { return SrcAddr; }); 304 Remap.addPrivate(DestVD, [DestAddr]() -> Address { return DestAddr; }); 305 (void)Remap.Privatize(); 306 // Emit copying of the whole variable. 307 EmitIgnoredExpr(Copy); 308 } 309 } 310 311 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, 312 OMPPrivateScope &PrivateScope) { 313 llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate; 314 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { 315 auto IRef = C->varlist_begin(); 316 auto InitsRef = C->inits().begin(); 317 for (auto IInit : C->private_copies()) { 318 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 319 if (EmittedAsFirstprivate.count(OrigVD) == 0) { 320 EmittedAsFirstprivate.insert(OrigVD); 321 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 322 auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl()); 323 bool IsRegistered; 324 DeclRefExpr DRE( 325 const_cast<VarDecl *>(OrigVD), 326 /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup( 327 OrigVD) != nullptr, 328 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 329 Address OriginalAddr = EmitLValue(&DRE).getAddress(); 330 QualType Type = OrigVD->getType(); 331 if (Type->isArrayType()) { 332 // Emit VarDecl with copy init for arrays. 333 // Get the address of the original variable captured in current 334 // captured region. 335 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 336 auto Emission = EmitAutoVarAlloca(*VD); 337 auto *Init = VD->getInit(); 338 if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) { 339 // Perform simple memcpy. 340 EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr, 341 Type); 342 } else { 343 EmitOMPAggregateAssign( 344 Emission.getAllocatedAddress(), OriginalAddr, Type, 345 [this, VDInit, Init](Address DestElement, 346 Address SrcElement) { 347 // Clean up any temporaries needed by the initialization. 348 RunCleanupsScope InitScope(*this); 349 // Emit initialization for single element. 350 setAddrOfLocalVar(VDInit, SrcElement); 351 EmitAnyExprToMem(Init, DestElement, 352 Init->getType().getQualifiers(), 353 /*IsInitializer*/ false); 354 LocalDeclMap.erase(VDInit); 355 }); 356 } 357 EmitAutoVarCleanups(Emission); 358 return Emission.getAllocatedAddress(); 359 }); 360 } else { 361 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 362 // Emit private VarDecl with copy init. 363 // Remap temp VDInit variable to the address of the original 364 // variable 365 // (for proper handling of captured global variables). 366 setAddrOfLocalVar(VDInit, OriginalAddr); 367 EmitDecl(*VD); 368 LocalDeclMap.erase(VDInit); 369 return GetAddrOfLocalVar(VD); 370 }); 371 } 372 assert(IsRegistered && 373 "firstprivate var already registered as private"); 374 // Silence the warning about unused variable. 375 (void)IsRegistered; 376 } 377 ++IRef, ++InitsRef; 378 } 379 } 380 return !EmittedAsFirstprivate.empty(); 381 } 382 383 void CodeGenFunction::EmitOMPPrivateClause( 384 const OMPExecutableDirective &D, 385 CodeGenFunction::OMPPrivateScope &PrivateScope) { 386 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 387 for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) { 388 auto IRef = C->varlist_begin(); 389 for (auto IInit : C->private_copies()) { 390 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 391 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 392 auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 393 bool IsRegistered = 394 PrivateScope.addPrivate(OrigVD, [&]() -> Address { 395 // Emit private VarDecl with copy init. 396 EmitDecl(*VD); 397 return GetAddrOfLocalVar(VD); 398 }); 399 assert(IsRegistered && "private var already registered as private"); 400 // Silence the warning about unused variable. 401 (void)IsRegistered; 402 } 403 ++IRef; 404 } 405 } 406 } 407 408 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { 409 // threadprivate_var1 = master_threadprivate_var1; 410 // operator=(threadprivate_var2, master_threadprivate_var2); 411 // ... 412 // __kmpc_barrier(&loc, global_tid); 413 llvm::DenseSet<const VarDecl *> CopiedVars; 414 llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr; 415 for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) { 416 auto IRef = C->varlist_begin(); 417 auto ISrcRef = C->source_exprs().begin(); 418 auto IDestRef = C->destination_exprs().begin(); 419 for (auto *AssignOp : C->assignment_ops()) { 420 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 421 QualType Type = VD->getType(); 422 if (CopiedVars.insert(VD->getCanonicalDecl()).second) { 423 424 // Get the address of the master variable. If we are emitting code with 425 // TLS support, the address is passed from the master as field in the 426 // captured declaration. 427 Address MasterAddr = Address::invalid(); 428 if (getLangOpts().OpenMPUseTLS && 429 getContext().getTargetInfo().isTLSSupported()) { 430 assert(CapturedStmtInfo->lookup(VD) && 431 "Copyin threadprivates should have been captured!"); 432 DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(), 433 VK_LValue, (*IRef)->getExprLoc()); 434 MasterAddr = EmitLValue(&DRE).getAddress(); 435 LocalDeclMap.erase(VD); 436 } else { 437 MasterAddr = 438 Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD) 439 : CGM.GetAddrOfGlobal(VD), 440 getContext().getDeclAlign(VD)); 441 } 442 // Get the address of the threadprivate variable. 443 Address PrivateAddr = EmitLValue(*IRef).getAddress(); 444 if (CopiedVars.size() == 1) { 445 // At first check if current thread is a master thread. If it is, no 446 // need to copy data. 447 CopyBegin = createBasicBlock("copyin.not.master"); 448 CopyEnd = createBasicBlock("copyin.not.master.end"); 449 Builder.CreateCondBr( 450 Builder.CreateICmpNE( 451 Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy), 452 Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy)), 453 CopyBegin, CopyEnd); 454 EmitBlock(CopyBegin); 455 } 456 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 457 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 458 EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp); 459 } 460 ++IRef; 461 ++ISrcRef; 462 ++IDestRef; 463 } 464 } 465 if (CopyEnd) { 466 // Exit out of copying procedure for non-master thread. 467 EmitBlock(CopyEnd, /*IsFinished=*/true); 468 return true; 469 } 470 return false; 471 } 472 473 bool CodeGenFunction::EmitOMPLastprivateClauseInit( 474 const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) { 475 bool HasAtLeastOneLastprivate = false; 476 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 477 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 478 HasAtLeastOneLastprivate = true; 479 auto IRef = C->varlist_begin(); 480 auto IDestRef = C->destination_exprs().begin(); 481 for (auto *IInit : C->private_copies()) { 482 // Keep the address of the original variable for future update at the end 483 // of the loop. 484 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 485 if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) { 486 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 487 PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address { 488 DeclRefExpr DRE( 489 const_cast<VarDecl *>(OrigVD), 490 /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup( 491 OrigVD) != nullptr, 492 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 493 return EmitLValue(&DRE).getAddress(); 494 }); 495 // Check if the variable is also a firstprivate: in this case IInit is 496 // not generated. Initialization of this variable will happen in codegen 497 // for 'firstprivate' clause. 498 if (IInit) { 499 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 500 bool IsRegistered = 501 PrivateScope.addPrivate(OrigVD, [&]() -> Address { 502 // Emit private VarDecl with copy init. 503 EmitDecl(*VD); 504 return GetAddrOfLocalVar(VD); 505 }); 506 assert(IsRegistered && 507 "lastprivate var already registered as private"); 508 (void)IsRegistered; 509 } 510 } 511 ++IRef, ++IDestRef; 512 } 513 } 514 return HasAtLeastOneLastprivate; 515 } 516 517 void CodeGenFunction::EmitOMPLastprivateClauseFinal( 518 const OMPExecutableDirective &D, llvm::Value *IsLastIterCond) { 519 // Emit following code: 520 // if (<IsLastIterCond>) { 521 // orig_var1 = private_orig_var1; 522 // ... 523 // orig_varn = private_orig_varn; 524 // } 525 llvm::BasicBlock *ThenBB = nullptr; 526 llvm::BasicBlock *DoneBB = nullptr; 527 if (IsLastIterCond) { 528 ThenBB = createBasicBlock(".omp.lastprivate.then"); 529 DoneBB = createBasicBlock(".omp.lastprivate.done"); 530 Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB); 531 EmitBlock(ThenBB); 532 } 533 llvm::DenseMap<const Decl *, const Expr *> LoopCountersAndUpdates; 534 const Expr *LastIterVal = nullptr; 535 const Expr *IVExpr = nullptr; 536 const Expr *IncExpr = nullptr; 537 if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) { 538 if (isOpenMPWorksharingDirective(D.getDirectiveKind())) { 539 LastIterVal = cast<VarDecl>(cast<DeclRefExpr>( 540 LoopDirective->getUpperBoundVariable()) 541 ->getDecl()) 542 ->getAnyInitializer(); 543 IVExpr = LoopDirective->getIterationVariable(); 544 IncExpr = LoopDirective->getInc(); 545 auto IUpdate = LoopDirective->updates().begin(); 546 for (auto *E : LoopDirective->counters()) { 547 auto *D = cast<DeclRefExpr>(E)->getDecl()->getCanonicalDecl(); 548 LoopCountersAndUpdates[D] = *IUpdate; 549 ++IUpdate; 550 } 551 } 552 } 553 { 554 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 555 bool FirstLCV = true; 556 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 557 auto IRef = C->varlist_begin(); 558 auto ISrcRef = C->source_exprs().begin(); 559 auto IDestRef = C->destination_exprs().begin(); 560 for (auto *AssignOp : C->assignment_ops()) { 561 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 562 QualType Type = PrivateVD->getType(); 563 auto *CanonicalVD = PrivateVD->getCanonicalDecl(); 564 if (AlreadyEmittedVars.insert(CanonicalVD).second) { 565 // If lastprivate variable is a loop control variable for loop-based 566 // directive, update its value before copyin back to original 567 // variable. 568 if (auto *UpExpr = LoopCountersAndUpdates.lookup(CanonicalVD)) { 569 if (FirstLCV && LastIterVal) { 570 EmitAnyExprToMem(LastIterVal, EmitLValue(IVExpr).getAddress(), 571 IVExpr->getType().getQualifiers(), 572 /*IsInitializer=*/false); 573 EmitIgnoredExpr(IncExpr); 574 FirstLCV = false; 575 } 576 EmitIgnoredExpr(UpExpr); 577 } 578 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 579 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 580 // Get the address of the original variable. 581 Address OriginalAddr = GetAddrOfLocalVar(DestVD); 582 // Get the address of the private variable. 583 Address PrivateAddr = GetAddrOfLocalVar(PrivateVD); 584 if (auto RefTy = PrivateVD->getType()->getAs<ReferenceType>()) 585 PrivateAddr = 586 Address(Builder.CreateLoad(PrivateAddr), 587 getNaturalTypeAlignment(RefTy->getPointeeType())); 588 EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp); 589 } 590 ++IRef; 591 ++ISrcRef; 592 ++IDestRef; 593 } 594 } 595 } 596 if (IsLastIterCond) { 597 EmitBlock(DoneBB, /*IsFinished=*/true); 598 } 599 } 600 601 void CodeGenFunction::EmitOMPReductionClauseInit( 602 const OMPExecutableDirective &D, 603 CodeGenFunction::OMPPrivateScope &PrivateScope) { 604 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 605 auto ILHS = C->lhs_exprs().begin(); 606 auto IRHS = C->rhs_exprs().begin(); 607 auto IPriv = C->privates().begin(); 608 for (auto IRef : C->varlists()) { 609 auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 610 auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 611 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl()); 612 if (auto *OASE = dyn_cast<OMPArraySectionExpr>(IRef)) { 613 auto *Base = OASE->getBase()->IgnoreParenImpCasts(); 614 while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 615 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 616 while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 617 Base = TempASE->getBase()->IgnoreParenImpCasts(); 618 auto *DE = cast<DeclRefExpr>(Base); 619 auto *OrigVD = cast<VarDecl>(DE->getDecl()); 620 auto OASELValueLB = EmitOMPArraySectionExpr(OASE); 621 auto OASELValueUB = 622 EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 623 auto OriginalBaseLValue = EmitLValue(DE); 624 auto BaseLValue = OriginalBaseLValue; 625 auto *Zero = Builder.getInt64(/*C=*/0); 626 llvm::SmallVector<llvm::Value *, 4> Indexes; 627 Indexes.push_back(Zero); 628 auto *ItemTy = 629 OASELValueLB.getPointer()->getType()->getPointerElementType(); 630 auto *Ty = BaseLValue.getPointer()->getType()->getPointerElementType(); 631 while (Ty != ItemTy) { 632 Indexes.push_back(Zero); 633 Ty = Ty->getPointerElementType(); 634 } 635 BaseLValue = MakeAddrLValue( 636 Address(Builder.CreateInBoundsGEP(BaseLValue.getPointer(), Indexes), 637 OASELValueLB.getAlignment()), 638 OASELValueLB.getType(), OASELValueLB.getAlignmentSource()); 639 // Store the address of the original variable associated with the LHS 640 // implicit variable. 641 PrivateScope.addPrivate(LHSVD, [this, OASELValueLB]() -> Address { 642 return OASELValueLB.getAddress(); 643 }); 644 // Emit reduction copy. 645 bool IsRegistered = PrivateScope.addPrivate( 646 OrigVD, [this, PrivateVD, BaseLValue, OASELValueLB, OASELValueUB, 647 OriginalBaseLValue]() -> Address { 648 // Emit VarDecl with copy init for arrays. 649 // Get the address of the original variable captured in current 650 // captured region. 651 auto *Size = Builder.CreatePtrDiff(OASELValueUB.getPointer(), 652 OASELValueLB.getPointer()); 653 Size = Builder.CreateNUWAdd( 654 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 655 CodeGenFunction::OpaqueValueMapping OpaqueMap( 656 *this, cast<OpaqueValueExpr>( 657 getContext() 658 .getAsVariableArrayType(PrivateVD->getType()) 659 ->getSizeExpr()), 660 RValue::get(Size)); 661 EmitVariablyModifiedType(PrivateVD->getType()); 662 auto Emission = EmitAutoVarAlloca(*PrivateVD); 663 auto Addr = Emission.getAllocatedAddress(); 664 auto *Init = PrivateVD->getInit(); 665 EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(), Init); 666 EmitAutoVarCleanups(Emission); 667 // Emit private VarDecl with reduction init. 668 auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(), 669 OASELValueLB.getPointer()); 670 auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset); 671 Ptr = Builder.CreatePointerBitCastOrAddrSpaceCast( 672 Ptr, OriginalBaseLValue.getPointer()->getType()); 673 return Address(Ptr, OriginalBaseLValue.getAlignment()); 674 }); 675 assert(IsRegistered && "private var already registered as private"); 676 // Silence the warning about unused variable. 677 (void)IsRegistered; 678 PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { 679 return GetAddrOfLocalVar(PrivateVD); 680 }); 681 } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(IRef)) { 682 auto *Base = ASE->getBase()->IgnoreParenImpCasts(); 683 while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 684 Base = TempASE->getBase()->IgnoreParenImpCasts(); 685 auto *DE = cast<DeclRefExpr>(Base); 686 auto *OrigVD = cast<VarDecl>(DE->getDecl()); 687 auto ASELValue = EmitLValue(ASE); 688 auto OriginalBaseLValue = EmitLValue(DE); 689 auto BaseLValue = OriginalBaseLValue; 690 auto *Zero = Builder.getInt64(/*C=*/0); 691 llvm::SmallVector<llvm::Value *, 4> Indexes; 692 Indexes.push_back(Zero); 693 auto *ItemTy = 694 ASELValue.getPointer()->getType()->getPointerElementType(); 695 auto *Ty = BaseLValue.getPointer()->getType()->getPointerElementType(); 696 while (Ty != ItemTy) { 697 Indexes.push_back(Zero); 698 Ty = Ty->getPointerElementType(); 699 } 700 BaseLValue = MakeAddrLValue( 701 Address(Builder.CreateInBoundsGEP(BaseLValue.getPointer(), Indexes), 702 ASELValue.getAlignment()), 703 ASELValue.getType(), ASELValue.getAlignmentSource()); 704 // Store the address of the original variable associated with the LHS 705 // implicit variable. 706 PrivateScope.addPrivate(LHSVD, [this, ASELValue]() -> Address { 707 return ASELValue.getAddress(); 708 }); 709 // Emit reduction copy. 710 bool IsRegistered = PrivateScope.addPrivate( 711 OrigVD, [this, PrivateVD, BaseLValue, ASELValue, 712 OriginalBaseLValue]() -> Address { 713 // Emit private VarDecl with reduction init. 714 EmitDecl(*PrivateVD); 715 auto Addr = GetAddrOfLocalVar(PrivateVD); 716 auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(), 717 ASELValue.getPointer()); 718 auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset); 719 Ptr = Builder.CreatePointerBitCastOrAddrSpaceCast( 720 Ptr, OriginalBaseLValue.getPointer()->getType()); 721 return Address(Ptr, OriginalBaseLValue.getAlignment()); 722 }); 723 assert(IsRegistered && "private var already registered as private"); 724 // Silence the warning about unused variable. 725 (void)IsRegistered; 726 PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { 727 return GetAddrOfLocalVar(PrivateVD); 728 }); 729 } else { 730 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); 731 // Store the address of the original variable associated with the LHS 732 // implicit variable. 733 PrivateScope.addPrivate(LHSVD, [this, OrigVD, IRef]() -> Address { 734 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 735 CapturedStmtInfo->lookup(OrigVD) != nullptr, 736 IRef->getType(), VK_LValue, IRef->getExprLoc()); 737 return EmitLValue(&DRE).getAddress(); 738 }); 739 // Emit reduction copy. 740 bool IsRegistered = 741 PrivateScope.addPrivate(OrigVD, [this, PrivateVD]() -> Address { 742 // Emit private VarDecl with reduction init. 743 EmitDecl(*PrivateVD); 744 return GetAddrOfLocalVar(PrivateVD); 745 }); 746 assert(IsRegistered && "private var already registered as private"); 747 // Silence the warning about unused variable. 748 (void)IsRegistered; 749 PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { 750 return GetAddrOfLocalVar(PrivateVD); 751 }); 752 } 753 ++ILHS, ++IRHS, ++IPriv; 754 } 755 } 756 } 757 758 void CodeGenFunction::EmitOMPReductionClauseFinal( 759 const OMPExecutableDirective &D) { 760 llvm::SmallVector<const Expr *, 8> Privates; 761 llvm::SmallVector<const Expr *, 8> LHSExprs; 762 llvm::SmallVector<const Expr *, 8> RHSExprs; 763 llvm::SmallVector<const Expr *, 8> ReductionOps; 764 bool HasAtLeastOneReduction = false; 765 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 766 HasAtLeastOneReduction = true; 767 Privates.append(C->privates().begin(), C->privates().end()); 768 LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 769 RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 770 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 771 } 772 if (HasAtLeastOneReduction) { 773 // Emit nowait reduction if nowait clause is present or directive is a 774 // parallel directive (it always has implicit barrier). 775 CGM.getOpenMPRuntime().emitReduction( 776 *this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps, 777 D.getSingleClause<OMPNowaitClause>() || 778 isOpenMPParallelDirective(D.getDirectiveKind()) || 779 D.getDirectiveKind() == OMPD_simd, 780 D.getDirectiveKind() == OMPD_simd); 781 } 782 } 783 784 static void emitCommonOMPParallelDirective(CodeGenFunction &CGF, 785 const OMPExecutableDirective &S, 786 OpenMPDirectiveKind InnermostKind, 787 const RegionCodeGenTy &CodeGen) { 788 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 789 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 790 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 791 auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( 792 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 793 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) { 794 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 795 auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), 796 /*IgnoreResultAssign*/ true); 797 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause( 798 CGF, NumThreads, NumThreadsClause->getLocStart()); 799 } 800 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) { 801 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 802 CGF.CGM.getOpenMPRuntime().emitProcBindClause( 803 CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart()); 804 } 805 const Expr *IfCond = nullptr; 806 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 807 if (C->getNameModifier() == OMPD_unknown || 808 C->getNameModifier() == OMPD_parallel) { 809 IfCond = C->getCondition(); 810 break; 811 } 812 } 813 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn, 814 CapturedVars, IfCond); 815 } 816 817 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { 818 LexicalScope Scope(*this, S.getSourceRange()); 819 // Emit parallel region as a standalone region. 820 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 821 OMPPrivateScope PrivateScope(CGF); 822 bool Copyins = CGF.EmitOMPCopyinClause(S); 823 bool Firstprivates = CGF.EmitOMPFirstprivateClause(S, PrivateScope); 824 if (Copyins || Firstprivates) { 825 // Emit implicit barrier to synchronize threads and avoid data races on 826 // initialization of firstprivate variables or propagation master's thread 827 // values of threadprivate variables to local instances of that variables 828 // of all other implicit threads. 829 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 830 CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 831 /*ForceSimpleCall=*/true); 832 } 833 CGF.EmitOMPPrivateClause(S, PrivateScope); 834 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 835 (void)PrivateScope.Privatize(); 836 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 837 CGF.EmitOMPReductionClauseFinal(S); 838 // Emit implicit barrier at the end of the 'parallel' directive. 839 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 840 CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 841 /*ForceSimpleCall=*/true); 842 }; 843 emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen); 844 } 845 846 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, 847 JumpDest LoopExit) { 848 RunCleanupsScope BodyScope(*this); 849 // Update counters values on current iteration. 850 for (auto I : D.updates()) { 851 EmitIgnoredExpr(I); 852 } 853 // Update the linear variables. 854 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 855 for (auto U : C->updates()) { 856 EmitIgnoredExpr(U); 857 } 858 } 859 860 // On a continue in the body, jump to the end. 861 auto Continue = getJumpDestInCurrentScope("omp.body.continue"); 862 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 863 // Emit loop body. 864 EmitStmt(D.getBody()); 865 // The end (updates/cleanups). 866 EmitBlock(Continue.getBlock()); 867 BreakContinueStack.pop_back(); 868 // TODO: Update lastprivates if the SeparateIter flag is true. 869 // This will be implemented in a follow-up OMPLastprivateClause patch, but 870 // result should be still correct without it, as we do not make these 871 // variables private yet. 872 } 873 874 void CodeGenFunction::EmitOMPInnerLoop( 875 const Stmt &S, bool RequiresCleanup, const Expr *LoopCond, 876 const Expr *IncExpr, 877 const llvm::function_ref<void(CodeGenFunction &)> &BodyGen, 878 const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen) { 879 auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end"); 880 881 // Start the loop with a block that tests the condition. 882 auto CondBlock = createBasicBlock("omp.inner.for.cond"); 883 EmitBlock(CondBlock); 884 LoopStack.push(CondBlock); 885 886 // If there are any cleanups between here and the loop-exit scope, 887 // create a block to stage a loop exit along. 888 auto ExitBlock = LoopExit.getBlock(); 889 if (RequiresCleanup) 890 ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup"); 891 892 auto LoopBody = createBasicBlock("omp.inner.for.body"); 893 894 // Emit condition. 895 EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S)); 896 if (ExitBlock != LoopExit.getBlock()) { 897 EmitBlock(ExitBlock); 898 EmitBranchThroughCleanup(LoopExit); 899 } 900 901 EmitBlock(LoopBody); 902 incrementProfileCounter(&S); 903 904 // Create a block for the increment. 905 auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc"); 906 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 907 908 BodyGen(*this); 909 910 // Emit "IV = IV + 1" and a back-edge to the condition block. 911 EmitBlock(Continue.getBlock()); 912 EmitIgnoredExpr(IncExpr); 913 PostIncGen(*this); 914 BreakContinueStack.pop_back(); 915 EmitBranch(CondBlock); 916 LoopStack.pop(); 917 // Emit the fall-through block. 918 EmitBlock(LoopExit.getBlock()); 919 } 920 921 void CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { 922 // Emit inits for the linear variables. 923 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 924 for (auto Init : C->inits()) { 925 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl()); 926 auto *OrigVD = cast<VarDecl>( 927 cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())->getDecl()); 928 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 929 CapturedStmtInfo->lookup(OrigVD) != nullptr, 930 VD->getInit()->getType(), VK_LValue, 931 VD->getInit()->getExprLoc()); 932 AutoVarEmission Emission = EmitAutoVarAlloca(*VD); 933 EmitExprAsInit(&DRE, VD, 934 MakeAddrLValue(Emission.getAllocatedAddress(), VD->getType()), 935 /*capturedByInit=*/false); 936 EmitAutoVarCleanups(Emission); 937 } 938 // Emit the linear steps for the linear clauses. 939 // If a step is not constant, it is pre-calculated before the loop. 940 if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep())) 941 if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) { 942 EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl())); 943 // Emit calculation of the linear step. 944 EmitIgnoredExpr(CS); 945 } 946 } 947 } 948 949 static void emitLinearClauseFinal(CodeGenFunction &CGF, 950 const OMPLoopDirective &D) { 951 // Emit the final values of the linear variables. 952 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 953 auto IC = C->varlist_begin(); 954 for (auto F : C->finals()) { 955 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl()); 956 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 957 CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr, 958 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); 959 Address OrigAddr = CGF.EmitLValue(&DRE).getAddress(); 960 CodeGenFunction::OMPPrivateScope VarScope(CGF); 961 VarScope.addPrivate(OrigVD, 962 [OrigAddr]() -> Address { return OrigAddr; }); 963 (void)VarScope.Privatize(); 964 CGF.EmitIgnoredExpr(F); 965 ++IC; 966 } 967 } 968 } 969 970 static void emitAlignedClause(CodeGenFunction &CGF, 971 const OMPExecutableDirective &D) { 972 for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) { 973 unsigned ClauseAlignment = 0; 974 if (auto AlignmentExpr = Clause->getAlignment()) { 975 auto AlignmentCI = 976 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); 977 ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue()); 978 } 979 for (auto E : Clause->varlists()) { 980 unsigned Alignment = ClauseAlignment; 981 if (Alignment == 0) { 982 // OpenMP [2.8.1, Description] 983 // If no optional parameter is specified, implementation-defined default 984 // alignments for SIMD instructions on the target platforms are assumed. 985 Alignment = 986 CGF.getContext() 987 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign( 988 E->getType()->getPointeeType())) 989 .getQuantity(); 990 } 991 assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) && 992 "alignment is not power of 2"); 993 if (Alignment != 0) { 994 llvm::Value *PtrValue = CGF.EmitScalarExpr(E); 995 CGF.EmitAlignmentAssumption(PtrValue, Alignment); 996 } 997 } 998 } 999 } 1000 1001 static void emitPrivateLoopCounters(CodeGenFunction &CGF, 1002 CodeGenFunction::OMPPrivateScope &LoopScope, 1003 ArrayRef<Expr *> Counters, 1004 ArrayRef<Expr *> PrivateCounters) { 1005 auto I = PrivateCounters.begin(); 1006 for (auto *E : Counters) { 1007 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1008 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()); 1009 Address Addr = Address::invalid(); 1010 (void)LoopScope.addPrivate(PrivateVD, [&]() -> Address { 1011 // Emit var without initialization. 1012 auto VarEmission = CGF.EmitAutoVarAlloca(*PrivateVD); 1013 CGF.EmitAutoVarCleanups(VarEmission); 1014 Addr = VarEmission.getAllocatedAddress(); 1015 return Addr; 1016 }); 1017 (void)LoopScope.addPrivate(VD, [&]() -> Address { return Addr; }); 1018 ++I; 1019 } 1020 } 1021 1022 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S, 1023 const Expr *Cond, llvm::BasicBlock *TrueBlock, 1024 llvm::BasicBlock *FalseBlock, uint64_t TrueCount) { 1025 { 1026 CodeGenFunction::OMPPrivateScope PreCondScope(CGF); 1027 emitPrivateLoopCounters(CGF, PreCondScope, S.counters(), 1028 S.private_counters()); 1029 (void)PreCondScope.Privatize(); 1030 // Get initial values of real counters. 1031 for (auto I : S.inits()) { 1032 CGF.EmitIgnoredExpr(I); 1033 } 1034 } 1035 // Check that loop is executed at least one time. 1036 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount); 1037 } 1038 1039 static void 1040 emitPrivateLinearVars(CodeGenFunction &CGF, const OMPExecutableDirective &D, 1041 CodeGenFunction::OMPPrivateScope &PrivateScope) { 1042 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1043 auto CurPrivate = C->privates().begin(); 1044 for (auto *E : C->varlists()) { 1045 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1046 auto *PrivateVD = 1047 cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl()); 1048 bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address { 1049 // Emit private VarDecl with copy init. 1050 CGF.EmitVarDecl(*PrivateVD); 1051 return CGF.GetAddrOfLocalVar(PrivateVD); 1052 }); 1053 assert(IsRegistered && "linear var already registered as private"); 1054 // Silence the warning about unused variable. 1055 (void)IsRegistered; 1056 ++CurPrivate; 1057 } 1058 } 1059 } 1060 1061 static void emitSimdlenSafelenClause(CodeGenFunction &CGF, 1062 const OMPExecutableDirective &D) { 1063 if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) { 1064 RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(), 1065 /*ignoreResult=*/true); 1066 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 1067 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 1068 // In presence of finite 'safelen', it may be unsafe to mark all 1069 // the memory instructions parallel, because loop-carried 1070 // dependences of 'safelen' iterations are possible. 1071 CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>()); 1072 } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) { 1073 RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(), 1074 /*ignoreResult=*/true); 1075 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 1076 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 1077 // In presence of finite 'safelen', it may be unsafe to mark all 1078 // the memory instructions parallel, because loop-carried 1079 // dependences of 'safelen' iterations are possible. 1080 CGF.LoopStack.setParallel(false); 1081 } 1082 } 1083 1084 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D) { 1085 // Walk clauses and process safelen/lastprivate. 1086 LoopStack.setParallel(); 1087 LoopStack.setVectorizeEnable(true); 1088 emitSimdlenSafelenClause(*this, D); 1089 } 1090 1091 void CodeGenFunction::EmitOMPSimdFinal(const OMPLoopDirective &D) { 1092 auto IC = D.counters().begin(); 1093 for (auto F : D.finals()) { 1094 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl()); 1095 if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD)) { 1096 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1097 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1098 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); 1099 Address OrigAddr = EmitLValue(&DRE).getAddress(); 1100 OMPPrivateScope VarScope(*this); 1101 VarScope.addPrivate(OrigVD, 1102 [OrigAddr]() -> Address { return OrigAddr; }); 1103 (void)VarScope.Privatize(); 1104 EmitIgnoredExpr(F); 1105 } 1106 ++IC; 1107 } 1108 emitLinearClauseFinal(*this, D); 1109 } 1110 1111 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { 1112 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 1113 // if (PreCond) { 1114 // for (IV in 0..LastIteration) BODY; 1115 // <Final counter/linear vars updates>; 1116 // } 1117 // 1118 1119 // Emit: if (PreCond) - begin. 1120 // If the condition constant folds and can be elided, avoid emitting the 1121 // whole loop. 1122 bool CondConstant; 1123 llvm::BasicBlock *ContBlock = nullptr; 1124 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 1125 if (!CondConstant) 1126 return; 1127 } else { 1128 auto *ThenBlock = CGF.createBasicBlock("simd.if.then"); 1129 ContBlock = CGF.createBasicBlock("simd.if.end"); 1130 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 1131 CGF.getProfileCount(&S)); 1132 CGF.EmitBlock(ThenBlock); 1133 CGF.incrementProfileCounter(&S); 1134 } 1135 1136 // Emit the loop iteration variable. 1137 const Expr *IVExpr = S.getIterationVariable(); 1138 const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 1139 CGF.EmitVarDecl(*IVDecl); 1140 CGF.EmitIgnoredExpr(S.getInit()); 1141 1142 // Emit the iterations count variable. 1143 // If it is not a variable, Sema decided to calculate iterations count on 1144 // each iteration (e.g., it is foldable into a constant). 1145 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 1146 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 1147 // Emit calculation of the iterations count. 1148 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 1149 } 1150 1151 CGF.EmitOMPSimdInit(S); 1152 1153 emitAlignedClause(CGF, S); 1154 CGF.EmitOMPLinearClauseInit(S); 1155 bool HasLastprivateClause; 1156 { 1157 OMPPrivateScope LoopScope(CGF); 1158 emitPrivateLoopCounters(CGF, LoopScope, S.counters(), 1159 S.private_counters()); 1160 emitPrivateLinearVars(CGF, S, LoopScope); 1161 CGF.EmitOMPPrivateClause(S, LoopScope); 1162 CGF.EmitOMPReductionClauseInit(S, LoopScope); 1163 HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 1164 (void)LoopScope.Privatize(); 1165 CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 1166 S.getInc(), 1167 [&S](CodeGenFunction &CGF) { 1168 CGF.EmitOMPLoopBody(S, JumpDest()); 1169 CGF.EmitStopPoint(&S); 1170 }, 1171 [](CodeGenFunction &) {}); 1172 // Emit final copy of the lastprivate variables at the end of loops. 1173 if (HasLastprivateClause) { 1174 CGF.EmitOMPLastprivateClauseFinal(S); 1175 } 1176 CGF.EmitOMPReductionClauseFinal(S); 1177 } 1178 CGF.EmitOMPSimdFinal(S); 1179 // Emit: if (PreCond) - end. 1180 if (ContBlock) { 1181 CGF.EmitBranch(ContBlock); 1182 CGF.EmitBlock(ContBlock, true); 1183 } 1184 }; 1185 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 1186 } 1187 1188 void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind, 1189 const OMPLoopDirective &S, 1190 OMPPrivateScope &LoopScope, 1191 bool Ordered, Address LB, 1192 Address UB, Address ST, 1193 Address IL, llvm::Value *Chunk) { 1194 auto &RT = CGM.getOpenMPRuntime(); 1195 1196 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). 1197 const bool DynamicOrOrdered = Ordered || RT.isDynamic(ScheduleKind); 1198 1199 assert((Ordered || 1200 !RT.isStaticNonchunked(ScheduleKind, /*Chunked=*/Chunk != nullptr)) && 1201 "static non-chunked schedule does not need outer loop"); 1202 1203 // Emit outer loop. 1204 // 1205 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1206 // When schedule(dynamic,chunk_size) is specified, the iterations are 1207 // distributed to threads in the team in chunks as the threads request them. 1208 // Each thread executes a chunk of iterations, then requests another chunk, 1209 // until no chunks remain to be distributed. Each chunk contains chunk_size 1210 // iterations, except for the last chunk to be distributed, which may have 1211 // fewer iterations. When no chunk_size is specified, it defaults to 1. 1212 // 1213 // When schedule(guided,chunk_size) is specified, the iterations are assigned 1214 // to threads in the team in chunks as the executing threads request them. 1215 // Each thread executes a chunk of iterations, then requests another chunk, 1216 // until no chunks remain to be assigned. For a chunk_size of 1, the size of 1217 // each chunk is proportional to the number of unassigned iterations divided 1218 // by the number of threads in the team, decreasing to 1. For a chunk_size 1219 // with value k (greater than 1), the size of each chunk is determined in the 1220 // same way, with the restriction that the chunks do not contain fewer than k 1221 // iterations (except for the last chunk to be assigned, which may have fewer 1222 // than k iterations). 1223 // 1224 // When schedule(auto) is specified, the decision regarding scheduling is 1225 // delegated to the compiler and/or runtime system. The programmer gives the 1226 // implementation the freedom to choose any possible mapping of iterations to 1227 // threads in the team. 1228 // 1229 // When schedule(runtime) is specified, the decision regarding scheduling is 1230 // deferred until run time, and the schedule and chunk size are taken from the 1231 // run-sched-var ICV. If the ICV is set to auto, the schedule is 1232 // implementation defined 1233 // 1234 // while(__kmpc_dispatch_next(&LB, &UB)) { 1235 // idx = LB; 1236 // while (idx <= UB) { BODY; ++idx; 1237 // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only. 1238 // } // inner loop 1239 // } 1240 // 1241 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1242 // When schedule(static, chunk_size) is specified, iterations are divided into 1243 // chunks of size chunk_size, and the chunks are assigned to the threads in 1244 // the team in a round-robin fashion in the order of the thread number. 1245 // 1246 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { 1247 // while (idx <= UB) { BODY; ++idx; } // inner loop 1248 // LB = LB + ST; 1249 // UB = UB + ST; 1250 // } 1251 // 1252 1253 const Expr *IVExpr = S.getIterationVariable(); 1254 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1255 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1256 1257 if (DynamicOrOrdered) { 1258 llvm::Value *UBVal = EmitScalarExpr(S.getLastIteration()); 1259 RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, 1260 IVSize, IVSigned, Ordered, UBVal, Chunk); 1261 } else { 1262 RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, 1263 IVSize, IVSigned, Ordered, IL, LB, UB, ST, Chunk); 1264 } 1265 1266 auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end"); 1267 1268 // Start the loop with a block that tests the condition. 1269 auto CondBlock = createBasicBlock("omp.dispatch.cond"); 1270 EmitBlock(CondBlock); 1271 LoopStack.push(CondBlock); 1272 1273 llvm::Value *BoolCondVal = nullptr; 1274 if (!DynamicOrOrdered) { 1275 // UB = min(UB, GlobalUB) 1276 EmitIgnoredExpr(S.getEnsureUpperBound()); 1277 // IV = LB 1278 EmitIgnoredExpr(S.getInit()); 1279 // IV < UB 1280 BoolCondVal = EvaluateExprAsBool(S.getCond()); 1281 } else { 1282 BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, 1283 IL, LB, UB, ST); 1284 } 1285 1286 // If there are any cleanups between here and the loop-exit scope, 1287 // create a block to stage a loop exit along. 1288 auto ExitBlock = LoopExit.getBlock(); 1289 if (LoopScope.requiresCleanups()) 1290 ExitBlock = createBasicBlock("omp.dispatch.cleanup"); 1291 1292 auto LoopBody = createBasicBlock("omp.dispatch.body"); 1293 Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock); 1294 if (ExitBlock != LoopExit.getBlock()) { 1295 EmitBlock(ExitBlock); 1296 EmitBranchThroughCleanup(LoopExit); 1297 } 1298 EmitBlock(LoopBody); 1299 1300 // Emit "IV = LB" (in case of static schedule, we have already calculated new 1301 // LB for loop condition and emitted it above). 1302 if (DynamicOrOrdered) 1303 EmitIgnoredExpr(S.getInit()); 1304 1305 // Create a block for the increment. 1306 auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); 1307 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1308 1309 // Generate !llvm.loop.parallel metadata for loads and stores for loops 1310 // with dynamic/guided scheduling and without ordered clause. 1311 if (!isOpenMPSimdDirective(S.getDirectiveKind())) { 1312 LoopStack.setParallel((ScheduleKind == OMPC_SCHEDULE_dynamic || 1313 ScheduleKind == OMPC_SCHEDULE_guided) && 1314 !Ordered); 1315 } else { 1316 EmitOMPSimdInit(S); 1317 } 1318 1319 SourceLocation Loc = S.getLocStart(); 1320 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), 1321 [&S, LoopExit](CodeGenFunction &CGF) { 1322 CGF.EmitOMPLoopBody(S, LoopExit); 1323 CGF.EmitStopPoint(&S); 1324 }, 1325 [Ordered, IVSize, IVSigned, Loc](CodeGenFunction &CGF) { 1326 if (Ordered) { 1327 CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd( 1328 CGF, Loc, IVSize, IVSigned); 1329 } 1330 }); 1331 1332 EmitBlock(Continue.getBlock()); 1333 BreakContinueStack.pop_back(); 1334 if (!DynamicOrOrdered) { 1335 // Emit "LB = LB + Stride", "UB = UB + Stride". 1336 EmitIgnoredExpr(S.getNextLowerBound()); 1337 EmitIgnoredExpr(S.getNextUpperBound()); 1338 } 1339 1340 EmitBranch(CondBlock); 1341 LoopStack.pop(); 1342 // Emit the fall-through block. 1343 EmitBlock(LoopExit.getBlock()); 1344 1345 // Tell the runtime we are done. 1346 if (!DynamicOrOrdered) 1347 RT.emitForStaticFinish(*this, S.getLocEnd()); 1348 } 1349 1350 /// \brief Emit a helper variable and return corresponding lvalue. 1351 static LValue EmitOMPHelperVar(CodeGenFunction &CGF, 1352 const DeclRefExpr *Helper) { 1353 auto VDecl = cast<VarDecl>(Helper->getDecl()); 1354 CGF.EmitVarDecl(*VDecl); 1355 return CGF.EmitLValue(Helper); 1356 } 1357 1358 static std::pair<llvm::Value * /*Chunk*/, OpenMPScheduleClauseKind> 1359 emitScheduleClause(CodeGenFunction &CGF, const OMPLoopDirective &S, 1360 bool OuterRegion) { 1361 // Detect the loop schedule kind and chunk. 1362 auto ScheduleKind = OMPC_SCHEDULE_unknown; 1363 llvm::Value *Chunk = nullptr; 1364 if (const auto *C = S.getSingleClause<OMPScheduleClause>()) { 1365 ScheduleKind = C->getScheduleKind(); 1366 if (const auto *Ch = C->getChunkSize()) { 1367 if (auto *ImpRef = cast_or_null<DeclRefExpr>(C->getHelperChunkSize())) { 1368 if (OuterRegion) { 1369 const VarDecl *ImpVar = cast<VarDecl>(ImpRef->getDecl()); 1370 CGF.EmitVarDecl(*ImpVar); 1371 CGF.EmitStoreThroughLValue( 1372 CGF.EmitAnyExpr(Ch), 1373 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(ImpVar), 1374 ImpVar->getType())); 1375 } else { 1376 Ch = ImpRef; 1377 } 1378 } 1379 if (!C->getHelperChunkSize() || !OuterRegion) { 1380 Chunk = CGF.EmitScalarExpr(Ch); 1381 Chunk = CGF.EmitScalarConversion(Chunk, Ch->getType(), 1382 S.getIterationVariable()->getType(), 1383 S.getLocStart()); 1384 } 1385 } 1386 } 1387 return std::make_pair(Chunk, ScheduleKind); 1388 } 1389 1390 bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { 1391 // Emit the loop iteration variable. 1392 auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 1393 auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); 1394 EmitVarDecl(*IVDecl); 1395 1396 // Emit the iterations count variable. 1397 // If it is not a variable, Sema decided to calculate iterations count on each 1398 // iteration (e.g., it is foldable into a constant). 1399 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 1400 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 1401 // Emit calculation of the iterations count. 1402 EmitIgnoredExpr(S.getCalcLastIteration()); 1403 } 1404 1405 auto &RT = CGM.getOpenMPRuntime(); 1406 1407 bool HasLastprivateClause; 1408 // Check pre-condition. 1409 { 1410 // Skip the entire loop if we don't meet the precondition. 1411 // If the condition constant folds and can be elided, avoid emitting the 1412 // whole loop. 1413 bool CondConstant; 1414 llvm::BasicBlock *ContBlock = nullptr; 1415 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 1416 if (!CondConstant) 1417 return false; 1418 } else { 1419 auto *ThenBlock = createBasicBlock("omp.precond.then"); 1420 ContBlock = createBasicBlock("omp.precond.end"); 1421 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 1422 getProfileCount(&S)); 1423 EmitBlock(ThenBlock); 1424 incrementProfileCounter(&S); 1425 } 1426 1427 emitAlignedClause(*this, S); 1428 EmitOMPLinearClauseInit(S); 1429 // Emit 'then' code. 1430 { 1431 // Emit helper vars inits. 1432 LValue LB = 1433 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable())); 1434 LValue UB = 1435 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable())); 1436 LValue ST = 1437 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 1438 LValue IL = 1439 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 1440 1441 OMPPrivateScope LoopScope(*this); 1442 if (EmitOMPFirstprivateClause(S, LoopScope)) { 1443 // Emit implicit barrier to synchronize threads and avoid data races on 1444 // initialization of firstprivate variables. 1445 CGM.getOpenMPRuntime().emitBarrierCall( 1446 *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 1447 /*ForceSimpleCall=*/true); 1448 } 1449 EmitOMPPrivateClause(S, LoopScope); 1450 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 1451 EmitOMPReductionClauseInit(S, LoopScope); 1452 emitPrivateLoopCounters(*this, LoopScope, S.counters(), 1453 S.private_counters()); 1454 emitPrivateLinearVars(*this, S, LoopScope); 1455 (void)LoopScope.Privatize(); 1456 1457 // Detect the loop schedule kind and chunk. 1458 llvm::Value *Chunk; 1459 OpenMPScheduleClauseKind ScheduleKind; 1460 auto ScheduleInfo = 1461 emitScheduleClause(*this, S, /*OuterRegion=*/false); 1462 Chunk = ScheduleInfo.first; 1463 ScheduleKind = ScheduleInfo.second; 1464 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1465 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1466 const bool Ordered = S.getSingleClause<OMPOrderedClause>() != nullptr; 1467 if (RT.isStaticNonchunked(ScheduleKind, 1468 /* Chunked */ Chunk != nullptr) && 1469 !Ordered) { 1470 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 1471 EmitOMPSimdInit(S); 1472 } 1473 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1474 // When no chunk_size is specified, the iteration space is divided into 1475 // chunks that are approximately equal in size, and at most one chunk is 1476 // distributed to each thread. Note that the size of the chunks is 1477 // unspecified in this case. 1478 RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, 1479 IVSize, IVSigned, Ordered, 1480 IL.getAddress(), LB.getAddress(), 1481 UB.getAddress(), ST.getAddress()); 1482 auto LoopExit = getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 1483 // UB = min(UB, GlobalUB); 1484 EmitIgnoredExpr(S.getEnsureUpperBound()); 1485 // IV = LB; 1486 EmitIgnoredExpr(S.getInit()); 1487 // while (idx <= UB) { BODY; ++idx; } 1488 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 1489 S.getInc(), 1490 [&S, LoopExit](CodeGenFunction &CGF) { 1491 CGF.EmitOMPLoopBody(S, LoopExit); 1492 CGF.EmitStopPoint(&S); 1493 }, 1494 [](CodeGenFunction &) {}); 1495 EmitBlock(LoopExit.getBlock()); 1496 // Tell the runtime we are done. 1497 RT.emitForStaticFinish(*this, S.getLocStart()); 1498 } else { 1499 // Emit the outer loop, which requests its work chunk [LB..UB] from 1500 // runtime and runs the inner loop to process it. 1501 EmitOMPForOuterLoop(ScheduleKind, S, LoopScope, Ordered, 1502 LB.getAddress(), UB.getAddress(), ST.getAddress(), 1503 IL.getAddress(), Chunk); 1504 } 1505 EmitOMPReductionClauseFinal(S); 1506 // Emit final copy of the lastprivate variables if IsLastIter != 0. 1507 if (HasLastprivateClause) 1508 EmitOMPLastprivateClauseFinal( 1509 S, Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart()))); 1510 } 1511 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 1512 EmitOMPSimdFinal(S); 1513 } 1514 // We're now done with the loop, so jump to the continuation block. 1515 if (ContBlock) { 1516 EmitBranch(ContBlock); 1517 EmitBlock(ContBlock, true); 1518 } 1519 } 1520 return HasLastprivateClause; 1521 } 1522 1523 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { 1524 LexicalScope Scope(*this, S.getSourceRange()); 1525 bool HasLastprivates = false; 1526 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF) { 1527 HasLastprivates = CGF.EmitOMPWorksharingLoop(S); 1528 }; 1529 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen, 1530 S.hasCancel()); 1531 1532 // Emit an implicit barrier at the end. 1533 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) { 1534 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); 1535 } 1536 } 1537 1538 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { 1539 LexicalScope Scope(*this, S.getSourceRange()); 1540 bool HasLastprivates = false; 1541 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF) { 1542 HasLastprivates = CGF.EmitOMPWorksharingLoop(S); 1543 }; 1544 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 1545 1546 // Emit an implicit barrier at the end. 1547 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) { 1548 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); 1549 } 1550 } 1551 1552 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, 1553 const Twine &Name, 1554 llvm::Value *Init = nullptr) { 1555 auto LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty); 1556 if (Init) 1557 CGF.EmitScalarInit(Init, LVal); 1558 return LVal; 1559 } 1560 1561 OpenMPDirectiveKind 1562 CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { 1563 auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt(); 1564 auto *CS = dyn_cast<CompoundStmt>(Stmt); 1565 if (CS && CS->size() > 1) { 1566 bool HasLastprivates = false; 1567 auto &&CodeGen = [&S, CS, &HasLastprivates](CodeGenFunction &CGF) { 1568 auto &C = CGF.CGM.getContext(); 1569 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 1570 // Emit helper vars inits. 1571 LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.", 1572 CGF.Builder.getInt32(0)); 1573 auto *GlobalUBVal = CGF.Builder.getInt32(CS->size() - 1); 1574 LValue UB = 1575 createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal); 1576 LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.", 1577 CGF.Builder.getInt32(1)); 1578 LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.", 1579 CGF.Builder.getInt32(0)); 1580 // Loop counter. 1581 LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv."); 1582 OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 1583 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV); 1584 OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 1585 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); 1586 // Generate condition for loop. 1587 BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, 1588 OK_Ordinary, S.getLocStart(), 1589 /*fpContractable=*/false); 1590 // Increment for loop counter. 1591 UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, 1592 OK_Ordinary, S.getLocStart()); 1593 auto BodyGen = [CS, &S, &IV](CodeGenFunction &CGF) { 1594 // Iterate through all sections and emit a switch construct: 1595 // switch (IV) { 1596 // case 0: 1597 // <SectionStmt[0]>; 1598 // break; 1599 // ... 1600 // case <NumSection> - 1: 1601 // <SectionStmt[<NumSection> - 1]>; 1602 // break; 1603 // } 1604 // .omp.sections.exit: 1605 auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit"); 1606 auto *SwitchStmt = CGF.Builder.CreateSwitch( 1607 CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB, 1608 CS->size()); 1609 unsigned CaseNumber = 0; 1610 for (auto *SubStmt : CS->children()) { 1611 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 1612 CGF.EmitBlock(CaseBB); 1613 SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB); 1614 CGF.EmitStmt(SubStmt); 1615 CGF.EmitBranch(ExitBB); 1616 ++CaseNumber; 1617 } 1618 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 1619 }; 1620 1621 CodeGenFunction::OMPPrivateScope LoopScope(CGF); 1622 if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) { 1623 // Emit implicit barrier to synchronize threads and avoid data races on 1624 // initialization of firstprivate variables. 1625 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 1626 CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 1627 /*ForceSimpleCall=*/true); 1628 } 1629 CGF.EmitOMPPrivateClause(S, LoopScope); 1630 HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 1631 CGF.EmitOMPReductionClauseInit(S, LoopScope); 1632 (void)LoopScope.Privatize(); 1633 1634 // Emit static non-chunked loop. 1635 CGF.CGM.getOpenMPRuntime().emitForStaticInit( 1636 CGF, S.getLocStart(), OMPC_SCHEDULE_static, /*IVSize=*/32, 1637 /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), 1638 LB.getAddress(), UB.getAddress(), ST.getAddress()); 1639 // UB = min(UB, GlobalUB); 1640 auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart()); 1641 auto *MinUBGlobalUB = CGF.Builder.CreateSelect( 1642 CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal); 1643 CGF.EmitStoreOfScalar(MinUBGlobalUB, UB); 1644 // IV = LB; 1645 CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV); 1646 // while (idx <= UB) { BODY; ++idx; } 1647 CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen, 1648 [](CodeGenFunction &) {}); 1649 // Tell the runtime we are done. 1650 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocStart()); 1651 CGF.EmitOMPReductionClauseFinal(S); 1652 1653 // Emit final copy of the lastprivate variables if IsLastIter != 0. 1654 if (HasLastprivates) 1655 CGF.EmitOMPLastprivateClauseFinal( 1656 S, CGF.Builder.CreateIsNotNull( 1657 CGF.EmitLoadOfScalar(IL, S.getLocStart()))); 1658 }; 1659 1660 bool HasCancel = false; 1661 if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S)) 1662 HasCancel = OSD->hasCancel(); 1663 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S)) 1664 HasCancel = OPSD->hasCancel(); 1665 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen, 1666 HasCancel); 1667 // Emit barrier for lastprivates only if 'sections' directive has 'nowait' 1668 // clause. Otherwise the barrier will be generated by the codegen for the 1669 // directive. 1670 if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) { 1671 // Emit implicit barrier to synchronize threads and avoid data races on 1672 // initialization of firstprivate variables. 1673 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), 1674 OMPD_unknown); 1675 } 1676 return OMPD_sections; 1677 } 1678 // If only one section is found - no need to generate loop, emit as a single 1679 // region. 1680 bool HasFirstprivates; 1681 // No need to generate reductions for sections with single section region, we 1682 // can use original shared variables for all operations. 1683 bool HasReductions = S.hasClausesOfKind<OMPReductionClause>(); 1684 // No need to generate lastprivates for sections with single section region, 1685 // we can use original shared variable for all calculations with barrier at 1686 // the end of the sections. 1687 bool HasLastprivates = S.hasClausesOfKind<OMPLastprivateClause>(); 1688 auto &&CodeGen = [Stmt, &S, &HasFirstprivates](CodeGenFunction &CGF) { 1689 CodeGenFunction::OMPPrivateScope SingleScope(CGF); 1690 HasFirstprivates = CGF.EmitOMPFirstprivateClause(S, SingleScope); 1691 CGF.EmitOMPPrivateClause(S, SingleScope); 1692 (void)SingleScope.Privatize(); 1693 1694 CGF.EmitStmt(Stmt); 1695 }; 1696 CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(), 1697 llvm::None, llvm::None, llvm::None, 1698 llvm::None); 1699 // Emit barrier for firstprivates, lastprivates or reductions only if 1700 // 'sections' directive has 'nowait' clause. Otherwise the barrier will be 1701 // generated by the codegen for the directive. 1702 if ((HasFirstprivates || HasLastprivates || HasReductions) && 1703 S.getSingleClause<OMPNowaitClause>()) { 1704 // Emit implicit barrier to synchronize threads and avoid data races on 1705 // initialization of firstprivate variables. 1706 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_unknown, 1707 /*EmitChecks=*/false, 1708 /*ForceSimpleCall=*/true); 1709 } 1710 return OMPD_single; 1711 } 1712 1713 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { 1714 LexicalScope Scope(*this, S.getSourceRange()); 1715 OpenMPDirectiveKind EmittedAs = EmitSections(S); 1716 // Emit an implicit barrier at the end. 1717 if (!S.getSingleClause<OMPNowaitClause>()) { 1718 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), EmittedAs); 1719 } 1720 } 1721 1722 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { 1723 LexicalScope Scope(*this, S.getSourceRange()); 1724 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 1725 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1726 CGF.EnsureInsertPoint(); 1727 }; 1728 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen, 1729 S.hasCancel()); 1730 } 1731 1732 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { 1733 llvm::SmallVector<const Expr *, 8> CopyprivateVars; 1734 llvm::SmallVector<const Expr *, 8> DestExprs; 1735 llvm::SmallVector<const Expr *, 8> SrcExprs; 1736 llvm::SmallVector<const Expr *, 8> AssignmentOps; 1737 // Check if there are any 'copyprivate' clauses associated with this 1738 // 'single' 1739 // construct. 1740 // Build a list of copyprivate variables along with helper expressions 1741 // (<source>, <destination>, <destination>=<source> expressions) 1742 for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) { 1743 CopyprivateVars.append(C->varlists().begin(), C->varlists().end()); 1744 DestExprs.append(C->destination_exprs().begin(), 1745 C->destination_exprs().end()); 1746 SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end()); 1747 AssignmentOps.append(C->assignment_ops().begin(), 1748 C->assignment_ops().end()); 1749 } 1750 LexicalScope Scope(*this, S.getSourceRange()); 1751 // Emit code for 'single' region along with 'copyprivate' clauses 1752 bool HasFirstprivates; 1753 auto &&CodeGen = [&S, &HasFirstprivates](CodeGenFunction &CGF) { 1754 CodeGenFunction::OMPPrivateScope SingleScope(CGF); 1755 HasFirstprivates = CGF.EmitOMPFirstprivateClause(S, SingleScope); 1756 CGF.EmitOMPPrivateClause(S, SingleScope); 1757 (void)SingleScope.Privatize(); 1758 1759 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1760 CGF.EnsureInsertPoint(); 1761 }; 1762 CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(), 1763 CopyprivateVars, DestExprs, SrcExprs, 1764 AssignmentOps); 1765 // Emit an implicit barrier at the end (to avoid data race on firstprivate 1766 // init or if no 'nowait' clause was specified and no 'copyprivate' clause). 1767 if ((!S.getSingleClause<OMPNowaitClause>() || HasFirstprivates) && 1768 CopyprivateVars.empty()) { 1769 CGM.getOpenMPRuntime().emitBarrierCall( 1770 *this, S.getLocStart(), 1771 S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single); 1772 } 1773 } 1774 1775 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { 1776 LexicalScope Scope(*this, S.getSourceRange()); 1777 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 1778 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1779 CGF.EnsureInsertPoint(); 1780 }; 1781 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart()); 1782 } 1783 1784 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { 1785 LexicalScope Scope(*this, S.getSourceRange()); 1786 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 1787 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1788 CGF.EnsureInsertPoint(); 1789 }; 1790 CGM.getOpenMPRuntime().emitCriticalRegion( 1791 *this, S.getDirectiveName().getAsString(), CodeGen, S.getLocStart()); 1792 } 1793 1794 void CodeGenFunction::EmitOMPParallelForDirective( 1795 const OMPParallelForDirective &S) { 1796 // Emit directive as a combined directive that consists of two implicit 1797 // directives: 'parallel' with 'for' directive. 1798 LexicalScope Scope(*this, S.getSourceRange()); 1799 (void)emitScheduleClause(*this, S, /*OuterRegion=*/true); 1800 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 1801 CGF.EmitOMPWorksharingLoop(S); 1802 // Emit implicit barrier at the end of parallel region, but this barrier 1803 // is at the end of 'for' directive, so emit it as the implicit barrier for 1804 // this 'for' directive. 1805 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 1806 CGF, S.getLocStart(), OMPD_parallel, /*EmitChecks=*/false, 1807 /*ForceSimpleCall=*/true); 1808 }; 1809 emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen); 1810 } 1811 1812 void CodeGenFunction::EmitOMPParallelForSimdDirective( 1813 const OMPParallelForSimdDirective &S) { 1814 // Emit directive as a combined directive that consists of two implicit 1815 // directives: 'parallel' with 'for' directive. 1816 LexicalScope Scope(*this, S.getSourceRange()); 1817 (void)emitScheduleClause(*this, S, /*OuterRegion=*/true); 1818 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 1819 CGF.EmitOMPWorksharingLoop(S); 1820 // Emit implicit barrier at the end of parallel region, but this barrier 1821 // is at the end of 'for' directive, so emit it as the implicit barrier for 1822 // this 'for' directive. 1823 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 1824 CGF, S.getLocStart(), OMPD_parallel, /*EmitChecks=*/false, 1825 /*ForceSimpleCall=*/true); 1826 }; 1827 emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen); 1828 } 1829 1830 void CodeGenFunction::EmitOMPParallelSectionsDirective( 1831 const OMPParallelSectionsDirective &S) { 1832 // Emit directive as a combined directive that consists of two implicit 1833 // directives: 'parallel' with 'sections' directive. 1834 LexicalScope Scope(*this, S.getSourceRange()); 1835 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 1836 (void)CGF.EmitSections(S); 1837 // Emit implicit barrier at the end of parallel region. 1838 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 1839 CGF, S.getLocStart(), OMPD_parallel, /*EmitChecks=*/false, 1840 /*ForceSimpleCall=*/true); 1841 }; 1842 emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen); 1843 } 1844 1845 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { 1846 // Emit outlined function for task construct. 1847 LexicalScope Scope(*this, S.getSourceRange()); 1848 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 1849 auto CapturedStruct = GenerateCapturedStmtArgument(*CS); 1850 auto *I = CS->getCapturedDecl()->param_begin(); 1851 auto *PartId = std::next(I); 1852 // The first function argument for tasks is a thread id, the second one is a 1853 // part id (0 for tied tasks, >=0 for untied task). 1854 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 1855 // Get list of private variables. 1856 llvm::SmallVector<const Expr *, 8> PrivateVars; 1857 llvm::SmallVector<const Expr *, 8> PrivateCopies; 1858 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 1859 auto IRef = C->varlist_begin(); 1860 for (auto *IInit : C->private_copies()) { 1861 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 1862 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 1863 PrivateVars.push_back(*IRef); 1864 PrivateCopies.push_back(IInit); 1865 } 1866 ++IRef; 1867 } 1868 } 1869 EmittedAsPrivate.clear(); 1870 // Get list of firstprivate variables. 1871 llvm::SmallVector<const Expr *, 8> FirstprivateVars; 1872 llvm::SmallVector<const Expr *, 8> FirstprivateCopies; 1873 llvm::SmallVector<const Expr *, 8> FirstprivateInits; 1874 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 1875 auto IRef = C->varlist_begin(); 1876 auto IElemInitRef = C->inits().begin(); 1877 for (auto *IInit : C->private_copies()) { 1878 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 1879 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 1880 FirstprivateVars.push_back(*IRef); 1881 FirstprivateCopies.push_back(IInit); 1882 FirstprivateInits.push_back(*IElemInitRef); 1883 } 1884 ++IRef, ++IElemInitRef; 1885 } 1886 } 1887 // Build list of dependences. 1888 llvm::SmallVector<std::pair<OpenMPDependClauseKind, const Expr *>, 8> 1889 Dependences; 1890 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) { 1891 for (auto *IRef : C->varlists()) { 1892 Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef)); 1893 } 1894 } 1895 auto &&CodeGen = [PartId, &S, &PrivateVars, &FirstprivateVars]( 1896 CodeGenFunction &CGF) { 1897 // Set proper addresses for generated private copies. 1898 auto *CS = cast<CapturedStmt>(S.getAssociatedStmt()); 1899 OMPPrivateScope Scope(CGF); 1900 if (!PrivateVars.empty() || !FirstprivateVars.empty()) { 1901 auto *CopyFn = CGF.Builder.CreateLoad( 1902 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3))); 1903 auto *PrivatesPtr = CGF.Builder.CreateLoad( 1904 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2))); 1905 // Map privates. 1906 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> 1907 PrivatePtrs; 1908 llvm::SmallVector<llvm::Value *, 16> CallArgs; 1909 CallArgs.push_back(PrivatesPtr); 1910 for (auto *E : PrivateVars) { 1911 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1912 Address PrivatePtr = 1913 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType())); 1914 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 1915 CallArgs.push_back(PrivatePtr.getPointer()); 1916 } 1917 for (auto *E : FirstprivateVars) { 1918 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1919 Address PrivatePtr = 1920 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType())); 1921 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 1922 CallArgs.push_back(PrivatePtr.getPointer()); 1923 } 1924 CGF.EmitRuntimeCall(CopyFn, CallArgs); 1925 for (auto &&Pair : PrivatePtrs) { 1926 Address Replacement(CGF.Builder.CreateLoad(Pair.second), 1927 CGF.getContext().getDeclAlign(Pair.first)); 1928 Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); 1929 } 1930 } 1931 (void)Scope.Privatize(); 1932 if (*PartId) { 1933 // TODO: emit code for untied tasks. 1934 } 1935 CGF.EmitStmt(CS->getCapturedStmt()); 1936 }; 1937 auto OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( 1938 S, *I, OMPD_task, CodeGen); 1939 // Check if we should emit tied or untied task. 1940 bool Tied = !S.getSingleClause<OMPUntiedClause>(); 1941 // Check if the task is final 1942 llvm::PointerIntPair<llvm::Value *, 1, bool> Final; 1943 if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) { 1944 // If the condition constant folds and can be elided, try to avoid emitting 1945 // the condition and the dead arm of the if/else. 1946 auto *Cond = Clause->getCondition(); 1947 bool CondConstant; 1948 if (ConstantFoldsToSimpleInteger(Cond, CondConstant)) 1949 Final.setInt(CondConstant); 1950 else 1951 Final.setPointer(EvaluateExprAsBool(Cond)); 1952 } else { 1953 // By default the task is not final. 1954 Final.setInt(/*IntVal=*/false); 1955 } 1956 auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 1957 const Expr *IfCond = nullptr; 1958 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 1959 if (C->getNameModifier() == OMPD_unknown || 1960 C->getNameModifier() == OMPD_task) { 1961 IfCond = C->getCondition(); 1962 break; 1963 } 1964 } 1965 CGM.getOpenMPRuntime().emitTaskCall( 1966 *this, S.getLocStart(), S, Tied, Final, OutlinedFn, SharedsTy, 1967 CapturedStruct, IfCond, PrivateVars, PrivateCopies, FirstprivateVars, 1968 FirstprivateCopies, FirstprivateInits, Dependences); 1969 } 1970 1971 void CodeGenFunction::EmitOMPTaskyieldDirective( 1972 const OMPTaskyieldDirective &S) { 1973 CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart()); 1974 } 1975 1976 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { 1977 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier); 1978 } 1979 1980 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) { 1981 CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart()); 1982 } 1983 1984 void CodeGenFunction::EmitOMPTaskgroupDirective( 1985 const OMPTaskgroupDirective &S) { 1986 LexicalScope Scope(*this, S.getSourceRange()); 1987 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 1988 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1989 CGF.EnsureInsertPoint(); 1990 }; 1991 CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart()); 1992 } 1993 1994 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { 1995 CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> { 1996 if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) { 1997 return llvm::makeArrayRef(FlushClause->varlist_begin(), 1998 FlushClause->varlist_end()); 1999 } 2000 return llvm::None; 2001 }(), S.getLocStart()); 2002 } 2003 2004 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, 2005 const CapturedStmt *S) { 2006 CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); 2007 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; 2008 CGF.CapturedStmtInfo = &CapStmtInfo; 2009 auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S); 2010 Fn->addFnAttr(llvm::Attribute::NoInline); 2011 return Fn; 2012 } 2013 2014 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { 2015 LexicalScope Scope(*this, S.getSourceRange()); 2016 auto *C = S.getSingleClause<OMPSIMDClause>(); 2017 auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF) { 2018 if (C) { 2019 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2020 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 2021 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 2022 auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS); 2023 CGF.EmitNounwindRuntimeCall(OutlinedFn, CapturedVars); 2024 } else { 2025 CGF.EmitStmt( 2026 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2027 } 2028 CGF.EnsureInsertPoint(); 2029 }; 2030 CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C); 2031 } 2032 2033 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, 2034 QualType SrcType, QualType DestType, 2035 SourceLocation Loc) { 2036 assert(CGF.hasScalarEvaluationKind(DestType) && 2037 "DestType must have scalar evaluation kind."); 2038 assert(!Val.isAggregate() && "Must be a scalar or complex."); 2039 return Val.isScalar() 2040 ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType, 2041 Loc) 2042 : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType, 2043 DestType, Loc); 2044 } 2045 2046 static CodeGenFunction::ComplexPairTy 2047 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, 2048 QualType DestType, SourceLocation Loc) { 2049 assert(CGF.getEvaluationKind(DestType) == TEK_Complex && 2050 "DestType must have complex evaluation kind."); 2051 CodeGenFunction::ComplexPairTy ComplexVal; 2052 if (Val.isScalar()) { 2053 // Convert the input element to the element type of the complex. 2054 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 2055 auto ScalarVal = CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, 2056 DestElementType, Loc); 2057 ComplexVal = CodeGenFunction::ComplexPairTy( 2058 ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType())); 2059 } else { 2060 assert(Val.isComplex() && "Must be a scalar or complex."); 2061 auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType(); 2062 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 2063 ComplexVal.first = CGF.EmitScalarConversion( 2064 Val.getComplexVal().first, SrcElementType, DestElementType, Loc); 2065 ComplexVal.second = CGF.EmitScalarConversion( 2066 Val.getComplexVal().second, SrcElementType, DestElementType, Loc); 2067 } 2068 return ComplexVal; 2069 } 2070 2071 static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst, 2072 LValue LVal, RValue RVal) { 2073 if (LVal.isGlobalReg()) { 2074 CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal); 2075 } else { 2076 CGF.EmitAtomicStore(RVal, LVal, IsSeqCst ? llvm::SequentiallyConsistent 2077 : llvm::Monotonic, 2078 LVal.isVolatile(), /*IsInit=*/false); 2079 } 2080 } 2081 2082 static void emitSimpleStore(CodeGenFunction &CGF, LValue LVal, RValue RVal, 2083 QualType RValTy, SourceLocation Loc) { 2084 switch (CGF.getEvaluationKind(LVal.getType())) { 2085 case TEK_Scalar: 2086 CGF.EmitStoreThroughLValue(RValue::get(convertToScalarValue( 2087 CGF, RVal, RValTy, LVal.getType(), Loc)), 2088 LVal); 2089 break; 2090 case TEK_Complex: 2091 CGF.EmitStoreOfComplex( 2092 convertToComplexValue(CGF, RVal, RValTy, LVal.getType(), Loc), LVal, 2093 /*isInit=*/false); 2094 break; 2095 case TEK_Aggregate: 2096 llvm_unreachable("Must be a scalar or complex."); 2097 } 2098 } 2099 2100 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst, 2101 const Expr *X, const Expr *V, 2102 SourceLocation Loc) { 2103 // v = x; 2104 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue"); 2105 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue"); 2106 LValue XLValue = CGF.EmitLValue(X); 2107 LValue VLValue = CGF.EmitLValue(V); 2108 RValue Res = XLValue.isGlobalReg() 2109 ? CGF.EmitLoadOfLValue(XLValue, Loc) 2110 : CGF.EmitAtomicLoad(XLValue, Loc, 2111 IsSeqCst ? llvm::SequentiallyConsistent 2112 : llvm::Monotonic, 2113 XLValue.isVolatile()); 2114 // OpenMP, 2.12.6, atomic Construct 2115 // Any atomic construct with a seq_cst clause forces the atomically 2116 // performed operation to include an implicit flush operation without a 2117 // list. 2118 if (IsSeqCst) 2119 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 2120 emitSimpleStore(CGF, VLValue, Res, X->getType().getNonReferenceType(), Loc); 2121 } 2122 2123 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst, 2124 const Expr *X, const Expr *E, 2125 SourceLocation Loc) { 2126 // x = expr; 2127 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue"); 2128 emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E)); 2129 // OpenMP, 2.12.6, atomic Construct 2130 // Any atomic construct with a seq_cst clause forces the atomically 2131 // performed operation to include an implicit flush operation without a 2132 // list. 2133 if (IsSeqCst) 2134 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 2135 } 2136 2137 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, 2138 RValue Update, 2139 BinaryOperatorKind BO, 2140 llvm::AtomicOrdering AO, 2141 bool IsXLHSInRHSPart) { 2142 auto &Context = CGF.CGM.getContext(); 2143 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x' 2144 // expression is simple and atomic is allowed for the given type for the 2145 // target platform. 2146 if (BO == BO_Comma || !Update.isScalar() || 2147 !Update.getScalarVal()->getType()->isIntegerTy() || 2148 !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) && 2149 (Update.getScalarVal()->getType() != 2150 X.getAddress().getElementType())) || 2151 !X.getAddress().getElementType()->isIntegerTy() || 2152 !Context.getTargetInfo().hasBuiltinAtomic( 2153 Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment()))) 2154 return std::make_pair(false, RValue::get(nullptr)); 2155 2156 llvm::AtomicRMWInst::BinOp RMWOp; 2157 switch (BO) { 2158 case BO_Add: 2159 RMWOp = llvm::AtomicRMWInst::Add; 2160 break; 2161 case BO_Sub: 2162 if (!IsXLHSInRHSPart) 2163 return std::make_pair(false, RValue::get(nullptr)); 2164 RMWOp = llvm::AtomicRMWInst::Sub; 2165 break; 2166 case BO_And: 2167 RMWOp = llvm::AtomicRMWInst::And; 2168 break; 2169 case BO_Or: 2170 RMWOp = llvm::AtomicRMWInst::Or; 2171 break; 2172 case BO_Xor: 2173 RMWOp = llvm::AtomicRMWInst::Xor; 2174 break; 2175 case BO_LT: 2176 RMWOp = X.getType()->hasSignedIntegerRepresentation() 2177 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min 2178 : llvm::AtomicRMWInst::Max) 2179 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin 2180 : llvm::AtomicRMWInst::UMax); 2181 break; 2182 case BO_GT: 2183 RMWOp = X.getType()->hasSignedIntegerRepresentation() 2184 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max 2185 : llvm::AtomicRMWInst::Min) 2186 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax 2187 : llvm::AtomicRMWInst::UMin); 2188 break; 2189 case BO_Assign: 2190 RMWOp = llvm::AtomicRMWInst::Xchg; 2191 break; 2192 case BO_Mul: 2193 case BO_Div: 2194 case BO_Rem: 2195 case BO_Shl: 2196 case BO_Shr: 2197 case BO_LAnd: 2198 case BO_LOr: 2199 return std::make_pair(false, RValue::get(nullptr)); 2200 case BO_PtrMemD: 2201 case BO_PtrMemI: 2202 case BO_LE: 2203 case BO_GE: 2204 case BO_EQ: 2205 case BO_NE: 2206 case BO_AddAssign: 2207 case BO_SubAssign: 2208 case BO_AndAssign: 2209 case BO_OrAssign: 2210 case BO_XorAssign: 2211 case BO_MulAssign: 2212 case BO_DivAssign: 2213 case BO_RemAssign: 2214 case BO_ShlAssign: 2215 case BO_ShrAssign: 2216 case BO_Comma: 2217 llvm_unreachable("Unsupported atomic update operation"); 2218 } 2219 auto *UpdateVal = Update.getScalarVal(); 2220 if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) { 2221 UpdateVal = CGF.Builder.CreateIntCast( 2222 IC, X.getAddress().getElementType(), 2223 X.getType()->hasSignedIntegerRepresentation()); 2224 } 2225 auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO); 2226 return std::make_pair(true, RValue::get(Res)); 2227 } 2228 2229 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr( 2230 LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart, 2231 llvm::AtomicOrdering AO, SourceLocation Loc, 2232 const llvm::function_ref<RValue(RValue)> &CommonGen) { 2233 // Update expressions are allowed to have the following forms: 2234 // x binop= expr; -> xrval + expr; 2235 // x++, ++x -> xrval + 1; 2236 // x--, --x -> xrval - 1; 2237 // x = x binop expr; -> xrval binop expr 2238 // x = expr Op x; - > expr binop xrval; 2239 auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart); 2240 if (!Res.first) { 2241 if (X.isGlobalReg()) { 2242 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop 2243 // 'xrval'. 2244 EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X); 2245 } else { 2246 // Perform compare-and-swap procedure. 2247 EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified()); 2248 } 2249 } 2250 return Res; 2251 } 2252 2253 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst, 2254 const Expr *X, const Expr *E, 2255 const Expr *UE, bool IsXLHSInRHSPart, 2256 SourceLocation Loc) { 2257 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 2258 "Update expr in 'atomic update' must be a binary operator."); 2259 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 2260 // Update expressions are allowed to have the following forms: 2261 // x binop= expr; -> xrval + expr; 2262 // x++, ++x -> xrval + 1; 2263 // x--, --x -> xrval - 1; 2264 // x = x binop expr; -> xrval binop expr 2265 // x = expr Op x; - > expr binop xrval; 2266 assert(X->isLValue() && "X of 'omp atomic update' is not lvalue"); 2267 LValue XLValue = CGF.EmitLValue(X); 2268 RValue ExprRValue = CGF.EmitAnyExpr(E); 2269 auto AO = IsSeqCst ? llvm::SequentiallyConsistent : llvm::Monotonic; 2270 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 2271 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 2272 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 2273 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 2274 auto Gen = 2275 [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue { 2276 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 2277 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 2278 return CGF.EmitAnyExpr(UE); 2279 }; 2280 (void)CGF.EmitOMPAtomicSimpleUpdateExpr( 2281 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 2282 // OpenMP, 2.12.6, atomic Construct 2283 // Any atomic construct with a seq_cst clause forces the atomically 2284 // performed operation to include an implicit flush operation without a 2285 // list. 2286 if (IsSeqCst) 2287 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 2288 } 2289 2290 static RValue convertToType(CodeGenFunction &CGF, RValue Value, 2291 QualType SourceType, QualType ResType, 2292 SourceLocation Loc) { 2293 switch (CGF.getEvaluationKind(ResType)) { 2294 case TEK_Scalar: 2295 return RValue::get( 2296 convertToScalarValue(CGF, Value, SourceType, ResType, Loc)); 2297 case TEK_Complex: { 2298 auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc); 2299 return RValue::getComplex(Res.first, Res.second); 2300 } 2301 case TEK_Aggregate: 2302 break; 2303 } 2304 llvm_unreachable("Must be a scalar or complex."); 2305 } 2306 2307 static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst, 2308 bool IsPostfixUpdate, const Expr *V, 2309 const Expr *X, const Expr *E, 2310 const Expr *UE, bool IsXLHSInRHSPart, 2311 SourceLocation Loc) { 2312 assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue"); 2313 assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue"); 2314 RValue NewVVal; 2315 LValue VLValue = CGF.EmitLValue(V); 2316 LValue XLValue = CGF.EmitLValue(X); 2317 RValue ExprRValue = CGF.EmitAnyExpr(E); 2318 auto AO = IsSeqCst ? llvm::SequentiallyConsistent : llvm::Monotonic; 2319 QualType NewVValType; 2320 if (UE) { 2321 // 'x' is updated with some additional value. 2322 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 2323 "Update expr in 'atomic capture' must be a binary operator."); 2324 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 2325 // Update expressions are allowed to have the following forms: 2326 // x binop= expr; -> xrval + expr; 2327 // x++, ++x -> xrval + 1; 2328 // x--, --x -> xrval - 1; 2329 // x = x binop expr; -> xrval binop expr 2330 // x = expr Op x; - > expr binop xrval; 2331 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 2332 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 2333 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 2334 NewVValType = XRValExpr->getType(); 2335 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 2336 auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr, 2337 IsSeqCst, IsPostfixUpdate](RValue XRValue) -> RValue { 2338 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 2339 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 2340 RValue Res = CGF.EmitAnyExpr(UE); 2341 NewVVal = IsPostfixUpdate ? XRValue : Res; 2342 return Res; 2343 }; 2344 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 2345 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 2346 if (Res.first) { 2347 // 'atomicrmw' instruction was generated. 2348 if (IsPostfixUpdate) { 2349 // Use old value from 'atomicrmw'. 2350 NewVVal = Res.second; 2351 } else { 2352 // 'atomicrmw' does not provide new value, so evaluate it using old 2353 // value of 'x'. 2354 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 2355 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second); 2356 NewVVal = CGF.EmitAnyExpr(UE); 2357 } 2358 } 2359 } else { 2360 // 'x' is simply rewritten with some 'expr'. 2361 NewVValType = X->getType().getNonReferenceType(); 2362 ExprRValue = convertToType(CGF, ExprRValue, E->getType(), 2363 X->getType().getNonReferenceType(), Loc); 2364 auto &&Gen = [&CGF, &NewVVal, ExprRValue](RValue XRValue) -> RValue { 2365 NewVVal = XRValue; 2366 return ExprRValue; 2367 }; 2368 // Try to perform atomicrmw xchg, otherwise simple exchange. 2369 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 2370 XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO, 2371 Loc, Gen); 2372 if (Res.first) { 2373 // 'atomicrmw' instruction was generated. 2374 NewVVal = IsPostfixUpdate ? Res.second : ExprRValue; 2375 } 2376 } 2377 // Emit post-update store to 'v' of old/new 'x' value. 2378 emitSimpleStore(CGF, VLValue, NewVVal, NewVValType, Loc); 2379 // OpenMP, 2.12.6, atomic Construct 2380 // Any atomic construct with a seq_cst clause forces the atomically 2381 // performed operation to include an implicit flush operation without a 2382 // list. 2383 if (IsSeqCst) 2384 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 2385 } 2386 2387 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, 2388 bool IsSeqCst, bool IsPostfixUpdate, 2389 const Expr *X, const Expr *V, const Expr *E, 2390 const Expr *UE, bool IsXLHSInRHSPart, 2391 SourceLocation Loc) { 2392 switch (Kind) { 2393 case OMPC_read: 2394 EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc); 2395 break; 2396 case OMPC_write: 2397 EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc); 2398 break; 2399 case OMPC_unknown: 2400 case OMPC_update: 2401 EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc); 2402 break; 2403 case OMPC_capture: 2404 EmitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE, 2405 IsXLHSInRHSPart, Loc); 2406 break; 2407 case OMPC_if: 2408 case OMPC_final: 2409 case OMPC_num_threads: 2410 case OMPC_private: 2411 case OMPC_firstprivate: 2412 case OMPC_lastprivate: 2413 case OMPC_reduction: 2414 case OMPC_safelen: 2415 case OMPC_simdlen: 2416 case OMPC_collapse: 2417 case OMPC_default: 2418 case OMPC_seq_cst: 2419 case OMPC_shared: 2420 case OMPC_linear: 2421 case OMPC_aligned: 2422 case OMPC_copyin: 2423 case OMPC_copyprivate: 2424 case OMPC_flush: 2425 case OMPC_proc_bind: 2426 case OMPC_schedule: 2427 case OMPC_ordered: 2428 case OMPC_nowait: 2429 case OMPC_untied: 2430 case OMPC_threadprivate: 2431 case OMPC_depend: 2432 case OMPC_mergeable: 2433 case OMPC_device: 2434 case OMPC_threads: 2435 case OMPC_simd: 2436 llvm_unreachable("Clause is not allowed in 'omp atomic'."); 2437 } 2438 } 2439 2440 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { 2441 bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>(); 2442 OpenMPClauseKind Kind = OMPC_unknown; 2443 for (auto *C : S.clauses()) { 2444 // Find first clause (skip seq_cst clause, if it is first). 2445 if (C->getClauseKind() != OMPC_seq_cst) { 2446 Kind = C->getClauseKind(); 2447 break; 2448 } 2449 } 2450 2451 const auto *CS = 2452 S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 2453 if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) { 2454 enterFullExpression(EWC); 2455 } 2456 // Processing for statements under 'atomic capture'. 2457 if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) { 2458 for (const auto *C : Compound->body()) { 2459 if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) { 2460 enterFullExpression(EWC); 2461 } 2462 } 2463 } 2464 2465 LexicalScope Scope(*this, S.getSourceRange()); 2466 auto &&CodeGen = [&S, Kind, IsSeqCst](CodeGenFunction &CGF) { 2467 EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(), 2468 S.getV(), S.getExpr(), S.getUpdateExpr(), 2469 S.isXLHSInRHSPart(), S.getLocStart()); 2470 }; 2471 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen); 2472 } 2473 2474 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { 2475 LexicalScope Scope(*this, S.getSourceRange()); 2476 const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt()); 2477 2478 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 2479 GenerateOpenMPCapturedVars(CS, CapturedVars, /*UseOnlyReferences=*/true); 2480 2481 // Emit target region as a standalone region. 2482 auto &&CodeGen = [&CS](CodeGenFunction &CGF) { 2483 CGF.EmitStmt(CS.getCapturedStmt()); 2484 }; 2485 2486 // Obtain the target region outlined function. 2487 llvm::Value *Fn = 2488 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, CodeGen); 2489 2490 // Check if we have any if clause associated with the directive. 2491 const Expr *IfCond = nullptr; 2492 2493 if (auto *C = S.getSingleClause<OMPIfClause>()) { 2494 IfCond = C->getCondition(); 2495 } 2496 2497 // Check if we have any device clause associated with the directive. 2498 const Expr *Device = nullptr; 2499 if (auto *C = S.getSingleClause<OMPDeviceClause>()) { 2500 Device = C->getDevice(); 2501 } 2502 2503 CGM.getOpenMPRuntime().emitTargetCall(*this, S, Fn, IfCond, Device, 2504 CapturedVars); 2505 } 2506 2507 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &) { 2508 llvm_unreachable("CodeGen for 'omp teams' is not supported yet."); 2509 } 2510 2511 void CodeGenFunction::EmitOMPCancellationPointDirective( 2512 const OMPCancellationPointDirective &S) { 2513 CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(), 2514 S.getCancelRegion()); 2515 } 2516 2517 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { 2518 const Expr *IfCond = nullptr; 2519 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 2520 if (C->getNameModifier() == OMPD_unknown || 2521 C->getNameModifier() == OMPD_cancel) { 2522 IfCond = C->getCondition(); 2523 break; 2524 } 2525 } 2526 CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(), IfCond, 2527 S.getCancelRegion()); 2528 } 2529 2530 CodeGenFunction::JumpDest 2531 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) { 2532 if (Kind == OMPD_parallel || Kind == OMPD_task) 2533 return ReturnBlock; 2534 assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections || 2535 Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for); 2536 return BreakContinueStack.back().BreakBlock; 2537 } 2538 2539 // Generate the instructions for '#pragma omp target data' directive. 2540 void CodeGenFunction::EmitOMPTargetDataDirective( 2541 const OMPTargetDataDirective &S) { 2542 2543 // emit the code inside the construct for now 2544 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2545 CGM.getOpenMPRuntime().emitInlinedDirective( 2546 *this, OMPD_target_data, 2547 [&CS](CodeGenFunction &CGF) { CGF.EmitStmt(CS->getCapturedStmt()); }); 2548 } 2549