1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This contains code to emit OpenMP nodes as LLVM code. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGOpenMPRuntime.h" 15 #include "CodeGenFunction.h" 16 #include "CodeGenModule.h" 17 #include "TargetInfo.h" 18 #include "clang/AST/Stmt.h" 19 #include "clang/AST/StmtOpenMP.h" 20 using namespace clang; 21 using namespace CodeGen; 22 23 //===----------------------------------------------------------------------===// 24 // OpenMP Directive Emission 25 //===----------------------------------------------------------------------===// 26 void CodeGenFunction::EmitOMPAggregateAssign( 27 llvm::Value *DestAddr, llvm::Value *SrcAddr, QualType OriginalType, 28 const llvm::function_ref<void(llvm::Value *, llvm::Value *)> &CopyGen) { 29 // Perform element-by-element initialization. 30 QualType ElementTy; 31 auto SrcBegin = SrcAddr; 32 auto DestBegin = DestAddr; 33 auto ArrayTy = OriginalType->getAsArrayTypeUnsafe(); 34 auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestBegin); 35 // Cast from pointer to array type to pointer to single element. 36 SrcBegin = Builder.CreatePointerBitCastOrAddrSpaceCast(SrcBegin, 37 DestBegin->getType()); 38 auto DestEnd = Builder.CreateGEP(DestBegin, NumElements); 39 // The basic structure here is a while-do loop. 40 auto BodyBB = createBasicBlock("omp.arraycpy.body"); 41 auto DoneBB = createBasicBlock("omp.arraycpy.done"); 42 auto IsEmpty = 43 Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty"); 44 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 45 46 // Enter the loop body, making that address the current address. 47 auto EntryBB = Builder.GetInsertBlock(); 48 EmitBlock(BodyBB); 49 auto SrcElementCurrent = 50 Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 51 SrcElementCurrent->addIncoming(SrcBegin, EntryBB); 52 auto DestElementCurrent = Builder.CreatePHI(DestBegin->getType(), 2, 53 "omp.arraycpy.destElementPast"); 54 DestElementCurrent->addIncoming(DestBegin, EntryBB); 55 56 // Emit copy. 57 CopyGen(DestElementCurrent, SrcElementCurrent); 58 59 // Shift the address forward by one element. 60 auto DestElementNext = Builder.CreateConstGEP1_32( 61 DestElementCurrent, /*Idx0=*/1, "omp.arraycpy.dest.element"); 62 auto SrcElementNext = Builder.CreateConstGEP1_32( 63 SrcElementCurrent, /*Idx0=*/1, "omp.arraycpy.src.element"); 64 // Check whether we've reached the end. 65 auto Done = 66 Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 67 Builder.CreateCondBr(Done, DoneBB, BodyBB); 68 DestElementCurrent->addIncoming(DestElementNext, Builder.GetInsertBlock()); 69 SrcElementCurrent->addIncoming(SrcElementNext, Builder.GetInsertBlock()); 70 71 // Done. 72 EmitBlock(DoneBB, /*IsFinished=*/true); 73 } 74 75 void CodeGenFunction::EmitOMPCopy(CodeGenFunction &CGF, 76 QualType OriginalType, llvm::Value *DestAddr, 77 llvm::Value *SrcAddr, const VarDecl *DestVD, 78 const VarDecl *SrcVD, const Expr *Copy) { 79 if (OriginalType->isArrayType()) { 80 auto *BO = dyn_cast<BinaryOperator>(Copy); 81 if (BO && BO->getOpcode() == BO_Assign) { 82 // Perform simple memcpy for simple copying. 83 CGF.EmitAggregateAssign(DestAddr, SrcAddr, OriginalType); 84 } else { 85 // For arrays with complex element types perform element by element 86 // copying. 87 CGF.EmitOMPAggregateAssign( 88 DestAddr, SrcAddr, OriginalType, 89 [&CGF, Copy, SrcVD, DestVD](llvm::Value *DestElement, 90 llvm::Value *SrcElement) { 91 // Working with the single array element, so have to remap 92 // destination and source variables to corresponding array 93 // elements. 94 CodeGenFunction::OMPPrivateScope Remap(CGF); 95 Remap.addPrivate(DestVD, [DestElement]() -> llvm::Value *{ 96 return DestElement; 97 }); 98 Remap.addPrivate( 99 SrcVD, [SrcElement]() -> llvm::Value *{ return SrcElement; }); 100 (void)Remap.Privatize(); 101 CGF.EmitIgnoredExpr(Copy); 102 }); 103 } 104 } else { 105 // Remap pseudo source variable to private copy. 106 CodeGenFunction::OMPPrivateScope Remap(CGF); 107 Remap.addPrivate(SrcVD, [SrcAddr]() -> llvm::Value *{ return SrcAddr; }); 108 Remap.addPrivate(DestVD, [DestAddr]() -> llvm::Value *{ return DestAddr; }); 109 (void)Remap.Privatize(); 110 // Emit copying of the whole variable. 111 CGF.EmitIgnoredExpr(Copy); 112 } 113 } 114 115 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, 116 OMPPrivateScope &PrivateScope) { 117 llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate; 118 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { 119 auto IRef = C->varlist_begin(); 120 auto InitsRef = C->inits().begin(); 121 for (auto IInit : C->private_copies()) { 122 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 123 if (EmittedAsFirstprivate.count(OrigVD) == 0) { 124 EmittedAsFirstprivate.insert(OrigVD); 125 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 126 auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl()); 127 bool IsRegistered; 128 DeclRefExpr DRE( 129 const_cast<VarDecl *>(OrigVD), 130 /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup( 131 OrigVD) != nullptr, 132 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 133 auto *OriginalAddr = EmitLValue(&DRE).getAddress(); 134 QualType Type = OrigVD->getType(); 135 if (Type->isArrayType()) { 136 // Emit VarDecl with copy init for arrays. 137 // Get the address of the original variable captured in current 138 // captured region. 139 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value *{ 140 auto Emission = EmitAutoVarAlloca(*VD); 141 auto *Init = VD->getInit(); 142 if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) { 143 // Perform simple memcpy. 144 EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr, 145 Type); 146 } else { 147 EmitOMPAggregateAssign( 148 Emission.getAllocatedAddress(), OriginalAddr, Type, 149 [this, VDInit, Init](llvm::Value *DestElement, 150 llvm::Value *SrcElement) { 151 // Clean up any temporaries needed by the initialization. 152 RunCleanupsScope InitScope(*this); 153 // Emit initialization for single element. 154 LocalDeclMap[VDInit] = SrcElement; 155 EmitAnyExprToMem(Init, DestElement, 156 Init->getType().getQualifiers(), 157 /*IsInitializer*/ false); 158 LocalDeclMap.erase(VDInit); 159 }); 160 } 161 EmitAutoVarCleanups(Emission); 162 return Emission.getAllocatedAddress(); 163 }); 164 } else { 165 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value *{ 166 // Emit private VarDecl with copy init. 167 // Remap temp VDInit variable to the address of the original 168 // variable 169 // (for proper handling of captured global variables). 170 LocalDeclMap[VDInit] = OriginalAddr; 171 EmitDecl(*VD); 172 LocalDeclMap.erase(VDInit); 173 return GetAddrOfLocalVar(VD); 174 }); 175 } 176 assert(IsRegistered && 177 "firstprivate var already registered as private"); 178 // Silence the warning about unused variable. 179 (void)IsRegistered; 180 } 181 ++IRef, ++InitsRef; 182 } 183 } 184 return !EmittedAsFirstprivate.empty(); 185 } 186 187 void CodeGenFunction::EmitOMPPrivateClause( 188 const OMPExecutableDirective &D, 189 CodeGenFunction::OMPPrivateScope &PrivateScope) { 190 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 191 for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) { 192 auto IRef = C->varlist_begin(); 193 for (auto IInit : C->private_copies()) { 194 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 195 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 196 auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 197 bool IsRegistered = 198 PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value *{ 199 // Emit private VarDecl with copy init. 200 EmitDecl(*VD); 201 return GetAddrOfLocalVar(VD); 202 }); 203 assert(IsRegistered && "private var already registered as private"); 204 // Silence the warning about unused variable. 205 (void)IsRegistered; 206 } 207 ++IRef; 208 } 209 } 210 } 211 212 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { 213 // threadprivate_var1 = master_threadprivate_var1; 214 // operator=(threadprivate_var2, master_threadprivate_var2); 215 // ... 216 // __kmpc_barrier(&loc, global_tid); 217 llvm::DenseSet<const VarDecl *> CopiedVars; 218 llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr; 219 for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) { 220 auto IRef = C->varlist_begin(); 221 auto ISrcRef = C->source_exprs().begin(); 222 auto IDestRef = C->destination_exprs().begin(); 223 for (auto *AssignOp : C->assignment_ops()) { 224 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 225 QualType Type = VD->getType(); 226 if (CopiedVars.insert(VD->getCanonicalDecl()).second) { 227 228 // Get the address of the master variable. If we are emitting code with 229 // TLS support, the address is passed from the master as field in the 230 // captured declaration. 231 llvm::Value *MasterAddr; 232 if (getLangOpts().OpenMPUseTLS && 233 getContext().getTargetInfo().isTLSSupported()) { 234 assert(CapturedStmtInfo->lookup(VD) && 235 "Copyin threadprivates should have been captured!"); 236 DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(), 237 VK_LValue, (*IRef)->getExprLoc()); 238 MasterAddr = EmitLValue(&DRE).getAddress(); 239 } else { 240 MasterAddr = VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD) 241 : CGM.GetAddrOfGlobal(VD); 242 } 243 // Get the address of the threadprivate variable. 244 auto *PrivateAddr = EmitLValue(*IRef).getAddress(); 245 if (CopiedVars.size() == 1) { 246 // At first check if current thread is a master thread. If it is, no 247 // need to copy data. 248 CopyBegin = createBasicBlock("copyin.not.master"); 249 CopyEnd = createBasicBlock("copyin.not.master.end"); 250 Builder.CreateCondBr( 251 Builder.CreateICmpNE( 252 Builder.CreatePtrToInt(MasterAddr, CGM.IntPtrTy), 253 Builder.CreatePtrToInt(PrivateAddr, CGM.IntPtrTy)), 254 CopyBegin, CopyEnd); 255 EmitBlock(CopyBegin); 256 } 257 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 258 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 259 EmitOMPCopy(*this, Type, PrivateAddr, MasterAddr, DestVD, SrcVD, 260 AssignOp); 261 } 262 ++IRef; 263 ++ISrcRef; 264 ++IDestRef; 265 } 266 } 267 if (CopyEnd) { 268 // Exit out of copying procedure for non-master thread. 269 EmitBlock(CopyEnd, /*IsFinished=*/true); 270 return true; 271 } 272 return false; 273 } 274 275 bool CodeGenFunction::EmitOMPLastprivateClauseInit( 276 const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) { 277 bool HasAtLeastOneLastprivate = false; 278 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 279 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 280 HasAtLeastOneLastprivate = true; 281 auto IRef = C->varlist_begin(); 282 auto IDestRef = C->destination_exprs().begin(); 283 for (auto *IInit : C->private_copies()) { 284 // Keep the address of the original variable for future update at the end 285 // of the loop. 286 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 287 if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) { 288 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 289 PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> llvm::Value *{ 290 DeclRefExpr DRE( 291 const_cast<VarDecl *>(OrigVD), 292 /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup( 293 OrigVD) != nullptr, 294 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 295 return EmitLValue(&DRE).getAddress(); 296 }); 297 // Check if the variable is also a firstprivate: in this case IInit is 298 // not generated. Initialization of this variable will happen in codegen 299 // for 'firstprivate' clause. 300 if (IInit) { 301 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 302 bool IsRegistered = 303 PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value *{ 304 // Emit private VarDecl with copy init. 305 EmitDecl(*VD); 306 return GetAddrOfLocalVar(VD); 307 }); 308 assert(IsRegistered && 309 "lastprivate var already registered as private"); 310 (void)IsRegistered; 311 } 312 } 313 ++IRef, ++IDestRef; 314 } 315 } 316 return HasAtLeastOneLastprivate; 317 } 318 319 void CodeGenFunction::EmitOMPLastprivateClauseFinal( 320 const OMPExecutableDirective &D, llvm::Value *IsLastIterCond) { 321 // Emit following code: 322 // if (<IsLastIterCond>) { 323 // orig_var1 = private_orig_var1; 324 // ... 325 // orig_varn = private_orig_varn; 326 // } 327 llvm::BasicBlock *ThenBB = nullptr; 328 llvm::BasicBlock *DoneBB = nullptr; 329 if (IsLastIterCond) { 330 ThenBB = createBasicBlock(".omp.lastprivate.then"); 331 DoneBB = createBasicBlock(".omp.lastprivate.done"); 332 Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB); 333 EmitBlock(ThenBB); 334 } 335 llvm::DenseMap<const Decl *, const Expr *> LoopCountersAndUpdates; 336 const Expr *LastIterVal = nullptr; 337 const Expr *IVExpr = nullptr; 338 const Expr *IncExpr = nullptr; 339 if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) { 340 if (isOpenMPWorksharingDirective(D.getDirectiveKind())) { 341 LastIterVal = cast<VarDecl>(cast<DeclRefExpr>( 342 LoopDirective->getUpperBoundVariable()) 343 ->getDecl()) 344 ->getAnyInitializer(); 345 IVExpr = LoopDirective->getIterationVariable(); 346 IncExpr = LoopDirective->getInc(); 347 auto IUpdate = LoopDirective->updates().begin(); 348 for (auto *E : LoopDirective->counters()) { 349 auto *D = cast<DeclRefExpr>(E)->getDecl()->getCanonicalDecl(); 350 LoopCountersAndUpdates[D] = *IUpdate; 351 ++IUpdate; 352 } 353 } 354 } 355 { 356 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 357 bool FirstLCV = true; 358 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 359 auto IRef = C->varlist_begin(); 360 auto ISrcRef = C->source_exprs().begin(); 361 auto IDestRef = C->destination_exprs().begin(); 362 for (auto *AssignOp : C->assignment_ops()) { 363 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 364 QualType Type = PrivateVD->getType(); 365 auto *CanonicalVD = PrivateVD->getCanonicalDecl(); 366 if (AlreadyEmittedVars.insert(CanonicalVD).second) { 367 // If lastprivate variable is a loop control variable for loop-based 368 // directive, update its value before copyin back to original 369 // variable. 370 if (auto *UpExpr = LoopCountersAndUpdates.lookup(CanonicalVD)) { 371 if (FirstLCV && LastIterVal) { 372 EmitAnyExprToMem(LastIterVal, EmitLValue(IVExpr).getAddress(), 373 IVExpr->getType().getQualifiers(), 374 /*IsInitializer=*/false); 375 EmitIgnoredExpr(IncExpr); 376 FirstLCV = false; 377 } 378 EmitIgnoredExpr(UpExpr); 379 } 380 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 381 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 382 // Get the address of the original variable. 383 auto *OriginalAddr = GetAddrOfLocalVar(DestVD); 384 // Get the address of the private variable. 385 auto *PrivateAddr = GetAddrOfLocalVar(PrivateVD); 386 EmitOMPCopy(*this, Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, 387 AssignOp); 388 } 389 ++IRef; 390 ++ISrcRef; 391 ++IDestRef; 392 } 393 } 394 } 395 if (IsLastIterCond) { 396 EmitBlock(DoneBB, /*IsFinished=*/true); 397 } 398 } 399 400 void CodeGenFunction::EmitOMPReductionClauseInit( 401 const OMPExecutableDirective &D, 402 CodeGenFunction::OMPPrivateScope &PrivateScope) { 403 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 404 auto ILHS = C->lhs_exprs().begin(); 405 auto IRHS = C->rhs_exprs().begin(); 406 for (auto IRef : C->varlists()) { 407 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); 408 auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 409 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 410 // Store the address of the original variable associated with the LHS 411 // implicit variable. 412 PrivateScope.addPrivate(LHSVD, [this, OrigVD, IRef]() -> llvm::Value *{ 413 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 414 CapturedStmtInfo->lookup(OrigVD) != nullptr, 415 IRef->getType(), VK_LValue, IRef->getExprLoc()); 416 return EmitLValue(&DRE).getAddress(); 417 }); 418 // Emit reduction copy. 419 bool IsRegistered = 420 PrivateScope.addPrivate(OrigVD, [this, PrivateVD]() -> llvm::Value *{ 421 // Emit private VarDecl with reduction init. 422 EmitDecl(*PrivateVD); 423 return GetAddrOfLocalVar(PrivateVD); 424 }); 425 assert(IsRegistered && "private var already registered as private"); 426 // Silence the warning about unused variable. 427 (void)IsRegistered; 428 ++ILHS, ++IRHS; 429 } 430 } 431 } 432 433 void CodeGenFunction::EmitOMPReductionClauseFinal( 434 const OMPExecutableDirective &D) { 435 llvm::SmallVector<const Expr *, 8> LHSExprs; 436 llvm::SmallVector<const Expr *, 8> RHSExprs; 437 llvm::SmallVector<const Expr *, 8> ReductionOps; 438 bool HasAtLeastOneReduction = false; 439 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 440 HasAtLeastOneReduction = true; 441 LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 442 RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 443 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 444 } 445 if (HasAtLeastOneReduction) { 446 // Emit nowait reduction if nowait clause is present or directive is a 447 // parallel directive (it always has implicit barrier). 448 CGM.getOpenMPRuntime().emitReduction( 449 *this, D.getLocEnd(), LHSExprs, RHSExprs, ReductionOps, 450 D.getSingleClause<OMPNowaitClause>() || 451 isOpenMPParallelDirective(D.getDirectiveKind()) || 452 D.getDirectiveKind() == OMPD_simd, 453 D.getDirectiveKind() == OMPD_simd); 454 } 455 } 456 457 static void emitCommonOMPParallelDirective(CodeGenFunction &CGF, 458 const OMPExecutableDirective &S, 459 OpenMPDirectiveKind InnermostKind, 460 const RegionCodeGenTy &CodeGen) { 461 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 462 auto CapturedStruct = CGF.GenerateCapturedStmtArgument(*CS); 463 auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( 464 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 465 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) { 466 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 467 auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), 468 /*IgnoreResultAssign*/ true); 469 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause( 470 CGF, NumThreads, NumThreadsClause->getLocStart()); 471 } 472 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) { 473 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 474 CGF.CGM.getOpenMPRuntime().emitProcBindClause( 475 CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart()); 476 } 477 const Expr *IfCond = nullptr; 478 if (const auto *C = S.getSingleClause<OMPIfClause>()) { 479 IfCond = C->getCondition(); 480 } 481 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn, 482 CapturedStruct, IfCond); 483 } 484 485 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { 486 LexicalScope Scope(*this, S.getSourceRange()); 487 // Emit parallel region as a standalone region. 488 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 489 OMPPrivateScope PrivateScope(CGF); 490 bool Copyins = CGF.EmitOMPCopyinClause(S); 491 bool Firstprivates = CGF.EmitOMPFirstprivateClause(S, PrivateScope); 492 if (Copyins || Firstprivates) { 493 // Emit implicit barrier to synchronize threads and avoid data races on 494 // initialization of firstprivate variables or propagation master's thread 495 // values of threadprivate variables to local instances of that variables 496 // of all other implicit threads. 497 CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(), 498 OMPD_unknown); 499 } 500 CGF.EmitOMPPrivateClause(S, PrivateScope); 501 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 502 (void)PrivateScope.Privatize(); 503 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 504 CGF.EmitOMPReductionClauseFinal(S); 505 // Emit implicit barrier at the end of the 'parallel' directive. 506 CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(), 507 OMPD_unknown); 508 }; 509 emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen); 510 } 511 512 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, 513 JumpDest LoopExit) { 514 RunCleanupsScope BodyScope(*this); 515 // Update counters values on current iteration. 516 for (auto I : D.updates()) { 517 EmitIgnoredExpr(I); 518 } 519 // Update the linear variables. 520 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 521 for (auto U : C->updates()) { 522 EmitIgnoredExpr(U); 523 } 524 } 525 526 // On a continue in the body, jump to the end. 527 auto Continue = getJumpDestInCurrentScope("omp.body.continue"); 528 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 529 // Emit loop body. 530 EmitStmt(D.getBody()); 531 // The end (updates/cleanups). 532 EmitBlock(Continue.getBlock()); 533 BreakContinueStack.pop_back(); 534 // TODO: Update lastprivates if the SeparateIter flag is true. 535 // This will be implemented in a follow-up OMPLastprivateClause patch, but 536 // result should be still correct without it, as we do not make these 537 // variables private yet. 538 } 539 540 void CodeGenFunction::EmitOMPInnerLoop( 541 const Stmt &S, bool RequiresCleanup, const Expr *LoopCond, 542 const Expr *IncExpr, 543 const llvm::function_ref<void(CodeGenFunction &)> &BodyGen, 544 const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen) { 545 auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end"); 546 547 // Start the loop with a block that tests the condition. 548 auto CondBlock = createBasicBlock("omp.inner.for.cond"); 549 EmitBlock(CondBlock); 550 LoopStack.push(CondBlock); 551 552 // If there are any cleanups between here and the loop-exit scope, 553 // create a block to stage a loop exit along. 554 auto ExitBlock = LoopExit.getBlock(); 555 if (RequiresCleanup) 556 ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup"); 557 558 auto LoopBody = createBasicBlock("omp.inner.for.body"); 559 560 // Emit condition. 561 EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S)); 562 if (ExitBlock != LoopExit.getBlock()) { 563 EmitBlock(ExitBlock); 564 EmitBranchThroughCleanup(LoopExit); 565 } 566 567 EmitBlock(LoopBody); 568 incrementProfileCounter(&S); 569 570 // Create a block for the increment. 571 auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc"); 572 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 573 574 BodyGen(*this); 575 576 // Emit "IV = IV + 1" and a back-edge to the condition block. 577 EmitBlock(Continue.getBlock()); 578 EmitIgnoredExpr(IncExpr); 579 PostIncGen(*this); 580 BreakContinueStack.pop_back(); 581 EmitBranch(CondBlock); 582 LoopStack.pop(); 583 // Emit the fall-through block. 584 EmitBlock(LoopExit.getBlock()); 585 } 586 587 void CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { 588 // Emit inits for the linear variables. 589 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 590 for (auto Init : C->inits()) { 591 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl()); 592 auto *OrigVD = cast<VarDecl>( 593 cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())->getDecl()); 594 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 595 CapturedStmtInfo->lookup(OrigVD) != nullptr, 596 VD->getInit()->getType(), VK_LValue, 597 VD->getInit()->getExprLoc()); 598 AutoVarEmission Emission = EmitAutoVarAlloca(*VD); 599 EmitExprAsInit(&DRE, VD, 600 MakeAddrLValue(Emission.getAllocatedAddress(), 601 VD->getType(), Emission.Alignment), 602 /*capturedByInit=*/false); 603 EmitAutoVarCleanups(Emission); 604 } 605 // Emit the linear steps for the linear clauses. 606 // If a step is not constant, it is pre-calculated before the loop. 607 if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep())) 608 if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) { 609 EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl())); 610 // Emit calculation of the linear step. 611 EmitIgnoredExpr(CS); 612 } 613 } 614 } 615 616 static void emitLinearClauseFinal(CodeGenFunction &CGF, 617 const OMPLoopDirective &D) { 618 // Emit the final values of the linear variables. 619 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 620 auto IC = C->varlist_begin(); 621 for (auto F : C->finals()) { 622 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl()); 623 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 624 CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr, 625 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); 626 auto *OrigAddr = CGF.EmitLValue(&DRE).getAddress(); 627 CodeGenFunction::OMPPrivateScope VarScope(CGF); 628 VarScope.addPrivate(OrigVD, 629 [OrigAddr]() -> llvm::Value *{ return OrigAddr; }); 630 (void)VarScope.Privatize(); 631 CGF.EmitIgnoredExpr(F); 632 ++IC; 633 } 634 } 635 } 636 637 static void emitAlignedClause(CodeGenFunction &CGF, 638 const OMPExecutableDirective &D) { 639 for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) { 640 unsigned ClauseAlignment = 0; 641 if (auto AlignmentExpr = Clause->getAlignment()) { 642 auto AlignmentCI = 643 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); 644 ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue()); 645 } 646 for (auto E : Clause->varlists()) { 647 unsigned Alignment = ClauseAlignment; 648 if (Alignment == 0) { 649 // OpenMP [2.8.1, Description] 650 // If no optional parameter is specified, implementation-defined default 651 // alignments for SIMD instructions on the target platforms are assumed. 652 Alignment = 653 CGF.getContext() 654 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign( 655 E->getType()->getPointeeType())) 656 .getQuantity(); 657 } 658 assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) && 659 "alignment is not power of 2"); 660 if (Alignment != 0) { 661 llvm::Value *PtrValue = CGF.EmitScalarExpr(E); 662 CGF.EmitAlignmentAssumption(PtrValue, Alignment); 663 } 664 } 665 } 666 } 667 668 static void emitPrivateLoopCounters(CodeGenFunction &CGF, 669 CodeGenFunction::OMPPrivateScope &LoopScope, 670 ArrayRef<Expr *> Counters, 671 ArrayRef<Expr *> PrivateCounters) { 672 auto I = PrivateCounters.begin(); 673 for (auto *E : Counters) { 674 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 675 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()); 676 llvm::Value *Addr; 677 (void)LoopScope.addPrivate(PrivateVD, [&]() -> llvm::Value * { 678 // Emit var without initialization. 679 auto VarEmission = CGF.EmitAutoVarAlloca(*PrivateVD); 680 CGF.EmitAutoVarCleanups(VarEmission); 681 Addr = VarEmission.getAllocatedAddress(); 682 return Addr; 683 }); 684 (void)LoopScope.addPrivate(VD, [&]() -> llvm::Value * { return Addr; }); 685 ++I; 686 } 687 } 688 689 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S, 690 const Expr *Cond, llvm::BasicBlock *TrueBlock, 691 llvm::BasicBlock *FalseBlock, uint64_t TrueCount) { 692 { 693 CodeGenFunction::OMPPrivateScope PreCondScope(CGF); 694 emitPrivateLoopCounters(CGF, PreCondScope, S.counters(), 695 S.private_counters()); 696 (void)PreCondScope.Privatize(); 697 // Get initial values of real counters. 698 for (auto I : S.inits()) { 699 CGF.EmitIgnoredExpr(I); 700 } 701 } 702 // Check that loop is executed at least one time. 703 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount); 704 } 705 706 static void 707 emitPrivateLinearVars(CodeGenFunction &CGF, const OMPExecutableDirective &D, 708 CodeGenFunction::OMPPrivateScope &PrivateScope) { 709 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 710 auto CurPrivate = C->privates().begin(); 711 for (auto *E : C->varlists()) { 712 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 713 auto *PrivateVD = 714 cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl()); 715 bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> llvm::Value * { 716 // Emit private VarDecl with copy init. 717 CGF.EmitVarDecl(*PrivateVD); 718 return CGF.GetAddrOfLocalVar(PrivateVD); 719 }); 720 assert(IsRegistered && "linear var already registered as private"); 721 // Silence the warning about unused variable. 722 (void)IsRegistered; 723 ++CurPrivate; 724 } 725 } 726 } 727 728 static void emitSimdlenSafelenClause(CodeGenFunction &CGF, 729 const OMPExecutableDirective &D) { 730 if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) { 731 RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(), 732 /*ignoreResult=*/true); 733 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 734 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 735 // In presence of finite 'safelen', it may be unsafe to mark all 736 // the memory instructions parallel, because loop-carried 737 // dependences of 'safelen' iterations are possible. 738 CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>()); 739 } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) { 740 RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(), 741 /*ignoreResult=*/true); 742 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 743 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 744 // In presence of finite 'safelen', it may be unsafe to mark all 745 // the memory instructions parallel, because loop-carried 746 // dependences of 'safelen' iterations are possible. 747 CGF.LoopStack.setParallel(false); 748 } 749 } 750 751 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D) { 752 // Walk clauses and process safelen/lastprivate. 753 LoopStack.setParallel(); 754 LoopStack.setVectorizeEnable(true); 755 emitSimdlenSafelenClause(*this, D); 756 } 757 758 void CodeGenFunction::EmitOMPSimdFinal(const OMPLoopDirective &D) { 759 auto IC = D.counters().begin(); 760 for (auto F : D.finals()) { 761 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl()); 762 if (LocalDeclMap.lookup(OrigVD) || CapturedStmtInfo->lookup(OrigVD)) { 763 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 764 CapturedStmtInfo->lookup(OrigVD) != nullptr, 765 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); 766 auto *OrigAddr = EmitLValue(&DRE).getAddress(); 767 OMPPrivateScope VarScope(*this); 768 VarScope.addPrivate(OrigVD, 769 [OrigAddr]() -> llvm::Value *{ return OrigAddr; }); 770 (void)VarScope.Privatize(); 771 EmitIgnoredExpr(F); 772 } 773 ++IC; 774 } 775 emitLinearClauseFinal(*this, D); 776 } 777 778 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { 779 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 780 // if (PreCond) { 781 // for (IV in 0..LastIteration) BODY; 782 // <Final counter/linear vars updates>; 783 // } 784 // 785 786 // Emit: if (PreCond) - begin. 787 // If the condition constant folds and can be elided, avoid emitting the 788 // whole loop. 789 bool CondConstant; 790 llvm::BasicBlock *ContBlock = nullptr; 791 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 792 if (!CondConstant) 793 return; 794 } else { 795 auto *ThenBlock = CGF.createBasicBlock("simd.if.then"); 796 ContBlock = CGF.createBasicBlock("simd.if.end"); 797 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 798 CGF.getProfileCount(&S)); 799 CGF.EmitBlock(ThenBlock); 800 CGF.incrementProfileCounter(&S); 801 } 802 803 // Emit the loop iteration variable. 804 const Expr *IVExpr = S.getIterationVariable(); 805 const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 806 CGF.EmitVarDecl(*IVDecl); 807 CGF.EmitIgnoredExpr(S.getInit()); 808 809 // Emit the iterations count variable. 810 // If it is not a variable, Sema decided to calculate iterations count on 811 // each iteration (e.g., it is foldable into a constant). 812 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 813 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 814 // Emit calculation of the iterations count. 815 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 816 } 817 818 CGF.EmitOMPSimdInit(S); 819 820 emitAlignedClause(CGF, S); 821 CGF.EmitOMPLinearClauseInit(S); 822 bool HasLastprivateClause; 823 { 824 OMPPrivateScope LoopScope(CGF); 825 emitPrivateLoopCounters(CGF, LoopScope, S.counters(), 826 S.private_counters()); 827 emitPrivateLinearVars(CGF, S, LoopScope); 828 CGF.EmitOMPPrivateClause(S, LoopScope); 829 CGF.EmitOMPReductionClauseInit(S, LoopScope); 830 HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 831 (void)LoopScope.Privatize(); 832 CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 833 S.getInc(), 834 [&S](CodeGenFunction &CGF) { 835 CGF.EmitOMPLoopBody(S, JumpDest()); 836 CGF.EmitStopPoint(&S); 837 }, 838 [](CodeGenFunction &) {}); 839 // Emit final copy of the lastprivate variables at the end of loops. 840 if (HasLastprivateClause) { 841 CGF.EmitOMPLastprivateClauseFinal(S); 842 } 843 CGF.EmitOMPReductionClauseFinal(S); 844 } 845 CGF.EmitOMPSimdFinal(S); 846 // Emit: if (PreCond) - end. 847 if (ContBlock) { 848 CGF.EmitBranch(ContBlock); 849 CGF.EmitBlock(ContBlock, true); 850 } 851 }; 852 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 853 } 854 855 void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind, 856 const OMPLoopDirective &S, 857 OMPPrivateScope &LoopScope, 858 bool Ordered, llvm::Value *LB, 859 llvm::Value *UB, llvm::Value *ST, 860 llvm::Value *IL, llvm::Value *Chunk) { 861 auto &RT = CGM.getOpenMPRuntime(); 862 863 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). 864 const bool DynamicOrOrdered = Ordered || RT.isDynamic(ScheduleKind); 865 866 assert((Ordered || 867 !RT.isStaticNonchunked(ScheduleKind, /*Chunked=*/Chunk != nullptr)) && 868 "static non-chunked schedule does not need outer loop"); 869 870 // Emit outer loop. 871 // 872 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 873 // When schedule(dynamic,chunk_size) is specified, the iterations are 874 // distributed to threads in the team in chunks as the threads request them. 875 // Each thread executes a chunk of iterations, then requests another chunk, 876 // until no chunks remain to be distributed. Each chunk contains chunk_size 877 // iterations, except for the last chunk to be distributed, which may have 878 // fewer iterations. When no chunk_size is specified, it defaults to 1. 879 // 880 // When schedule(guided,chunk_size) is specified, the iterations are assigned 881 // to threads in the team in chunks as the executing threads request them. 882 // Each thread executes a chunk of iterations, then requests another chunk, 883 // until no chunks remain to be assigned. For a chunk_size of 1, the size of 884 // each chunk is proportional to the number of unassigned iterations divided 885 // by the number of threads in the team, decreasing to 1. For a chunk_size 886 // with value k (greater than 1), the size of each chunk is determined in the 887 // same way, with the restriction that the chunks do not contain fewer than k 888 // iterations (except for the last chunk to be assigned, which may have fewer 889 // than k iterations). 890 // 891 // When schedule(auto) is specified, the decision regarding scheduling is 892 // delegated to the compiler and/or runtime system. The programmer gives the 893 // implementation the freedom to choose any possible mapping of iterations to 894 // threads in the team. 895 // 896 // When schedule(runtime) is specified, the decision regarding scheduling is 897 // deferred until run time, and the schedule and chunk size are taken from the 898 // run-sched-var ICV. If the ICV is set to auto, the schedule is 899 // implementation defined 900 // 901 // while(__kmpc_dispatch_next(&LB, &UB)) { 902 // idx = LB; 903 // while (idx <= UB) { BODY; ++idx; 904 // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only. 905 // } // inner loop 906 // } 907 // 908 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 909 // When schedule(static, chunk_size) is specified, iterations are divided into 910 // chunks of size chunk_size, and the chunks are assigned to the threads in 911 // the team in a round-robin fashion in the order of the thread number. 912 // 913 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { 914 // while (idx <= UB) { BODY; ++idx; } // inner loop 915 // LB = LB + ST; 916 // UB = UB + ST; 917 // } 918 // 919 920 const Expr *IVExpr = S.getIterationVariable(); 921 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 922 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 923 924 RT.emitForInit( 925 *this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, Ordered, IL, LB, 926 (DynamicOrOrdered ? EmitAnyExpr(S.getLastIteration()).getScalarVal() 927 : UB), 928 ST, Chunk); 929 930 auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end"); 931 932 // Start the loop with a block that tests the condition. 933 auto CondBlock = createBasicBlock("omp.dispatch.cond"); 934 EmitBlock(CondBlock); 935 LoopStack.push(CondBlock); 936 937 llvm::Value *BoolCondVal = nullptr; 938 if (!DynamicOrOrdered) { 939 // UB = min(UB, GlobalUB) 940 EmitIgnoredExpr(S.getEnsureUpperBound()); 941 // IV = LB 942 EmitIgnoredExpr(S.getInit()); 943 // IV < UB 944 BoolCondVal = EvaluateExprAsBool(S.getCond()); 945 } else { 946 BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, 947 IL, LB, UB, ST); 948 } 949 950 // If there are any cleanups between here and the loop-exit scope, 951 // create a block to stage a loop exit along. 952 auto ExitBlock = LoopExit.getBlock(); 953 if (LoopScope.requiresCleanups()) 954 ExitBlock = createBasicBlock("omp.dispatch.cleanup"); 955 956 auto LoopBody = createBasicBlock("omp.dispatch.body"); 957 Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock); 958 if (ExitBlock != LoopExit.getBlock()) { 959 EmitBlock(ExitBlock); 960 EmitBranchThroughCleanup(LoopExit); 961 } 962 EmitBlock(LoopBody); 963 964 // Emit "IV = LB" (in case of static schedule, we have already calculated new 965 // LB for loop condition and emitted it above). 966 if (DynamicOrOrdered) 967 EmitIgnoredExpr(S.getInit()); 968 969 // Create a block for the increment. 970 auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); 971 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 972 973 // Generate !llvm.loop.parallel metadata for loads and stores for loops 974 // with dynamic/guided scheduling and without ordered clause. 975 if (!isOpenMPSimdDirective(S.getDirectiveKind())) { 976 LoopStack.setParallel((ScheduleKind == OMPC_SCHEDULE_dynamic || 977 ScheduleKind == OMPC_SCHEDULE_guided) && 978 !Ordered); 979 } else { 980 EmitOMPSimdInit(S); 981 } 982 983 SourceLocation Loc = S.getLocStart(); 984 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), 985 [&S, LoopExit](CodeGenFunction &CGF) { 986 CGF.EmitOMPLoopBody(S, LoopExit); 987 CGF.EmitStopPoint(&S); 988 }, 989 [Ordered, IVSize, IVSigned, Loc](CodeGenFunction &CGF) { 990 if (Ordered) { 991 CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd( 992 CGF, Loc, IVSize, IVSigned); 993 } 994 }); 995 996 EmitBlock(Continue.getBlock()); 997 BreakContinueStack.pop_back(); 998 if (!DynamicOrOrdered) { 999 // Emit "LB = LB + Stride", "UB = UB + Stride". 1000 EmitIgnoredExpr(S.getNextLowerBound()); 1001 EmitIgnoredExpr(S.getNextUpperBound()); 1002 } 1003 1004 EmitBranch(CondBlock); 1005 LoopStack.pop(); 1006 // Emit the fall-through block. 1007 EmitBlock(LoopExit.getBlock()); 1008 1009 // Tell the runtime we are done. 1010 if (!DynamicOrOrdered) 1011 RT.emitForStaticFinish(*this, S.getLocEnd()); 1012 } 1013 1014 /// \brief Emit a helper variable and return corresponding lvalue. 1015 static LValue EmitOMPHelperVar(CodeGenFunction &CGF, 1016 const DeclRefExpr *Helper) { 1017 auto VDecl = cast<VarDecl>(Helper->getDecl()); 1018 CGF.EmitVarDecl(*VDecl); 1019 return CGF.EmitLValue(Helper); 1020 } 1021 1022 static std::pair<llvm::Value * /*Chunk*/, OpenMPScheduleClauseKind> 1023 emitScheduleClause(CodeGenFunction &CGF, const OMPLoopDirective &S, 1024 bool OuterRegion) { 1025 // Detect the loop schedule kind and chunk. 1026 auto ScheduleKind = OMPC_SCHEDULE_unknown; 1027 llvm::Value *Chunk = nullptr; 1028 if (const auto *C = S.getSingleClause<OMPScheduleClause>()) { 1029 ScheduleKind = C->getScheduleKind(); 1030 if (const auto *Ch = C->getChunkSize()) { 1031 if (auto *ImpRef = cast_or_null<DeclRefExpr>(C->getHelperChunkSize())) { 1032 if (OuterRegion) { 1033 const VarDecl *ImpVar = cast<VarDecl>(ImpRef->getDecl()); 1034 CGF.EmitVarDecl(*ImpVar); 1035 CGF.EmitStoreThroughLValue( 1036 CGF.EmitAnyExpr(Ch), 1037 CGF.MakeNaturalAlignAddrLValue(CGF.GetAddrOfLocalVar(ImpVar), 1038 ImpVar->getType())); 1039 } else { 1040 Ch = ImpRef; 1041 } 1042 } 1043 if (!C->getHelperChunkSize() || !OuterRegion) { 1044 Chunk = CGF.EmitScalarExpr(Ch); 1045 Chunk = CGF.EmitScalarConversion(Chunk, Ch->getType(), 1046 S.getIterationVariable()->getType(), 1047 S.getLocStart()); 1048 } 1049 } 1050 } 1051 return std::make_pair(Chunk, ScheduleKind); 1052 } 1053 1054 bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { 1055 // Emit the loop iteration variable. 1056 auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 1057 auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); 1058 EmitVarDecl(*IVDecl); 1059 1060 // Emit the iterations count variable. 1061 // If it is not a variable, Sema decided to calculate iterations count on each 1062 // iteration (e.g., it is foldable into a constant). 1063 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 1064 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 1065 // Emit calculation of the iterations count. 1066 EmitIgnoredExpr(S.getCalcLastIteration()); 1067 } 1068 1069 auto &RT = CGM.getOpenMPRuntime(); 1070 1071 bool HasLastprivateClause; 1072 // Check pre-condition. 1073 { 1074 // Skip the entire loop if we don't meet the precondition. 1075 // If the condition constant folds and can be elided, avoid emitting the 1076 // whole loop. 1077 bool CondConstant; 1078 llvm::BasicBlock *ContBlock = nullptr; 1079 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 1080 if (!CondConstant) 1081 return false; 1082 } else { 1083 auto *ThenBlock = createBasicBlock("omp.precond.then"); 1084 ContBlock = createBasicBlock("omp.precond.end"); 1085 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 1086 getProfileCount(&S)); 1087 EmitBlock(ThenBlock); 1088 incrementProfileCounter(&S); 1089 } 1090 1091 emitAlignedClause(*this, S); 1092 EmitOMPLinearClauseInit(S); 1093 // Emit 'then' code. 1094 { 1095 // Emit helper vars inits. 1096 LValue LB = 1097 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable())); 1098 LValue UB = 1099 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable())); 1100 LValue ST = 1101 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 1102 LValue IL = 1103 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 1104 1105 OMPPrivateScope LoopScope(*this); 1106 if (EmitOMPFirstprivateClause(S, LoopScope)) { 1107 // Emit implicit barrier to synchronize threads and avoid data races on 1108 // initialization of firstprivate variables. 1109 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), 1110 OMPD_unknown); 1111 } 1112 EmitOMPPrivateClause(S, LoopScope); 1113 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 1114 EmitOMPReductionClauseInit(S, LoopScope); 1115 emitPrivateLoopCounters(*this, LoopScope, S.counters(), 1116 S.private_counters()); 1117 emitPrivateLinearVars(*this, S, LoopScope); 1118 (void)LoopScope.Privatize(); 1119 1120 // Detect the loop schedule kind and chunk. 1121 llvm::Value *Chunk; 1122 OpenMPScheduleClauseKind ScheduleKind; 1123 auto ScheduleInfo = 1124 emitScheduleClause(*this, S, /*OuterRegion=*/false); 1125 Chunk = ScheduleInfo.first; 1126 ScheduleKind = ScheduleInfo.second; 1127 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1128 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1129 const bool Ordered = S.getSingleClause<OMPOrderedClause>() != nullptr; 1130 if (RT.isStaticNonchunked(ScheduleKind, 1131 /* Chunked */ Chunk != nullptr) && 1132 !Ordered) { 1133 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 1134 EmitOMPSimdInit(S); 1135 } 1136 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1137 // When no chunk_size is specified, the iteration space is divided into 1138 // chunks that are approximately equal in size, and at most one chunk is 1139 // distributed to each thread. Note that the size of the chunks is 1140 // unspecified in this case. 1141 RT.emitForInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, 1142 Ordered, IL.getAddress(), LB.getAddress(), 1143 UB.getAddress(), ST.getAddress()); 1144 auto LoopExit = getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 1145 // UB = min(UB, GlobalUB); 1146 EmitIgnoredExpr(S.getEnsureUpperBound()); 1147 // IV = LB; 1148 EmitIgnoredExpr(S.getInit()); 1149 // while (idx <= UB) { BODY; ++idx; } 1150 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 1151 S.getInc(), 1152 [&S, LoopExit](CodeGenFunction &CGF) { 1153 CGF.EmitOMPLoopBody(S, LoopExit); 1154 CGF.EmitStopPoint(&S); 1155 }, 1156 [](CodeGenFunction &) {}); 1157 EmitBlock(LoopExit.getBlock()); 1158 // Tell the runtime we are done. 1159 RT.emitForStaticFinish(*this, S.getLocStart()); 1160 } else { 1161 // Emit the outer loop, which requests its work chunk [LB..UB] from 1162 // runtime and runs the inner loop to process it. 1163 EmitOMPForOuterLoop(ScheduleKind, S, LoopScope, Ordered, 1164 LB.getAddress(), UB.getAddress(), ST.getAddress(), 1165 IL.getAddress(), Chunk); 1166 } 1167 EmitOMPReductionClauseFinal(S); 1168 // Emit final copy of the lastprivate variables if IsLastIter != 0. 1169 if (HasLastprivateClause) 1170 EmitOMPLastprivateClauseFinal( 1171 S, Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart()))); 1172 } 1173 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 1174 EmitOMPSimdFinal(S); 1175 } 1176 // We're now done with the loop, so jump to the continuation block. 1177 if (ContBlock) { 1178 EmitBranch(ContBlock); 1179 EmitBlock(ContBlock, true); 1180 } 1181 } 1182 return HasLastprivateClause; 1183 } 1184 1185 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { 1186 LexicalScope Scope(*this, S.getSourceRange()); 1187 bool HasLastprivates = false; 1188 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF) { 1189 HasLastprivates = CGF.EmitOMPWorksharingLoop(S); 1190 }; 1191 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen); 1192 1193 // Emit an implicit barrier at the end. 1194 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) { 1195 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); 1196 } 1197 } 1198 1199 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { 1200 LexicalScope Scope(*this, S.getSourceRange()); 1201 bool HasLastprivates = false; 1202 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF) { 1203 HasLastprivates = CGF.EmitOMPWorksharingLoop(S); 1204 }; 1205 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 1206 1207 // Emit an implicit barrier at the end. 1208 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) { 1209 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); 1210 } 1211 } 1212 1213 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, 1214 const Twine &Name, 1215 llvm::Value *Init = nullptr) { 1216 auto LVal = CGF.MakeNaturalAlignAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty); 1217 if (Init) 1218 CGF.EmitScalarInit(Init, LVal); 1219 return LVal; 1220 } 1221 1222 OpenMPDirectiveKind 1223 CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { 1224 auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt(); 1225 auto *CS = dyn_cast<CompoundStmt>(Stmt); 1226 if (CS && CS->size() > 1) { 1227 bool HasLastprivates = false; 1228 auto &&CodeGen = [&S, CS, &HasLastprivates](CodeGenFunction &CGF) { 1229 auto &C = CGF.CGM.getContext(); 1230 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 1231 // Emit helper vars inits. 1232 LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.", 1233 CGF.Builder.getInt32(0)); 1234 auto *GlobalUBVal = CGF.Builder.getInt32(CS->size() - 1); 1235 LValue UB = 1236 createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal); 1237 LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.", 1238 CGF.Builder.getInt32(1)); 1239 LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.", 1240 CGF.Builder.getInt32(0)); 1241 // Loop counter. 1242 LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv."); 1243 OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 1244 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV); 1245 OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 1246 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); 1247 // Generate condition for loop. 1248 BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, 1249 OK_Ordinary, S.getLocStart(), 1250 /*fpContractable=*/false); 1251 // Increment for loop counter. 1252 UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, 1253 OK_Ordinary, S.getLocStart()); 1254 auto BodyGen = [CS, &S, &IV](CodeGenFunction &CGF) { 1255 // Iterate through all sections and emit a switch construct: 1256 // switch (IV) { 1257 // case 0: 1258 // <SectionStmt[0]>; 1259 // break; 1260 // ... 1261 // case <NumSection> - 1: 1262 // <SectionStmt[<NumSection> - 1]>; 1263 // break; 1264 // } 1265 // .omp.sections.exit: 1266 auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit"); 1267 auto *SwitchStmt = CGF.Builder.CreateSwitch( 1268 CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB, 1269 CS->size()); 1270 unsigned CaseNumber = 0; 1271 for (auto *SubStmt : CS->children()) { 1272 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 1273 CGF.EmitBlock(CaseBB); 1274 SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB); 1275 CGF.EmitStmt(SubStmt); 1276 CGF.EmitBranch(ExitBB); 1277 ++CaseNumber; 1278 } 1279 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 1280 }; 1281 1282 CodeGenFunction::OMPPrivateScope LoopScope(CGF); 1283 if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) { 1284 // Emit implicit barrier to synchronize threads and avoid data races on 1285 // initialization of firstprivate variables. 1286 CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(), 1287 OMPD_unknown); 1288 } 1289 CGF.EmitOMPPrivateClause(S, LoopScope); 1290 HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 1291 CGF.EmitOMPReductionClauseInit(S, LoopScope); 1292 (void)LoopScope.Privatize(); 1293 1294 // Emit static non-chunked loop. 1295 CGF.CGM.getOpenMPRuntime().emitForInit( 1296 CGF, S.getLocStart(), OMPC_SCHEDULE_static, /*IVSize=*/32, 1297 /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), 1298 LB.getAddress(), UB.getAddress(), ST.getAddress()); 1299 // UB = min(UB, GlobalUB); 1300 auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart()); 1301 auto *MinUBGlobalUB = CGF.Builder.CreateSelect( 1302 CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal); 1303 CGF.EmitStoreOfScalar(MinUBGlobalUB, UB); 1304 // IV = LB; 1305 CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV); 1306 // while (idx <= UB) { BODY; ++idx; } 1307 CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen, 1308 [](CodeGenFunction &) {}); 1309 // Tell the runtime we are done. 1310 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocStart()); 1311 CGF.EmitOMPReductionClauseFinal(S); 1312 1313 // Emit final copy of the lastprivate variables if IsLastIter != 0. 1314 if (HasLastprivates) 1315 CGF.EmitOMPLastprivateClauseFinal( 1316 S, CGF.Builder.CreateIsNotNull( 1317 CGF.EmitLoadOfScalar(IL, S.getLocStart()))); 1318 }; 1319 1320 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen); 1321 // Emit barrier for lastprivates only if 'sections' directive has 'nowait' 1322 // clause. Otherwise the barrier will be generated by the codegen for the 1323 // directive. 1324 if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) { 1325 // Emit implicit barrier to synchronize threads and avoid data races on 1326 // initialization of firstprivate variables. 1327 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), 1328 OMPD_unknown); 1329 } 1330 return OMPD_sections; 1331 } 1332 // If only one section is found - no need to generate loop, emit as a single 1333 // region. 1334 bool HasFirstprivates; 1335 // No need to generate reductions for sections with single section region, we 1336 // can use original shared variables for all operations. 1337 bool HasReductions = S.hasClausesOfKind<OMPReductionClause>(); 1338 // No need to generate lastprivates for sections with single section region, 1339 // we can use original shared variable for all calculations with barrier at 1340 // the end of the sections. 1341 bool HasLastprivates = S.hasClausesOfKind<OMPLastprivateClause>(); 1342 auto &&CodeGen = [Stmt, &S, &HasFirstprivates](CodeGenFunction &CGF) { 1343 CodeGenFunction::OMPPrivateScope SingleScope(CGF); 1344 HasFirstprivates = CGF.EmitOMPFirstprivateClause(S, SingleScope); 1345 CGF.EmitOMPPrivateClause(S, SingleScope); 1346 (void)SingleScope.Privatize(); 1347 1348 CGF.EmitStmt(Stmt); 1349 }; 1350 CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(), 1351 llvm::None, llvm::None, llvm::None, 1352 llvm::None); 1353 // Emit barrier for firstprivates, lastprivates or reductions only if 1354 // 'sections' directive has 'nowait' clause. Otherwise the barrier will be 1355 // generated by the codegen for the directive. 1356 if ((HasFirstprivates || HasLastprivates || HasReductions) && 1357 S.getSingleClause<OMPNowaitClause>()) { 1358 // Emit implicit barrier to synchronize threads and avoid data races on 1359 // initialization of firstprivate variables. 1360 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_unknown); 1361 } 1362 return OMPD_single; 1363 } 1364 1365 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { 1366 LexicalScope Scope(*this, S.getSourceRange()); 1367 OpenMPDirectiveKind EmittedAs = EmitSections(S); 1368 // Emit an implicit barrier at the end. 1369 if (!S.getSingleClause<OMPNowaitClause>()) { 1370 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), EmittedAs); 1371 } 1372 } 1373 1374 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { 1375 LexicalScope Scope(*this, S.getSourceRange()); 1376 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 1377 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1378 CGF.EnsureInsertPoint(); 1379 }; 1380 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen); 1381 } 1382 1383 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { 1384 llvm::SmallVector<const Expr *, 8> CopyprivateVars; 1385 llvm::SmallVector<const Expr *, 8> DestExprs; 1386 llvm::SmallVector<const Expr *, 8> SrcExprs; 1387 llvm::SmallVector<const Expr *, 8> AssignmentOps; 1388 // Check if there are any 'copyprivate' clauses associated with this 1389 // 'single' 1390 // construct. 1391 // Build a list of copyprivate variables along with helper expressions 1392 // (<source>, <destination>, <destination>=<source> expressions) 1393 for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) { 1394 CopyprivateVars.append(C->varlists().begin(), C->varlists().end()); 1395 DestExprs.append(C->destination_exprs().begin(), 1396 C->destination_exprs().end()); 1397 SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end()); 1398 AssignmentOps.append(C->assignment_ops().begin(), 1399 C->assignment_ops().end()); 1400 } 1401 LexicalScope Scope(*this, S.getSourceRange()); 1402 // Emit code for 'single' region along with 'copyprivate' clauses 1403 bool HasFirstprivates; 1404 auto &&CodeGen = [&S, &HasFirstprivates](CodeGenFunction &CGF) { 1405 CodeGenFunction::OMPPrivateScope SingleScope(CGF); 1406 HasFirstprivates = CGF.EmitOMPFirstprivateClause(S, SingleScope); 1407 CGF.EmitOMPPrivateClause(S, SingleScope); 1408 (void)SingleScope.Privatize(); 1409 1410 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1411 CGF.EnsureInsertPoint(); 1412 }; 1413 CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(), 1414 CopyprivateVars, DestExprs, SrcExprs, 1415 AssignmentOps); 1416 // Emit an implicit barrier at the end (to avoid data race on firstprivate 1417 // init or if no 'nowait' clause was specified and no 'copyprivate' clause). 1418 if ((!S.getSingleClause<OMPNowaitClause>() || HasFirstprivates) && 1419 CopyprivateVars.empty()) { 1420 CGM.getOpenMPRuntime().emitBarrierCall( 1421 *this, S.getLocStart(), 1422 S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single); 1423 } 1424 } 1425 1426 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { 1427 LexicalScope Scope(*this, S.getSourceRange()); 1428 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 1429 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1430 CGF.EnsureInsertPoint(); 1431 }; 1432 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart()); 1433 } 1434 1435 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { 1436 LexicalScope Scope(*this, S.getSourceRange()); 1437 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 1438 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1439 CGF.EnsureInsertPoint(); 1440 }; 1441 CGM.getOpenMPRuntime().emitCriticalRegion( 1442 *this, S.getDirectiveName().getAsString(), CodeGen, S.getLocStart()); 1443 } 1444 1445 void CodeGenFunction::EmitOMPParallelForDirective( 1446 const OMPParallelForDirective &S) { 1447 // Emit directive as a combined directive that consists of two implicit 1448 // directives: 'parallel' with 'for' directive. 1449 LexicalScope Scope(*this, S.getSourceRange()); 1450 (void)emitScheduleClause(*this, S, /*OuterRegion=*/true); 1451 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 1452 CGF.EmitOMPWorksharingLoop(S); 1453 // Emit implicit barrier at the end of parallel region, but this barrier 1454 // is at the end of 'for' directive, so emit it as the implicit barrier for 1455 // this 'for' directive. 1456 CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(), 1457 OMPD_parallel); 1458 }; 1459 emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen); 1460 } 1461 1462 void CodeGenFunction::EmitOMPParallelForSimdDirective( 1463 const OMPParallelForSimdDirective &S) { 1464 // Emit directive as a combined directive that consists of two implicit 1465 // directives: 'parallel' with 'for' directive. 1466 LexicalScope Scope(*this, S.getSourceRange()); 1467 (void)emitScheduleClause(*this, S, /*OuterRegion=*/true); 1468 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 1469 CGF.EmitOMPWorksharingLoop(S); 1470 // Emit implicit barrier at the end of parallel region, but this barrier 1471 // is at the end of 'for' directive, so emit it as the implicit barrier for 1472 // this 'for' directive. 1473 CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(), 1474 OMPD_parallel); 1475 }; 1476 emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen); 1477 } 1478 1479 void CodeGenFunction::EmitOMPParallelSectionsDirective( 1480 const OMPParallelSectionsDirective &S) { 1481 // Emit directive as a combined directive that consists of two implicit 1482 // directives: 'parallel' with 'sections' directive. 1483 LexicalScope Scope(*this, S.getSourceRange()); 1484 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 1485 (void)CGF.EmitSections(S); 1486 // Emit implicit barrier at the end of parallel region. 1487 CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(), 1488 OMPD_parallel); 1489 }; 1490 emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen); 1491 } 1492 1493 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { 1494 // Emit outlined function for task construct. 1495 LexicalScope Scope(*this, S.getSourceRange()); 1496 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 1497 auto CapturedStruct = GenerateCapturedStmtArgument(*CS); 1498 auto *I = CS->getCapturedDecl()->param_begin(); 1499 auto *PartId = std::next(I); 1500 // The first function argument for tasks is a thread id, the second one is a 1501 // part id (0 for tied tasks, >=0 for untied task). 1502 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 1503 // Get list of private variables. 1504 llvm::SmallVector<const Expr *, 8> PrivateVars; 1505 llvm::SmallVector<const Expr *, 8> PrivateCopies; 1506 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 1507 auto IRef = C->varlist_begin(); 1508 for (auto *IInit : C->private_copies()) { 1509 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 1510 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 1511 PrivateVars.push_back(*IRef); 1512 PrivateCopies.push_back(IInit); 1513 } 1514 ++IRef; 1515 } 1516 } 1517 EmittedAsPrivate.clear(); 1518 // Get list of firstprivate variables. 1519 llvm::SmallVector<const Expr *, 8> FirstprivateVars; 1520 llvm::SmallVector<const Expr *, 8> FirstprivateCopies; 1521 llvm::SmallVector<const Expr *, 8> FirstprivateInits; 1522 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 1523 auto IRef = C->varlist_begin(); 1524 auto IElemInitRef = C->inits().begin(); 1525 for (auto *IInit : C->private_copies()) { 1526 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 1527 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 1528 FirstprivateVars.push_back(*IRef); 1529 FirstprivateCopies.push_back(IInit); 1530 FirstprivateInits.push_back(*IElemInitRef); 1531 } 1532 ++IRef, ++IElemInitRef; 1533 } 1534 } 1535 // Build list of dependences. 1536 llvm::SmallVector<std::pair<OpenMPDependClauseKind, const Expr *>, 8> 1537 Dependences; 1538 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) { 1539 for (auto *IRef : C->varlists()) { 1540 Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef)); 1541 } 1542 } 1543 auto &&CodeGen = [PartId, &S, &PrivateVars, &FirstprivateVars]( 1544 CodeGenFunction &CGF) { 1545 // Set proper addresses for generated private copies. 1546 auto *CS = cast<CapturedStmt>(S.getAssociatedStmt()); 1547 OMPPrivateScope Scope(CGF); 1548 if (!PrivateVars.empty() || !FirstprivateVars.empty()) { 1549 auto *CopyFn = CGF.Builder.CreateAlignedLoad( 1550 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3)), 1551 CGF.PointerAlignInBytes); 1552 auto *PrivatesPtr = CGF.Builder.CreateAlignedLoad( 1553 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2)), 1554 CGF.PointerAlignInBytes); 1555 // Map privates. 1556 llvm::SmallVector<std::pair<const VarDecl *, llvm::Value *>, 16> 1557 PrivatePtrs; 1558 llvm::SmallVector<llvm::Value *, 16> CallArgs; 1559 CallArgs.push_back(PrivatesPtr); 1560 for (auto *E : PrivateVars) { 1561 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1562 auto *PrivatePtr = 1563 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType())); 1564 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 1565 CallArgs.push_back(PrivatePtr); 1566 } 1567 for (auto *E : FirstprivateVars) { 1568 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1569 auto *PrivatePtr = 1570 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType())); 1571 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 1572 CallArgs.push_back(PrivatePtr); 1573 } 1574 CGF.EmitRuntimeCall(CopyFn, CallArgs); 1575 for (auto &&Pair : PrivatePtrs) { 1576 auto *Replacement = 1577 CGF.Builder.CreateAlignedLoad(Pair.second, CGF.PointerAlignInBytes); 1578 Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); 1579 } 1580 } 1581 (void)Scope.Privatize(); 1582 if (*PartId) { 1583 // TODO: emit code for untied tasks. 1584 } 1585 CGF.EmitStmt(CS->getCapturedStmt()); 1586 }; 1587 auto OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( 1588 S, *I, OMPD_task, CodeGen); 1589 // Check if we should emit tied or untied task. 1590 bool Tied = !S.getSingleClause<OMPUntiedClause>(); 1591 // Check if the task is final 1592 llvm::PointerIntPair<llvm::Value *, 1, bool> Final; 1593 if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) { 1594 // If the condition constant folds and can be elided, try to avoid emitting 1595 // the condition and the dead arm of the if/else. 1596 auto *Cond = Clause->getCondition(); 1597 bool CondConstant; 1598 if (ConstantFoldsToSimpleInteger(Cond, CondConstant)) 1599 Final.setInt(CondConstant); 1600 else 1601 Final.setPointer(EvaluateExprAsBool(Cond)); 1602 } else { 1603 // By default the task is not final. 1604 Final.setInt(/*IntVal=*/false); 1605 } 1606 auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 1607 const Expr *IfCond = nullptr; 1608 if (const auto *C = S.getSingleClause<OMPIfClause>()) { 1609 IfCond = C->getCondition(); 1610 } 1611 CGM.getOpenMPRuntime().emitTaskCall( 1612 *this, S.getLocStart(), S, Tied, Final, OutlinedFn, SharedsTy, 1613 CapturedStruct, IfCond, PrivateVars, PrivateCopies, FirstprivateVars, 1614 FirstprivateCopies, FirstprivateInits, Dependences); 1615 } 1616 1617 void CodeGenFunction::EmitOMPTaskyieldDirective( 1618 const OMPTaskyieldDirective &S) { 1619 CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart()); 1620 } 1621 1622 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { 1623 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier); 1624 } 1625 1626 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) { 1627 CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart()); 1628 } 1629 1630 void CodeGenFunction::EmitOMPTaskgroupDirective( 1631 const OMPTaskgroupDirective &S) { 1632 LexicalScope Scope(*this, S.getSourceRange()); 1633 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 1634 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1635 CGF.EnsureInsertPoint(); 1636 }; 1637 CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart()); 1638 } 1639 1640 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { 1641 CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> { 1642 if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) { 1643 return llvm::makeArrayRef(FlushClause->varlist_begin(), 1644 FlushClause->varlist_end()); 1645 } 1646 return llvm::None; 1647 }(), S.getLocStart()); 1648 } 1649 1650 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { 1651 LexicalScope Scope(*this, S.getSourceRange()); 1652 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 1653 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1654 CGF.EnsureInsertPoint(); 1655 }; 1656 CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart()); 1657 } 1658 1659 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, 1660 QualType SrcType, QualType DestType, 1661 SourceLocation Loc) { 1662 assert(CGF.hasScalarEvaluationKind(DestType) && 1663 "DestType must have scalar evaluation kind."); 1664 assert(!Val.isAggregate() && "Must be a scalar or complex."); 1665 return Val.isScalar() 1666 ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType, 1667 Loc) 1668 : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType, 1669 DestType, Loc); 1670 } 1671 1672 static CodeGenFunction::ComplexPairTy 1673 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, 1674 QualType DestType, SourceLocation Loc) { 1675 assert(CGF.getEvaluationKind(DestType) == TEK_Complex && 1676 "DestType must have complex evaluation kind."); 1677 CodeGenFunction::ComplexPairTy ComplexVal; 1678 if (Val.isScalar()) { 1679 // Convert the input element to the element type of the complex. 1680 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 1681 auto ScalarVal = CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, 1682 DestElementType, Loc); 1683 ComplexVal = CodeGenFunction::ComplexPairTy( 1684 ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType())); 1685 } else { 1686 assert(Val.isComplex() && "Must be a scalar or complex."); 1687 auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType(); 1688 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 1689 ComplexVal.first = CGF.EmitScalarConversion( 1690 Val.getComplexVal().first, SrcElementType, DestElementType, Loc); 1691 ComplexVal.second = CGF.EmitScalarConversion( 1692 Val.getComplexVal().second, SrcElementType, DestElementType, Loc); 1693 } 1694 return ComplexVal; 1695 } 1696 1697 static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst, 1698 LValue LVal, RValue RVal) { 1699 if (LVal.isGlobalReg()) { 1700 CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal); 1701 } else { 1702 CGF.EmitAtomicStore(RVal, LVal, IsSeqCst ? llvm::SequentiallyConsistent 1703 : llvm::Monotonic, 1704 LVal.isVolatile(), /*IsInit=*/false); 1705 } 1706 } 1707 1708 static void emitSimpleStore(CodeGenFunction &CGF, LValue LVal, RValue RVal, 1709 QualType RValTy, SourceLocation Loc) { 1710 switch (CGF.getEvaluationKind(LVal.getType())) { 1711 case TEK_Scalar: 1712 CGF.EmitStoreThroughLValue(RValue::get(convertToScalarValue( 1713 CGF, RVal, RValTy, LVal.getType(), Loc)), 1714 LVal); 1715 break; 1716 case TEK_Complex: 1717 CGF.EmitStoreOfComplex( 1718 convertToComplexValue(CGF, RVal, RValTy, LVal.getType(), Loc), LVal, 1719 /*isInit=*/false); 1720 break; 1721 case TEK_Aggregate: 1722 llvm_unreachable("Must be a scalar or complex."); 1723 } 1724 } 1725 1726 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst, 1727 const Expr *X, const Expr *V, 1728 SourceLocation Loc) { 1729 // v = x; 1730 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue"); 1731 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue"); 1732 LValue XLValue = CGF.EmitLValue(X); 1733 LValue VLValue = CGF.EmitLValue(V); 1734 RValue Res = XLValue.isGlobalReg() 1735 ? CGF.EmitLoadOfLValue(XLValue, Loc) 1736 : CGF.EmitAtomicLoad(XLValue, Loc, 1737 IsSeqCst ? llvm::SequentiallyConsistent 1738 : llvm::Monotonic, 1739 XLValue.isVolatile()); 1740 // OpenMP, 2.12.6, atomic Construct 1741 // Any atomic construct with a seq_cst clause forces the atomically 1742 // performed operation to include an implicit flush operation without a 1743 // list. 1744 if (IsSeqCst) 1745 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 1746 emitSimpleStore(CGF, VLValue, Res, X->getType().getNonReferenceType(), Loc); 1747 } 1748 1749 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst, 1750 const Expr *X, const Expr *E, 1751 SourceLocation Loc) { 1752 // x = expr; 1753 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue"); 1754 emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E)); 1755 // OpenMP, 2.12.6, atomic Construct 1756 // Any atomic construct with a seq_cst clause forces the atomically 1757 // performed operation to include an implicit flush operation without a 1758 // list. 1759 if (IsSeqCst) 1760 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 1761 } 1762 1763 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, 1764 RValue Update, 1765 BinaryOperatorKind BO, 1766 llvm::AtomicOrdering AO, 1767 bool IsXLHSInRHSPart) { 1768 auto &Context = CGF.CGM.getContext(); 1769 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x' 1770 // expression is simple and atomic is allowed for the given type for the 1771 // target platform. 1772 if (BO == BO_Comma || !Update.isScalar() || 1773 !Update.getScalarVal()->getType()->isIntegerTy() || 1774 !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) && 1775 (Update.getScalarVal()->getType() != 1776 X.getAddress()->getType()->getPointerElementType())) || 1777 !X.getAddress()->getType()->getPointerElementType()->isIntegerTy() || 1778 !Context.getTargetInfo().hasBuiltinAtomic( 1779 Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment()))) 1780 return std::make_pair(false, RValue::get(nullptr)); 1781 1782 llvm::AtomicRMWInst::BinOp RMWOp; 1783 switch (BO) { 1784 case BO_Add: 1785 RMWOp = llvm::AtomicRMWInst::Add; 1786 break; 1787 case BO_Sub: 1788 if (!IsXLHSInRHSPart) 1789 return std::make_pair(false, RValue::get(nullptr)); 1790 RMWOp = llvm::AtomicRMWInst::Sub; 1791 break; 1792 case BO_And: 1793 RMWOp = llvm::AtomicRMWInst::And; 1794 break; 1795 case BO_Or: 1796 RMWOp = llvm::AtomicRMWInst::Or; 1797 break; 1798 case BO_Xor: 1799 RMWOp = llvm::AtomicRMWInst::Xor; 1800 break; 1801 case BO_LT: 1802 RMWOp = X.getType()->hasSignedIntegerRepresentation() 1803 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min 1804 : llvm::AtomicRMWInst::Max) 1805 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin 1806 : llvm::AtomicRMWInst::UMax); 1807 break; 1808 case BO_GT: 1809 RMWOp = X.getType()->hasSignedIntegerRepresentation() 1810 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max 1811 : llvm::AtomicRMWInst::Min) 1812 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax 1813 : llvm::AtomicRMWInst::UMin); 1814 break; 1815 case BO_Assign: 1816 RMWOp = llvm::AtomicRMWInst::Xchg; 1817 break; 1818 case BO_Mul: 1819 case BO_Div: 1820 case BO_Rem: 1821 case BO_Shl: 1822 case BO_Shr: 1823 case BO_LAnd: 1824 case BO_LOr: 1825 return std::make_pair(false, RValue::get(nullptr)); 1826 case BO_PtrMemD: 1827 case BO_PtrMemI: 1828 case BO_LE: 1829 case BO_GE: 1830 case BO_EQ: 1831 case BO_NE: 1832 case BO_AddAssign: 1833 case BO_SubAssign: 1834 case BO_AndAssign: 1835 case BO_OrAssign: 1836 case BO_XorAssign: 1837 case BO_MulAssign: 1838 case BO_DivAssign: 1839 case BO_RemAssign: 1840 case BO_ShlAssign: 1841 case BO_ShrAssign: 1842 case BO_Comma: 1843 llvm_unreachable("Unsupported atomic update operation"); 1844 } 1845 auto *UpdateVal = Update.getScalarVal(); 1846 if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) { 1847 UpdateVal = CGF.Builder.CreateIntCast( 1848 IC, X.getAddress()->getType()->getPointerElementType(), 1849 X.getType()->hasSignedIntegerRepresentation()); 1850 } 1851 auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getAddress(), UpdateVal, AO); 1852 return std::make_pair(true, RValue::get(Res)); 1853 } 1854 1855 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr( 1856 LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart, 1857 llvm::AtomicOrdering AO, SourceLocation Loc, 1858 const llvm::function_ref<RValue(RValue)> &CommonGen) { 1859 // Update expressions are allowed to have the following forms: 1860 // x binop= expr; -> xrval + expr; 1861 // x++, ++x -> xrval + 1; 1862 // x--, --x -> xrval - 1; 1863 // x = x binop expr; -> xrval binop expr 1864 // x = expr Op x; - > expr binop xrval; 1865 auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart); 1866 if (!Res.first) { 1867 if (X.isGlobalReg()) { 1868 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop 1869 // 'xrval'. 1870 EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X); 1871 } else { 1872 // Perform compare-and-swap procedure. 1873 EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified()); 1874 } 1875 } 1876 return Res; 1877 } 1878 1879 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst, 1880 const Expr *X, const Expr *E, 1881 const Expr *UE, bool IsXLHSInRHSPart, 1882 SourceLocation Loc) { 1883 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 1884 "Update expr in 'atomic update' must be a binary operator."); 1885 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 1886 // Update expressions are allowed to have the following forms: 1887 // x binop= expr; -> xrval + expr; 1888 // x++, ++x -> xrval + 1; 1889 // x--, --x -> xrval - 1; 1890 // x = x binop expr; -> xrval binop expr 1891 // x = expr Op x; - > expr binop xrval; 1892 assert(X->isLValue() && "X of 'omp atomic update' is not lvalue"); 1893 LValue XLValue = CGF.EmitLValue(X); 1894 RValue ExprRValue = CGF.EmitAnyExpr(E); 1895 auto AO = IsSeqCst ? llvm::SequentiallyConsistent : llvm::Monotonic; 1896 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 1897 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 1898 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 1899 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 1900 auto Gen = 1901 [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue { 1902 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 1903 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 1904 return CGF.EmitAnyExpr(UE); 1905 }; 1906 (void)CGF.EmitOMPAtomicSimpleUpdateExpr( 1907 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 1908 // OpenMP, 2.12.6, atomic Construct 1909 // Any atomic construct with a seq_cst clause forces the atomically 1910 // performed operation to include an implicit flush operation without a 1911 // list. 1912 if (IsSeqCst) 1913 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 1914 } 1915 1916 static RValue convertToType(CodeGenFunction &CGF, RValue Value, 1917 QualType SourceType, QualType ResType, 1918 SourceLocation Loc) { 1919 switch (CGF.getEvaluationKind(ResType)) { 1920 case TEK_Scalar: 1921 return RValue::get( 1922 convertToScalarValue(CGF, Value, SourceType, ResType, Loc)); 1923 case TEK_Complex: { 1924 auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc); 1925 return RValue::getComplex(Res.first, Res.second); 1926 } 1927 case TEK_Aggregate: 1928 break; 1929 } 1930 llvm_unreachable("Must be a scalar or complex."); 1931 } 1932 1933 static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst, 1934 bool IsPostfixUpdate, const Expr *V, 1935 const Expr *X, const Expr *E, 1936 const Expr *UE, bool IsXLHSInRHSPart, 1937 SourceLocation Loc) { 1938 assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue"); 1939 assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue"); 1940 RValue NewVVal; 1941 LValue VLValue = CGF.EmitLValue(V); 1942 LValue XLValue = CGF.EmitLValue(X); 1943 RValue ExprRValue = CGF.EmitAnyExpr(E); 1944 auto AO = IsSeqCst ? llvm::SequentiallyConsistent : llvm::Monotonic; 1945 QualType NewVValType; 1946 if (UE) { 1947 // 'x' is updated with some additional value. 1948 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 1949 "Update expr in 'atomic capture' must be a binary operator."); 1950 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 1951 // Update expressions are allowed to have the following forms: 1952 // x binop= expr; -> xrval + expr; 1953 // x++, ++x -> xrval + 1; 1954 // x--, --x -> xrval - 1; 1955 // x = x binop expr; -> xrval binop expr 1956 // x = expr Op x; - > expr binop xrval; 1957 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 1958 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 1959 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 1960 NewVValType = XRValExpr->getType(); 1961 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 1962 auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr, 1963 IsSeqCst, IsPostfixUpdate](RValue XRValue) -> RValue { 1964 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 1965 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 1966 RValue Res = CGF.EmitAnyExpr(UE); 1967 NewVVal = IsPostfixUpdate ? XRValue : Res; 1968 return Res; 1969 }; 1970 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 1971 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 1972 if (Res.first) { 1973 // 'atomicrmw' instruction was generated. 1974 if (IsPostfixUpdate) { 1975 // Use old value from 'atomicrmw'. 1976 NewVVal = Res.second; 1977 } else { 1978 // 'atomicrmw' does not provide new value, so evaluate it using old 1979 // value of 'x'. 1980 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 1981 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second); 1982 NewVVal = CGF.EmitAnyExpr(UE); 1983 } 1984 } 1985 } else { 1986 // 'x' is simply rewritten with some 'expr'. 1987 NewVValType = X->getType().getNonReferenceType(); 1988 ExprRValue = convertToType(CGF, ExprRValue, E->getType(), 1989 X->getType().getNonReferenceType(), Loc); 1990 auto &&Gen = [&CGF, &NewVVal, ExprRValue](RValue XRValue) -> RValue { 1991 NewVVal = XRValue; 1992 return ExprRValue; 1993 }; 1994 // Try to perform atomicrmw xchg, otherwise simple exchange. 1995 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 1996 XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO, 1997 Loc, Gen); 1998 if (Res.first) { 1999 // 'atomicrmw' instruction was generated. 2000 NewVVal = IsPostfixUpdate ? Res.second : ExprRValue; 2001 } 2002 } 2003 // Emit post-update store to 'v' of old/new 'x' value. 2004 emitSimpleStore(CGF, VLValue, NewVVal, NewVValType, Loc); 2005 // OpenMP, 2.12.6, atomic Construct 2006 // Any atomic construct with a seq_cst clause forces the atomically 2007 // performed operation to include an implicit flush operation without a 2008 // list. 2009 if (IsSeqCst) 2010 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 2011 } 2012 2013 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, 2014 bool IsSeqCst, bool IsPostfixUpdate, 2015 const Expr *X, const Expr *V, const Expr *E, 2016 const Expr *UE, bool IsXLHSInRHSPart, 2017 SourceLocation Loc) { 2018 switch (Kind) { 2019 case OMPC_read: 2020 EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc); 2021 break; 2022 case OMPC_write: 2023 EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc); 2024 break; 2025 case OMPC_unknown: 2026 case OMPC_update: 2027 EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc); 2028 break; 2029 case OMPC_capture: 2030 EmitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE, 2031 IsXLHSInRHSPart, Loc); 2032 break; 2033 case OMPC_if: 2034 case OMPC_final: 2035 case OMPC_num_threads: 2036 case OMPC_private: 2037 case OMPC_firstprivate: 2038 case OMPC_lastprivate: 2039 case OMPC_reduction: 2040 case OMPC_safelen: 2041 case OMPC_simdlen: 2042 case OMPC_collapse: 2043 case OMPC_default: 2044 case OMPC_seq_cst: 2045 case OMPC_shared: 2046 case OMPC_linear: 2047 case OMPC_aligned: 2048 case OMPC_copyin: 2049 case OMPC_copyprivate: 2050 case OMPC_flush: 2051 case OMPC_proc_bind: 2052 case OMPC_schedule: 2053 case OMPC_ordered: 2054 case OMPC_nowait: 2055 case OMPC_untied: 2056 case OMPC_threadprivate: 2057 case OMPC_depend: 2058 case OMPC_mergeable: 2059 case OMPC_device: 2060 llvm_unreachable("Clause is not allowed in 'omp atomic'."); 2061 } 2062 } 2063 2064 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { 2065 bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>(); 2066 OpenMPClauseKind Kind = OMPC_unknown; 2067 for (auto *C : S.clauses()) { 2068 // Find first clause (skip seq_cst clause, if it is first). 2069 if (C->getClauseKind() != OMPC_seq_cst) { 2070 Kind = C->getClauseKind(); 2071 break; 2072 } 2073 } 2074 2075 const auto *CS = 2076 S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 2077 if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) { 2078 enterFullExpression(EWC); 2079 } 2080 // Processing for statements under 'atomic capture'. 2081 if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) { 2082 for (const auto *C : Compound->body()) { 2083 if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) { 2084 enterFullExpression(EWC); 2085 } 2086 } 2087 } 2088 2089 LexicalScope Scope(*this, S.getSourceRange()); 2090 auto &&CodeGen = [&S, Kind, IsSeqCst](CodeGenFunction &CGF) { 2091 EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(), 2092 S.getV(), S.getExpr(), S.getUpdateExpr(), 2093 S.isXLHSInRHSPart(), S.getLocStart()); 2094 }; 2095 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen); 2096 } 2097 2098 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &) { 2099 llvm_unreachable("CodeGen for 'omp target' is not supported yet."); 2100 } 2101 2102 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &) { 2103 llvm_unreachable("CodeGen for 'omp teams' is not supported yet."); 2104 } 2105 2106 void CodeGenFunction::EmitOMPCancellationPointDirective( 2107 const OMPCancellationPointDirective &S) { 2108 CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(), 2109 S.getCancelRegion()); 2110 } 2111 2112 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { 2113 CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(), 2114 S.getCancelRegion()); 2115 } 2116 2117 CodeGenFunction::JumpDest 2118 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) { 2119 if (Kind == OMPD_parallel || Kind == OMPD_task) 2120 return ReturnBlock; 2121 else if (Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections) 2122 return BreakContinueStack.empty() ? JumpDest() 2123 : BreakContinueStack.back().BreakBlock; 2124 return JumpDest(); 2125 } 2126 2127 // Generate the instructions for '#pragma omp target data' directive. 2128 void CodeGenFunction::EmitOMPTargetDataDirective( 2129 const OMPTargetDataDirective &S) { 2130 2131 // emit the code inside the construct for now 2132 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2133 CGM.getOpenMPRuntime().emitInlinedDirective( 2134 *this, OMPD_target_data, 2135 [&CS](CodeGenFunction &CGF) { CGF.EmitStmt(CS->getCapturedStmt()); }); 2136 } 2137