1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This contains code to emit OpenMP nodes as LLVM code. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGOpenMPRuntime.h" 15 #include "CodeGenFunction.h" 16 #include "CodeGenModule.h" 17 #include "TargetInfo.h" 18 #include "clang/AST/Stmt.h" 19 #include "clang/AST/StmtOpenMP.h" 20 using namespace clang; 21 using namespace CodeGen; 22 23 //===----------------------------------------------------------------------===// 24 // OpenMP Directive Emission 25 //===----------------------------------------------------------------------===// 26 void CodeGenFunction::EmitOMPAggregateAssign( 27 llvm::Value *DestAddr, llvm::Value *SrcAddr, QualType OriginalType, 28 const llvm::function_ref<void(llvm::Value *, llvm::Value *)> &CopyGen) { 29 // Perform element-by-element initialization. 30 QualType ElementTy; 31 auto SrcBegin = SrcAddr; 32 auto DestBegin = DestAddr; 33 auto ArrayTy = OriginalType->getAsArrayTypeUnsafe(); 34 auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestBegin); 35 // Cast from pointer to array type to pointer to single element. 36 SrcBegin = Builder.CreatePointerBitCastOrAddrSpaceCast(SrcBegin, 37 DestBegin->getType()); 38 auto DestEnd = Builder.CreateGEP(DestBegin, NumElements); 39 // The basic structure here is a while-do loop. 40 auto BodyBB = createBasicBlock("omp.arraycpy.body"); 41 auto DoneBB = createBasicBlock("omp.arraycpy.done"); 42 auto IsEmpty = 43 Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty"); 44 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 45 46 // Enter the loop body, making that address the current address. 47 auto EntryBB = Builder.GetInsertBlock(); 48 EmitBlock(BodyBB); 49 auto SrcElementCurrent = 50 Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 51 SrcElementCurrent->addIncoming(SrcBegin, EntryBB); 52 auto DestElementCurrent = Builder.CreatePHI(DestBegin->getType(), 2, 53 "omp.arraycpy.destElementPast"); 54 DestElementCurrent->addIncoming(DestBegin, EntryBB); 55 56 // Emit copy. 57 CopyGen(DestElementCurrent, SrcElementCurrent); 58 59 // Shift the address forward by one element. 60 auto DestElementNext = Builder.CreateConstGEP1_32( 61 DestElementCurrent, /*Idx0=*/1, "omp.arraycpy.dest.element"); 62 auto SrcElementNext = Builder.CreateConstGEP1_32( 63 SrcElementCurrent, /*Idx0=*/1, "omp.arraycpy.src.element"); 64 // Check whether we've reached the end. 65 auto Done = 66 Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 67 Builder.CreateCondBr(Done, DoneBB, BodyBB); 68 DestElementCurrent->addIncoming(DestElementNext, Builder.GetInsertBlock()); 69 SrcElementCurrent->addIncoming(SrcElementNext, Builder.GetInsertBlock()); 70 71 // Done. 72 EmitBlock(DoneBB, /*IsFinished=*/true); 73 } 74 75 void CodeGenFunction::EmitOMPCopy(CodeGenFunction &CGF, 76 QualType OriginalType, llvm::Value *DestAddr, 77 llvm::Value *SrcAddr, const VarDecl *DestVD, 78 const VarDecl *SrcVD, const Expr *Copy) { 79 if (OriginalType->isArrayType()) { 80 auto *BO = dyn_cast<BinaryOperator>(Copy); 81 if (BO && BO->getOpcode() == BO_Assign) { 82 // Perform simple memcpy for simple copying. 83 CGF.EmitAggregateAssign(DestAddr, SrcAddr, OriginalType); 84 } else { 85 // For arrays with complex element types perform element by element 86 // copying. 87 CGF.EmitOMPAggregateAssign( 88 DestAddr, SrcAddr, OriginalType, 89 [&CGF, Copy, SrcVD, DestVD](llvm::Value *DestElement, 90 llvm::Value *SrcElement) { 91 // Working with the single array element, so have to remap 92 // destination and source variables to corresponding array 93 // elements. 94 CodeGenFunction::OMPPrivateScope Remap(CGF); 95 Remap.addPrivate(DestVD, [DestElement]() -> llvm::Value *{ 96 return DestElement; 97 }); 98 Remap.addPrivate( 99 SrcVD, [SrcElement]() -> llvm::Value *{ return SrcElement; }); 100 (void)Remap.Privatize(); 101 CGF.EmitIgnoredExpr(Copy); 102 }); 103 } 104 } else { 105 // Remap pseudo source variable to private copy. 106 CodeGenFunction::OMPPrivateScope Remap(CGF); 107 Remap.addPrivate(SrcVD, [SrcAddr]() -> llvm::Value *{ return SrcAddr; }); 108 Remap.addPrivate(DestVD, [DestAddr]() -> llvm::Value *{ return DestAddr; }); 109 (void)Remap.Privatize(); 110 // Emit copying of the whole variable. 111 CGF.EmitIgnoredExpr(Copy); 112 } 113 } 114 115 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, 116 OMPPrivateScope &PrivateScope) { 117 llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate; 118 for (auto &&I = D.getClausesOfKind(OMPC_firstprivate); I; ++I) { 119 auto *C = cast<OMPFirstprivateClause>(*I); 120 auto IRef = C->varlist_begin(); 121 auto InitsRef = C->inits().begin(); 122 for (auto IInit : C->private_copies()) { 123 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 124 if (EmittedAsFirstprivate.count(OrigVD) == 0) { 125 EmittedAsFirstprivate.insert(OrigVD); 126 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 127 auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl()); 128 bool IsRegistered; 129 DeclRefExpr DRE( 130 const_cast<VarDecl *>(OrigVD), 131 /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup( 132 OrigVD) != nullptr, 133 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 134 auto *OriginalAddr = EmitLValue(&DRE).getAddress(); 135 if (OrigVD->getType()->isArrayType()) { 136 // Emit VarDecl with copy init for arrays. 137 // Get the address of the original variable captured in current 138 // captured region. 139 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value *{ 140 auto Emission = EmitAutoVarAlloca(*VD); 141 auto *Init = VD->getInit(); 142 if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) { 143 // Perform simple memcpy. 144 EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr, 145 (*IRef)->getType()); 146 } else { 147 EmitOMPAggregateAssign( 148 Emission.getAllocatedAddress(), OriginalAddr, 149 (*IRef)->getType(), 150 [this, VDInit, Init](llvm::Value *DestElement, 151 llvm::Value *SrcElement) { 152 // Clean up any temporaries needed by the initialization. 153 RunCleanupsScope InitScope(*this); 154 // Emit initialization for single element. 155 LocalDeclMap[VDInit] = SrcElement; 156 EmitAnyExprToMem(Init, DestElement, 157 Init->getType().getQualifiers(), 158 /*IsInitializer*/ false); 159 LocalDeclMap.erase(VDInit); 160 }); 161 } 162 EmitAutoVarCleanups(Emission); 163 return Emission.getAllocatedAddress(); 164 }); 165 } else { 166 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value *{ 167 // Emit private VarDecl with copy init. 168 // Remap temp VDInit variable to the address of the original 169 // variable 170 // (for proper handling of captured global variables). 171 LocalDeclMap[VDInit] = OriginalAddr; 172 EmitDecl(*VD); 173 LocalDeclMap.erase(VDInit); 174 return GetAddrOfLocalVar(VD); 175 }); 176 } 177 assert(IsRegistered && 178 "firstprivate var already registered as private"); 179 // Silence the warning about unused variable. 180 (void)IsRegistered; 181 } 182 ++IRef, ++InitsRef; 183 } 184 } 185 return !EmittedAsFirstprivate.empty(); 186 } 187 188 void CodeGenFunction::EmitOMPPrivateClause( 189 const OMPExecutableDirective &D, 190 CodeGenFunction::OMPPrivateScope &PrivateScope) { 191 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 192 for (auto &&I = D.getClausesOfKind(OMPC_private); I; ++I) { 193 auto *C = cast<OMPPrivateClause>(*I); 194 auto IRef = C->varlist_begin(); 195 for (auto IInit : C->private_copies()) { 196 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 197 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 198 auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 199 bool IsRegistered = 200 PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value *{ 201 // Emit private VarDecl with copy init. 202 EmitDecl(*VD); 203 return GetAddrOfLocalVar(VD); 204 }); 205 assert(IsRegistered && "private var already registered as private"); 206 // Silence the warning about unused variable. 207 (void)IsRegistered; 208 } 209 ++IRef; 210 } 211 } 212 } 213 214 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { 215 // threadprivate_var1 = master_threadprivate_var1; 216 // operator=(threadprivate_var2, master_threadprivate_var2); 217 // ... 218 // __kmpc_barrier(&loc, global_tid); 219 llvm::DenseSet<const VarDecl *> CopiedVars; 220 llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr; 221 for (auto &&I = D.getClausesOfKind(OMPC_copyin); I; ++I) { 222 auto *C = cast<OMPCopyinClause>(*I); 223 auto IRef = C->varlist_begin(); 224 auto ISrcRef = C->source_exprs().begin(); 225 auto IDestRef = C->destination_exprs().begin(); 226 for (auto *AssignOp : C->assignment_ops()) { 227 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 228 if (CopiedVars.insert(VD->getCanonicalDecl()).second) { 229 // Get the address of the master variable. 230 auto *MasterAddr = VD->isStaticLocal() 231 ? CGM.getStaticLocalDeclAddress(VD) 232 : CGM.GetAddrOfGlobal(VD); 233 // Get the address of the threadprivate variable. 234 auto *PrivateAddr = EmitLValue(*IRef).getAddress(); 235 if (CopiedVars.size() == 1) { 236 // At first check if current thread is a master thread. If it is, no 237 // need to copy data. 238 CopyBegin = createBasicBlock("copyin.not.master"); 239 CopyEnd = createBasicBlock("copyin.not.master.end"); 240 Builder.CreateCondBr( 241 Builder.CreateICmpNE( 242 Builder.CreatePtrToInt(MasterAddr, CGM.IntPtrTy), 243 Builder.CreatePtrToInt(PrivateAddr, CGM.IntPtrTy)), 244 CopyBegin, CopyEnd); 245 EmitBlock(CopyBegin); 246 } 247 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 248 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 249 EmitOMPCopy(*this, (*IRef)->getType(), PrivateAddr, MasterAddr, DestVD, 250 SrcVD, AssignOp); 251 } 252 ++IRef; 253 ++ISrcRef; 254 ++IDestRef; 255 } 256 } 257 if (CopyEnd) { 258 // Exit out of copying procedure for non-master thread. 259 EmitBlock(CopyEnd, /*IsFinished=*/true); 260 return true; 261 } 262 return false; 263 } 264 265 bool CodeGenFunction::EmitOMPLastprivateClauseInit( 266 const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) { 267 bool HasAtLeastOneLastprivate = false; 268 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 269 for (auto &&I = D.getClausesOfKind(OMPC_lastprivate); I; ++I) { 270 auto *C = cast<OMPLastprivateClause>(*I); 271 auto IRef = C->varlist_begin(); 272 auto IDestRef = C->destination_exprs().begin(); 273 for (auto *IInit : C->private_copies()) { 274 // Keep the address of the original variable for future update at the end 275 // of the loop. 276 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 277 if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) { 278 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 279 PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> llvm::Value *{ 280 DeclRefExpr DRE( 281 const_cast<VarDecl *>(OrigVD), 282 /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup( 283 OrigVD) != nullptr, 284 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 285 return EmitLValue(&DRE).getAddress(); 286 }); 287 // Check if the variable is also a firstprivate: in this case IInit is 288 // not generated. Initialization of this variable will happen in codegen 289 // for 'firstprivate' clause. 290 if (!IInit) 291 continue; 292 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 293 bool IsRegistered = 294 PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value *{ 295 // Emit private VarDecl with copy init. 296 EmitDecl(*VD); 297 return GetAddrOfLocalVar(VD); 298 }); 299 assert(IsRegistered && "lastprivate var already registered as private"); 300 HasAtLeastOneLastprivate = HasAtLeastOneLastprivate || IsRegistered; 301 } 302 ++IRef, ++IDestRef; 303 } 304 } 305 return HasAtLeastOneLastprivate; 306 } 307 308 void CodeGenFunction::EmitOMPLastprivateClauseFinal( 309 const OMPExecutableDirective &D, llvm::Value *IsLastIterCond) { 310 // Emit following code: 311 // if (<IsLastIterCond>) { 312 // orig_var1 = private_orig_var1; 313 // ... 314 // orig_varn = private_orig_varn; 315 // } 316 auto *ThenBB = createBasicBlock(".omp.lastprivate.then"); 317 auto *DoneBB = createBasicBlock(".omp.lastprivate.done"); 318 Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB); 319 EmitBlock(ThenBB); 320 { 321 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 322 for (auto &&I = D.getClausesOfKind(OMPC_lastprivate); I; ++I) { 323 auto *C = cast<OMPLastprivateClause>(*I); 324 auto IRef = C->varlist_begin(); 325 auto ISrcRef = C->source_exprs().begin(); 326 auto IDestRef = C->destination_exprs().begin(); 327 for (auto *AssignOp : C->assignment_ops()) { 328 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 329 if (AlreadyEmittedVars.insert(PrivateVD->getCanonicalDecl()).second) { 330 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 331 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 332 // Get the address of the original variable. 333 auto *OriginalAddr = GetAddrOfLocalVar(DestVD); 334 // Get the address of the private variable. 335 auto *PrivateAddr = GetAddrOfLocalVar(PrivateVD); 336 EmitOMPCopy(*this, (*IRef)->getType(), OriginalAddr, PrivateAddr, 337 DestVD, SrcVD, AssignOp); 338 } 339 ++IRef; 340 ++ISrcRef; 341 ++IDestRef; 342 } 343 } 344 } 345 EmitBlock(DoneBB, /*IsFinished=*/true); 346 } 347 348 void CodeGenFunction::EmitOMPReductionClauseInit( 349 const OMPExecutableDirective &D, 350 CodeGenFunction::OMPPrivateScope &PrivateScope) { 351 for (auto &&I = D.getClausesOfKind(OMPC_reduction); I; ++I) { 352 auto *C = cast<OMPReductionClause>(*I); 353 auto ILHS = C->lhs_exprs().begin(); 354 auto IRHS = C->rhs_exprs().begin(); 355 for (auto IRef : C->varlists()) { 356 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); 357 auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 358 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 359 // Store the address of the original variable associated with the LHS 360 // implicit variable. 361 PrivateScope.addPrivate(LHSVD, [this, OrigVD, IRef]() -> llvm::Value *{ 362 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 363 CapturedStmtInfo->lookup(OrigVD) != nullptr, 364 IRef->getType(), VK_LValue, IRef->getExprLoc()); 365 return EmitLValue(&DRE).getAddress(); 366 }); 367 // Emit reduction copy. 368 bool IsRegistered = 369 PrivateScope.addPrivate(OrigVD, [this, PrivateVD]() -> llvm::Value *{ 370 // Emit private VarDecl with reduction init. 371 EmitDecl(*PrivateVD); 372 return GetAddrOfLocalVar(PrivateVD); 373 }); 374 assert(IsRegistered && "private var already registered as private"); 375 // Silence the warning about unused variable. 376 (void)IsRegistered; 377 ++ILHS, ++IRHS; 378 } 379 } 380 } 381 382 void CodeGenFunction::EmitOMPReductionClauseFinal( 383 const OMPExecutableDirective &D) { 384 llvm::SmallVector<const Expr *, 8> LHSExprs; 385 llvm::SmallVector<const Expr *, 8> RHSExprs; 386 llvm::SmallVector<const Expr *, 8> ReductionOps; 387 bool HasAtLeastOneReduction = false; 388 for (auto &&I = D.getClausesOfKind(OMPC_reduction); I; ++I) { 389 HasAtLeastOneReduction = true; 390 auto *C = cast<OMPReductionClause>(*I); 391 LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 392 RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 393 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 394 } 395 if (HasAtLeastOneReduction) { 396 // Emit nowait reduction if nowait clause is present or directive is a 397 // parallel directive (it always has implicit barrier). 398 CGM.getOpenMPRuntime().emitReduction( 399 *this, D.getLocEnd(), LHSExprs, RHSExprs, ReductionOps, 400 D.getSingleClause(OMPC_nowait) || 401 isOpenMPParallelDirective(D.getDirectiveKind())); 402 } 403 } 404 405 static void emitCommonOMPParallelDirective(CodeGenFunction &CGF, 406 const OMPExecutableDirective &S, 407 const RegionCodeGenTy &CodeGen) { 408 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 409 auto CapturedStruct = CGF.GenerateCapturedStmtArgument(*CS); 410 auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( 411 S, *CS->getCapturedDecl()->param_begin(), CodeGen); 412 if (auto C = S.getSingleClause(OMPC_num_threads)) { 413 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 414 auto NumThreadsClause = cast<OMPNumThreadsClause>(C); 415 auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), 416 /*IgnoreResultAssign*/ true); 417 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause( 418 CGF, NumThreads, NumThreadsClause->getLocStart()); 419 } 420 const Expr *IfCond = nullptr; 421 if (auto C = S.getSingleClause(OMPC_if)) { 422 IfCond = cast<OMPIfClause>(C)->getCondition(); 423 } 424 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn, 425 CapturedStruct, IfCond); 426 } 427 428 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { 429 LexicalScope Scope(*this, S.getSourceRange()); 430 // Emit parallel region as a standalone region. 431 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 432 OMPPrivateScope PrivateScope(CGF); 433 bool Copyins = CGF.EmitOMPCopyinClause(S); 434 bool Firstprivates = CGF.EmitOMPFirstprivateClause(S, PrivateScope); 435 if (Copyins || Firstprivates) { 436 // Emit implicit barrier to synchronize threads and avoid data races on 437 // initialization of firstprivate variables or propagation master's thread 438 // values of threadprivate variables to local instances of that variables 439 // of all other implicit threads. 440 CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(), 441 OMPD_unknown); 442 } 443 CGF.EmitOMPPrivateClause(S, PrivateScope); 444 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 445 (void)PrivateScope.Privatize(); 446 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 447 CGF.EmitOMPReductionClauseFinal(S); 448 // Emit implicit barrier at the end of the 'parallel' directive. 449 CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(), 450 OMPD_unknown); 451 }; 452 emitCommonOMPParallelDirective(*this, S, CodeGen); 453 } 454 455 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &S, 456 bool SeparateIter) { 457 RunCleanupsScope BodyScope(*this); 458 // Update counters values on current iteration. 459 for (auto I : S.updates()) { 460 EmitIgnoredExpr(I); 461 } 462 // Update the linear variables. 463 for (auto &&I = S.getClausesOfKind(OMPC_linear); I; ++I) { 464 auto *C = cast<OMPLinearClause>(*I); 465 for (auto U : C->updates()) { 466 EmitIgnoredExpr(U); 467 } 468 } 469 470 // On a continue in the body, jump to the end. 471 auto Continue = getJumpDestInCurrentScope("omp.body.continue"); 472 BreakContinueStack.push_back(BreakContinue(JumpDest(), Continue)); 473 // Emit loop body. 474 EmitStmt(S.getBody()); 475 // The end (updates/cleanups). 476 EmitBlock(Continue.getBlock()); 477 BreakContinueStack.pop_back(); 478 if (SeparateIter) { 479 // TODO: Update lastprivates if the SeparateIter flag is true. 480 // This will be implemented in a follow-up OMPLastprivateClause patch, but 481 // result should be still correct without it, as we do not make these 482 // variables private yet. 483 } 484 } 485 486 void CodeGenFunction::EmitOMPInnerLoop( 487 const Stmt &S, bool RequiresCleanup, const Expr *LoopCond, 488 const Expr *IncExpr, 489 const llvm::function_ref<void(CodeGenFunction &)> &BodyGen, 490 const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen) { 491 auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end"); 492 493 // Start the loop with a block that tests the condition. 494 auto CondBlock = createBasicBlock("omp.inner.for.cond"); 495 EmitBlock(CondBlock); 496 LoopStack.push(CondBlock); 497 498 // If there are any cleanups between here and the loop-exit scope, 499 // create a block to stage a loop exit along. 500 auto ExitBlock = LoopExit.getBlock(); 501 if (RequiresCleanup) 502 ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup"); 503 504 auto LoopBody = createBasicBlock("omp.inner.for.body"); 505 506 // Emit condition. 507 EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S)); 508 if (ExitBlock != LoopExit.getBlock()) { 509 EmitBlock(ExitBlock); 510 EmitBranchThroughCleanup(LoopExit); 511 } 512 513 EmitBlock(LoopBody); 514 incrementProfileCounter(&S); 515 516 // Create a block for the increment. 517 auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc"); 518 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 519 520 BodyGen(*this); 521 522 // Emit "IV = IV + 1" and a back-edge to the condition block. 523 EmitBlock(Continue.getBlock()); 524 EmitIgnoredExpr(IncExpr); 525 PostIncGen(*this); 526 BreakContinueStack.pop_back(); 527 EmitBranch(CondBlock); 528 LoopStack.pop(); 529 // Emit the fall-through block. 530 EmitBlock(LoopExit.getBlock()); 531 } 532 533 void CodeGenFunction::EmitOMPSimdFinal(const OMPLoopDirective &S) { 534 auto IC = S.counters().begin(); 535 for (auto F : S.finals()) { 536 if (LocalDeclMap.lookup(cast<DeclRefExpr>((*IC))->getDecl())) { 537 EmitIgnoredExpr(F); 538 } 539 ++IC; 540 } 541 // Emit the final values of the linear variables. 542 for (auto &&I = S.getClausesOfKind(OMPC_linear); I; ++I) { 543 auto *C = cast<OMPLinearClause>(*I); 544 for (auto F : C->finals()) { 545 EmitIgnoredExpr(F); 546 } 547 } 548 } 549 550 static void EmitOMPAlignedClause(CodeGenFunction &CGF, CodeGenModule &CGM, 551 const OMPAlignedClause &Clause) { 552 unsigned ClauseAlignment = 0; 553 if (auto AlignmentExpr = Clause.getAlignment()) { 554 auto AlignmentCI = 555 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); 556 ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue()); 557 } 558 for (auto E : Clause.varlists()) { 559 unsigned Alignment = ClauseAlignment; 560 if (Alignment == 0) { 561 // OpenMP [2.8.1, Description] 562 // If no optional parameter is specified, implementation-defined default 563 // alignments for SIMD instructions on the target platforms are assumed. 564 Alignment = CGM.getTargetCodeGenInfo().getOpenMPSimdDefaultAlignment( 565 E->getType()); 566 } 567 assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) && 568 "alignment is not power of 2"); 569 if (Alignment != 0) { 570 llvm::Value *PtrValue = CGF.EmitScalarExpr(E); 571 CGF.EmitAlignmentAssumption(PtrValue, Alignment); 572 } 573 } 574 } 575 576 static void EmitPrivateLoopCounters(CodeGenFunction &CGF, 577 CodeGenFunction::OMPPrivateScope &LoopScope, 578 ArrayRef<Expr *> Counters) { 579 for (auto *E : Counters) { 580 auto VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 581 (void)LoopScope.addPrivate(VD, [&]() -> llvm::Value *{ 582 // Emit var without initialization. 583 auto VarEmission = CGF.EmitAutoVarAlloca(*VD); 584 CGF.EmitAutoVarCleanups(VarEmission); 585 return VarEmission.getAllocatedAddress(); 586 }); 587 } 588 } 589 590 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S, 591 const Expr *Cond, llvm::BasicBlock *TrueBlock, 592 llvm::BasicBlock *FalseBlock, uint64_t TrueCount) { 593 CodeGenFunction::OMPPrivateScope PreCondScope(CGF); 594 EmitPrivateLoopCounters(CGF, PreCondScope, S.counters()); 595 const VarDecl *IVDecl = 596 cast<VarDecl>(cast<DeclRefExpr>(S.getIterationVariable())->getDecl()); 597 bool IsRegistered = PreCondScope.addPrivate(IVDecl, [&]() -> llvm::Value *{ 598 // Emit var without initialization. 599 auto VarEmission = CGF.EmitAutoVarAlloca(*IVDecl); 600 CGF.EmitAutoVarCleanups(VarEmission); 601 return VarEmission.getAllocatedAddress(); 602 }); 603 assert(IsRegistered && "counter already registered as private"); 604 // Silence the warning about unused variable. 605 (void)IsRegistered; 606 (void)PreCondScope.Privatize(); 607 // Initialize internal counter to 0 to calculate initial values of real 608 // counters. 609 LValue IV = CGF.EmitLValue(S.getIterationVariable()); 610 CGF.EmitStoreOfScalar( 611 llvm::ConstantInt::getNullValue( 612 IV.getAddress()->getType()->getPointerElementType()), 613 CGF.EmitLValue(S.getIterationVariable()), /*isInit=*/true); 614 // Get initial values of real counters. 615 for (auto I : S.updates()) { 616 CGF.EmitIgnoredExpr(I); 617 } 618 // Check that loop is executed at least one time. 619 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount); 620 } 621 622 static void 623 EmitPrivateLinearVars(CodeGenFunction &CGF, const OMPExecutableDirective &D, 624 CodeGenFunction::OMPPrivateScope &PrivateScope) { 625 for (auto &&I = D.getClausesOfKind(OMPC_linear); I; ++I) { 626 auto *C = cast<OMPLinearClause>(*I); 627 for (auto *E : C->varlists()) { 628 auto VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 629 bool IsRegistered = PrivateScope.addPrivate(VD, [&]()->llvm::Value * { 630 // Emit var without initialization. 631 auto VarEmission = CGF.EmitAutoVarAlloca(*VD); 632 CGF.EmitAutoVarCleanups(VarEmission); 633 return VarEmission.getAllocatedAddress(); 634 }); 635 assert(IsRegistered && "linear var already registered as private"); 636 // Silence the warning about unused variable. 637 (void)IsRegistered; 638 } 639 } 640 } 641 642 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { 643 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 644 // Pragma 'simd' code depends on presence of 'lastprivate'. 645 // If present, we have to separate last iteration of the loop: 646 // 647 // if (PreCond) { 648 // for (IV in 0..LastIteration-1) BODY; 649 // BODY with updates of lastprivate vars; 650 // <Final counter/linear vars updates>; 651 // } 652 // 653 // otherwise (when there's no lastprivate): 654 // 655 // if (PreCond) { 656 // for (IV in 0..LastIteration) BODY; 657 // <Final counter/linear vars updates>; 658 // } 659 // 660 661 // Emit: if (PreCond) - begin. 662 // If the condition constant folds and can be elided, avoid emitting the 663 // whole loop. 664 bool CondConstant; 665 llvm::BasicBlock *ContBlock = nullptr; 666 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 667 if (!CondConstant) 668 return; 669 } else { 670 auto *ThenBlock = CGF.createBasicBlock("simd.if.then"); 671 ContBlock = CGF.createBasicBlock("simd.if.end"); 672 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 673 CGF.getProfileCount(&S)); 674 CGF.EmitBlock(ThenBlock); 675 CGF.incrementProfileCounter(&S); 676 } 677 // Walk clauses and process safelen/lastprivate. 678 bool SeparateIter = false; 679 CGF.LoopStack.setParallel(); 680 CGF.LoopStack.setVectorizerEnable(true); 681 for (auto C : S.clauses()) { 682 switch (C->getClauseKind()) { 683 case OMPC_safelen: { 684 RValue Len = CGF.EmitAnyExpr(cast<OMPSafelenClause>(C)->getSafelen(), 685 AggValueSlot::ignored(), true); 686 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 687 CGF.LoopStack.setVectorizerWidth(Val->getZExtValue()); 688 // In presence of finite 'safelen', it may be unsafe to mark all 689 // the memory instructions parallel, because loop-carried 690 // dependences of 'safelen' iterations are possible. 691 CGF.LoopStack.setParallel(false); 692 break; 693 } 694 case OMPC_aligned: 695 EmitOMPAlignedClause(CGF, CGF.CGM, cast<OMPAlignedClause>(*C)); 696 break; 697 case OMPC_lastprivate: 698 SeparateIter = true; 699 break; 700 default: 701 // Not handled yet 702 ; 703 } 704 } 705 706 // Emit inits for the linear variables. 707 for (auto &&I = S.getClausesOfKind(OMPC_linear); I; ++I) { 708 auto *C = cast<OMPLinearClause>(*I); 709 for (auto Init : C->inits()) { 710 auto *D = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl()); 711 CGF.EmitVarDecl(*D); 712 } 713 } 714 715 // Emit the loop iteration variable. 716 const Expr *IVExpr = S.getIterationVariable(); 717 const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 718 CGF.EmitVarDecl(*IVDecl); 719 CGF.EmitIgnoredExpr(S.getInit()); 720 721 // Emit the iterations count variable. 722 // If it is not a variable, Sema decided to calculate iterations count on 723 // each 724 // iteration (e.g., it is foldable into a constant). 725 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 726 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 727 // Emit calculation of the iterations count. 728 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 729 } 730 731 // Emit the linear steps for the linear clauses. 732 // If a step is not constant, it is pre-calculated before the loop. 733 for (auto &&I = S.getClausesOfKind(OMPC_linear); I; ++I) { 734 auto *C = cast<OMPLinearClause>(*I); 735 if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep())) 736 if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) { 737 CGF.EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl())); 738 // Emit calculation of the linear step. 739 CGF.EmitIgnoredExpr(CS); 740 } 741 } 742 743 { 744 OMPPrivateScope LoopScope(CGF); 745 EmitPrivateLoopCounters(CGF, LoopScope, S.counters()); 746 EmitPrivateLinearVars(CGF, S, LoopScope); 747 CGF.EmitOMPPrivateClause(S, LoopScope); 748 (void)LoopScope.Privatize(); 749 CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), 750 S.getCond(SeparateIter), S.getInc(), 751 [&S](CodeGenFunction &CGF) { 752 CGF.EmitOMPLoopBody(S); 753 CGF.EmitStopPoint(&S); 754 }, 755 [](CodeGenFunction &) {}); 756 if (SeparateIter) { 757 CGF.EmitOMPLoopBody(S, /*SeparateIter=*/true); 758 } 759 } 760 CGF.EmitOMPSimdFinal(S); 761 // Emit: if (PreCond) - end. 762 if (ContBlock) { 763 CGF.EmitBranch(ContBlock); 764 CGF.EmitBlock(ContBlock, true); 765 } 766 }; 767 CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen); 768 } 769 770 void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind, 771 const OMPLoopDirective &S, 772 OMPPrivateScope &LoopScope, 773 llvm::Value *LB, llvm::Value *UB, 774 llvm::Value *ST, llvm::Value *IL, 775 llvm::Value *Chunk) { 776 auto &RT = CGM.getOpenMPRuntime(); 777 778 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). 779 const bool Dynamic = RT.isDynamic(ScheduleKind); 780 781 assert(!RT.isStaticNonchunked(ScheduleKind, /* Chunked */ Chunk != nullptr) && 782 "static non-chunked schedule does not need outer loop"); 783 784 // Emit outer loop. 785 // 786 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 787 // When schedule(dynamic,chunk_size) is specified, the iterations are 788 // distributed to threads in the team in chunks as the threads request them. 789 // Each thread executes a chunk of iterations, then requests another chunk, 790 // until no chunks remain to be distributed. Each chunk contains chunk_size 791 // iterations, except for the last chunk to be distributed, which may have 792 // fewer iterations. When no chunk_size is specified, it defaults to 1. 793 // 794 // When schedule(guided,chunk_size) is specified, the iterations are assigned 795 // to threads in the team in chunks as the executing threads request them. 796 // Each thread executes a chunk of iterations, then requests another chunk, 797 // until no chunks remain to be assigned. For a chunk_size of 1, the size of 798 // each chunk is proportional to the number of unassigned iterations divided 799 // by the number of threads in the team, decreasing to 1. For a chunk_size 800 // with value k (greater than 1), the size of each chunk is determined in the 801 // same way, with the restriction that the chunks do not contain fewer than k 802 // iterations (except for the last chunk to be assigned, which may have fewer 803 // than k iterations). 804 // 805 // When schedule(auto) is specified, the decision regarding scheduling is 806 // delegated to the compiler and/or runtime system. The programmer gives the 807 // implementation the freedom to choose any possible mapping of iterations to 808 // threads in the team. 809 // 810 // When schedule(runtime) is specified, the decision regarding scheduling is 811 // deferred until run time, and the schedule and chunk size are taken from the 812 // run-sched-var ICV. If the ICV is set to auto, the schedule is 813 // implementation defined 814 // 815 // while(__kmpc_dispatch_next(&LB, &UB)) { 816 // idx = LB; 817 // while (idx <= UB) { BODY; ++idx; 818 // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only. 819 // } // inner loop 820 // } 821 // 822 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 823 // When schedule(static, chunk_size) is specified, iterations are divided into 824 // chunks of size chunk_size, and the chunks are assigned to the threads in 825 // the team in a round-robin fashion in the order of the thread number. 826 // 827 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { 828 // while (idx <= UB) { BODY; ++idx; } // inner loop 829 // LB = LB + ST; 830 // UB = UB + ST; 831 // } 832 // 833 834 const Expr *IVExpr = S.getIterationVariable(); 835 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 836 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 837 838 RT.emitForInit( 839 *this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, IL, LB, 840 (Dynamic ? EmitAnyExpr(S.getLastIteration()).getScalarVal() : UB), ST, 841 Chunk); 842 843 auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end"); 844 845 // Start the loop with a block that tests the condition. 846 auto CondBlock = createBasicBlock("omp.dispatch.cond"); 847 EmitBlock(CondBlock); 848 LoopStack.push(CondBlock); 849 850 llvm::Value *BoolCondVal = nullptr; 851 if (!Dynamic) { 852 // UB = min(UB, GlobalUB) 853 EmitIgnoredExpr(S.getEnsureUpperBound()); 854 // IV = LB 855 EmitIgnoredExpr(S.getInit()); 856 // IV < UB 857 BoolCondVal = EvaluateExprAsBool(S.getCond(false)); 858 } else { 859 BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, 860 IL, LB, UB, ST); 861 } 862 863 // If there are any cleanups between here and the loop-exit scope, 864 // create a block to stage a loop exit along. 865 auto ExitBlock = LoopExit.getBlock(); 866 if (LoopScope.requiresCleanups()) 867 ExitBlock = createBasicBlock("omp.dispatch.cleanup"); 868 869 auto LoopBody = createBasicBlock("omp.dispatch.body"); 870 Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock); 871 if (ExitBlock != LoopExit.getBlock()) { 872 EmitBlock(ExitBlock); 873 EmitBranchThroughCleanup(LoopExit); 874 } 875 EmitBlock(LoopBody); 876 877 // Emit "IV = LB" (in case of static schedule, we have already calculated new 878 // LB for loop condition and emitted it above). 879 if (Dynamic) 880 EmitIgnoredExpr(S.getInit()); 881 882 // Create a block for the increment. 883 auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); 884 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 885 886 bool DynamicWithOrderedClause = 887 Dynamic && S.getSingleClause(OMPC_ordered) != nullptr; 888 SourceLocation Loc = S.getLocStart(); 889 EmitOMPInnerLoop( 890 S, LoopScope.requiresCleanups(), S.getCond(/*SeparateIter=*/false), 891 S.getInc(), 892 [&S](CodeGenFunction &CGF) { 893 CGF.EmitOMPLoopBody(S); 894 CGF.EmitStopPoint(&S); 895 }, 896 [DynamicWithOrderedClause, IVSize, IVSigned, Loc](CodeGenFunction &CGF) { 897 if (DynamicWithOrderedClause) { 898 CGF.CGM.getOpenMPRuntime().emitForOrderedDynamicIterationEnd( 899 CGF, Loc, IVSize, IVSigned); 900 } 901 }); 902 903 EmitBlock(Continue.getBlock()); 904 BreakContinueStack.pop_back(); 905 if (!Dynamic) { 906 // Emit "LB = LB + Stride", "UB = UB + Stride". 907 EmitIgnoredExpr(S.getNextLowerBound()); 908 EmitIgnoredExpr(S.getNextUpperBound()); 909 } 910 911 EmitBranch(CondBlock); 912 LoopStack.pop(); 913 // Emit the fall-through block. 914 EmitBlock(LoopExit.getBlock()); 915 916 // Tell the runtime we are done. 917 if (!Dynamic) 918 RT.emitForStaticFinish(*this, S.getLocEnd()); 919 } 920 921 /// \brief Emit a helper variable and return corresponding lvalue. 922 static LValue EmitOMPHelperVar(CodeGenFunction &CGF, 923 const DeclRefExpr *Helper) { 924 auto VDecl = cast<VarDecl>(Helper->getDecl()); 925 CGF.EmitVarDecl(*VDecl); 926 return CGF.EmitLValue(Helper); 927 } 928 929 bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { 930 // Emit the loop iteration variable. 931 auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 932 auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); 933 EmitVarDecl(*IVDecl); 934 935 // Emit the iterations count variable. 936 // If it is not a variable, Sema decided to calculate iterations count on each 937 // iteration (e.g., it is foldable into a constant). 938 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 939 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 940 // Emit calculation of the iterations count. 941 EmitIgnoredExpr(S.getCalcLastIteration()); 942 } 943 944 auto &RT = CGM.getOpenMPRuntime(); 945 946 bool HasLastprivateClause; 947 // Check pre-condition. 948 { 949 // Skip the entire loop if we don't meet the precondition. 950 // If the condition constant folds and can be elided, avoid emitting the 951 // whole loop. 952 bool CondConstant; 953 llvm::BasicBlock *ContBlock = nullptr; 954 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 955 if (!CondConstant) 956 return false; 957 } else { 958 auto *ThenBlock = createBasicBlock("omp.precond.then"); 959 ContBlock = createBasicBlock("omp.precond.end"); 960 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 961 getProfileCount(&S)); 962 EmitBlock(ThenBlock); 963 incrementProfileCounter(&S); 964 } 965 // Emit 'then' code. 966 { 967 // Emit helper vars inits. 968 LValue LB = 969 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable())); 970 LValue UB = 971 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable())); 972 LValue ST = 973 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 974 LValue IL = 975 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 976 977 OMPPrivateScope LoopScope(*this); 978 if (EmitOMPFirstprivateClause(S, LoopScope)) { 979 // Emit implicit barrier to synchronize threads and avoid data races on 980 // initialization of firstprivate variables. 981 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), 982 OMPD_unknown); 983 } 984 EmitOMPPrivateClause(S, LoopScope); 985 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 986 EmitOMPReductionClauseInit(S, LoopScope); 987 EmitPrivateLoopCounters(*this, LoopScope, S.counters()); 988 (void)LoopScope.Privatize(); 989 990 // Detect the loop schedule kind and chunk. 991 auto ScheduleKind = OMPC_SCHEDULE_unknown; 992 llvm::Value *Chunk = nullptr; 993 if (auto C = cast_or_null<OMPScheduleClause>( 994 S.getSingleClause(OMPC_schedule))) { 995 ScheduleKind = C->getScheduleKind(); 996 if (auto Ch = C->getChunkSize()) { 997 Chunk = EmitScalarExpr(Ch); 998 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 999 S.getIterationVariable()->getType()); 1000 } 1001 } 1002 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1003 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1004 if (RT.isStaticNonchunked(ScheduleKind, 1005 /* Chunked */ Chunk != nullptr)) { 1006 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1007 // When no chunk_size is specified, the iteration space is divided into 1008 // chunks that are approximately equal in size, and at most one chunk is 1009 // distributed to each thread. Note that the size of the chunks is 1010 // unspecified in this case. 1011 RT.emitForInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, 1012 IL.getAddress(), LB.getAddress(), UB.getAddress(), 1013 ST.getAddress()); 1014 // UB = min(UB, GlobalUB); 1015 EmitIgnoredExpr(S.getEnsureUpperBound()); 1016 // IV = LB; 1017 EmitIgnoredExpr(S.getInit()); 1018 // while (idx <= UB) { BODY; ++idx; } 1019 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), 1020 S.getCond(/*SeparateIter=*/false), S.getInc(), 1021 [&S](CodeGenFunction &CGF) { 1022 CGF.EmitOMPLoopBody(S); 1023 CGF.EmitStopPoint(&S); 1024 }, 1025 [](CodeGenFunction &) {}); 1026 // Tell the runtime we are done. 1027 RT.emitForStaticFinish(*this, S.getLocStart()); 1028 } else { 1029 // Emit the outer loop, which requests its work chunk [LB..UB] from 1030 // runtime and runs the inner loop to process it. 1031 EmitOMPForOuterLoop(ScheduleKind, S, LoopScope, LB.getAddress(), 1032 UB.getAddress(), ST.getAddress(), IL.getAddress(), 1033 Chunk); 1034 } 1035 EmitOMPReductionClauseFinal(S); 1036 // Emit final copy of the lastprivate variables if IsLastIter != 0. 1037 if (HasLastprivateClause) 1038 EmitOMPLastprivateClauseFinal( 1039 S, Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart()))); 1040 } 1041 // We're now done with the loop, so jump to the continuation block. 1042 if (ContBlock) { 1043 EmitBranch(ContBlock); 1044 EmitBlock(ContBlock, true); 1045 } 1046 } 1047 return HasLastprivateClause; 1048 } 1049 1050 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { 1051 LexicalScope Scope(*this, S.getSourceRange()); 1052 bool HasLastprivates = false; 1053 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF) { 1054 HasLastprivates = CGF.EmitOMPWorksharingLoop(S); 1055 }; 1056 CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen); 1057 1058 // Emit an implicit barrier at the end. 1059 if (!S.getSingleClause(OMPC_nowait) || HasLastprivates) { 1060 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); 1061 } 1062 } 1063 1064 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &) { 1065 llvm_unreachable("CodeGen for 'omp for simd' is not supported yet."); 1066 } 1067 1068 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, 1069 const Twine &Name, 1070 llvm::Value *Init = nullptr) { 1071 auto LVal = CGF.MakeNaturalAlignAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty); 1072 if (Init) 1073 CGF.EmitScalarInit(Init, LVal); 1074 return LVal; 1075 } 1076 1077 static OpenMPDirectiveKind emitSections(CodeGenFunction &CGF, 1078 const OMPExecutableDirective &S) { 1079 auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt(); 1080 auto *CS = dyn_cast<CompoundStmt>(Stmt); 1081 if (CS && CS->size() > 1) { 1082 bool HasLastprivates = false; 1083 auto &&CodeGen = [&S, CS, &HasLastprivates](CodeGenFunction &CGF) { 1084 auto &C = CGF.CGM.getContext(); 1085 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 1086 // Emit helper vars inits. 1087 LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.", 1088 CGF.Builder.getInt32(0)); 1089 auto *GlobalUBVal = CGF.Builder.getInt32(CS->size() - 1); 1090 LValue UB = 1091 createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal); 1092 LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.", 1093 CGF.Builder.getInt32(1)); 1094 LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.", 1095 CGF.Builder.getInt32(0)); 1096 // Loop counter. 1097 LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv."); 1098 OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 1099 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV); 1100 OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 1101 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); 1102 // Generate condition for loop. 1103 BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, 1104 OK_Ordinary, S.getLocStart(), 1105 /*fpContractable=*/false); 1106 // Increment for loop counter. 1107 UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, 1108 OK_Ordinary, S.getLocStart()); 1109 auto BodyGen = [CS, &S, &IV](CodeGenFunction &CGF) { 1110 // Iterate through all sections and emit a switch construct: 1111 // switch (IV) { 1112 // case 0: 1113 // <SectionStmt[0]>; 1114 // break; 1115 // ... 1116 // case <NumSection> - 1: 1117 // <SectionStmt[<NumSection> - 1]>; 1118 // break; 1119 // } 1120 // .omp.sections.exit: 1121 auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit"); 1122 auto *SwitchStmt = CGF.Builder.CreateSwitch( 1123 CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB, 1124 CS->size()); 1125 unsigned CaseNumber = 0; 1126 for (auto C = CS->children(); C; ++C, ++CaseNumber) { 1127 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 1128 CGF.EmitBlock(CaseBB); 1129 SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB); 1130 CGF.EmitStmt(*C); 1131 CGF.EmitBranch(ExitBB); 1132 } 1133 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 1134 }; 1135 1136 CodeGenFunction::OMPPrivateScope LoopScope(CGF); 1137 if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) { 1138 // Emit implicit barrier to synchronize threads and avoid data races on 1139 // initialization of firstprivate variables. 1140 CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(), 1141 OMPD_unknown); 1142 } 1143 CGF.EmitOMPPrivateClause(S, LoopScope); 1144 HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 1145 CGF.EmitOMPReductionClauseInit(S, LoopScope); 1146 (void)LoopScope.Privatize(); 1147 1148 // Emit static non-chunked loop. 1149 CGF.CGM.getOpenMPRuntime().emitForInit( 1150 CGF, S.getLocStart(), OMPC_SCHEDULE_static, /*IVSize=*/32, 1151 /*IVSigned=*/true, IL.getAddress(), LB.getAddress(), UB.getAddress(), 1152 ST.getAddress()); 1153 // UB = min(UB, GlobalUB); 1154 auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart()); 1155 auto *MinUBGlobalUB = CGF.Builder.CreateSelect( 1156 CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal); 1157 CGF.EmitStoreOfScalar(MinUBGlobalUB, UB); 1158 // IV = LB; 1159 CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV); 1160 // while (idx <= UB) { BODY; ++idx; } 1161 CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen, 1162 [](CodeGenFunction &) {}); 1163 // Tell the runtime we are done. 1164 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocStart()); 1165 CGF.EmitOMPReductionClauseFinal(S); 1166 1167 // Emit final copy of the lastprivate variables if IsLastIter != 0. 1168 if (HasLastprivates) 1169 CGF.EmitOMPLastprivateClauseFinal( 1170 S, CGF.Builder.CreateIsNotNull( 1171 CGF.EmitLoadOfScalar(IL, S.getLocStart()))); 1172 }; 1173 1174 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, CodeGen); 1175 // Emit barrier for lastprivates only if 'sections' directive has 'nowait' 1176 // clause. Otherwise the barrier will be generated by the codegen for the 1177 // directive. 1178 if (HasLastprivates && S.getSingleClause(OMPC_nowait)) { 1179 // Emit implicit barrier to synchronize threads and avoid data races on 1180 // initialization of firstprivate variables. 1181 CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(), 1182 OMPD_unknown); 1183 } 1184 return OMPD_sections; 1185 } 1186 // If only one section is found - no need to generate loop, emit as a single 1187 // region. 1188 bool HasFirstprivates; 1189 // No need to generate reductions for sections with single section region, we 1190 // can use original shared variables for all operations. 1191 bool HasReductions = !S.getClausesOfKind(OMPC_reduction).empty(); 1192 // No need to generate lastprivates for sections with single section region, 1193 // we can use original shared variable for all calculations with barrier at 1194 // the end of the sections. 1195 bool HasLastprivates = !S.getClausesOfKind(OMPC_lastprivate).empty(); 1196 auto &&CodeGen = [Stmt, &S, &HasFirstprivates](CodeGenFunction &CGF) { 1197 CodeGenFunction::OMPPrivateScope SingleScope(CGF); 1198 HasFirstprivates = CGF.EmitOMPFirstprivateClause(S, SingleScope); 1199 CGF.EmitOMPPrivateClause(S, SingleScope); 1200 (void)SingleScope.Privatize(); 1201 1202 CGF.EmitStmt(Stmt); 1203 CGF.EnsureInsertPoint(); 1204 }; 1205 CGF.CGM.getOpenMPRuntime().emitSingleRegion(CGF, CodeGen, S.getLocStart(), 1206 llvm::None, llvm::None, 1207 llvm::None, llvm::None); 1208 // Emit barrier for firstprivates, lastprivates or reductions only if 1209 // 'sections' directive has 'nowait' clause. Otherwise the barrier will be 1210 // generated by the codegen for the directive. 1211 if ((HasFirstprivates || HasLastprivates || HasReductions) && 1212 S.getSingleClause(OMPC_nowait)) { 1213 // Emit implicit barrier to synchronize threads and avoid data races on 1214 // initialization of firstprivate variables. 1215 CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(), 1216 OMPD_unknown); 1217 } 1218 return OMPD_single; 1219 } 1220 1221 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { 1222 LexicalScope Scope(*this, S.getSourceRange()); 1223 OpenMPDirectiveKind EmittedAs = emitSections(*this, S); 1224 // Emit an implicit barrier at the end. 1225 if (!S.getSingleClause(OMPC_nowait)) { 1226 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), EmittedAs); 1227 } 1228 } 1229 1230 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { 1231 LexicalScope Scope(*this, S.getSourceRange()); 1232 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 1233 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1234 CGF.EnsureInsertPoint(); 1235 }; 1236 CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen); 1237 } 1238 1239 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { 1240 llvm::SmallVector<const Expr *, 8> CopyprivateVars; 1241 llvm::SmallVector<const Expr *, 8> DestExprs; 1242 llvm::SmallVector<const Expr *, 8> SrcExprs; 1243 llvm::SmallVector<const Expr *, 8> AssignmentOps; 1244 // Check if there are any 'copyprivate' clauses associated with this 1245 // 'single' 1246 // construct. 1247 // Build a list of copyprivate variables along with helper expressions 1248 // (<source>, <destination>, <destination>=<source> expressions) 1249 for (auto &&I = S.getClausesOfKind(OMPC_copyprivate); I; ++I) { 1250 auto *C = cast<OMPCopyprivateClause>(*I); 1251 CopyprivateVars.append(C->varlists().begin(), C->varlists().end()); 1252 DestExprs.append(C->destination_exprs().begin(), 1253 C->destination_exprs().end()); 1254 SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end()); 1255 AssignmentOps.append(C->assignment_ops().begin(), 1256 C->assignment_ops().end()); 1257 } 1258 LexicalScope Scope(*this, S.getSourceRange()); 1259 // Emit code for 'single' region along with 'copyprivate' clauses 1260 bool HasFirstprivates; 1261 auto &&CodeGen = [&S, &HasFirstprivates](CodeGenFunction &CGF) { 1262 CodeGenFunction::OMPPrivateScope SingleScope(CGF); 1263 HasFirstprivates = CGF.EmitOMPFirstprivateClause(S, SingleScope); 1264 CGF.EmitOMPPrivateClause(S, SingleScope); 1265 (void)SingleScope.Privatize(); 1266 1267 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1268 CGF.EnsureInsertPoint(); 1269 }; 1270 CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(), 1271 CopyprivateVars, DestExprs, SrcExprs, 1272 AssignmentOps); 1273 // Emit an implicit barrier at the end (to avoid data race on firstprivate 1274 // init or if no 'nowait' clause was specified and no 'copyprivate' clause). 1275 if ((!S.getSingleClause(OMPC_nowait) || HasFirstprivates) && 1276 CopyprivateVars.empty()) { 1277 CGM.getOpenMPRuntime().emitBarrierCall( 1278 *this, S.getLocStart(), 1279 S.getSingleClause(OMPC_nowait) ? OMPD_unknown : OMPD_single); 1280 } 1281 } 1282 1283 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { 1284 LexicalScope Scope(*this, S.getSourceRange()); 1285 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 1286 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1287 CGF.EnsureInsertPoint(); 1288 }; 1289 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart()); 1290 } 1291 1292 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { 1293 LexicalScope Scope(*this, S.getSourceRange()); 1294 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 1295 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1296 CGF.EnsureInsertPoint(); 1297 }; 1298 CGM.getOpenMPRuntime().emitCriticalRegion( 1299 *this, S.getDirectiveName().getAsString(), CodeGen, S.getLocStart()); 1300 } 1301 1302 void CodeGenFunction::EmitOMPParallelForDirective( 1303 const OMPParallelForDirective &S) { 1304 // Emit directive as a combined directive that consists of two implicit 1305 // directives: 'parallel' with 'for' directive. 1306 LexicalScope Scope(*this, S.getSourceRange()); 1307 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 1308 CGF.EmitOMPWorksharingLoop(S); 1309 // Emit implicit barrier at the end of parallel region, but this barrier 1310 // is at the end of 'for' directive, so emit it as the implicit barrier for 1311 // this 'for' directive. 1312 CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(), 1313 OMPD_parallel); 1314 }; 1315 emitCommonOMPParallelDirective(*this, S, CodeGen); 1316 } 1317 1318 void CodeGenFunction::EmitOMPParallelForSimdDirective( 1319 const OMPParallelForSimdDirective &) { 1320 llvm_unreachable("CodeGen for 'omp parallel for simd' is not supported yet."); 1321 } 1322 1323 void CodeGenFunction::EmitOMPParallelSectionsDirective( 1324 const OMPParallelSectionsDirective &S) { 1325 // Emit directive as a combined directive that consists of two implicit 1326 // directives: 'parallel' with 'sections' directive. 1327 LexicalScope Scope(*this, S.getSourceRange()); 1328 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 1329 (void)emitSections(CGF, S); 1330 // Emit implicit barrier at the end of parallel region. 1331 CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(), 1332 OMPD_parallel); 1333 }; 1334 emitCommonOMPParallelDirective(*this, S, CodeGen); 1335 } 1336 1337 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { 1338 // Emit outlined function for task construct. 1339 LexicalScope Scope(*this, S.getSourceRange()); 1340 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 1341 auto CapturedStruct = GenerateCapturedStmtArgument(*CS); 1342 auto *I = CS->getCapturedDecl()->param_begin(); 1343 auto *PartId = std::next(I); 1344 // The first function argument for tasks is a thread id, the second one is a 1345 // part id (0 for tied tasks, >=0 for untied task). 1346 auto &&CodeGen = [PartId, &S](CodeGenFunction &CGF) { 1347 if (*PartId) { 1348 // TODO: emit code for untied tasks. 1349 } 1350 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1351 }; 1352 auto OutlinedFn = 1353 CGM.getOpenMPRuntime().emitTaskOutlinedFunction(S, *I, CodeGen); 1354 // Check if we should emit tied or untied task. 1355 bool Tied = !S.getSingleClause(OMPC_untied); 1356 // Check if the task is final 1357 llvm::PointerIntPair<llvm::Value *, 1, bool> Final; 1358 if (auto *Clause = S.getSingleClause(OMPC_final)) { 1359 // If the condition constant folds and can be elided, try to avoid emitting 1360 // the condition and the dead arm of the if/else. 1361 auto *Cond = cast<OMPFinalClause>(Clause)->getCondition(); 1362 bool CondConstant; 1363 if (ConstantFoldsToSimpleInteger(Cond, CondConstant)) 1364 Final.setInt(CondConstant); 1365 else 1366 Final.setPointer(EvaluateExprAsBool(Cond)); 1367 } else { 1368 // By default the task is not final. 1369 Final.setInt(/*IntVal=*/false); 1370 } 1371 auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 1372 const Expr *IfCond = nullptr; 1373 if (auto C = S.getSingleClause(OMPC_if)) { 1374 IfCond = cast<OMPIfClause>(C)->getCondition(); 1375 } 1376 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 1377 // Get list of private variables. 1378 llvm::SmallVector<const Expr *, 8> Privates; 1379 llvm::SmallVector<const Expr *, 8> PrivateCopies; 1380 for (auto &&I = S.getClausesOfKind(OMPC_private); I; ++I) { 1381 auto *C = cast<OMPPrivateClause>(*I); 1382 auto IRef = C->varlist_begin(); 1383 for (auto *IInit : C->private_copies()) { 1384 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 1385 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 1386 Privates.push_back(*IRef); 1387 PrivateCopies.push_back(IInit); 1388 } 1389 ++IRef; 1390 } 1391 } 1392 EmittedAsPrivate.clear(); 1393 // Get list of firstprivate variables. 1394 llvm::SmallVector<const Expr *, 8> FirstprivateVars; 1395 llvm::SmallVector<const Expr *, 8> FirstprivateCopies; 1396 llvm::SmallVector<const Expr *, 8> FirstprivateInits; 1397 for (auto &&I = S.getClausesOfKind(OMPC_firstprivate); I; ++I) { 1398 auto *C = cast<OMPFirstprivateClause>(*I); 1399 auto IRef = C->varlist_begin(); 1400 auto IElemInitRef = C->inits().begin(); 1401 for (auto *IInit : C->private_copies()) { 1402 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 1403 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 1404 FirstprivateVars.push_back(*IRef); 1405 FirstprivateCopies.push_back(IInit); 1406 FirstprivateInits.push_back(*IElemInitRef); 1407 } 1408 ++IRef, ++IElemInitRef; 1409 } 1410 } 1411 CGM.getOpenMPRuntime().emitTaskCall( 1412 *this, S.getLocStart(), S, Tied, Final, OutlinedFn, SharedsTy, 1413 CapturedStruct, IfCond, Privates, PrivateCopies, FirstprivateVars, 1414 FirstprivateCopies, FirstprivateInits); 1415 } 1416 1417 void CodeGenFunction::EmitOMPTaskyieldDirective( 1418 const OMPTaskyieldDirective &S) { 1419 CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart()); 1420 } 1421 1422 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { 1423 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier); 1424 } 1425 1426 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) { 1427 CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart()); 1428 } 1429 1430 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { 1431 CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> { 1432 if (auto C = S.getSingleClause(/*K*/ OMPC_flush)) { 1433 auto FlushClause = cast<OMPFlushClause>(C); 1434 return llvm::makeArrayRef(FlushClause->varlist_begin(), 1435 FlushClause->varlist_end()); 1436 } 1437 return llvm::None; 1438 }(), S.getLocStart()); 1439 } 1440 1441 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { 1442 LexicalScope Scope(*this, S.getSourceRange()); 1443 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 1444 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1445 CGF.EnsureInsertPoint(); 1446 }; 1447 CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart()); 1448 } 1449 1450 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, 1451 QualType SrcType, QualType DestType) { 1452 assert(CGF.hasScalarEvaluationKind(DestType) && 1453 "DestType must have scalar evaluation kind."); 1454 assert(!Val.isAggregate() && "Must be a scalar or complex."); 1455 return Val.isScalar() 1456 ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType) 1457 : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType, 1458 DestType); 1459 } 1460 1461 static CodeGenFunction::ComplexPairTy 1462 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, 1463 QualType DestType) { 1464 assert(CGF.getEvaluationKind(DestType) == TEK_Complex && 1465 "DestType must have complex evaluation kind."); 1466 CodeGenFunction::ComplexPairTy ComplexVal; 1467 if (Val.isScalar()) { 1468 // Convert the input element to the element type of the complex. 1469 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 1470 auto ScalarVal = 1471 CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestElementType); 1472 ComplexVal = CodeGenFunction::ComplexPairTy( 1473 ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType())); 1474 } else { 1475 assert(Val.isComplex() && "Must be a scalar or complex."); 1476 auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType(); 1477 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 1478 ComplexVal.first = CGF.EmitScalarConversion( 1479 Val.getComplexVal().first, SrcElementType, DestElementType); 1480 ComplexVal.second = CGF.EmitScalarConversion( 1481 Val.getComplexVal().second, SrcElementType, DestElementType); 1482 } 1483 return ComplexVal; 1484 } 1485 1486 static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst, 1487 LValue LVal, RValue RVal) { 1488 if (LVal.isGlobalReg()) { 1489 CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal); 1490 } else { 1491 CGF.EmitAtomicStore(RVal, LVal, IsSeqCst ? llvm::SequentiallyConsistent 1492 : llvm::Monotonic, 1493 LVal.isVolatile(), /*IsInit=*/false); 1494 } 1495 } 1496 1497 static void emitSimpleStore(CodeGenFunction &CGF, LValue LVal, RValue RVal, 1498 QualType RValTy) { 1499 switch (CGF.getEvaluationKind(LVal.getType())) { 1500 case TEK_Scalar: 1501 CGF.EmitStoreThroughLValue( 1502 RValue::get(convertToScalarValue(CGF, RVal, RValTy, LVal.getType())), 1503 LVal); 1504 break; 1505 case TEK_Complex: 1506 CGF.EmitStoreOfComplex( 1507 convertToComplexValue(CGF, RVal, RValTy, LVal.getType()), LVal, 1508 /*isInit=*/false); 1509 break; 1510 case TEK_Aggregate: 1511 llvm_unreachable("Must be a scalar or complex."); 1512 } 1513 } 1514 1515 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst, 1516 const Expr *X, const Expr *V, 1517 SourceLocation Loc) { 1518 // v = x; 1519 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue"); 1520 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue"); 1521 LValue XLValue = CGF.EmitLValue(X); 1522 LValue VLValue = CGF.EmitLValue(V); 1523 RValue Res = XLValue.isGlobalReg() 1524 ? CGF.EmitLoadOfLValue(XLValue, Loc) 1525 : CGF.EmitAtomicLoad(XLValue, Loc, 1526 IsSeqCst ? llvm::SequentiallyConsistent 1527 : llvm::Monotonic, 1528 XLValue.isVolatile()); 1529 // OpenMP, 2.12.6, atomic Construct 1530 // Any atomic construct with a seq_cst clause forces the atomically 1531 // performed operation to include an implicit flush operation without a 1532 // list. 1533 if (IsSeqCst) 1534 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 1535 emitSimpleStore(CGF,VLValue, Res, X->getType().getNonReferenceType()); 1536 } 1537 1538 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst, 1539 const Expr *X, const Expr *E, 1540 SourceLocation Loc) { 1541 // x = expr; 1542 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue"); 1543 emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E)); 1544 // OpenMP, 2.12.6, atomic Construct 1545 // Any atomic construct with a seq_cst clause forces the atomically 1546 // performed operation to include an implicit flush operation without a 1547 // list. 1548 if (IsSeqCst) 1549 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 1550 } 1551 1552 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, 1553 RValue Update, 1554 BinaryOperatorKind BO, 1555 llvm::AtomicOrdering AO, 1556 bool IsXLHSInRHSPart) { 1557 auto &Context = CGF.CGM.getContext(); 1558 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x' 1559 // expression is simple and atomic is allowed for the given type for the 1560 // target platform. 1561 if (BO == BO_Comma || !Update.isScalar() || 1562 !Update.getScalarVal()->getType()->isIntegerTy() || !X.isSimple() || 1563 (!isa<llvm::ConstantInt>(Update.getScalarVal()) && 1564 (Update.getScalarVal()->getType() != 1565 X.getAddress()->getType()->getPointerElementType())) || 1566 !Context.getTargetInfo().hasBuiltinAtomic( 1567 Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment()))) 1568 return std::make_pair(false, RValue::get(nullptr)); 1569 1570 llvm::AtomicRMWInst::BinOp RMWOp; 1571 switch (BO) { 1572 case BO_Add: 1573 RMWOp = llvm::AtomicRMWInst::Add; 1574 break; 1575 case BO_Sub: 1576 if (!IsXLHSInRHSPart) 1577 return std::make_pair(false, RValue::get(nullptr)); 1578 RMWOp = llvm::AtomicRMWInst::Sub; 1579 break; 1580 case BO_And: 1581 RMWOp = llvm::AtomicRMWInst::And; 1582 break; 1583 case BO_Or: 1584 RMWOp = llvm::AtomicRMWInst::Or; 1585 break; 1586 case BO_Xor: 1587 RMWOp = llvm::AtomicRMWInst::Xor; 1588 break; 1589 case BO_LT: 1590 RMWOp = X.getType()->hasSignedIntegerRepresentation() 1591 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min 1592 : llvm::AtomicRMWInst::Max) 1593 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin 1594 : llvm::AtomicRMWInst::UMax); 1595 break; 1596 case BO_GT: 1597 RMWOp = X.getType()->hasSignedIntegerRepresentation() 1598 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max 1599 : llvm::AtomicRMWInst::Min) 1600 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax 1601 : llvm::AtomicRMWInst::UMin); 1602 break; 1603 case BO_Assign: 1604 RMWOp = llvm::AtomicRMWInst::Xchg; 1605 break; 1606 case BO_Mul: 1607 case BO_Div: 1608 case BO_Rem: 1609 case BO_Shl: 1610 case BO_Shr: 1611 case BO_LAnd: 1612 case BO_LOr: 1613 return std::make_pair(false, RValue::get(nullptr)); 1614 case BO_PtrMemD: 1615 case BO_PtrMemI: 1616 case BO_LE: 1617 case BO_GE: 1618 case BO_EQ: 1619 case BO_NE: 1620 case BO_AddAssign: 1621 case BO_SubAssign: 1622 case BO_AndAssign: 1623 case BO_OrAssign: 1624 case BO_XorAssign: 1625 case BO_MulAssign: 1626 case BO_DivAssign: 1627 case BO_RemAssign: 1628 case BO_ShlAssign: 1629 case BO_ShrAssign: 1630 case BO_Comma: 1631 llvm_unreachable("Unsupported atomic update operation"); 1632 } 1633 auto *UpdateVal = Update.getScalarVal(); 1634 if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) { 1635 UpdateVal = CGF.Builder.CreateIntCast( 1636 IC, X.getAddress()->getType()->getPointerElementType(), 1637 X.getType()->hasSignedIntegerRepresentation()); 1638 } 1639 auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getAddress(), UpdateVal, AO); 1640 return std::make_pair(true, RValue::get(Res)); 1641 } 1642 1643 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr( 1644 LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart, 1645 llvm::AtomicOrdering AO, SourceLocation Loc, 1646 const llvm::function_ref<RValue(RValue)> &CommonGen) { 1647 // Update expressions are allowed to have the following forms: 1648 // x binop= expr; -> xrval + expr; 1649 // x++, ++x -> xrval + 1; 1650 // x--, --x -> xrval - 1; 1651 // x = x binop expr; -> xrval binop expr 1652 // x = expr Op x; - > expr binop xrval; 1653 auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart); 1654 if (!Res.first) { 1655 if (X.isGlobalReg()) { 1656 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop 1657 // 'xrval'. 1658 EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X); 1659 } else { 1660 // Perform compare-and-swap procedure. 1661 EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified()); 1662 } 1663 } 1664 return Res; 1665 } 1666 1667 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst, 1668 const Expr *X, const Expr *E, 1669 const Expr *UE, bool IsXLHSInRHSPart, 1670 SourceLocation Loc) { 1671 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 1672 "Update expr in 'atomic update' must be a binary operator."); 1673 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 1674 // Update expressions are allowed to have the following forms: 1675 // x binop= expr; -> xrval + expr; 1676 // x++, ++x -> xrval + 1; 1677 // x--, --x -> xrval - 1; 1678 // x = x binop expr; -> xrval binop expr 1679 // x = expr Op x; - > expr binop xrval; 1680 assert(X->isLValue() && "X of 'omp atomic update' is not lvalue"); 1681 LValue XLValue = CGF.EmitLValue(X); 1682 RValue ExprRValue = CGF.EmitAnyExpr(E); 1683 auto AO = IsSeqCst ? llvm::SequentiallyConsistent : llvm::Monotonic; 1684 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 1685 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 1686 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 1687 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 1688 auto Gen = 1689 [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue { 1690 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 1691 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 1692 return CGF.EmitAnyExpr(UE); 1693 }; 1694 (void)CGF.EmitOMPAtomicSimpleUpdateExpr( 1695 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 1696 // OpenMP, 2.12.6, atomic Construct 1697 // Any atomic construct with a seq_cst clause forces the atomically 1698 // performed operation to include an implicit flush operation without a 1699 // list. 1700 if (IsSeqCst) 1701 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 1702 } 1703 1704 static RValue convertToType(CodeGenFunction &CGF, RValue Value, 1705 QualType SourceType, QualType ResType) { 1706 switch (CGF.getEvaluationKind(ResType)) { 1707 case TEK_Scalar: 1708 return RValue::get(convertToScalarValue(CGF, Value, SourceType, ResType)); 1709 case TEK_Complex: { 1710 auto Res = convertToComplexValue(CGF, Value, SourceType, ResType); 1711 return RValue::getComplex(Res.first, Res.second); 1712 } 1713 case TEK_Aggregate: 1714 break; 1715 } 1716 llvm_unreachable("Must be a scalar or complex."); 1717 } 1718 1719 static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst, 1720 bool IsPostfixUpdate, const Expr *V, 1721 const Expr *X, const Expr *E, 1722 const Expr *UE, bool IsXLHSInRHSPart, 1723 SourceLocation Loc) { 1724 assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue"); 1725 assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue"); 1726 RValue NewVVal; 1727 LValue VLValue = CGF.EmitLValue(V); 1728 LValue XLValue = CGF.EmitLValue(X); 1729 RValue ExprRValue = CGF.EmitAnyExpr(E); 1730 auto AO = IsSeqCst ? llvm::SequentiallyConsistent : llvm::Monotonic; 1731 QualType NewVValType; 1732 if (UE) { 1733 // 'x' is updated with some additional value. 1734 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 1735 "Update expr in 'atomic capture' must be a binary operator."); 1736 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 1737 // Update expressions are allowed to have the following forms: 1738 // x binop= expr; -> xrval + expr; 1739 // x++, ++x -> xrval + 1; 1740 // x--, --x -> xrval - 1; 1741 // x = x binop expr; -> xrval binop expr 1742 // x = expr Op x; - > expr binop xrval; 1743 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 1744 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 1745 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 1746 NewVValType = XRValExpr->getType(); 1747 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 1748 auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr, 1749 IsSeqCst, IsPostfixUpdate](RValue XRValue) -> RValue { 1750 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 1751 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 1752 RValue Res = CGF.EmitAnyExpr(UE); 1753 NewVVal = IsPostfixUpdate ? XRValue : Res; 1754 return Res; 1755 }; 1756 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 1757 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 1758 if (Res.first) { 1759 // 'atomicrmw' instruction was generated. 1760 if (IsPostfixUpdate) { 1761 // Use old value from 'atomicrmw'. 1762 NewVVal = Res.second; 1763 } else { 1764 // 'atomicrmw' does not provide new value, so evaluate it using old 1765 // value of 'x'. 1766 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 1767 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second); 1768 NewVVal = CGF.EmitAnyExpr(UE); 1769 } 1770 } 1771 } else { 1772 // 'x' is simply rewritten with some 'expr'. 1773 NewVValType = X->getType().getNonReferenceType(); 1774 ExprRValue = convertToType(CGF, ExprRValue, E->getType(), 1775 X->getType().getNonReferenceType()); 1776 auto &&Gen = [&CGF, &NewVVal, ExprRValue](RValue XRValue) -> RValue { 1777 NewVVal = XRValue; 1778 return ExprRValue; 1779 }; 1780 // Try to perform atomicrmw xchg, otherwise simple exchange. 1781 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 1782 XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO, 1783 Loc, Gen); 1784 if (Res.first) { 1785 // 'atomicrmw' instruction was generated. 1786 NewVVal = IsPostfixUpdate ? Res.second : ExprRValue; 1787 } 1788 } 1789 // Emit post-update store to 'v' of old/new 'x' value. 1790 emitSimpleStore(CGF, VLValue, NewVVal, NewVValType); 1791 // OpenMP, 2.12.6, atomic Construct 1792 // Any atomic construct with a seq_cst clause forces the atomically 1793 // performed operation to include an implicit flush operation without a 1794 // list. 1795 if (IsSeqCst) 1796 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 1797 } 1798 1799 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, 1800 bool IsSeqCst, bool IsPostfixUpdate, 1801 const Expr *X, const Expr *V, const Expr *E, 1802 const Expr *UE, bool IsXLHSInRHSPart, 1803 SourceLocation Loc) { 1804 switch (Kind) { 1805 case OMPC_read: 1806 EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc); 1807 break; 1808 case OMPC_write: 1809 EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc); 1810 break; 1811 case OMPC_unknown: 1812 case OMPC_update: 1813 EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc); 1814 break; 1815 case OMPC_capture: 1816 EmitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE, 1817 IsXLHSInRHSPart, Loc); 1818 break; 1819 case OMPC_if: 1820 case OMPC_final: 1821 case OMPC_num_threads: 1822 case OMPC_private: 1823 case OMPC_firstprivate: 1824 case OMPC_lastprivate: 1825 case OMPC_reduction: 1826 case OMPC_safelen: 1827 case OMPC_collapse: 1828 case OMPC_default: 1829 case OMPC_seq_cst: 1830 case OMPC_shared: 1831 case OMPC_linear: 1832 case OMPC_aligned: 1833 case OMPC_copyin: 1834 case OMPC_copyprivate: 1835 case OMPC_flush: 1836 case OMPC_proc_bind: 1837 case OMPC_schedule: 1838 case OMPC_ordered: 1839 case OMPC_nowait: 1840 case OMPC_untied: 1841 case OMPC_threadprivate: 1842 case OMPC_mergeable: 1843 llvm_unreachable("Clause is not allowed in 'omp atomic'."); 1844 } 1845 } 1846 1847 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { 1848 bool IsSeqCst = S.getSingleClause(/*K=*/OMPC_seq_cst); 1849 OpenMPClauseKind Kind = OMPC_unknown; 1850 for (auto *C : S.clauses()) { 1851 // Find first clause (skip seq_cst clause, if it is first). 1852 if (C->getClauseKind() != OMPC_seq_cst) { 1853 Kind = C->getClauseKind(); 1854 break; 1855 } 1856 } 1857 1858 const auto *CS = 1859 S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 1860 if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) { 1861 enterFullExpression(EWC); 1862 } 1863 // Processing for statements under 'atomic capture'. 1864 if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) { 1865 for (const auto *C : Compound->body()) { 1866 if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) { 1867 enterFullExpression(EWC); 1868 } 1869 } 1870 } 1871 1872 LexicalScope Scope(*this, S.getSourceRange()); 1873 auto &&CodeGen = [&S, Kind, IsSeqCst](CodeGenFunction &CGF) { 1874 EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(), 1875 S.getV(), S.getExpr(), S.getUpdateExpr(), 1876 S.isXLHSInRHSPart(), S.getLocStart()); 1877 }; 1878 CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen); 1879 } 1880 1881 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &) { 1882 llvm_unreachable("CodeGen for 'omp target' is not supported yet."); 1883 } 1884 1885 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &) { 1886 llvm_unreachable("CodeGen for 'omp teams' is not supported yet."); 1887 } 1888