1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This contains code to emit OpenMP nodes as LLVM code. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGOpenMPRuntime.h" 15 #include "CodeGenFunction.h" 16 #include "CodeGenModule.h" 17 #include "TargetInfo.h" 18 #include "clang/AST/Stmt.h" 19 #include "clang/AST/StmtOpenMP.h" 20 using namespace clang; 21 using namespace CodeGen; 22 23 //===----------------------------------------------------------------------===// 24 // OpenMP Directive Emission 25 //===----------------------------------------------------------------------===// 26 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen 27 /// function. Here is the logic: 28 /// if (Cond) { 29 /// CodeGen(true); 30 /// } else { 31 /// CodeGen(false); 32 /// } 33 static void EmitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, 34 const std::function<void(bool)> &CodeGen) { 35 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 36 37 // If the condition constant folds and can be elided, try to avoid emitting 38 // the condition and the dead arm of the if/else. 39 bool CondConstant; 40 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 41 CodeGen(CondConstant); 42 return; 43 } 44 45 // Otherwise, the condition did not fold, or we couldn't elide it. Just 46 // emit the conditional branch. 47 auto ThenBlock = CGF.createBasicBlock(/*name*/ "omp_if.then"); 48 auto ElseBlock = CGF.createBasicBlock(/*name*/ "omp_if.else"); 49 auto ContBlock = CGF.createBasicBlock(/*name*/ "omp_if.end"); 50 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount*/ 0); 51 52 // Emit the 'then' code. 53 CGF.EmitBlock(ThenBlock); 54 CodeGen(/*ThenBlock*/ true); 55 CGF.EmitBranch(ContBlock); 56 // Emit the 'else' code if present. 57 { 58 // There is no need to emit line number for unconditional branch. 59 auto NL = ApplyDebugLocation::CreateEmpty(CGF); 60 CGF.EmitBlock(ElseBlock); 61 } 62 CodeGen(/*ThenBlock*/ false); 63 { 64 // There is no need to emit line number for unconditional branch. 65 auto NL = ApplyDebugLocation::CreateEmpty(CGF); 66 CGF.EmitBranch(ContBlock); 67 } 68 // Emit the continuation block for code after the if. 69 CGF.EmitBlock(ContBlock, /*IsFinished*/ true); 70 } 71 72 void CodeGenFunction::EmitOMPAggregateAssign(LValue OriginalAddr, 73 llvm::Value *PrivateAddr, 74 const Expr *AssignExpr, 75 QualType OriginalType, 76 const VarDecl *VDInit) { 77 EmitBlock(createBasicBlock(".omp.assign.begin.")); 78 if (!isa<CXXConstructExpr>(AssignExpr) || isTrivialInitializer(AssignExpr)) { 79 // Perform simple memcpy. 80 EmitAggregateAssign(PrivateAddr, OriginalAddr.getAddress(), 81 AssignExpr->getType()); 82 } else { 83 // Perform element-by-element initialization. 84 QualType ElementTy; 85 auto SrcBegin = OriginalAddr.getAddress(); 86 auto DestBegin = PrivateAddr; 87 auto ArrayTy = OriginalType->getAsArrayTypeUnsafe(); 88 auto SrcNumElements = emitArrayLength(ArrayTy, ElementTy, SrcBegin); 89 auto DestNumElements = emitArrayLength(ArrayTy, ElementTy, DestBegin); 90 auto SrcEnd = Builder.CreateGEP(SrcBegin, SrcNumElements); 91 auto DestEnd = Builder.CreateGEP(DestBegin, DestNumElements); 92 // The basic structure here is a do-while loop, because we don't 93 // need to check for the zero-element case. 94 auto BodyBB = createBasicBlock("omp.arraycpy.body"); 95 auto DoneBB = createBasicBlock("omp.arraycpy.done"); 96 auto IsEmpty = 97 Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty"); 98 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 99 100 // Enter the loop body, making that address the current address. 101 auto EntryBB = Builder.GetInsertBlock(); 102 EmitBlock(BodyBB); 103 auto SrcElementPast = Builder.CreatePHI(SrcBegin->getType(), 2, 104 "omp.arraycpy.srcElementPast"); 105 SrcElementPast->addIncoming(SrcEnd, EntryBB); 106 auto DestElementPast = Builder.CreatePHI(DestBegin->getType(), 2, 107 "omp.arraycpy.destElementPast"); 108 DestElementPast->addIncoming(DestEnd, EntryBB); 109 110 // Shift the address back by one element. 111 auto NegativeOne = llvm::ConstantInt::get(SizeTy, -1, true); 112 auto DestElement = Builder.CreateGEP(DestElementPast, NegativeOne, 113 "omp.arraycpy.dest.element"); 114 auto SrcElement = Builder.CreateGEP(SrcElementPast, NegativeOne, 115 "omp.arraycpy.src.element"); 116 { 117 // Create RunCleanScope to cleanup possible temps. 118 CodeGenFunction::RunCleanupsScope Init(*this); 119 // Emit initialization for single element. 120 LocalDeclMap[VDInit] = SrcElement; 121 EmitAnyExprToMem(AssignExpr, DestElement, 122 AssignExpr->getType().getQualifiers(), 123 /*IsInitializer*/ false); 124 LocalDeclMap.erase(VDInit); 125 } 126 127 // Check whether we've reached the end. 128 auto Done = 129 Builder.CreateICmpEQ(DestElement, DestBegin, "omp.arraycpy.done"); 130 Builder.CreateCondBr(Done, DoneBB, BodyBB); 131 DestElementPast->addIncoming(DestElement, Builder.GetInsertBlock()); 132 SrcElementPast->addIncoming(SrcElement, Builder.GetInsertBlock()); 133 134 // Done. 135 EmitBlock(DoneBB, true); 136 } 137 EmitBlock(createBasicBlock(".omp.assign.end.")); 138 } 139 140 void CodeGenFunction::EmitOMPFirstprivateClause( 141 const OMPExecutableDirective &D, 142 CodeGenFunction::OMPPrivateScope &PrivateScope) { 143 auto PrivateFilter = [](const OMPClause *C) -> bool { 144 return C->getClauseKind() == OMPC_firstprivate; 145 }; 146 for (OMPExecutableDirective::filtered_clause_iterator<decltype(PrivateFilter)> 147 I(D.clauses(), PrivateFilter); I; ++I) { 148 auto *C = cast<OMPFirstprivateClause>(*I); 149 auto IRef = C->varlist_begin(); 150 auto InitsRef = C->inits().begin(); 151 for (auto IInit : C->private_copies()) { 152 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 153 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 154 bool IsRegistered; 155 if (*InitsRef != nullptr) { 156 // Emit VarDecl with copy init for arrays. 157 auto *FD = CapturedStmtInfo->lookup(OrigVD); 158 LValue Base = MakeNaturalAlignAddrLValue( 159 CapturedStmtInfo->getContextValue(), 160 getContext().getTagDeclType(FD->getParent())); 161 auto OriginalAddr = EmitLValueForField(Base, FD); 162 auto VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl()); 163 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value * { 164 auto Emission = EmitAutoVarAlloca(*VD); 165 // Emit initialization of aggregate firstprivate vars. 166 EmitOMPAggregateAssign(OriginalAddr, Emission.getAllocatedAddress(), 167 VD->getInit(), (*IRef)->getType(), VDInit); 168 EmitAutoVarCleanups(Emission); 169 return Emission.getAllocatedAddress(); 170 }); 171 } else 172 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value * { 173 // Emit private VarDecl with copy init. 174 EmitDecl(*VD); 175 return GetAddrOfLocalVar(VD); 176 }); 177 assert(IsRegistered && "firstprivate var already registered as private"); 178 // Silence the warning about unused variable. 179 (void)IsRegistered; 180 ++IRef, ++InitsRef; 181 } 182 } 183 } 184 185 void CodeGenFunction::EmitOMPPrivateClause( 186 const OMPExecutableDirective &D, 187 CodeGenFunction::OMPPrivateScope &PrivateScope) { 188 auto PrivateFilter = [](const OMPClause *C) -> bool { 189 return C->getClauseKind() == OMPC_private; 190 }; 191 for (OMPExecutableDirective::filtered_clause_iterator<decltype(PrivateFilter)> 192 I(D.clauses(), PrivateFilter); I; ++I) { 193 auto *C = cast<OMPPrivateClause>(*I); 194 auto IRef = C->varlist_begin(); 195 for (auto IInit : C->private_copies()) { 196 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 197 auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 198 bool IsRegistered = 199 PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value * { 200 // Emit private VarDecl with copy init. 201 EmitDecl(*VD); 202 return GetAddrOfLocalVar(VD); 203 }); 204 assert(IsRegistered && "private var already registered as private"); 205 // Silence the warning about unused variable. 206 (void)IsRegistered; 207 ++IRef; 208 } 209 } 210 } 211 212 void CodeGenFunction::EmitOMPReductionClauseInit( 213 const OMPExecutableDirective &D, 214 CodeGenFunction::OMPPrivateScope &PrivateScope) { 215 auto ReductionFilter = [](const OMPClause *C) -> bool { 216 return C->getClauseKind() == OMPC_reduction; 217 }; 218 for (OMPExecutableDirective::filtered_clause_iterator<decltype( 219 ReductionFilter)> I(D.clauses(), ReductionFilter); 220 I; ++I) { 221 auto *C = cast<OMPReductionClause>(*I); 222 auto ILHS = C->lhs_exprs().begin(); 223 auto IRHS = C->rhs_exprs().begin(); 224 for (auto IRef : C->varlists()) { 225 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); 226 auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 227 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 228 // Store the address of the original variable associated with the LHS 229 // implicit variable. 230 PrivateScope.addPrivate(LHSVD, [this, OrigVD, IRef]() -> llvm::Value *{ 231 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 232 CapturedStmtInfo->lookup(OrigVD) != nullptr, 233 IRef->getType(), VK_LValue, IRef->getExprLoc()); 234 return EmitLValue(&DRE).getAddress(); 235 }); 236 // Emit reduction copy. 237 bool IsRegistered = 238 PrivateScope.addPrivate(OrigVD, [this, PrivateVD]() -> llvm::Value *{ 239 // Emit private VarDecl with reduction init. 240 EmitDecl(*PrivateVD); 241 return GetAddrOfLocalVar(PrivateVD); 242 }); 243 assert(IsRegistered && "private var already registered as private"); 244 // Silence the warning about unused variable. 245 (void)IsRegistered; 246 ++ILHS, ++IRHS; 247 } 248 } 249 } 250 251 void CodeGenFunction::EmitOMPReductionClauseFinal( 252 const OMPExecutableDirective &D) { 253 llvm::SmallVector<const Expr *, 8> LHSExprs; 254 llvm::SmallVector<const Expr *, 8> RHSExprs; 255 llvm::SmallVector<const Expr *, 8> ReductionOps; 256 auto ReductionFilter = [](const OMPClause *C) -> bool { 257 return C->getClauseKind() == OMPC_reduction; 258 }; 259 bool HasAtLeastOneReduction = false; 260 for (OMPExecutableDirective::filtered_clause_iterator<decltype( 261 ReductionFilter)> I(D.clauses(), ReductionFilter); 262 I; ++I) { 263 HasAtLeastOneReduction = true; 264 auto *C = cast<OMPReductionClause>(*I); 265 LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 266 RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 267 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 268 } 269 if (HasAtLeastOneReduction) { 270 // Emit nowait reduction if nowait clause is present or directive is a 271 // parallel directive (it always has implicit barrier). 272 CGM.getOpenMPRuntime().emitReduction( 273 *this, D.getLocEnd(), LHSExprs, RHSExprs, ReductionOps, 274 D.getSingleClause(OMPC_nowait) || 275 isOpenMPParallelDirective(D.getDirectiveKind())); 276 } 277 } 278 279 /// \brief Emits code for OpenMP parallel directive in the parallel region. 280 static void emitOMPParallelCall(CodeGenFunction &CGF, 281 const OMPExecutableDirective &S, 282 llvm::Value *OutlinedFn, 283 llvm::Value *CapturedStruct) { 284 if (auto C = S.getSingleClause(/*K*/ OMPC_num_threads)) { 285 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 286 auto NumThreadsClause = cast<OMPNumThreadsClause>(C); 287 auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), 288 /*IgnoreResultAssign*/ true); 289 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause( 290 CGF, NumThreads, NumThreadsClause->getLocStart()); 291 } 292 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn, 293 CapturedStruct); 294 } 295 296 static void emitCommonOMPParallelDirective(CodeGenFunction &CGF, 297 const OMPExecutableDirective &S, 298 const RegionCodeGenTy &CodeGen) { 299 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 300 auto CapturedStruct = CGF.GenerateCapturedStmtArgument(*CS); 301 auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( 302 S, *CS->getCapturedDecl()->param_begin(), CodeGen); 303 if (auto C = S.getSingleClause(/*K*/ OMPC_if)) { 304 auto Cond = cast<OMPIfClause>(C)->getCondition(); 305 EmitOMPIfClause(CGF, Cond, [&](bool ThenBlock) { 306 if (ThenBlock) 307 emitOMPParallelCall(CGF, S, OutlinedFn, CapturedStruct); 308 else 309 CGF.CGM.getOpenMPRuntime().emitSerialCall(CGF, S.getLocStart(), 310 OutlinedFn, CapturedStruct); 311 }); 312 } else 313 emitOMPParallelCall(CGF, S, OutlinedFn, CapturedStruct); 314 } 315 316 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { 317 LexicalScope Scope(*this, S.getSourceRange()); 318 // Emit parallel region as a standalone region. 319 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 320 OMPPrivateScope PrivateScope(CGF); 321 CGF.EmitOMPPrivateClause(S, PrivateScope); 322 CGF.EmitOMPFirstprivateClause(S, PrivateScope); 323 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 324 if (PrivateScope.Privatize()) 325 // Emit implicit barrier to synchronize threads and avoid data races. 326 CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(), 327 OMPD_unknown); 328 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 329 CGF.EmitOMPReductionClauseFinal(S); 330 // Emit implicit barrier at the end of the 'parallel' directive. 331 CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(), 332 OMPD_unknown); 333 }; 334 emitCommonOMPParallelDirective(*this, S, CodeGen); 335 } 336 337 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &S, 338 bool SeparateIter) { 339 RunCleanupsScope BodyScope(*this); 340 // Update counters values on current iteration. 341 for (auto I : S.updates()) { 342 EmitIgnoredExpr(I); 343 } 344 // Update the linear variables. 345 for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) { 346 for (auto U : C->updates()) { 347 EmitIgnoredExpr(U); 348 } 349 } 350 351 // On a continue in the body, jump to the end. 352 auto Continue = getJumpDestInCurrentScope("omp.body.continue"); 353 BreakContinueStack.push_back(BreakContinue(JumpDest(), Continue)); 354 // Emit loop body. 355 EmitStmt(S.getBody()); 356 // The end (updates/cleanups). 357 EmitBlock(Continue.getBlock()); 358 BreakContinueStack.pop_back(); 359 if (SeparateIter) { 360 // TODO: Update lastprivates if the SeparateIter flag is true. 361 // This will be implemented in a follow-up OMPLastprivateClause patch, but 362 // result should be still correct without it, as we do not make these 363 // variables private yet. 364 } 365 } 366 367 void CodeGenFunction::EmitOMPInnerLoop( 368 const Stmt &S, bool RequiresCleanup, const Expr *LoopCond, 369 const Expr *IncExpr, 370 const llvm::function_ref<void(CodeGenFunction &)> &BodyGen) { 371 auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end"); 372 auto Cnt = getPGORegionCounter(&S); 373 374 // Start the loop with a block that tests the condition. 375 auto CondBlock = createBasicBlock("omp.inner.for.cond"); 376 EmitBlock(CondBlock); 377 LoopStack.push(CondBlock); 378 379 // If there are any cleanups between here and the loop-exit scope, 380 // create a block to stage a loop exit along. 381 auto ExitBlock = LoopExit.getBlock(); 382 if (RequiresCleanup) 383 ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup"); 384 385 auto LoopBody = createBasicBlock("omp.inner.for.body"); 386 387 // Emit condition. 388 EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, Cnt.getCount()); 389 if (ExitBlock != LoopExit.getBlock()) { 390 EmitBlock(ExitBlock); 391 EmitBranchThroughCleanup(LoopExit); 392 } 393 394 EmitBlock(LoopBody); 395 Cnt.beginRegion(Builder); 396 397 // Create a block for the increment. 398 auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc"); 399 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 400 401 BodyGen(*this); 402 403 // Emit "IV = IV + 1" and a back-edge to the condition block. 404 EmitBlock(Continue.getBlock()); 405 EmitIgnoredExpr(IncExpr); 406 BreakContinueStack.pop_back(); 407 EmitBranch(CondBlock); 408 LoopStack.pop(); 409 // Emit the fall-through block. 410 EmitBlock(LoopExit.getBlock()); 411 } 412 413 void CodeGenFunction::EmitOMPSimdFinal(const OMPLoopDirective &S) { 414 auto IC = S.counters().begin(); 415 for (auto F : S.finals()) { 416 if (LocalDeclMap.lookup(cast<DeclRefExpr>((*IC))->getDecl())) { 417 EmitIgnoredExpr(F); 418 } 419 ++IC; 420 } 421 // Emit the final values of the linear variables. 422 for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) { 423 for (auto F : C->finals()) { 424 EmitIgnoredExpr(F); 425 } 426 } 427 } 428 429 static void EmitOMPAlignedClause(CodeGenFunction &CGF, CodeGenModule &CGM, 430 const OMPAlignedClause &Clause) { 431 unsigned ClauseAlignment = 0; 432 if (auto AlignmentExpr = Clause.getAlignment()) { 433 auto AlignmentCI = 434 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); 435 ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue()); 436 } 437 for (auto E : Clause.varlists()) { 438 unsigned Alignment = ClauseAlignment; 439 if (Alignment == 0) { 440 // OpenMP [2.8.1, Description] 441 // If no optional parameter is specified, implementation-defined default 442 // alignments for SIMD instructions on the target platforms are assumed. 443 Alignment = CGM.getTargetCodeGenInfo().getOpenMPSimdDefaultAlignment( 444 E->getType()); 445 } 446 assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) && 447 "alignment is not power of 2"); 448 if (Alignment != 0) { 449 llvm::Value *PtrValue = CGF.EmitScalarExpr(E); 450 CGF.EmitAlignmentAssumption(PtrValue, Alignment); 451 } 452 } 453 } 454 455 static void EmitPrivateLoopCounters(CodeGenFunction &CGF, 456 CodeGenFunction::OMPPrivateScope &LoopScope, 457 ArrayRef<Expr *> Counters) { 458 for (auto *E : Counters) { 459 auto VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 460 bool IsRegistered = LoopScope.addPrivate(VD, [&]() -> llvm::Value * { 461 // Emit var without initialization. 462 auto VarEmission = CGF.EmitAutoVarAlloca(*VD); 463 CGF.EmitAutoVarCleanups(VarEmission); 464 return VarEmission.getAllocatedAddress(); 465 }); 466 assert(IsRegistered && "counter already registered as private"); 467 // Silence the warning about unused variable. 468 (void)IsRegistered; 469 } 470 } 471 472 static void 473 EmitPrivateLinearVars(CodeGenFunction &CGF, const OMPExecutableDirective &D, 474 CodeGenFunction::OMPPrivateScope &PrivateScope) { 475 for (auto Clause : OMPExecutableDirective::linear_filter(D.clauses())) { 476 for (auto *E : Clause->varlists()) { 477 auto VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 478 bool IsRegistered = PrivateScope.addPrivate(VD, [&]()->llvm::Value * { 479 // Emit var without initialization. 480 auto VarEmission = CGF.EmitAutoVarAlloca(*VD); 481 CGF.EmitAutoVarCleanups(VarEmission); 482 return VarEmission.getAllocatedAddress(); 483 }); 484 assert(IsRegistered && "linear var already registered as private"); 485 // Silence the warning about unused variable. 486 (void)IsRegistered; 487 } 488 } 489 } 490 491 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { 492 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 493 // Pragma 'simd' code depends on presence of 'lastprivate'. 494 // If present, we have to separate last iteration of the loop: 495 // 496 // if (LastIteration != 0) { 497 // for (IV in 0..LastIteration-1) BODY; 498 // BODY with updates of lastprivate vars; 499 // <Final counter/linear vars updates>; 500 // } 501 // 502 // otherwise (when there's no lastprivate): 503 // 504 // for (IV in 0..LastIteration) BODY; 505 // <Final counter/linear vars updates>; 506 // 507 508 // Walk clauses and process safelen/lastprivate. 509 bool SeparateIter = false; 510 CGF.LoopStack.setParallel(); 511 CGF.LoopStack.setVectorizerEnable(true); 512 for (auto C : S.clauses()) { 513 switch (C->getClauseKind()) { 514 case OMPC_safelen: { 515 RValue Len = CGF.EmitAnyExpr(cast<OMPSafelenClause>(C)->getSafelen(), 516 AggValueSlot::ignored(), true); 517 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 518 CGF.LoopStack.setVectorizerWidth(Val->getZExtValue()); 519 // In presence of finite 'safelen', it may be unsafe to mark all 520 // the memory instructions parallel, because loop-carried 521 // dependences of 'safelen' iterations are possible. 522 CGF.LoopStack.setParallel(false); 523 break; 524 } 525 case OMPC_aligned: 526 EmitOMPAlignedClause(CGF, CGF.CGM, cast<OMPAlignedClause>(*C)); 527 break; 528 case OMPC_lastprivate: 529 SeparateIter = true; 530 break; 531 default: 532 // Not handled yet 533 ; 534 } 535 } 536 537 // Emit inits for the linear variables. 538 for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) { 539 for (auto Init : C->inits()) { 540 auto *D = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl()); 541 CGF.EmitVarDecl(*D); 542 } 543 } 544 545 // Emit the loop iteration variable. 546 const Expr *IVExpr = S.getIterationVariable(); 547 const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 548 CGF.EmitVarDecl(*IVDecl); 549 CGF.EmitIgnoredExpr(S.getInit()); 550 551 // Emit the iterations count variable. 552 // If it is not a variable, Sema decided to calculate iterations count on 553 // each 554 // iteration (e.g., it is foldable into a constant). 555 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 556 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 557 // Emit calculation of the iterations count. 558 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 559 } 560 561 // Emit the linear steps for the linear clauses. 562 // If a step is not constant, it is pre-calculated before the loop. 563 for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) { 564 if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep())) 565 if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) { 566 CGF.EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl())); 567 // Emit calculation of the linear step. 568 CGF.EmitIgnoredExpr(CS); 569 } 570 } 571 572 if (SeparateIter) { 573 // Emit: if (LastIteration > 0) - begin. 574 RegionCounter Cnt = CGF.getPGORegionCounter(&S); 575 auto ThenBlock = CGF.createBasicBlock("simd.if.then"); 576 auto ContBlock = CGF.createBasicBlock("simd.if.end"); 577 CGF.EmitBranchOnBoolExpr(S.getPreCond(), ThenBlock, ContBlock, 578 Cnt.getCount()); 579 CGF.EmitBlock(ThenBlock); 580 Cnt.beginRegion(CGF.Builder); 581 // Emit 'then' code. 582 { 583 OMPPrivateScope LoopScope(CGF); 584 EmitPrivateLoopCounters(CGF, LoopScope, S.counters()); 585 EmitPrivateLinearVars(CGF, S, LoopScope); 586 CGF.EmitOMPPrivateClause(S, LoopScope); 587 (void)LoopScope.Privatize(); 588 CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), 589 S.getCond(/*SeparateIter=*/true), S.getInc(), 590 [&S](CodeGenFunction &CGF) { 591 CGF.EmitOMPLoopBody(S); 592 CGF.EmitStopPoint(&S); 593 }); 594 CGF.EmitOMPLoopBody(S, /* SeparateIter */ true); 595 } 596 CGF.EmitOMPSimdFinal(S); 597 // Emit: if (LastIteration != 0) - end. 598 CGF.EmitBranch(ContBlock); 599 CGF.EmitBlock(ContBlock, true); 600 } else { 601 { 602 OMPPrivateScope LoopScope(CGF); 603 EmitPrivateLoopCounters(CGF, LoopScope, S.counters()); 604 EmitPrivateLinearVars(CGF, S, LoopScope); 605 CGF.EmitOMPPrivateClause(S, LoopScope); 606 (void)LoopScope.Privatize(); 607 CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), 608 S.getCond(/*SeparateIter=*/false), S.getInc(), 609 [&S](CodeGenFunction &CGF) { 610 CGF.EmitOMPLoopBody(S); 611 CGF.EmitStopPoint(&S); 612 }); 613 } 614 CGF.EmitOMPSimdFinal(S); 615 } 616 }; 617 CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen); 618 } 619 620 void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind, 621 const OMPLoopDirective &S, 622 OMPPrivateScope &LoopScope, 623 llvm::Value *LB, llvm::Value *UB, 624 llvm::Value *ST, llvm::Value *IL, 625 llvm::Value *Chunk) { 626 auto &RT = CGM.getOpenMPRuntime(); 627 628 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). 629 const bool Dynamic = RT.isDynamic(ScheduleKind); 630 631 assert(!RT.isStaticNonchunked(ScheduleKind, /* Chunked */ Chunk != nullptr) && 632 "static non-chunked schedule does not need outer loop"); 633 634 // Emit outer loop. 635 // 636 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 637 // When schedule(dynamic,chunk_size) is specified, the iterations are 638 // distributed to threads in the team in chunks as the threads request them. 639 // Each thread executes a chunk of iterations, then requests another chunk, 640 // until no chunks remain to be distributed. Each chunk contains chunk_size 641 // iterations, except for the last chunk to be distributed, which may have 642 // fewer iterations. When no chunk_size is specified, it defaults to 1. 643 // 644 // When schedule(guided,chunk_size) is specified, the iterations are assigned 645 // to threads in the team in chunks as the executing threads request them. 646 // Each thread executes a chunk of iterations, then requests another chunk, 647 // until no chunks remain to be assigned. For a chunk_size of 1, the size of 648 // each chunk is proportional to the number of unassigned iterations divided 649 // by the number of threads in the team, decreasing to 1. For a chunk_size 650 // with value k (greater than 1), the size of each chunk is determined in the 651 // same way, with the restriction that the chunks do not contain fewer than k 652 // iterations (except for the last chunk to be assigned, which may have fewer 653 // than k iterations). 654 // 655 // When schedule(auto) is specified, the decision regarding scheduling is 656 // delegated to the compiler and/or runtime system. The programmer gives the 657 // implementation the freedom to choose any possible mapping of iterations to 658 // threads in the team. 659 // 660 // When schedule(runtime) is specified, the decision regarding scheduling is 661 // deferred until run time, and the schedule and chunk size are taken from the 662 // run-sched-var ICV. If the ICV is set to auto, the schedule is 663 // implementation defined 664 // 665 // while(__kmpc_dispatch_next(&LB, &UB)) { 666 // idx = LB; 667 // while (idx <= UB) { BODY; ++idx; } // inner loop 668 // } 669 // 670 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 671 // When schedule(static, chunk_size) is specified, iterations are divided into 672 // chunks of size chunk_size, and the chunks are assigned to the threads in 673 // the team in a round-robin fashion in the order of the thread number. 674 // 675 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { 676 // while (idx <= UB) { BODY; ++idx; } // inner loop 677 // LB = LB + ST; 678 // UB = UB + ST; 679 // } 680 // 681 682 const Expr *IVExpr = S.getIterationVariable(); 683 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 684 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 685 686 RT.emitForInit( 687 *this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, IL, LB, 688 (Dynamic ? EmitAnyExpr(S.getLastIteration()).getScalarVal() : UB), ST, 689 Chunk); 690 691 auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end"); 692 693 // Start the loop with a block that tests the condition. 694 auto CondBlock = createBasicBlock("omp.dispatch.cond"); 695 EmitBlock(CondBlock); 696 LoopStack.push(CondBlock); 697 698 llvm::Value *BoolCondVal = nullptr; 699 if (!Dynamic) { 700 // UB = min(UB, GlobalUB) 701 EmitIgnoredExpr(S.getEnsureUpperBound()); 702 // IV = LB 703 EmitIgnoredExpr(S.getInit()); 704 // IV < UB 705 BoolCondVal = EvaluateExprAsBool(S.getCond(false)); 706 } else { 707 BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, 708 IL, LB, UB, ST); 709 } 710 711 // If there are any cleanups between here and the loop-exit scope, 712 // create a block to stage a loop exit along. 713 auto ExitBlock = LoopExit.getBlock(); 714 if (LoopScope.requiresCleanups()) 715 ExitBlock = createBasicBlock("omp.dispatch.cleanup"); 716 717 auto LoopBody = createBasicBlock("omp.dispatch.body"); 718 Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock); 719 if (ExitBlock != LoopExit.getBlock()) { 720 EmitBlock(ExitBlock); 721 EmitBranchThroughCleanup(LoopExit); 722 } 723 EmitBlock(LoopBody); 724 725 // Emit "IV = LB" (in case of static schedule, we have already calculated new 726 // LB for loop condition and emitted it above). 727 if (Dynamic) 728 EmitIgnoredExpr(S.getInit()); 729 730 // Create a block for the increment. 731 auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); 732 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 733 734 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), 735 S.getCond(/*SeparateIter=*/false), S.getInc(), 736 [&S](CodeGenFunction &CGF) { 737 CGF.EmitOMPLoopBody(S); 738 CGF.EmitStopPoint(&S); 739 }); 740 741 EmitBlock(Continue.getBlock()); 742 BreakContinueStack.pop_back(); 743 if (!Dynamic) { 744 // Emit "LB = LB + Stride", "UB = UB + Stride". 745 EmitIgnoredExpr(S.getNextLowerBound()); 746 EmitIgnoredExpr(S.getNextUpperBound()); 747 } 748 749 EmitBranch(CondBlock); 750 LoopStack.pop(); 751 // Emit the fall-through block. 752 EmitBlock(LoopExit.getBlock()); 753 754 // Tell the runtime we are done. 755 // FIXME: Also call fini for ordered loops with dynamic scheduling. 756 if (!Dynamic) 757 RT.emitForFinish(*this, S.getLocStart(), ScheduleKind); 758 } 759 760 /// \brief Emit a helper variable and return corresponding lvalue. 761 static LValue EmitOMPHelperVar(CodeGenFunction &CGF, 762 const DeclRefExpr *Helper) { 763 auto VDecl = cast<VarDecl>(Helper->getDecl()); 764 CGF.EmitVarDecl(*VDecl); 765 return CGF.EmitLValue(Helper); 766 } 767 768 void CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { 769 // Emit the loop iteration variable. 770 auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 771 auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); 772 EmitVarDecl(*IVDecl); 773 774 // Emit the iterations count variable. 775 // If it is not a variable, Sema decided to calculate iterations count on each 776 // iteration (e.g., it is foldable into a constant). 777 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 778 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 779 // Emit calculation of the iterations count. 780 EmitIgnoredExpr(S.getCalcLastIteration()); 781 } 782 783 auto &RT = CGM.getOpenMPRuntime(); 784 785 // Check pre-condition. 786 { 787 // Skip the entire loop if we don't meet the precondition. 788 RegionCounter Cnt = getPGORegionCounter(&S); 789 auto ThenBlock = createBasicBlock("omp.precond.then"); 790 auto ContBlock = createBasicBlock("omp.precond.end"); 791 EmitBranchOnBoolExpr(S.getPreCond(), ThenBlock, ContBlock, Cnt.getCount()); 792 EmitBlock(ThenBlock); 793 Cnt.beginRegion(Builder); 794 // Emit 'then' code. 795 { 796 // Emit helper vars inits. 797 LValue LB = 798 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable())); 799 LValue UB = 800 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable())); 801 LValue ST = 802 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 803 LValue IL = 804 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 805 806 OMPPrivateScope LoopScope(*this); 807 EmitPrivateLoopCounters(*this, LoopScope, S.counters()); 808 (void)LoopScope.Privatize(); 809 810 // Detect the loop schedule kind and chunk. 811 auto ScheduleKind = OMPC_SCHEDULE_unknown; 812 llvm::Value *Chunk = nullptr; 813 if (auto C = cast_or_null<OMPScheduleClause>( 814 S.getSingleClause(OMPC_schedule))) { 815 ScheduleKind = C->getScheduleKind(); 816 if (auto Ch = C->getChunkSize()) { 817 Chunk = EmitScalarExpr(Ch); 818 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 819 S.getIterationVariable()->getType()); 820 } 821 } 822 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 823 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 824 if (RT.isStaticNonchunked(ScheduleKind, 825 /* Chunked */ Chunk != nullptr)) { 826 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 827 // When no chunk_size is specified, the iteration space is divided into 828 // chunks that are approximately equal in size, and at most one chunk is 829 // distributed to each thread. Note that the size of the chunks is 830 // unspecified in this case. 831 RT.emitForInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, 832 IL.getAddress(), LB.getAddress(), UB.getAddress(), 833 ST.getAddress()); 834 // UB = min(UB, GlobalUB); 835 EmitIgnoredExpr(S.getEnsureUpperBound()); 836 // IV = LB; 837 EmitIgnoredExpr(S.getInit()); 838 // while (idx <= UB) { BODY; ++idx; } 839 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), 840 S.getCond(/*SeparateIter=*/false), S.getInc(), 841 [&S](CodeGenFunction &CGF) { 842 CGF.EmitOMPLoopBody(S); 843 CGF.EmitStopPoint(&S); 844 }); 845 // Tell the runtime we are done. 846 RT.emitForFinish(*this, S.getLocStart(), ScheduleKind); 847 } else { 848 // Emit the outer loop, which requests its work chunk [LB..UB] from 849 // runtime and runs the inner loop to process it. 850 EmitOMPForOuterLoop(ScheduleKind, S, LoopScope, LB.getAddress(), 851 UB.getAddress(), ST.getAddress(), IL.getAddress(), 852 Chunk); 853 } 854 } 855 // We're now done with the loop, so jump to the continuation block. 856 EmitBranch(ContBlock); 857 EmitBlock(ContBlock, true); 858 } 859 } 860 861 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { 862 LexicalScope Scope(*this, S.getSourceRange()); 863 auto &&CodeGen = 864 [&S](CodeGenFunction &CGF) { CGF.EmitOMPWorksharingLoop(S); }; 865 CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen); 866 867 // Emit an implicit barrier at the end. 868 if (!S.getSingleClause(OMPC_nowait)) { 869 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); 870 } 871 } 872 873 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &) { 874 llvm_unreachable("CodeGen for 'omp for simd' is not supported yet."); 875 } 876 877 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, 878 const Twine &Name, 879 llvm::Value *Init = nullptr) { 880 auto LVal = CGF.MakeNaturalAlignAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty); 881 if (Init) 882 CGF.EmitScalarInit(Init, LVal); 883 return LVal; 884 } 885 886 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { 887 LexicalScope Scope(*this, S.getSourceRange()); 888 auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt(); 889 auto *CS = dyn_cast<CompoundStmt>(Stmt); 890 if (CS && CS->size() > 1) { 891 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF) { 892 auto &C = CGF.CGM.getContext(); 893 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 894 // Emit helper vars inits. 895 LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.", 896 CGF.Builder.getInt32(0)); 897 auto *GlobalUBVal = CGF.Builder.getInt32(CS->size() - 1); 898 LValue UB = 899 createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal); 900 LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.", 901 CGF.Builder.getInt32(1)); 902 LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.", 903 CGF.Builder.getInt32(0)); 904 // Loop counter. 905 LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv."); 906 OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 907 OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV); 908 OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 909 OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); 910 // Generate condition for loop. 911 BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, 912 OK_Ordinary, S.getLocStart(), 913 /*fpContractable=*/false); 914 // Increment for loop counter. 915 UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, 916 OK_Ordinary, S.getLocStart()); 917 auto BodyGen = [CS, &S, &IV](CodeGenFunction &CGF) { 918 // Iterate through all sections and emit a switch construct: 919 // switch (IV) { 920 // case 0: 921 // <SectionStmt[0]>; 922 // break; 923 // ... 924 // case <NumSection> - 1: 925 // <SectionStmt[<NumSection> - 1]>; 926 // break; 927 // } 928 // .omp.sections.exit: 929 auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit"); 930 auto *SwitchStmt = CGF.Builder.CreateSwitch( 931 CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB, 932 CS->size()); 933 unsigned CaseNumber = 0; 934 for (auto C = CS->children(); C; ++C, ++CaseNumber) { 935 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 936 CGF.EmitBlock(CaseBB); 937 SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB); 938 CGF.EmitStmt(*C); 939 CGF.EmitBranch(ExitBB); 940 } 941 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 942 }; 943 // Emit static non-chunked loop. 944 CGF.CGM.getOpenMPRuntime().emitForInit( 945 CGF, S.getLocStart(), OMPC_SCHEDULE_static, /*IVSize=*/32, 946 /*IVSigned=*/true, IL.getAddress(), LB.getAddress(), UB.getAddress(), 947 ST.getAddress()); 948 // UB = min(UB, GlobalUB); 949 auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart()); 950 auto *MinUBGlobalUB = CGF.Builder.CreateSelect( 951 CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal); 952 CGF.EmitStoreOfScalar(MinUBGlobalUB, UB); 953 // IV = LB; 954 CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV); 955 // while (idx <= UB) { BODY; ++idx; } 956 CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen); 957 // Tell the runtime we are done. 958 CGF.CGM.getOpenMPRuntime().emitForFinish(CGF, S.getLocStart(), 959 OMPC_SCHEDULE_static); 960 }; 961 962 CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen); 963 } else { 964 // If only one section is found - no need to generate loop, emit as a 965 // single 966 // region. 967 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 968 CGF.EmitStmt( 969 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 970 CGF.EnsureInsertPoint(); 971 }; 972 CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(), 973 llvm::None, llvm::None, llvm::None, 974 llvm::None); 975 } 976 977 // Emit an implicit barrier at the end. 978 if (!S.getSingleClause(OMPC_nowait)) { 979 CGM.getOpenMPRuntime().emitBarrierCall( 980 *this, S.getLocStart(), 981 (CS && CS->size() > 1) ? OMPD_sections : OMPD_single); 982 } 983 } 984 985 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { 986 LexicalScope Scope(*this, S.getSourceRange()); 987 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 988 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 989 CGF.EnsureInsertPoint(); 990 }; 991 CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen); 992 } 993 994 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { 995 llvm::SmallVector<const Expr *, 8> CopyprivateVars; 996 llvm::SmallVector<const Expr *, 8> SrcExprs; 997 llvm::SmallVector<const Expr *, 8> DstExprs; 998 llvm::SmallVector<const Expr *, 8> AssignmentOps; 999 // Check if there are any 'copyprivate' clauses associated with this 1000 // 'single' 1001 // construct. 1002 auto CopyprivateFilter = [](const OMPClause *C) -> bool { 1003 return C->getClauseKind() == OMPC_copyprivate; 1004 }; 1005 // Build a list of copyprivate variables along with helper expressions 1006 // (<source>, <destination>, <destination>=<source> expressions) 1007 typedef OMPExecutableDirective::filtered_clause_iterator<decltype( 1008 CopyprivateFilter)> CopyprivateIter; 1009 for (CopyprivateIter I(S.clauses(), CopyprivateFilter); I; ++I) { 1010 auto *C = cast<OMPCopyprivateClause>(*I); 1011 CopyprivateVars.append(C->varlists().begin(), C->varlists().end()); 1012 SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end()); 1013 DstExprs.append(C->destination_exprs().begin(), 1014 C->destination_exprs().end()); 1015 AssignmentOps.append(C->assignment_ops().begin(), 1016 C->assignment_ops().end()); 1017 } 1018 LexicalScope Scope(*this, S.getSourceRange()); 1019 // Emit code for 'single' region along with 'copyprivate' clauses 1020 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 1021 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1022 CGF.EnsureInsertPoint(); 1023 }; 1024 CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(), 1025 CopyprivateVars, SrcExprs, DstExprs, 1026 AssignmentOps); 1027 // Emit an implicit barrier at the end. 1028 if (!S.getSingleClause(OMPC_nowait)) { 1029 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_single); 1030 } 1031 } 1032 1033 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { 1034 LexicalScope Scope(*this, S.getSourceRange()); 1035 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 1036 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1037 CGF.EnsureInsertPoint(); 1038 }; 1039 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart()); 1040 } 1041 1042 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { 1043 LexicalScope Scope(*this, S.getSourceRange()); 1044 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 1045 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1046 CGF.EnsureInsertPoint(); 1047 }; 1048 CGM.getOpenMPRuntime().emitCriticalRegion( 1049 *this, S.getDirectiveName().getAsString(), CodeGen, S.getLocStart()); 1050 } 1051 1052 void 1053 CodeGenFunction::EmitOMPParallelForDirective(const OMPParallelForDirective &) { 1054 llvm_unreachable("CodeGen for 'omp parallel for' is not supported yet."); 1055 } 1056 1057 void CodeGenFunction::EmitOMPParallelForSimdDirective( 1058 const OMPParallelForSimdDirective &) { 1059 llvm_unreachable("CodeGen for 'omp parallel for simd' is not supported yet."); 1060 } 1061 1062 void CodeGenFunction::EmitOMPParallelSectionsDirective( 1063 const OMPParallelSectionsDirective &) { 1064 llvm_unreachable("CodeGen for 'omp parallel sections' is not supported yet."); 1065 } 1066 1067 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { 1068 // Emit outlined function for task construct. 1069 LexicalScope Scope(*this, S.getSourceRange()); 1070 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 1071 auto CapturedStruct = GenerateCapturedStmtArgument(*CS); 1072 auto *I = CS->getCapturedDecl()->param_begin(); 1073 auto *PartId = std::next(I); 1074 // The first function argument for tasks is a thread id, the second one is a 1075 // part id (0 for tied tasks, >=0 for untied task). 1076 auto &&CodeGen = [PartId, &S](CodeGenFunction &CGF) { 1077 if (*PartId) { 1078 // TODO: emit code for untied tasks. 1079 } 1080 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1081 }; 1082 auto OutlinedFn = 1083 CGM.getOpenMPRuntime().emitTaskOutlinedFunction(S, *I, CodeGen); 1084 // Check if we should emit tied or untied task. 1085 bool Tied = !S.getSingleClause(OMPC_untied); 1086 // Check if the task is final 1087 llvm::PointerIntPair<llvm::Value *, 1, bool> Final; 1088 if (auto *Clause = S.getSingleClause(OMPC_final)) { 1089 // If the condition constant folds and can be elided, try to avoid emitting 1090 // the condition and the dead arm of the if/else. 1091 auto *Cond = cast<OMPFinalClause>(Clause)->getCondition(); 1092 bool CondConstant; 1093 if (ConstantFoldsToSimpleInteger(Cond, CondConstant)) 1094 Final.setInt(CondConstant); 1095 else 1096 Final.setPointer(EvaluateExprAsBool(Cond)); 1097 } else { 1098 // By default the task is not final. 1099 Final.setInt(/*IntVal=*/false); 1100 } 1101 auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 1102 CGM.getOpenMPRuntime().emitTaskCall(*this, S.getLocStart(), Tied, Final, 1103 OutlinedFn, SharedsTy, CapturedStruct); 1104 } 1105 1106 void CodeGenFunction::EmitOMPTaskyieldDirective( 1107 const OMPTaskyieldDirective &S) { 1108 CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart()); 1109 } 1110 1111 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { 1112 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier); 1113 } 1114 1115 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &) { 1116 llvm_unreachable("CodeGen for 'omp taskwait' is not supported yet."); 1117 } 1118 1119 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { 1120 CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> { 1121 if (auto C = S.getSingleClause(/*K*/ OMPC_flush)) { 1122 auto FlushClause = cast<OMPFlushClause>(C); 1123 return llvm::makeArrayRef(FlushClause->varlist_begin(), 1124 FlushClause->varlist_end()); 1125 } 1126 return llvm::None; 1127 }(), S.getLocStart()); 1128 } 1129 1130 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &) { 1131 llvm_unreachable("CodeGen for 'omp ordered' is not supported yet."); 1132 } 1133 1134 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, 1135 QualType SrcType, QualType DestType) { 1136 assert(CGF.hasScalarEvaluationKind(DestType) && 1137 "DestType must have scalar evaluation kind."); 1138 assert(!Val.isAggregate() && "Must be a scalar or complex."); 1139 return Val.isScalar() 1140 ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType) 1141 : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType, 1142 DestType); 1143 } 1144 1145 static CodeGenFunction::ComplexPairTy 1146 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, 1147 QualType DestType) { 1148 assert(CGF.getEvaluationKind(DestType) == TEK_Complex && 1149 "DestType must have complex evaluation kind."); 1150 CodeGenFunction::ComplexPairTy ComplexVal; 1151 if (Val.isScalar()) { 1152 // Convert the input element to the element type of the complex. 1153 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 1154 auto ScalarVal = 1155 CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestElementType); 1156 ComplexVal = CodeGenFunction::ComplexPairTy( 1157 ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType())); 1158 } else { 1159 assert(Val.isComplex() && "Must be a scalar or complex."); 1160 auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType(); 1161 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 1162 ComplexVal.first = CGF.EmitScalarConversion( 1163 Val.getComplexVal().first, SrcElementType, DestElementType); 1164 ComplexVal.second = CGF.EmitScalarConversion( 1165 Val.getComplexVal().second, SrcElementType, DestElementType); 1166 } 1167 return ComplexVal; 1168 } 1169 1170 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst, 1171 const Expr *X, const Expr *V, 1172 SourceLocation Loc) { 1173 // v = x; 1174 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue"); 1175 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue"); 1176 LValue XLValue = CGF.EmitLValue(X); 1177 LValue VLValue = CGF.EmitLValue(V); 1178 RValue Res = XLValue.isGlobalReg() 1179 ? CGF.EmitLoadOfLValue(XLValue, Loc) 1180 : CGF.EmitAtomicLoad(XLValue, Loc, 1181 IsSeqCst ? llvm::SequentiallyConsistent 1182 : llvm::Monotonic, 1183 XLValue.isVolatile()); 1184 // OpenMP, 2.12.6, atomic Construct 1185 // Any atomic construct with a seq_cst clause forces the atomically 1186 // performed operation to include an implicit flush operation without a 1187 // list. 1188 if (IsSeqCst) 1189 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 1190 switch (CGF.getEvaluationKind(V->getType())) { 1191 case TEK_Scalar: 1192 CGF.EmitStoreOfScalar( 1193 convertToScalarValue(CGF, Res, X->getType(), V->getType()), VLValue); 1194 break; 1195 case TEK_Complex: 1196 CGF.EmitStoreOfComplex( 1197 convertToComplexValue(CGF, Res, X->getType(), V->getType()), VLValue, 1198 /*isInit=*/false); 1199 break; 1200 case TEK_Aggregate: 1201 llvm_unreachable("Must be a scalar or complex."); 1202 } 1203 } 1204 1205 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst, 1206 const Expr *X, const Expr *E, 1207 SourceLocation Loc) { 1208 // x = expr; 1209 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue"); 1210 LValue XLValue = CGF.EmitLValue(X); 1211 RValue ExprRValue = CGF.EmitAnyExpr(E); 1212 if (XLValue.isGlobalReg()) 1213 CGF.EmitStoreThroughGlobalRegLValue(ExprRValue, XLValue); 1214 else 1215 CGF.EmitAtomicStore(ExprRValue, XLValue, 1216 IsSeqCst ? llvm::SequentiallyConsistent 1217 : llvm::Monotonic, 1218 XLValue.isVolatile(), /*IsInit=*/false); 1219 // OpenMP, 2.12.6, atomic Construct 1220 // Any atomic construct with a seq_cst clause forces the atomically 1221 // performed operation to include an implicit flush operation without a 1222 // list. 1223 if (IsSeqCst) 1224 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 1225 } 1226 1227 bool emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, RValue Update, 1228 BinaryOperatorKind BO, llvm::AtomicOrdering AO, 1229 bool IsXLHSInRHSPart) { 1230 auto &Context = CGF.CGM.getContext(); 1231 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x' 1232 // expression is simple and atomic is allowed for the given type for the 1233 // target platform. 1234 if (BO == BO_Comma || !Update.isScalar() || 1235 !Update.getScalarVal()->getType()->isIntegerTy() || !X.isSimple() || 1236 (!isa<llvm::ConstantInt>(Update.getScalarVal()) && 1237 (Update.getScalarVal()->getType() != 1238 X.getAddress()->getType()->getPointerElementType())) || 1239 !Context.getTargetInfo().hasBuiltinAtomic( 1240 Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment()))) 1241 return false; 1242 1243 llvm::AtomicRMWInst::BinOp RMWOp; 1244 switch (BO) { 1245 case BO_Add: 1246 RMWOp = llvm::AtomicRMWInst::Add; 1247 break; 1248 case BO_Sub: 1249 if (!IsXLHSInRHSPart) 1250 return false; 1251 RMWOp = llvm::AtomicRMWInst::Sub; 1252 break; 1253 case BO_And: 1254 RMWOp = llvm::AtomicRMWInst::And; 1255 break; 1256 case BO_Or: 1257 RMWOp = llvm::AtomicRMWInst::Or; 1258 break; 1259 case BO_Xor: 1260 RMWOp = llvm::AtomicRMWInst::Xor; 1261 break; 1262 case BO_LT: 1263 RMWOp = X.getType()->hasSignedIntegerRepresentation() 1264 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min 1265 : llvm::AtomicRMWInst::Max) 1266 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin 1267 : llvm::AtomicRMWInst::UMax); 1268 break; 1269 case BO_GT: 1270 RMWOp = X.getType()->hasSignedIntegerRepresentation() 1271 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max 1272 : llvm::AtomicRMWInst::Min) 1273 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax 1274 : llvm::AtomicRMWInst::UMin); 1275 break; 1276 case BO_Mul: 1277 case BO_Div: 1278 case BO_Rem: 1279 case BO_Shl: 1280 case BO_Shr: 1281 case BO_LAnd: 1282 case BO_LOr: 1283 return false; 1284 case BO_PtrMemD: 1285 case BO_PtrMemI: 1286 case BO_LE: 1287 case BO_GE: 1288 case BO_EQ: 1289 case BO_NE: 1290 case BO_Assign: 1291 case BO_AddAssign: 1292 case BO_SubAssign: 1293 case BO_AndAssign: 1294 case BO_OrAssign: 1295 case BO_XorAssign: 1296 case BO_MulAssign: 1297 case BO_DivAssign: 1298 case BO_RemAssign: 1299 case BO_ShlAssign: 1300 case BO_ShrAssign: 1301 case BO_Comma: 1302 llvm_unreachable("Unsupported atomic update operation"); 1303 } 1304 auto *UpdateVal = Update.getScalarVal(); 1305 if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) { 1306 UpdateVal = CGF.Builder.CreateIntCast( 1307 IC, X.getAddress()->getType()->getPointerElementType(), 1308 X.getType()->hasSignedIntegerRepresentation()); 1309 } 1310 CGF.Builder.CreateAtomicRMW(RMWOp, X.getAddress(), UpdateVal, AO); 1311 return true; 1312 } 1313 1314 void CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr( 1315 LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart, 1316 llvm::AtomicOrdering AO, SourceLocation Loc, 1317 const llvm::function_ref<RValue(RValue)> &CommonGen) { 1318 // Update expressions are allowed to have the following forms: 1319 // x binop= expr; -> xrval + expr; 1320 // x++, ++x -> xrval + 1; 1321 // x--, --x -> xrval - 1; 1322 // x = x binop expr; -> xrval binop expr 1323 // x = expr Op x; - > expr binop xrval; 1324 if (!emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart)) { 1325 if (X.isGlobalReg()) { 1326 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop 1327 // 'xrval'. 1328 EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X); 1329 } else { 1330 // Perform compare-and-swap procedure. 1331 EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified()); 1332 } 1333 } 1334 } 1335 1336 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst, 1337 const Expr *X, const Expr *E, 1338 const Expr *UE, bool IsXLHSInRHSPart, 1339 SourceLocation Loc) { 1340 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 1341 "Update expr in 'atomic update' must be a binary operator."); 1342 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 1343 // Update expressions are allowed to have the following forms: 1344 // x binop= expr; -> xrval + expr; 1345 // x++, ++x -> xrval + 1; 1346 // x--, --x -> xrval - 1; 1347 // x = x binop expr; -> xrval binop expr 1348 // x = expr Op x; - > expr binop xrval; 1349 assert(X->isLValue() && "X of 'omp atomic update' is not lvalue"); 1350 LValue XLValue = CGF.EmitLValue(X); 1351 RValue ExprRValue = CGF.EmitAnyExpr(E); 1352 auto AO = IsSeqCst ? llvm::SequentiallyConsistent : llvm::Monotonic; 1353 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 1354 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 1355 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 1356 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 1357 auto Gen = 1358 [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue { 1359 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 1360 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 1361 return CGF.EmitAnyExpr(UE); 1362 }; 1363 CGF.EmitOMPAtomicSimpleUpdateExpr(XLValue, ExprRValue, BOUE->getOpcode(), 1364 IsXLHSInRHSPart, AO, Loc, Gen); 1365 // OpenMP, 2.12.6, atomic Construct 1366 // Any atomic construct with a seq_cst clause forces the atomically 1367 // performed operation to include an implicit flush operation without a 1368 // list. 1369 if (IsSeqCst) 1370 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 1371 } 1372 1373 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, 1374 bool IsSeqCst, const Expr *X, const Expr *V, 1375 const Expr *E, const Expr *UE, 1376 bool IsXLHSInRHSPart, SourceLocation Loc) { 1377 switch (Kind) { 1378 case OMPC_read: 1379 EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc); 1380 break; 1381 case OMPC_write: 1382 EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc); 1383 break; 1384 case OMPC_unknown: 1385 case OMPC_update: 1386 EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc); 1387 break; 1388 case OMPC_capture: 1389 llvm_unreachable("CodeGen for 'omp atomic clause' is not supported yet."); 1390 case OMPC_if: 1391 case OMPC_final: 1392 case OMPC_num_threads: 1393 case OMPC_private: 1394 case OMPC_firstprivate: 1395 case OMPC_lastprivate: 1396 case OMPC_reduction: 1397 case OMPC_safelen: 1398 case OMPC_collapse: 1399 case OMPC_default: 1400 case OMPC_seq_cst: 1401 case OMPC_shared: 1402 case OMPC_linear: 1403 case OMPC_aligned: 1404 case OMPC_copyin: 1405 case OMPC_copyprivate: 1406 case OMPC_flush: 1407 case OMPC_proc_bind: 1408 case OMPC_schedule: 1409 case OMPC_ordered: 1410 case OMPC_nowait: 1411 case OMPC_untied: 1412 case OMPC_threadprivate: 1413 case OMPC_mergeable: 1414 llvm_unreachable("Clause is not allowed in 'omp atomic'."); 1415 } 1416 } 1417 1418 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { 1419 bool IsSeqCst = S.getSingleClause(/*K=*/OMPC_seq_cst); 1420 OpenMPClauseKind Kind = OMPC_unknown; 1421 for (auto *C : S.clauses()) { 1422 // Find first clause (skip seq_cst clause, if it is first). 1423 if (C->getClauseKind() != OMPC_seq_cst) { 1424 Kind = C->getClauseKind(); 1425 break; 1426 } 1427 } 1428 1429 const auto *CS = 1430 S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 1431 if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) 1432 enterFullExpression(EWC); 1433 1434 LexicalScope Scope(*this, S.getSourceRange()); 1435 auto &&CodeGen = [&S, Kind, IsSeqCst](CodeGenFunction &CGF) { 1436 EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.getX(), S.getV(), S.getExpr(), 1437 S.getUpdateExpr(), S.isXLHSInRHSPart(), S.getLocStart()); 1438 }; 1439 CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen); 1440 } 1441 1442 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &) { 1443 llvm_unreachable("CodeGen for 'omp target' is not supported yet."); 1444 } 1445 1446 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &) { 1447 llvm_unreachable("CodeGen for 'omp teams' is not supported yet."); 1448 } 1449 1450