1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This contains code to emit OpenMP nodes as LLVM code. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGOpenMPRuntime.h" 15 #include "CodeGenFunction.h" 16 #include "CodeGenModule.h" 17 #include "TargetInfo.h" 18 #include "clang/AST/Stmt.h" 19 #include "clang/AST/StmtOpenMP.h" 20 using namespace clang; 21 using namespace CodeGen; 22 23 //===----------------------------------------------------------------------===// 24 // OpenMP Directive Emission 25 //===----------------------------------------------------------------------===// 26 namespace { 27 /// \brief RAII for inlined OpenMP regions (like 'omp for', 'omp simd', 'omp 28 /// critical' etc.). Helps to generate proper debug info and provides correct 29 /// code generation for such constructs. 30 class InlinedOpenMPRegionScopeRAII { 31 InlinedOpenMPRegionRAII Region; 32 CodeGenFunction::LexicalScope DirectiveScope; 33 34 public: 35 InlinedOpenMPRegionScopeRAII(CodeGenFunction &CGF, 36 const OMPExecutableDirective &D) 37 : Region(CGF, D), DirectiveScope(CGF, D.getSourceRange()) {} 38 }; 39 } // namespace 40 41 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen 42 /// function. Here is the logic: 43 /// if (Cond) { 44 /// CodeGen(true); 45 /// } else { 46 /// CodeGen(false); 47 /// } 48 static void EmitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, 49 const std::function<void(bool)> &CodeGen) { 50 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 51 52 // If the condition constant folds and can be elided, try to avoid emitting 53 // the condition and the dead arm of the if/else. 54 bool CondConstant; 55 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 56 CodeGen(CondConstant); 57 return; 58 } 59 60 // Otherwise, the condition did not fold, or we couldn't elide it. Just 61 // emit the conditional branch. 62 auto ThenBlock = CGF.createBasicBlock(/*name*/ "omp_if.then"); 63 auto ElseBlock = CGF.createBasicBlock(/*name*/ "omp_if.else"); 64 auto ContBlock = CGF.createBasicBlock(/*name*/ "omp_if.end"); 65 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount*/ 0); 66 67 // Emit the 'then' code. 68 CGF.EmitBlock(ThenBlock); 69 CodeGen(/*ThenBlock*/ true); 70 CGF.EmitBranch(ContBlock); 71 // Emit the 'else' code if present. 72 { 73 // There is no need to emit line number for unconditional branch. 74 auto NL = ApplyDebugLocation::CreateEmpty(CGF); 75 CGF.EmitBlock(ElseBlock); 76 } 77 CodeGen(/*ThenBlock*/ false); 78 { 79 // There is no need to emit line number for unconditional branch. 80 auto NL = ApplyDebugLocation::CreateEmpty(CGF); 81 CGF.EmitBranch(ContBlock); 82 } 83 // Emit the continuation block for code after the if. 84 CGF.EmitBlock(ContBlock, /*IsFinished*/ true); 85 } 86 87 void CodeGenFunction::EmitOMPAggregateAssign(LValue OriginalAddr, 88 llvm::Value *PrivateAddr, 89 const Expr *AssignExpr, 90 QualType OriginalType, 91 const VarDecl *VDInit) { 92 EmitBlock(createBasicBlock(".omp.assign.begin.")); 93 if (!isa<CXXConstructExpr>(AssignExpr) || isTrivialInitializer(AssignExpr)) { 94 // Perform simple memcpy. 95 EmitAggregateAssign(PrivateAddr, OriginalAddr.getAddress(), 96 AssignExpr->getType()); 97 } else { 98 // Perform element-by-element initialization. 99 QualType ElementTy; 100 auto SrcBegin = OriginalAddr.getAddress(); 101 auto DestBegin = PrivateAddr; 102 auto ArrayTy = OriginalType->getAsArrayTypeUnsafe(); 103 auto SrcNumElements = emitArrayLength(ArrayTy, ElementTy, SrcBegin); 104 auto DestNumElements = emitArrayLength(ArrayTy, ElementTy, DestBegin); 105 auto SrcEnd = Builder.CreateGEP(SrcBegin, SrcNumElements); 106 auto DestEnd = Builder.CreateGEP(DestBegin, DestNumElements); 107 // The basic structure here is a do-while loop, because we don't 108 // need to check for the zero-element case. 109 auto BodyBB = createBasicBlock("omp.arraycpy.body"); 110 auto DoneBB = createBasicBlock("omp.arraycpy.done"); 111 auto IsEmpty = 112 Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty"); 113 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 114 115 // Enter the loop body, making that address the current address. 116 auto EntryBB = Builder.GetInsertBlock(); 117 EmitBlock(BodyBB); 118 auto SrcElementPast = Builder.CreatePHI(SrcBegin->getType(), 2, 119 "omp.arraycpy.srcElementPast"); 120 SrcElementPast->addIncoming(SrcEnd, EntryBB); 121 auto DestElementPast = Builder.CreatePHI(DestBegin->getType(), 2, 122 "omp.arraycpy.destElementPast"); 123 DestElementPast->addIncoming(DestEnd, EntryBB); 124 125 // Shift the address back by one element. 126 auto NegativeOne = llvm::ConstantInt::get(SizeTy, -1, true); 127 auto DestElement = Builder.CreateGEP(DestElementPast, NegativeOne, 128 "omp.arraycpy.dest.element"); 129 auto SrcElement = Builder.CreateGEP(SrcElementPast, NegativeOne, 130 "omp.arraycpy.src.element"); 131 { 132 // Create RunCleanScope to cleanup possible temps. 133 CodeGenFunction::RunCleanupsScope Init(*this); 134 // Emit initialization for single element. 135 LocalDeclMap[VDInit] = SrcElement; 136 EmitAnyExprToMem(AssignExpr, DestElement, 137 AssignExpr->getType().getQualifiers(), 138 /*IsInitializer*/ false); 139 LocalDeclMap.erase(VDInit); 140 } 141 142 // Check whether we've reached the end. 143 auto Done = 144 Builder.CreateICmpEQ(DestElement, DestBegin, "omp.arraycpy.done"); 145 Builder.CreateCondBr(Done, DoneBB, BodyBB); 146 DestElementPast->addIncoming(DestElement, Builder.GetInsertBlock()); 147 SrcElementPast->addIncoming(SrcElement, Builder.GetInsertBlock()); 148 149 // Done. 150 EmitBlock(DoneBB, true); 151 } 152 EmitBlock(createBasicBlock(".omp.assign.end.")); 153 } 154 155 void CodeGenFunction::EmitOMPFirstprivateClause( 156 const OMPExecutableDirective &D, 157 CodeGenFunction::OMPPrivateScope &PrivateScope) { 158 auto PrivateFilter = [](const OMPClause *C) -> bool { 159 return C->getClauseKind() == OMPC_firstprivate; 160 }; 161 for (OMPExecutableDirective::filtered_clause_iterator<decltype(PrivateFilter)> 162 I(D.clauses(), PrivateFilter); I; ++I) { 163 auto *C = cast<OMPFirstprivateClause>(*I); 164 auto IRef = C->varlist_begin(); 165 auto InitsRef = C->inits().begin(); 166 for (auto IInit : C->private_copies()) { 167 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 168 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 169 bool IsRegistered; 170 if (*InitsRef != nullptr) { 171 // Emit VarDecl with copy init for arrays. 172 auto *FD = CapturedStmtInfo->lookup(OrigVD); 173 LValue Base = MakeNaturalAlignAddrLValue( 174 CapturedStmtInfo->getContextValue(), 175 getContext().getTagDeclType(FD->getParent())); 176 auto OriginalAddr = EmitLValueForField(Base, FD); 177 auto VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl()); 178 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value * { 179 auto Emission = EmitAutoVarAlloca(*VD); 180 // Emit initialization of aggregate firstprivate vars. 181 EmitOMPAggregateAssign(OriginalAddr, Emission.getAllocatedAddress(), 182 VD->getInit(), (*IRef)->getType(), VDInit); 183 EmitAutoVarCleanups(Emission); 184 return Emission.getAllocatedAddress(); 185 }); 186 } else 187 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value * { 188 // Emit private VarDecl with copy init. 189 EmitDecl(*VD); 190 return GetAddrOfLocalVar(VD); 191 }); 192 assert(IsRegistered && "firstprivate var already registered as private"); 193 // Silence the warning about unused variable. 194 (void)IsRegistered; 195 ++IRef, ++InitsRef; 196 } 197 } 198 } 199 200 void CodeGenFunction::EmitOMPPrivateClause( 201 const OMPExecutableDirective &D, 202 CodeGenFunction::OMPPrivateScope &PrivateScope) { 203 auto PrivateFilter = [](const OMPClause *C) -> bool { 204 return C->getClauseKind() == OMPC_private; 205 }; 206 for (OMPExecutableDirective::filtered_clause_iterator<decltype(PrivateFilter)> 207 I(D.clauses(), PrivateFilter); I; ++I) { 208 auto *C = cast<OMPPrivateClause>(*I); 209 auto IRef = C->varlist_begin(); 210 for (auto IInit : C->private_copies()) { 211 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 212 auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 213 bool IsRegistered = 214 PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value * { 215 // Emit private VarDecl with copy init. 216 EmitDecl(*VD); 217 return GetAddrOfLocalVar(VD); 218 }); 219 assert(IsRegistered && "private var already registered as private"); 220 // Silence the warning about unused variable. 221 (void)IsRegistered; 222 ++IRef; 223 } 224 } 225 } 226 227 /// \brief Emits code for OpenMP parallel directive in the parallel region. 228 static void EmitOMPParallelCall(CodeGenFunction &CGF, 229 const OMPParallelDirective &S, 230 llvm::Value *OutlinedFn, 231 llvm::Value *CapturedStruct) { 232 if (auto C = S.getSingleClause(/*K*/ OMPC_num_threads)) { 233 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 234 auto NumThreadsClause = cast<OMPNumThreadsClause>(C); 235 auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), 236 /*IgnoreResultAssign*/ true); 237 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause( 238 CGF, NumThreads, NumThreadsClause->getLocStart()); 239 } 240 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn, 241 CapturedStruct); 242 } 243 244 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { 245 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 246 auto CapturedStruct = GenerateCapturedStmtArgument(*CS); 247 auto OutlinedFn = CGM.getOpenMPRuntime().emitOutlinedFunction( 248 S, *CS->getCapturedDecl()->param_begin()); 249 if (auto C = S.getSingleClause(/*K*/ OMPC_if)) { 250 auto Cond = cast<OMPIfClause>(C)->getCondition(); 251 EmitOMPIfClause(*this, Cond, [&](bool ThenBlock) { 252 if (ThenBlock) 253 EmitOMPParallelCall(*this, S, OutlinedFn, CapturedStruct); 254 else 255 CGM.getOpenMPRuntime().emitSerialCall(*this, S.getLocStart(), 256 OutlinedFn, CapturedStruct); 257 }); 258 } else 259 EmitOMPParallelCall(*this, S, OutlinedFn, CapturedStruct); 260 } 261 262 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &S, 263 bool SeparateIter) { 264 RunCleanupsScope BodyScope(*this); 265 // Update counters values on current iteration. 266 for (auto I : S.updates()) { 267 EmitIgnoredExpr(I); 268 } 269 // Update the linear variables. 270 for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) { 271 for (auto U : C->updates()) { 272 EmitIgnoredExpr(U); 273 } 274 } 275 276 // On a continue in the body, jump to the end. 277 auto Continue = getJumpDestInCurrentScope("omp.body.continue"); 278 BreakContinueStack.push_back(BreakContinue(JumpDest(), Continue)); 279 // Emit loop body. 280 EmitStmt(S.getBody()); 281 // The end (updates/cleanups). 282 EmitBlock(Continue.getBlock()); 283 BreakContinueStack.pop_back(); 284 if (SeparateIter) { 285 // TODO: Update lastprivates if the SeparateIter flag is true. 286 // This will be implemented in a follow-up OMPLastprivateClause patch, but 287 // result should be still correct without it, as we do not make these 288 // variables private yet. 289 } 290 } 291 292 void CodeGenFunction::EmitOMPInnerLoop(const Stmt &S, bool RequiresCleanup, 293 const Expr *LoopCond, 294 const Expr *IncExpr, 295 const std::function<void()> &BodyGen) { 296 auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end"); 297 auto Cnt = getPGORegionCounter(&S); 298 299 // Start the loop with a block that tests the condition. 300 auto CondBlock = createBasicBlock("omp.inner.for.cond"); 301 EmitBlock(CondBlock); 302 LoopStack.push(CondBlock); 303 304 // If there are any cleanups between here and the loop-exit scope, 305 // create a block to stage a loop exit along. 306 auto ExitBlock = LoopExit.getBlock(); 307 if (RequiresCleanup) 308 ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup"); 309 310 auto LoopBody = createBasicBlock("omp.inner.for.body"); 311 312 // Emit condition. 313 EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, Cnt.getCount()); 314 if (ExitBlock != LoopExit.getBlock()) { 315 EmitBlock(ExitBlock); 316 EmitBranchThroughCleanup(LoopExit); 317 } 318 319 EmitBlock(LoopBody); 320 Cnt.beginRegion(Builder); 321 322 // Create a block for the increment. 323 auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc"); 324 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 325 326 BodyGen(); 327 328 // Emit "IV = IV + 1" and a back-edge to the condition block. 329 EmitBlock(Continue.getBlock()); 330 EmitIgnoredExpr(IncExpr); 331 BreakContinueStack.pop_back(); 332 EmitBranch(CondBlock); 333 LoopStack.pop(); 334 // Emit the fall-through block. 335 EmitBlock(LoopExit.getBlock()); 336 } 337 338 void CodeGenFunction::EmitOMPSimdFinal(const OMPLoopDirective &S) { 339 auto IC = S.counters().begin(); 340 for (auto F : S.finals()) { 341 if (LocalDeclMap.lookup(cast<DeclRefExpr>((*IC))->getDecl())) { 342 EmitIgnoredExpr(F); 343 } 344 ++IC; 345 } 346 // Emit the final values of the linear variables. 347 for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) { 348 for (auto F : C->finals()) { 349 EmitIgnoredExpr(F); 350 } 351 } 352 } 353 354 static void EmitOMPAlignedClause(CodeGenFunction &CGF, CodeGenModule &CGM, 355 const OMPAlignedClause &Clause) { 356 unsigned ClauseAlignment = 0; 357 if (auto AlignmentExpr = Clause.getAlignment()) { 358 auto AlignmentCI = 359 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); 360 ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue()); 361 } 362 for (auto E : Clause.varlists()) { 363 unsigned Alignment = ClauseAlignment; 364 if (Alignment == 0) { 365 // OpenMP [2.8.1, Description] 366 // If no optional parameter is specified, implementation-defined default 367 // alignments for SIMD instructions on the target platforms are assumed. 368 Alignment = CGM.getTargetCodeGenInfo().getOpenMPSimdDefaultAlignment( 369 E->getType()); 370 } 371 assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) && 372 "alignment is not power of 2"); 373 if (Alignment != 0) { 374 llvm::Value *PtrValue = CGF.EmitScalarExpr(E); 375 CGF.EmitAlignmentAssumption(PtrValue, Alignment); 376 } 377 } 378 } 379 380 static void EmitPrivateLoopCounters(CodeGenFunction &CGF, 381 CodeGenFunction::OMPPrivateScope &LoopScope, 382 ArrayRef<Expr *> Counters) { 383 for (auto *E : Counters) { 384 auto VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 385 bool IsRegistered = LoopScope.addPrivate(VD, [&]() -> llvm::Value * { 386 // Emit var without initialization. 387 auto VarEmission = CGF.EmitAutoVarAlloca(*VD); 388 CGF.EmitAutoVarCleanups(VarEmission); 389 return VarEmission.getAllocatedAddress(); 390 }); 391 assert(IsRegistered && "counter already registered as private"); 392 // Silence the warning about unused variable. 393 (void)IsRegistered; 394 } 395 } 396 397 static void 398 EmitPrivateLinearVars(CodeGenFunction &CGF, const OMPExecutableDirective &D, 399 CodeGenFunction::OMPPrivateScope &PrivateScope) { 400 for (auto Clause : OMPExecutableDirective::linear_filter(D.clauses())) { 401 for (auto *E : Clause->varlists()) { 402 auto VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 403 bool IsRegistered = PrivateScope.addPrivate(VD, [&]()->llvm::Value * { 404 // Emit var without initialization. 405 auto VarEmission = CGF.EmitAutoVarAlloca(*VD); 406 CGF.EmitAutoVarCleanups(VarEmission); 407 return VarEmission.getAllocatedAddress(); 408 }); 409 assert(IsRegistered && "linear var already registered as private"); 410 // Silence the warning about unused variable. 411 (void)IsRegistered; 412 } 413 } 414 } 415 416 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { 417 // Pragma 'simd' code depends on presence of 'lastprivate'. 418 // If present, we have to separate last iteration of the loop: 419 // 420 // if (LastIteration != 0) { 421 // for (IV in 0..LastIteration-1) BODY; 422 // BODY with updates of lastprivate vars; 423 // <Final counter/linear vars updates>; 424 // } 425 // 426 // otherwise (when there's no lastprivate): 427 // 428 // for (IV in 0..LastIteration) BODY; 429 // <Final counter/linear vars updates>; 430 // 431 432 // Walk clauses and process safelen/lastprivate. 433 bool SeparateIter = false; 434 LoopStack.setParallel(); 435 LoopStack.setVectorizerEnable(true); 436 for (auto C : S.clauses()) { 437 switch (C->getClauseKind()) { 438 case OMPC_safelen: { 439 RValue Len = EmitAnyExpr(cast<OMPSafelenClause>(C)->getSafelen(), 440 AggValueSlot::ignored(), true); 441 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 442 LoopStack.setVectorizerWidth(Val->getZExtValue()); 443 // In presence of finite 'safelen', it may be unsafe to mark all 444 // the memory instructions parallel, because loop-carried 445 // dependences of 'safelen' iterations are possible. 446 LoopStack.setParallel(false); 447 break; 448 } 449 case OMPC_aligned: 450 EmitOMPAlignedClause(*this, CGM, cast<OMPAlignedClause>(*C)); 451 break; 452 case OMPC_lastprivate: 453 SeparateIter = true; 454 break; 455 default: 456 // Not handled yet 457 ; 458 } 459 } 460 461 InlinedOpenMPRegionScopeRAII Region(*this, S); 462 463 // Emit inits for the linear variables. 464 for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) { 465 for (auto Init : C->inits()) { 466 auto *D = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl()); 467 EmitVarDecl(*D); 468 } 469 } 470 471 // Emit the loop iteration variable. 472 const Expr *IVExpr = S.getIterationVariable(); 473 const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 474 EmitVarDecl(*IVDecl); 475 EmitIgnoredExpr(S.getInit()); 476 477 // Emit the iterations count variable. 478 // If it is not a variable, Sema decided to calculate iterations count on each 479 // iteration (e.g., it is foldable into a constant). 480 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 481 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 482 // Emit calculation of the iterations count. 483 EmitIgnoredExpr(S.getCalcLastIteration()); 484 } 485 486 // Emit the linear steps for the linear clauses. 487 // If a step is not constant, it is pre-calculated before the loop. 488 for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) { 489 if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep())) 490 if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) { 491 EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl())); 492 // Emit calculation of the linear step. 493 EmitIgnoredExpr(CS); 494 } 495 } 496 497 if (SeparateIter) { 498 // Emit: if (LastIteration > 0) - begin. 499 RegionCounter Cnt = getPGORegionCounter(&S); 500 auto ThenBlock = createBasicBlock("simd.if.then"); 501 auto ContBlock = createBasicBlock("simd.if.end"); 502 EmitBranchOnBoolExpr(S.getPreCond(), ThenBlock, ContBlock, Cnt.getCount()); 503 EmitBlock(ThenBlock); 504 Cnt.beginRegion(Builder); 505 // Emit 'then' code. 506 { 507 OMPPrivateScope LoopScope(*this); 508 EmitPrivateLoopCounters(*this, LoopScope, S.counters()); 509 EmitPrivateLinearVars(*this, S, LoopScope); 510 EmitOMPPrivateClause(S, LoopScope); 511 (void)LoopScope.Privatize(); 512 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), 513 S.getCond(/*SeparateIter=*/true), S.getInc(), 514 [&S, this]() { 515 EmitOMPLoopBody(S); 516 EmitStopPoint(&S); 517 }); 518 EmitOMPLoopBody(S, /* SeparateIter */ true); 519 } 520 EmitOMPSimdFinal(S); 521 // Emit: if (LastIteration != 0) - end. 522 EmitBranch(ContBlock); 523 EmitBlock(ContBlock, true); 524 } else { 525 { 526 OMPPrivateScope LoopScope(*this); 527 EmitPrivateLoopCounters(*this, LoopScope, S.counters()); 528 EmitPrivateLinearVars(*this, S, LoopScope); 529 EmitOMPPrivateClause(S, LoopScope); 530 (void)LoopScope.Privatize(); 531 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), 532 S.getCond(/*SeparateIter=*/false), S.getInc(), 533 [&S, this]() { 534 EmitOMPLoopBody(S); 535 EmitStopPoint(&S); 536 }); 537 } 538 EmitOMPSimdFinal(S); 539 } 540 } 541 542 void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind, 543 const OMPLoopDirective &S, 544 OMPPrivateScope &LoopScope, 545 llvm::Value *LB, llvm::Value *UB, 546 llvm::Value *ST, llvm::Value *IL, 547 llvm::Value *Chunk) { 548 auto &RT = CGM.getOpenMPRuntime(); 549 550 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). 551 const bool Dynamic = RT.isDynamic(ScheduleKind); 552 553 assert(!RT.isStaticNonchunked(ScheduleKind, /* Chunked */ Chunk != nullptr) && 554 "static non-chunked schedule does not need outer loop"); 555 556 // Emit outer loop. 557 // 558 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 559 // When schedule(dynamic,chunk_size) is specified, the iterations are 560 // distributed to threads in the team in chunks as the threads request them. 561 // Each thread executes a chunk of iterations, then requests another chunk, 562 // until no chunks remain to be distributed. Each chunk contains chunk_size 563 // iterations, except for the last chunk to be distributed, which may have 564 // fewer iterations. When no chunk_size is specified, it defaults to 1. 565 // 566 // When schedule(guided,chunk_size) is specified, the iterations are assigned 567 // to threads in the team in chunks as the executing threads request them. 568 // Each thread executes a chunk of iterations, then requests another chunk, 569 // until no chunks remain to be assigned. For a chunk_size of 1, the size of 570 // each chunk is proportional to the number of unassigned iterations divided 571 // by the number of threads in the team, decreasing to 1. For a chunk_size 572 // with value k (greater than 1), the size of each chunk is determined in the 573 // same way, with the restriction that the chunks do not contain fewer than k 574 // iterations (except for the last chunk to be assigned, which may have fewer 575 // than k iterations). 576 // 577 // When schedule(auto) is specified, the decision regarding scheduling is 578 // delegated to the compiler and/or runtime system. The programmer gives the 579 // implementation the freedom to choose any possible mapping of iterations to 580 // threads in the team. 581 // 582 // When schedule(runtime) is specified, the decision regarding scheduling is 583 // deferred until run time, and the schedule and chunk size are taken from the 584 // run-sched-var ICV. If the ICV is set to auto, the schedule is 585 // implementation defined 586 // 587 // while(__kmpc_dispatch_next(&LB, &UB)) { 588 // idx = LB; 589 // while (idx <= UB) { BODY; ++idx; } // inner loop 590 // } 591 // 592 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 593 // When schedule(static, chunk_size) is specified, iterations are divided into 594 // chunks of size chunk_size, and the chunks are assigned to the threads in 595 // the team in a round-robin fashion in the order of the thread number. 596 // 597 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { 598 // while (idx <= UB) { BODY; ++idx; } // inner loop 599 // LB = LB + ST; 600 // UB = UB + ST; 601 // } 602 // 603 604 const Expr *IVExpr = S.getIterationVariable(); 605 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 606 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 607 608 RT.emitForInit( 609 *this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, IL, LB, 610 (Dynamic ? EmitAnyExpr(S.getLastIteration()).getScalarVal() : UB), ST, 611 Chunk); 612 613 auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end"); 614 615 // Start the loop with a block that tests the condition. 616 auto CondBlock = createBasicBlock("omp.dispatch.cond"); 617 EmitBlock(CondBlock); 618 LoopStack.push(CondBlock); 619 620 llvm::Value *BoolCondVal = nullptr; 621 if (!Dynamic) { 622 // UB = min(UB, GlobalUB) 623 EmitIgnoredExpr(S.getEnsureUpperBound()); 624 // IV = LB 625 EmitIgnoredExpr(S.getInit()); 626 // IV < UB 627 BoolCondVal = EvaluateExprAsBool(S.getCond(false)); 628 } else { 629 BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, 630 IL, LB, UB, ST); 631 } 632 633 // If there are any cleanups between here and the loop-exit scope, 634 // create a block to stage a loop exit along. 635 auto ExitBlock = LoopExit.getBlock(); 636 if (LoopScope.requiresCleanups()) 637 ExitBlock = createBasicBlock("omp.dispatch.cleanup"); 638 639 auto LoopBody = createBasicBlock("omp.dispatch.body"); 640 Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock); 641 if (ExitBlock != LoopExit.getBlock()) { 642 EmitBlock(ExitBlock); 643 EmitBranchThroughCleanup(LoopExit); 644 } 645 EmitBlock(LoopBody); 646 647 // Emit "IV = LB" (in case of static schedule, we have already calculated new 648 // LB for loop condition and emitted it above). 649 if (Dynamic) 650 EmitIgnoredExpr(S.getInit()); 651 652 // Create a block for the increment. 653 auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); 654 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 655 656 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), 657 S.getCond(/*SeparateIter=*/false), S.getInc(), [&S, this]() { 658 EmitOMPLoopBody(S); 659 EmitStopPoint(&S); 660 }); 661 662 EmitBlock(Continue.getBlock()); 663 BreakContinueStack.pop_back(); 664 if (!Dynamic) { 665 // Emit "LB = LB + Stride", "UB = UB + Stride". 666 EmitIgnoredExpr(S.getNextLowerBound()); 667 EmitIgnoredExpr(S.getNextUpperBound()); 668 } 669 670 EmitBranch(CondBlock); 671 LoopStack.pop(); 672 // Emit the fall-through block. 673 EmitBlock(LoopExit.getBlock()); 674 675 // Tell the runtime we are done. 676 // FIXME: Also call fini for ordered loops with dynamic scheduling. 677 if (!Dynamic) 678 RT.emitForFinish(*this, S.getLocStart(), ScheduleKind); 679 } 680 681 /// \brief Emit a helper variable and return corresponding lvalue. 682 static LValue EmitOMPHelperVar(CodeGenFunction &CGF, 683 const DeclRefExpr *Helper) { 684 auto VDecl = cast<VarDecl>(Helper->getDecl()); 685 CGF.EmitVarDecl(*VDecl); 686 return CGF.EmitLValue(Helper); 687 } 688 689 void CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { 690 // Emit the loop iteration variable. 691 auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 692 auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); 693 EmitVarDecl(*IVDecl); 694 695 // Emit the iterations count variable. 696 // If it is not a variable, Sema decided to calculate iterations count on each 697 // iteration (e.g., it is foldable into a constant). 698 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 699 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 700 // Emit calculation of the iterations count. 701 EmitIgnoredExpr(S.getCalcLastIteration()); 702 } 703 704 auto &RT = CGM.getOpenMPRuntime(); 705 706 // Check pre-condition. 707 { 708 // Skip the entire loop if we don't meet the precondition. 709 RegionCounter Cnt = getPGORegionCounter(&S); 710 auto ThenBlock = createBasicBlock("omp.precond.then"); 711 auto ContBlock = createBasicBlock("omp.precond.end"); 712 EmitBranchOnBoolExpr(S.getPreCond(), ThenBlock, ContBlock, Cnt.getCount()); 713 EmitBlock(ThenBlock); 714 Cnt.beginRegion(Builder); 715 // Emit 'then' code. 716 { 717 // Emit helper vars inits. 718 LValue LB = 719 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable())); 720 LValue UB = 721 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable())); 722 LValue ST = 723 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 724 LValue IL = 725 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 726 727 OMPPrivateScope LoopScope(*this); 728 EmitPrivateLoopCounters(*this, LoopScope, S.counters()); 729 (void)LoopScope.Privatize(); 730 731 // Detect the loop schedule kind and chunk. 732 auto ScheduleKind = OMPC_SCHEDULE_unknown; 733 llvm::Value *Chunk = nullptr; 734 if (auto C = cast_or_null<OMPScheduleClause>( 735 S.getSingleClause(OMPC_schedule))) { 736 ScheduleKind = C->getScheduleKind(); 737 if (auto Ch = C->getChunkSize()) { 738 Chunk = EmitScalarExpr(Ch); 739 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 740 S.getIterationVariable()->getType()); 741 } 742 } 743 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 744 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 745 if (RT.isStaticNonchunked(ScheduleKind, 746 /* Chunked */ Chunk != nullptr)) { 747 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 748 // When no chunk_size is specified, the iteration space is divided into 749 // chunks that are approximately equal in size, and at most one chunk is 750 // distributed to each thread. Note that the size of the chunks is 751 // unspecified in this case. 752 RT.emitForInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, 753 IL.getAddress(), LB.getAddress(), UB.getAddress(), 754 ST.getAddress()); 755 // UB = min(UB, GlobalUB); 756 EmitIgnoredExpr(S.getEnsureUpperBound()); 757 // IV = LB; 758 EmitIgnoredExpr(S.getInit()); 759 // while (idx <= UB) { BODY; ++idx; } 760 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), 761 S.getCond(/*SeparateIter=*/false), S.getInc(), 762 [&S, this]() { 763 EmitOMPLoopBody(S); 764 EmitStopPoint(&S); 765 }); 766 // Tell the runtime we are done. 767 RT.emitForFinish(*this, S.getLocStart(), ScheduleKind); 768 } else { 769 // Emit the outer loop, which requests its work chunk [LB..UB] from 770 // runtime and runs the inner loop to process it. 771 EmitOMPForOuterLoop(ScheduleKind, S, LoopScope, LB.getAddress(), 772 UB.getAddress(), ST.getAddress(), IL.getAddress(), 773 Chunk); 774 } 775 } 776 // We're now done with the loop, so jump to the continuation block. 777 EmitBranch(ContBlock); 778 EmitBlock(ContBlock, true); 779 } 780 } 781 782 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { 783 InlinedOpenMPRegionScopeRAII Region(*this, S); 784 785 EmitOMPWorksharingLoop(S); 786 787 // Emit an implicit barrier at the end. 788 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), 789 /*IsExplicit*/ false); 790 } 791 792 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &) { 793 llvm_unreachable("CodeGen for 'omp for simd' is not supported yet."); 794 } 795 796 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, 797 const Twine &Name, 798 llvm::Value *Init = nullptr) { 799 auto LVal = CGF.MakeNaturalAlignAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty); 800 if (Init) 801 CGF.EmitScalarInit(Init, LVal); 802 return LVal; 803 } 804 805 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { 806 InlinedOpenMPRegionScopeRAII Region(*this, S); 807 808 auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt(); 809 auto *CS = dyn_cast<CompoundStmt>(Stmt); 810 if (CS && CS->size() > 1) { 811 auto &C = CGM.getContext(); 812 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 813 // Emit helper vars inits. 814 LValue LB = createSectionLVal(*this, KmpInt32Ty, ".omp.sections.lb.", 815 Builder.getInt32(0)); 816 auto *GlobalUBVal = Builder.getInt32(CS->size() - 1); 817 LValue UB = 818 createSectionLVal(*this, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal); 819 LValue ST = createSectionLVal(*this, KmpInt32Ty, ".omp.sections.st.", 820 Builder.getInt32(1)); 821 LValue IL = createSectionLVal(*this, KmpInt32Ty, ".omp.sections.il.", 822 Builder.getInt32(0)); 823 // Loop counter. 824 LValue IV = createSectionLVal(*this, KmpInt32Ty, ".omp.sections.iv."); 825 OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 826 OpaqueValueMapping OpaqueIV(*this, &IVRefExpr, IV); 827 OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 828 OpaqueValueMapping OpaqueUB(*this, &UBRefExpr, UB); 829 // Generate condition for loop. 830 BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, 831 OK_Ordinary, S.getLocStart(), /*fpContractable=*/false); 832 // Increment for loop counter. 833 UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary, 834 S.getLocStart()); 835 auto BodyGen = [this, CS, &S, &IV]() { 836 // Iterate through all sections and emit a switch construct: 837 // switch (IV) { 838 // case 0: 839 // <SectionStmt[0]>; 840 // break; 841 // ... 842 // case <NumSection> - 1: 843 // <SectionStmt[<NumSection> - 1]>; 844 // break; 845 // } 846 // .omp.sections.exit: 847 auto *ExitBB = createBasicBlock(".omp.sections.exit"); 848 auto *SwitchStmt = Builder.CreateSwitch( 849 EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB, 850 CS->size()); 851 unsigned CaseNumber = 0; 852 for (auto C = CS->children(); C; ++C, ++CaseNumber) { 853 auto CaseBB = createBasicBlock(".omp.sections.case"); 854 EmitBlock(CaseBB); 855 SwitchStmt->addCase(Builder.getInt32(CaseNumber), CaseBB); 856 EmitStmt(*C); 857 EmitBranch(ExitBB); 858 } 859 EmitBlock(ExitBB, /*IsFinished=*/true); 860 }; 861 // Emit static non-chunked loop. 862 CGM.getOpenMPRuntime().emitForInit( 863 *this, S.getLocStart(), OMPC_SCHEDULE_static, /*IVSize=*/32, 864 /*IVSigned=*/true, IL.getAddress(), LB.getAddress(), UB.getAddress(), 865 ST.getAddress()); 866 // UB = min(UB, GlobalUB); 867 auto *UBVal = EmitLoadOfScalar(UB, S.getLocStart()); 868 auto *MinUBGlobalUB = Builder.CreateSelect( 869 Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal); 870 EmitStoreOfScalar(MinUBGlobalUB, UB); 871 // IV = LB; 872 EmitStoreOfScalar(EmitLoadOfScalar(LB, S.getLocStart()), IV); 873 // while (idx <= UB) { BODY; ++idx; } 874 EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen); 875 // Tell the runtime we are done. 876 CGM.getOpenMPRuntime().emitForFinish(*this, S.getLocStart(), 877 OMPC_SCHEDULE_static); 878 } else { 879 // If only one section is found - no need to generate loop, emit as a single 880 // region. 881 CGM.getOpenMPRuntime().emitSingleRegion(*this, [&]() -> void { 882 InlinedOpenMPRegionScopeRAII Region(*this, S); 883 EmitStmt(Stmt); 884 EnsureInsertPoint(); 885 }, S.getLocStart(), llvm::None, llvm::None, llvm::None, llvm::None); 886 } 887 888 // Emit an implicit barrier at the end. 889 if (!S.getSingleClause(OMPC_nowait)) 890 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), 891 /*IsExplicit=*/false); 892 } 893 894 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { 895 InlinedOpenMPRegionScopeRAII Region(*this, S); 896 EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 897 EnsureInsertPoint(); 898 } 899 900 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { 901 llvm::SmallVector<const Expr *, 8> CopyprivateVars; 902 llvm::SmallVector<const Expr *, 8> SrcExprs; 903 llvm::SmallVector<const Expr *, 8> DstExprs; 904 llvm::SmallVector<const Expr *, 8> AssignmentOps; 905 // Check if there are any 'copyprivate' clauses associated with this 'single' 906 // construct. 907 auto CopyprivateFilter = [](const OMPClause *C) -> bool { 908 return C->getClauseKind() == OMPC_copyprivate; 909 }; 910 // Build a list of copyprivate variables along with helper expressions 911 // (<source>, <destination>, <destination>=<source> expressions) 912 typedef OMPExecutableDirective::filtered_clause_iterator<decltype( 913 CopyprivateFilter)> CopyprivateIter; 914 for (CopyprivateIter I(S.clauses(), CopyprivateFilter); I; ++I) { 915 auto *C = cast<OMPCopyprivateClause>(*I); 916 CopyprivateVars.append(C->varlists().begin(), C->varlists().end()); 917 SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end()); 918 DstExprs.append(C->destination_exprs().begin(), 919 C->destination_exprs().end()); 920 AssignmentOps.append(C->assignment_ops().begin(), 921 C->assignment_ops().end()); 922 } 923 // Emit code for 'single' region along with 'copyprivate' clauses 924 CGM.getOpenMPRuntime().emitSingleRegion(*this, [&]() -> void { 925 InlinedOpenMPRegionScopeRAII Region(*this, S); 926 EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 927 EnsureInsertPoint(); 928 }, S.getLocStart(), CopyprivateVars, SrcExprs, DstExprs, AssignmentOps); 929 // Emit an implicit barrier at the end. 930 if (!S.getSingleClause(OMPC_nowait)) 931 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), 932 /*IsExplicit=*/false); 933 } 934 935 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { 936 CGM.getOpenMPRuntime().emitMasterRegion(*this, [&]() -> void { 937 InlinedOpenMPRegionScopeRAII Region(*this, S); 938 EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 939 EnsureInsertPoint(); 940 }, S.getLocStart()); 941 } 942 943 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { 944 CGM.getOpenMPRuntime().emitCriticalRegion( 945 *this, S.getDirectiveName().getAsString(), [&]() -> void { 946 InlinedOpenMPRegionScopeRAII Region(*this, S); 947 EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 948 EnsureInsertPoint(); 949 }, S.getLocStart()); 950 } 951 952 void 953 CodeGenFunction::EmitOMPParallelForDirective(const OMPParallelForDirective &) { 954 llvm_unreachable("CodeGen for 'omp parallel for' is not supported yet."); 955 } 956 957 void CodeGenFunction::EmitOMPParallelForSimdDirective( 958 const OMPParallelForSimdDirective &) { 959 llvm_unreachable("CodeGen for 'omp parallel for simd' is not supported yet."); 960 } 961 962 void CodeGenFunction::EmitOMPParallelSectionsDirective( 963 const OMPParallelSectionsDirective &) { 964 llvm_unreachable("CodeGen for 'omp parallel sections' is not supported yet."); 965 } 966 967 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { 968 // Emit outlined function for task construct. 969 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 970 auto CapturedStruct = GenerateCapturedStmtArgument(*CS); 971 auto *I = CS->getCapturedDecl()->param_begin(); 972 // The first function argument for tasks is a thread id, the second one is a 973 // part id (0 for tied tasks, >=0 for untied task). 974 auto OutlinedFn = 975 CGM.getOpenMPRuntime().emitTaskOutlinedFunction(S, *I, *std::next(I)); 976 // Check if we should emit tied or untied task. 977 bool Tied = !S.getSingleClause(OMPC_untied); 978 // Check if the task is final 979 llvm::PointerIntPair<llvm::Value *, 1, bool> Final; 980 if (auto *Clause = S.getSingleClause(OMPC_final)) { 981 // If the condition constant folds and can be elided, try to avoid emitting 982 // the condition and the dead arm of the if/else. 983 auto *Cond = cast<OMPFinalClause>(Clause)->getCondition(); 984 bool CondConstant; 985 if (ConstantFoldsToSimpleInteger(Cond, CondConstant)) 986 Final.setInt(CondConstant); 987 else 988 Final.setPointer(EvaluateExprAsBool(Cond)); 989 } else { 990 // By default the task is not final. 991 Final.setInt(/*IntVal=*/false); 992 } 993 auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 994 CGM.getOpenMPRuntime().emitTaskCall(*this, S.getLocStart(), Tied, Final, 995 OutlinedFn, SharedsTy, CapturedStruct); 996 } 997 998 void CodeGenFunction::EmitOMPTaskyieldDirective( 999 const OMPTaskyieldDirective &S) { 1000 CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart()); 1001 } 1002 1003 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { 1004 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart()); 1005 } 1006 1007 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &) { 1008 llvm_unreachable("CodeGen for 'omp taskwait' is not supported yet."); 1009 } 1010 1011 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { 1012 CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> { 1013 if (auto C = S.getSingleClause(/*K*/ OMPC_flush)) { 1014 auto FlushClause = cast<OMPFlushClause>(C); 1015 return llvm::makeArrayRef(FlushClause->varlist_begin(), 1016 FlushClause->varlist_end()); 1017 } 1018 return llvm::None; 1019 }(), S.getLocStart()); 1020 } 1021 1022 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &) { 1023 llvm_unreachable("CodeGen for 'omp ordered' is not supported yet."); 1024 } 1025 1026 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, 1027 QualType SrcType, QualType DestType) { 1028 assert(CGF.hasScalarEvaluationKind(DestType) && 1029 "DestType must have scalar evaluation kind."); 1030 assert(!Val.isAggregate() && "Must be a scalar or complex."); 1031 return Val.isScalar() 1032 ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType) 1033 : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType, 1034 DestType); 1035 } 1036 1037 static CodeGenFunction::ComplexPairTy 1038 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, 1039 QualType DestType) { 1040 assert(CGF.getEvaluationKind(DestType) == TEK_Complex && 1041 "DestType must have complex evaluation kind."); 1042 CodeGenFunction::ComplexPairTy ComplexVal; 1043 if (Val.isScalar()) { 1044 // Convert the input element to the element type of the complex. 1045 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 1046 auto ScalarVal = 1047 CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestElementType); 1048 ComplexVal = CodeGenFunction::ComplexPairTy( 1049 ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType())); 1050 } else { 1051 assert(Val.isComplex() && "Must be a scalar or complex."); 1052 auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType(); 1053 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 1054 ComplexVal.first = CGF.EmitScalarConversion( 1055 Val.getComplexVal().first, SrcElementType, DestElementType); 1056 ComplexVal.second = CGF.EmitScalarConversion( 1057 Val.getComplexVal().second, SrcElementType, DestElementType); 1058 } 1059 return ComplexVal; 1060 } 1061 1062 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst, 1063 const Expr *X, const Expr *V, 1064 SourceLocation Loc) { 1065 // v = x; 1066 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue"); 1067 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue"); 1068 LValue XLValue = CGF.EmitLValue(X); 1069 LValue VLValue = CGF.EmitLValue(V); 1070 RValue Res = XLValue.isGlobalReg() 1071 ? CGF.EmitLoadOfLValue(XLValue, Loc) 1072 : CGF.EmitAtomicLoad(XLValue, Loc, 1073 IsSeqCst ? llvm::SequentiallyConsistent 1074 : llvm::Monotonic, 1075 XLValue.isVolatile()); 1076 // OpenMP, 2.12.6, atomic Construct 1077 // Any atomic construct with a seq_cst clause forces the atomically 1078 // performed operation to include an implicit flush operation without a 1079 // list. 1080 if (IsSeqCst) 1081 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 1082 switch (CGF.getEvaluationKind(V->getType())) { 1083 case TEK_Scalar: 1084 CGF.EmitStoreOfScalar( 1085 convertToScalarValue(CGF, Res, X->getType(), V->getType()), VLValue); 1086 break; 1087 case TEK_Complex: 1088 CGF.EmitStoreOfComplex( 1089 convertToComplexValue(CGF, Res, X->getType(), V->getType()), VLValue, 1090 /*isInit=*/false); 1091 break; 1092 case TEK_Aggregate: 1093 llvm_unreachable("Must be a scalar or complex."); 1094 } 1095 } 1096 1097 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst, 1098 const Expr *X, const Expr *E, 1099 SourceLocation Loc) { 1100 // x = expr; 1101 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue"); 1102 LValue XLValue = CGF.EmitLValue(X); 1103 RValue ExprRValue = CGF.EmitAnyExpr(E); 1104 if (XLValue.isGlobalReg()) 1105 CGF.EmitStoreThroughGlobalRegLValue(ExprRValue, XLValue); 1106 else 1107 CGF.EmitAtomicStore(ExprRValue, XLValue, 1108 IsSeqCst ? llvm::SequentiallyConsistent 1109 : llvm::Monotonic, 1110 XLValue.isVolatile(), /*IsInit=*/false); 1111 // OpenMP, 2.12.6, atomic Construct 1112 // Any atomic construct with a seq_cst clause forces the atomically 1113 // performed operation to include an implicit flush operation without a 1114 // list. 1115 if (IsSeqCst) 1116 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 1117 } 1118 1119 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, 1120 bool IsSeqCst, const Expr *X, const Expr *V, 1121 const Expr *E, SourceLocation Loc) { 1122 switch (Kind) { 1123 case OMPC_read: 1124 EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc); 1125 break; 1126 case OMPC_write: 1127 EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc); 1128 break; 1129 case OMPC_update: 1130 case OMPC_capture: 1131 llvm_unreachable("CodeGen for 'omp atomic clause' is not supported yet."); 1132 case OMPC_if: 1133 case OMPC_final: 1134 case OMPC_num_threads: 1135 case OMPC_private: 1136 case OMPC_firstprivate: 1137 case OMPC_lastprivate: 1138 case OMPC_reduction: 1139 case OMPC_safelen: 1140 case OMPC_collapse: 1141 case OMPC_default: 1142 case OMPC_seq_cst: 1143 case OMPC_shared: 1144 case OMPC_linear: 1145 case OMPC_aligned: 1146 case OMPC_copyin: 1147 case OMPC_copyprivate: 1148 case OMPC_flush: 1149 case OMPC_proc_bind: 1150 case OMPC_schedule: 1151 case OMPC_ordered: 1152 case OMPC_nowait: 1153 case OMPC_untied: 1154 case OMPC_threadprivate: 1155 case OMPC_mergeable: 1156 case OMPC_unknown: 1157 llvm_unreachable("Clause is not allowed in 'omp atomic'."); 1158 } 1159 } 1160 1161 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { 1162 bool IsSeqCst = S.getSingleClause(/*K=*/OMPC_seq_cst); 1163 OpenMPClauseKind Kind = OMPC_unknown; 1164 for (auto *C : S.clauses()) { 1165 // Find first clause (skip seq_cst clause, if it is first). 1166 if (C->getClauseKind() != OMPC_seq_cst) { 1167 Kind = C->getClauseKind(); 1168 break; 1169 } 1170 } 1171 1172 const auto *CS = 1173 S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 1174 if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) 1175 enterFullExpression(EWC); 1176 InlinedOpenMPRegionScopeRAII Region(*this, S); 1177 1178 EmitOMPAtomicExpr(*this, Kind, IsSeqCst, S.getX(), S.getV(), S.getExpr(), 1179 S.getLocStart()); 1180 } 1181 1182 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &) { 1183 llvm_unreachable("CodeGen for 'omp target' is not supported yet."); 1184 } 1185 1186 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &) { 1187 llvm_unreachable("CodeGen for 'omp teams' is not supported yet."); 1188 } 1189 1190