1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This contains code to emit OpenMP nodes as LLVM code. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGOpenMPRuntime.h" 15 #include "CodeGenFunction.h" 16 #include "CodeGenModule.h" 17 #include "TargetInfo.h" 18 #include "clang/AST/Stmt.h" 19 #include "clang/AST/StmtOpenMP.h" 20 using namespace clang; 21 using namespace CodeGen; 22 23 //===----------------------------------------------------------------------===// 24 // OpenMP Directive Emission 25 //===----------------------------------------------------------------------===// 26 namespace { 27 /// \brief RAII for inlined OpenMP regions (like 'omp for', 'omp simd', 'omp 28 /// critical' etc.). Helps to generate proper debug info and provides correct 29 /// code generation for such constructs. 30 class InlinedOpenMPRegionScopeRAII { 31 InlinedOpenMPRegionRAII Region; 32 CodeGenFunction::LexicalScope DirectiveScope; 33 34 public: 35 InlinedOpenMPRegionScopeRAII(CodeGenFunction &CGF, 36 const OMPExecutableDirective &D) 37 : Region(CGF, D), DirectiveScope(CGF, D.getSourceRange()) {} 38 }; 39 } // namespace 40 41 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen 42 /// function. Here is the logic: 43 /// if (Cond) { 44 /// CodeGen(true); 45 /// } else { 46 /// CodeGen(false); 47 /// } 48 static void EmitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, 49 const std::function<void(bool)> &CodeGen) { 50 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 51 52 // If the condition constant folds and can be elided, try to avoid emitting 53 // the condition and the dead arm of the if/else. 54 bool CondConstant; 55 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 56 CodeGen(CondConstant); 57 return; 58 } 59 60 // Otherwise, the condition did not fold, or we couldn't elide it. Just 61 // emit the conditional branch. 62 auto ThenBlock = CGF.createBasicBlock(/*name*/ "omp_if.then"); 63 auto ElseBlock = CGF.createBasicBlock(/*name*/ "omp_if.else"); 64 auto ContBlock = CGF.createBasicBlock(/*name*/ "omp_if.end"); 65 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount*/ 0); 66 67 // Emit the 'then' code. 68 CGF.EmitBlock(ThenBlock); 69 CodeGen(/*ThenBlock*/ true); 70 CGF.EmitBranch(ContBlock); 71 // Emit the 'else' code if present. 72 { 73 // There is no need to emit line number for unconditional branch. 74 auto NL = ApplyDebugLocation::CreateEmpty(CGF); 75 CGF.EmitBlock(ElseBlock); 76 } 77 CodeGen(/*ThenBlock*/ false); 78 { 79 // There is no need to emit line number for unconditional branch. 80 auto NL = ApplyDebugLocation::CreateEmpty(CGF); 81 CGF.EmitBranch(ContBlock); 82 } 83 // Emit the continuation block for code after the if. 84 CGF.EmitBlock(ContBlock, /*IsFinished*/ true); 85 } 86 87 void CodeGenFunction::EmitOMPAggregateAssign(LValue OriginalAddr, 88 llvm::Value *PrivateAddr, 89 const Expr *AssignExpr, 90 QualType OriginalType, 91 const VarDecl *VDInit) { 92 EmitBlock(createBasicBlock(".omp.assign.begin.")); 93 if (!isa<CXXConstructExpr>(AssignExpr) || isTrivialInitializer(AssignExpr)) { 94 // Perform simple memcpy. 95 EmitAggregateAssign(PrivateAddr, OriginalAddr.getAddress(), 96 AssignExpr->getType()); 97 } else { 98 // Perform element-by-element initialization. 99 QualType ElementTy; 100 auto SrcBegin = OriginalAddr.getAddress(); 101 auto DestBegin = PrivateAddr; 102 auto ArrayTy = OriginalType->getAsArrayTypeUnsafe(); 103 auto SrcNumElements = emitArrayLength(ArrayTy, ElementTy, SrcBegin); 104 auto DestNumElements = emitArrayLength(ArrayTy, ElementTy, DestBegin); 105 auto SrcEnd = Builder.CreateGEP(SrcBegin, SrcNumElements); 106 auto DestEnd = Builder.CreateGEP(DestBegin, DestNumElements); 107 // The basic structure here is a do-while loop, because we don't 108 // need to check for the zero-element case. 109 auto BodyBB = createBasicBlock("omp.arraycpy.body"); 110 auto DoneBB = createBasicBlock("omp.arraycpy.done"); 111 auto IsEmpty = 112 Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty"); 113 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 114 115 // Enter the loop body, making that address the current address. 116 auto EntryBB = Builder.GetInsertBlock(); 117 EmitBlock(BodyBB); 118 auto SrcElementPast = Builder.CreatePHI(SrcBegin->getType(), 2, 119 "omp.arraycpy.srcElementPast"); 120 SrcElementPast->addIncoming(SrcEnd, EntryBB); 121 auto DestElementPast = Builder.CreatePHI(DestBegin->getType(), 2, 122 "omp.arraycpy.destElementPast"); 123 DestElementPast->addIncoming(DestEnd, EntryBB); 124 125 // Shift the address back by one element. 126 auto NegativeOne = llvm::ConstantInt::get(SizeTy, -1, true); 127 auto DestElement = Builder.CreateGEP(DestElementPast, NegativeOne, 128 "omp.arraycpy.dest.element"); 129 auto SrcElement = Builder.CreateGEP(SrcElementPast, NegativeOne, 130 "omp.arraycpy.src.element"); 131 { 132 // Create RunCleanScope to cleanup possible temps. 133 CodeGenFunction::RunCleanupsScope Init(*this); 134 // Emit initialization for single element. 135 LocalDeclMap[VDInit] = SrcElement; 136 EmitAnyExprToMem(AssignExpr, DestElement, 137 AssignExpr->getType().getQualifiers(), 138 /*IsInitializer*/ false); 139 LocalDeclMap.erase(VDInit); 140 } 141 142 // Check whether we've reached the end. 143 auto Done = 144 Builder.CreateICmpEQ(DestElement, DestBegin, "omp.arraycpy.done"); 145 Builder.CreateCondBr(Done, DoneBB, BodyBB); 146 DestElementPast->addIncoming(DestElement, Builder.GetInsertBlock()); 147 SrcElementPast->addIncoming(SrcElement, Builder.GetInsertBlock()); 148 149 // Done. 150 EmitBlock(DoneBB, true); 151 } 152 EmitBlock(createBasicBlock(".omp.assign.end.")); 153 } 154 155 void CodeGenFunction::EmitOMPFirstprivateClause( 156 const OMPExecutableDirective &D, 157 CodeGenFunction::OMPPrivateScope &PrivateScope) { 158 auto PrivateFilter = [](const OMPClause *C) -> bool { 159 return C->getClauseKind() == OMPC_firstprivate; 160 }; 161 for (OMPExecutableDirective::filtered_clause_iterator<decltype(PrivateFilter)> 162 I(D.clauses(), PrivateFilter); I; ++I) { 163 auto *C = cast<OMPFirstprivateClause>(*I); 164 auto IRef = C->varlist_begin(); 165 auto InitsRef = C->inits().begin(); 166 for (auto IInit : C->private_copies()) { 167 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 168 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 169 bool IsRegistered; 170 if (*InitsRef != nullptr) { 171 // Emit VarDecl with copy init for arrays. 172 auto *FD = CapturedStmtInfo->lookup(OrigVD); 173 LValue Base = MakeNaturalAlignAddrLValue( 174 CapturedStmtInfo->getContextValue(), 175 getContext().getTagDeclType(FD->getParent())); 176 auto OriginalAddr = EmitLValueForField(Base, FD); 177 auto VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl()); 178 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value * { 179 auto Emission = EmitAutoVarAlloca(*VD); 180 // Emit initialization of aggregate firstprivate vars. 181 EmitOMPAggregateAssign(OriginalAddr, Emission.getAllocatedAddress(), 182 VD->getInit(), (*IRef)->getType(), VDInit); 183 EmitAutoVarCleanups(Emission); 184 return Emission.getAllocatedAddress(); 185 }); 186 } else 187 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value * { 188 // Emit private VarDecl with copy init. 189 EmitDecl(*VD); 190 return GetAddrOfLocalVar(VD); 191 }); 192 assert(IsRegistered && "firstprivate var already registered as private"); 193 // Silence the warning about unused variable. 194 (void)IsRegistered; 195 ++IRef, ++InitsRef; 196 } 197 } 198 } 199 200 void CodeGenFunction::EmitOMPPrivateClause( 201 const OMPExecutableDirective &D, 202 CodeGenFunction::OMPPrivateScope &PrivateScope) { 203 auto PrivateFilter = [](const OMPClause *C) -> bool { 204 return C->getClauseKind() == OMPC_private; 205 }; 206 for (OMPExecutableDirective::filtered_clause_iterator<decltype(PrivateFilter)> 207 I(D.clauses(), PrivateFilter); I; ++I) { 208 auto *C = cast<OMPPrivateClause>(*I); 209 auto IRef = C->varlist_begin(); 210 for (auto IInit : C->private_copies()) { 211 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 212 auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 213 bool IsRegistered = 214 PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value * { 215 // Emit private VarDecl with copy init. 216 EmitDecl(*VD); 217 return GetAddrOfLocalVar(VD); 218 }); 219 assert(IsRegistered && "private var already registered as private"); 220 // Silence the warning about unused variable. 221 (void)IsRegistered; 222 ++IRef; 223 } 224 } 225 } 226 227 /// \brief Emits code for OpenMP parallel directive in the parallel region. 228 static void EmitOMPParallelCall(CodeGenFunction &CGF, 229 const OMPParallelDirective &S, 230 llvm::Value *OutlinedFn, 231 llvm::Value *CapturedStruct) { 232 if (auto C = S.getSingleClause(/*K*/ OMPC_num_threads)) { 233 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 234 auto NumThreadsClause = cast<OMPNumThreadsClause>(C); 235 auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), 236 /*IgnoreResultAssign*/ true); 237 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause( 238 CGF, NumThreads, NumThreadsClause->getLocStart()); 239 } 240 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn, 241 CapturedStruct); 242 } 243 244 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { 245 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 246 auto CapturedStruct = GenerateCapturedStmtArgument(*CS); 247 auto OutlinedFn = CGM.getOpenMPRuntime().emitOutlinedFunction( 248 S, *CS->getCapturedDecl()->param_begin()); 249 if (auto C = S.getSingleClause(/*K*/ OMPC_if)) { 250 auto Cond = cast<OMPIfClause>(C)->getCondition(); 251 EmitOMPIfClause(*this, Cond, [&](bool ThenBlock) { 252 if (ThenBlock) 253 EmitOMPParallelCall(*this, S, OutlinedFn, CapturedStruct); 254 else 255 CGM.getOpenMPRuntime().emitSerialCall(*this, S.getLocStart(), 256 OutlinedFn, CapturedStruct); 257 }); 258 } else 259 EmitOMPParallelCall(*this, S, OutlinedFn, CapturedStruct); 260 } 261 262 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &S, 263 bool SeparateIter) { 264 RunCleanupsScope BodyScope(*this); 265 // Update counters values on current iteration. 266 for (auto I : S.updates()) { 267 EmitIgnoredExpr(I); 268 } 269 // Update the linear variables. 270 for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) { 271 for (auto U : C->updates()) { 272 EmitIgnoredExpr(U); 273 } 274 } 275 276 // On a continue in the body, jump to the end. 277 auto Continue = getJumpDestInCurrentScope("omp.body.continue"); 278 BreakContinueStack.push_back(BreakContinue(JumpDest(), Continue)); 279 // Emit loop body. 280 EmitStmt(S.getBody()); 281 // The end (updates/cleanups). 282 EmitBlock(Continue.getBlock()); 283 BreakContinueStack.pop_back(); 284 if (SeparateIter) { 285 // TODO: Update lastprivates if the SeparateIter flag is true. 286 // This will be implemented in a follow-up OMPLastprivateClause patch, but 287 // result should be still correct without it, as we do not make these 288 // variables private yet. 289 } 290 } 291 292 void CodeGenFunction::EmitOMPInnerLoop(const Stmt &S, bool RequiresCleanup, 293 const Expr *LoopCond, 294 const Expr *IncExpr, 295 const std::function<void()> &BodyGen) { 296 auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end"); 297 auto Cnt = getPGORegionCounter(&S); 298 299 // Start the loop with a block that tests the condition. 300 auto CondBlock = createBasicBlock("omp.inner.for.cond"); 301 EmitBlock(CondBlock); 302 LoopStack.push(CondBlock); 303 304 // If there are any cleanups between here and the loop-exit scope, 305 // create a block to stage a loop exit along. 306 auto ExitBlock = LoopExit.getBlock(); 307 if (RequiresCleanup) 308 ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup"); 309 310 auto LoopBody = createBasicBlock("omp.inner.for.body"); 311 312 // Emit condition. 313 EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, Cnt.getCount()); 314 if (ExitBlock != LoopExit.getBlock()) { 315 EmitBlock(ExitBlock); 316 EmitBranchThroughCleanup(LoopExit); 317 } 318 319 EmitBlock(LoopBody); 320 Cnt.beginRegion(Builder); 321 322 // Create a block for the increment. 323 auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc"); 324 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 325 326 BodyGen(); 327 328 // Emit "IV = IV + 1" and a back-edge to the condition block. 329 EmitBlock(Continue.getBlock()); 330 EmitIgnoredExpr(IncExpr); 331 BreakContinueStack.pop_back(); 332 EmitBranch(CondBlock); 333 LoopStack.pop(); 334 // Emit the fall-through block. 335 EmitBlock(LoopExit.getBlock()); 336 } 337 338 void CodeGenFunction::EmitOMPSimdFinal(const OMPLoopDirective &S) { 339 auto IC = S.counters().begin(); 340 for (auto F : S.finals()) { 341 if (LocalDeclMap.lookup(cast<DeclRefExpr>((*IC))->getDecl())) { 342 EmitIgnoredExpr(F); 343 } 344 ++IC; 345 } 346 // Emit the final values of the linear variables. 347 for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) { 348 for (auto F : C->finals()) { 349 EmitIgnoredExpr(F); 350 } 351 } 352 } 353 354 static void EmitOMPAlignedClause(CodeGenFunction &CGF, CodeGenModule &CGM, 355 const OMPAlignedClause &Clause) { 356 unsigned ClauseAlignment = 0; 357 if (auto AlignmentExpr = Clause.getAlignment()) { 358 auto AlignmentCI = 359 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); 360 ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue()); 361 } 362 for (auto E : Clause.varlists()) { 363 unsigned Alignment = ClauseAlignment; 364 if (Alignment == 0) { 365 // OpenMP [2.8.1, Description] 366 // If no optional parameter is specified, implementation-defined default 367 // alignments for SIMD instructions on the target platforms are assumed. 368 Alignment = CGM.getTargetCodeGenInfo().getOpenMPSimdDefaultAlignment( 369 E->getType()); 370 } 371 assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) && 372 "alignment is not power of 2"); 373 if (Alignment != 0) { 374 llvm::Value *PtrValue = CGF.EmitScalarExpr(E); 375 CGF.EmitAlignmentAssumption(PtrValue, Alignment); 376 } 377 } 378 } 379 380 static void EmitPrivateLoopCounters(CodeGenFunction &CGF, 381 CodeGenFunction::OMPPrivateScope &LoopScope, 382 ArrayRef<Expr *> Counters) { 383 for (auto *E : Counters) { 384 auto VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 385 bool IsRegistered = LoopScope.addPrivate(VD, [&]() -> llvm::Value * { 386 // Emit var without initialization. 387 auto VarEmission = CGF.EmitAutoVarAlloca(*VD); 388 CGF.EmitAutoVarCleanups(VarEmission); 389 return VarEmission.getAllocatedAddress(); 390 }); 391 assert(IsRegistered && "counter already registered as private"); 392 // Silence the warning about unused variable. 393 (void)IsRegistered; 394 } 395 } 396 397 static void 398 EmitPrivateLinearVars(CodeGenFunction &CGF, const OMPExecutableDirective &D, 399 CodeGenFunction::OMPPrivateScope &PrivateScope) { 400 for (auto Clause : OMPExecutableDirective::linear_filter(D.clauses())) { 401 for (auto *E : Clause->varlists()) { 402 auto VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 403 bool IsRegistered = PrivateScope.addPrivate(VD, [&]()->llvm::Value * { 404 // Emit var without initialization. 405 auto VarEmission = CGF.EmitAutoVarAlloca(*VD); 406 CGF.EmitAutoVarCleanups(VarEmission); 407 return VarEmission.getAllocatedAddress(); 408 }); 409 assert(IsRegistered && "linear var already registered as private"); 410 // Silence the warning about unused variable. 411 (void)IsRegistered; 412 } 413 } 414 } 415 416 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { 417 // Pragma 'simd' code depends on presence of 'lastprivate'. 418 // If present, we have to separate last iteration of the loop: 419 // 420 // if (LastIteration != 0) { 421 // for (IV in 0..LastIteration-1) BODY; 422 // BODY with updates of lastprivate vars; 423 // <Final counter/linear vars updates>; 424 // } 425 // 426 // otherwise (when there's no lastprivate): 427 // 428 // for (IV in 0..LastIteration) BODY; 429 // <Final counter/linear vars updates>; 430 // 431 432 // Walk clauses and process safelen/lastprivate. 433 bool SeparateIter = false; 434 LoopStack.setParallel(); 435 LoopStack.setVectorizerEnable(true); 436 for (auto C : S.clauses()) { 437 switch (C->getClauseKind()) { 438 case OMPC_safelen: { 439 RValue Len = EmitAnyExpr(cast<OMPSafelenClause>(C)->getSafelen(), 440 AggValueSlot::ignored(), true); 441 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 442 LoopStack.setVectorizerWidth(Val->getZExtValue()); 443 // In presence of finite 'safelen', it may be unsafe to mark all 444 // the memory instructions parallel, because loop-carried 445 // dependences of 'safelen' iterations are possible. 446 LoopStack.setParallel(false); 447 break; 448 } 449 case OMPC_aligned: 450 EmitOMPAlignedClause(*this, CGM, cast<OMPAlignedClause>(*C)); 451 break; 452 case OMPC_lastprivate: 453 SeparateIter = true; 454 break; 455 default: 456 // Not handled yet 457 ; 458 } 459 } 460 461 InlinedOpenMPRegionScopeRAII Region(*this, S); 462 463 // Emit inits for the linear variables. 464 for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) { 465 for (auto Init : C->inits()) { 466 auto *D = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl()); 467 EmitVarDecl(*D); 468 } 469 } 470 471 // Emit the loop iteration variable. 472 const Expr *IVExpr = S.getIterationVariable(); 473 const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 474 EmitVarDecl(*IVDecl); 475 EmitIgnoredExpr(S.getInit()); 476 477 // Emit the iterations count variable. 478 // If it is not a variable, Sema decided to calculate iterations count on each 479 // iteration (e.g., it is foldable into a constant). 480 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 481 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 482 // Emit calculation of the iterations count. 483 EmitIgnoredExpr(S.getCalcLastIteration()); 484 } 485 486 // Emit the linear steps for the linear clauses. 487 // If a step is not constant, it is pre-calculated before the loop. 488 for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) { 489 if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep())) 490 if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) { 491 EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl())); 492 // Emit calculation of the linear step. 493 EmitIgnoredExpr(CS); 494 } 495 } 496 497 if (SeparateIter) { 498 // Emit: if (LastIteration > 0) - begin. 499 RegionCounter Cnt = getPGORegionCounter(&S); 500 auto ThenBlock = createBasicBlock("simd.if.then"); 501 auto ContBlock = createBasicBlock("simd.if.end"); 502 EmitBranchOnBoolExpr(S.getPreCond(), ThenBlock, ContBlock, Cnt.getCount()); 503 EmitBlock(ThenBlock); 504 Cnt.beginRegion(Builder); 505 // Emit 'then' code. 506 { 507 OMPPrivateScope LoopScope(*this); 508 EmitPrivateLoopCounters(*this, LoopScope, S.counters()); 509 EmitPrivateLinearVars(*this, S, LoopScope); 510 EmitOMPPrivateClause(S, LoopScope); 511 (void)LoopScope.Privatize(); 512 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), 513 S.getCond(/*SeparateIter=*/true), S.getInc(), 514 [&S, this]() { 515 EmitOMPLoopBody(S); 516 EmitStopPoint(&S); 517 }); 518 EmitOMPLoopBody(S, /* SeparateIter */ true); 519 } 520 EmitOMPSimdFinal(S); 521 // Emit: if (LastIteration != 0) - end. 522 EmitBranch(ContBlock); 523 EmitBlock(ContBlock, true); 524 } else { 525 { 526 OMPPrivateScope LoopScope(*this); 527 EmitPrivateLoopCounters(*this, LoopScope, S.counters()); 528 EmitPrivateLinearVars(*this, S, LoopScope); 529 EmitOMPPrivateClause(S, LoopScope); 530 (void)LoopScope.Privatize(); 531 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), 532 S.getCond(/*SeparateIter=*/false), S.getInc(), 533 [&S, this]() { 534 EmitOMPLoopBody(S); 535 EmitStopPoint(&S); 536 }); 537 } 538 EmitOMPSimdFinal(S); 539 } 540 } 541 542 void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind, 543 const OMPLoopDirective &S, 544 OMPPrivateScope &LoopScope, 545 llvm::Value *LB, llvm::Value *UB, 546 llvm::Value *ST, llvm::Value *IL, 547 llvm::Value *Chunk) { 548 auto &RT = CGM.getOpenMPRuntime(); 549 550 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). 551 const bool Dynamic = RT.isDynamic(ScheduleKind); 552 553 assert(!RT.isStaticNonchunked(ScheduleKind, /* Chunked */ Chunk != nullptr) && 554 "static non-chunked schedule does not need outer loop"); 555 556 // Emit outer loop. 557 // 558 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 559 // When schedule(dynamic,chunk_size) is specified, the iterations are 560 // distributed to threads in the team in chunks as the threads request them. 561 // Each thread executes a chunk of iterations, then requests another chunk, 562 // until no chunks remain to be distributed. Each chunk contains chunk_size 563 // iterations, except for the last chunk to be distributed, which may have 564 // fewer iterations. When no chunk_size is specified, it defaults to 1. 565 // 566 // When schedule(guided,chunk_size) is specified, the iterations are assigned 567 // to threads in the team in chunks as the executing threads request them. 568 // Each thread executes a chunk of iterations, then requests another chunk, 569 // until no chunks remain to be assigned. For a chunk_size of 1, the size of 570 // each chunk is proportional to the number of unassigned iterations divided 571 // by the number of threads in the team, decreasing to 1. For a chunk_size 572 // with value k (greater than 1), the size of each chunk is determined in the 573 // same way, with the restriction that the chunks do not contain fewer than k 574 // iterations (except for the last chunk to be assigned, which may have fewer 575 // than k iterations). 576 // 577 // When schedule(auto) is specified, the decision regarding scheduling is 578 // delegated to the compiler and/or runtime system. The programmer gives the 579 // implementation the freedom to choose any possible mapping of iterations to 580 // threads in the team. 581 // 582 // When schedule(runtime) is specified, the decision regarding scheduling is 583 // deferred until run time, and the schedule and chunk size are taken from the 584 // run-sched-var ICV. If the ICV is set to auto, the schedule is 585 // implementation defined 586 // 587 // while(__kmpc_dispatch_next(&LB, &UB)) { 588 // idx = LB; 589 // while (idx <= UB) { BODY; ++idx; } // inner loop 590 // } 591 // 592 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 593 // When schedule(static, chunk_size) is specified, iterations are divided into 594 // chunks of size chunk_size, and the chunks are assigned to the threads in 595 // the team in a round-robin fashion in the order of the thread number. 596 // 597 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { 598 // while (idx <= UB) { BODY; ++idx; } // inner loop 599 // LB = LB + ST; 600 // UB = UB + ST; 601 // } 602 // 603 604 const Expr *IVExpr = S.getIterationVariable(); 605 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 606 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 607 608 RT.emitForInit( 609 *this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, IL, LB, 610 (Dynamic ? EmitAnyExpr(S.getLastIteration()).getScalarVal() : UB), ST, 611 Chunk); 612 613 auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end"); 614 615 // Start the loop with a block that tests the condition. 616 auto CondBlock = createBasicBlock("omp.dispatch.cond"); 617 EmitBlock(CondBlock); 618 LoopStack.push(CondBlock); 619 620 llvm::Value *BoolCondVal = nullptr; 621 if (!Dynamic) { 622 // UB = min(UB, GlobalUB) 623 EmitIgnoredExpr(S.getEnsureUpperBound()); 624 // IV = LB 625 EmitIgnoredExpr(S.getInit()); 626 // IV < UB 627 BoolCondVal = EvaluateExprAsBool(S.getCond(false)); 628 } else { 629 BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, 630 IL, LB, UB, ST); 631 } 632 633 // If there are any cleanups between here and the loop-exit scope, 634 // create a block to stage a loop exit along. 635 auto ExitBlock = LoopExit.getBlock(); 636 if (LoopScope.requiresCleanups()) 637 ExitBlock = createBasicBlock("omp.dispatch.cleanup"); 638 639 auto LoopBody = createBasicBlock("omp.dispatch.body"); 640 Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock); 641 if (ExitBlock != LoopExit.getBlock()) { 642 EmitBlock(ExitBlock); 643 EmitBranchThroughCleanup(LoopExit); 644 } 645 EmitBlock(LoopBody); 646 647 // Emit "IV = LB" (in case of static schedule, we have already calculated new 648 // LB for loop condition and emitted it above). 649 if (Dynamic) 650 EmitIgnoredExpr(S.getInit()); 651 652 // Create a block for the increment. 653 auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); 654 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 655 656 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), 657 S.getCond(/*SeparateIter=*/false), S.getInc(), [&S, this]() { 658 EmitOMPLoopBody(S); 659 EmitStopPoint(&S); 660 }); 661 662 EmitBlock(Continue.getBlock()); 663 BreakContinueStack.pop_back(); 664 if (!Dynamic) { 665 // Emit "LB = LB + Stride", "UB = UB + Stride". 666 EmitIgnoredExpr(S.getNextLowerBound()); 667 EmitIgnoredExpr(S.getNextUpperBound()); 668 } 669 670 EmitBranch(CondBlock); 671 LoopStack.pop(); 672 // Emit the fall-through block. 673 EmitBlock(LoopExit.getBlock()); 674 675 // Tell the runtime we are done. 676 // FIXME: Also call fini for ordered loops with dynamic scheduling. 677 if (!Dynamic) 678 RT.emitForFinish(*this, S.getLocStart(), ScheduleKind); 679 } 680 681 /// \brief Emit a helper variable and return corresponding lvalue. 682 static LValue EmitOMPHelperVar(CodeGenFunction &CGF, 683 const DeclRefExpr *Helper) { 684 auto VDecl = cast<VarDecl>(Helper->getDecl()); 685 CGF.EmitVarDecl(*VDecl); 686 return CGF.EmitLValue(Helper); 687 } 688 689 void CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { 690 // Emit the loop iteration variable. 691 auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 692 auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); 693 EmitVarDecl(*IVDecl); 694 695 // Emit the iterations count variable. 696 // If it is not a variable, Sema decided to calculate iterations count on each 697 // iteration (e.g., it is foldable into a constant). 698 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 699 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 700 // Emit calculation of the iterations count. 701 EmitIgnoredExpr(S.getCalcLastIteration()); 702 } 703 704 auto &RT = CGM.getOpenMPRuntime(); 705 706 // Check pre-condition. 707 { 708 // Skip the entire loop if we don't meet the precondition. 709 RegionCounter Cnt = getPGORegionCounter(&S); 710 auto ThenBlock = createBasicBlock("omp.precond.then"); 711 auto ContBlock = createBasicBlock("omp.precond.end"); 712 EmitBranchOnBoolExpr(S.getPreCond(), ThenBlock, ContBlock, Cnt.getCount()); 713 EmitBlock(ThenBlock); 714 Cnt.beginRegion(Builder); 715 // Emit 'then' code. 716 { 717 // Emit helper vars inits. 718 LValue LB = 719 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable())); 720 LValue UB = 721 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable())); 722 LValue ST = 723 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 724 LValue IL = 725 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 726 727 OMPPrivateScope LoopScope(*this); 728 EmitPrivateLoopCounters(*this, LoopScope, S.counters()); 729 (void)LoopScope.Privatize(); 730 731 // Detect the loop schedule kind and chunk. 732 auto ScheduleKind = OMPC_SCHEDULE_unknown; 733 llvm::Value *Chunk = nullptr; 734 if (auto C = cast_or_null<OMPScheduleClause>( 735 S.getSingleClause(OMPC_schedule))) { 736 ScheduleKind = C->getScheduleKind(); 737 if (auto Ch = C->getChunkSize()) { 738 Chunk = EmitScalarExpr(Ch); 739 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 740 S.getIterationVariable()->getType()); 741 } 742 } 743 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 744 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 745 if (RT.isStaticNonchunked(ScheduleKind, 746 /* Chunked */ Chunk != nullptr)) { 747 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 748 // When no chunk_size is specified, the iteration space is divided into 749 // chunks that are approximately equal in size, and at most one chunk is 750 // distributed to each thread. Note that the size of the chunks is 751 // unspecified in this case. 752 RT.emitForInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, 753 IL.getAddress(), LB.getAddress(), UB.getAddress(), 754 ST.getAddress()); 755 // UB = min(UB, GlobalUB); 756 EmitIgnoredExpr(S.getEnsureUpperBound()); 757 // IV = LB; 758 EmitIgnoredExpr(S.getInit()); 759 // while (idx <= UB) { BODY; ++idx; } 760 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), 761 S.getCond(/*SeparateIter=*/false), S.getInc(), 762 [&S, this]() { 763 EmitOMPLoopBody(S); 764 EmitStopPoint(&S); 765 }); 766 // Tell the runtime we are done. 767 RT.emitForFinish(*this, S.getLocStart(), ScheduleKind); 768 } else { 769 // Emit the outer loop, which requests its work chunk [LB..UB] from 770 // runtime and runs the inner loop to process it. 771 EmitOMPForOuterLoop(ScheduleKind, S, LoopScope, LB.getAddress(), 772 UB.getAddress(), ST.getAddress(), IL.getAddress(), 773 Chunk); 774 } 775 } 776 // We're now done with the loop, so jump to the continuation block. 777 EmitBranch(ContBlock); 778 EmitBlock(ContBlock, true); 779 } 780 } 781 782 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { 783 InlinedOpenMPRegionScopeRAII Region(*this, S); 784 785 EmitOMPWorksharingLoop(S); 786 787 // Emit an implicit barrier at the end. 788 if (!S.getSingleClause(OMPC_nowait)) { 789 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); 790 } 791 } 792 793 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &) { 794 llvm_unreachable("CodeGen for 'omp for simd' is not supported yet."); 795 } 796 797 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, 798 const Twine &Name, 799 llvm::Value *Init = nullptr) { 800 auto LVal = CGF.MakeNaturalAlignAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty); 801 if (Init) 802 CGF.EmitScalarInit(Init, LVal); 803 return LVal; 804 } 805 806 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { 807 InlinedOpenMPRegionScopeRAII Region(*this, S); 808 809 auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt(); 810 auto *CS = dyn_cast<CompoundStmt>(Stmt); 811 if (CS && CS->size() > 1) { 812 auto &C = CGM.getContext(); 813 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 814 // Emit helper vars inits. 815 LValue LB = createSectionLVal(*this, KmpInt32Ty, ".omp.sections.lb.", 816 Builder.getInt32(0)); 817 auto *GlobalUBVal = Builder.getInt32(CS->size() - 1); 818 LValue UB = 819 createSectionLVal(*this, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal); 820 LValue ST = createSectionLVal(*this, KmpInt32Ty, ".omp.sections.st.", 821 Builder.getInt32(1)); 822 LValue IL = createSectionLVal(*this, KmpInt32Ty, ".omp.sections.il.", 823 Builder.getInt32(0)); 824 // Loop counter. 825 LValue IV = createSectionLVal(*this, KmpInt32Ty, ".omp.sections.iv."); 826 OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 827 OpaqueValueMapping OpaqueIV(*this, &IVRefExpr, IV); 828 OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 829 OpaqueValueMapping OpaqueUB(*this, &UBRefExpr, UB); 830 // Generate condition for loop. 831 BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, 832 OK_Ordinary, S.getLocStart(), /*fpContractable=*/false); 833 // Increment for loop counter. 834 UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary, 835 S.getLocStart()); 836 auto BodyGen = [this, CS, &S, &IV]() { 837 // Iterate through all sections and emit a switch construct: 838 // switch (IV) { 839 // case 0: 840 // <SectionStmt[0]>; 841 // break; 842 // ... 843 // case <NumSection> - 1: 844 // <SectionStmt[<NumSection> - 1]>; 845 // break; 846 // } 847 // .omp.sections.exit: 848 auto *ExitBB = createBasicBlock(".omp.sections.exit"); 849 auto *SwitchStmt = Builder.CreateSwitch( 850 EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB, 851 CS->size()); 852 unsigned CaseNumber = 0; 853 for (auto C = CS->children(); C; ++C, ++CaseNumber) { 854 auto CaseBB = createBasicBlock(".omp.sections.case"); 855 EmitBlock(CaseBB); 856 SwitchStmt->addCase(Builder.getInt32(CaseNumber), CaseBB); 857 EmitStmt(*C); 858 EmitBranch(ExitBB); 859 } 860 EmitBlock(ExitBB, /*IsFinished=*/true); 861 }; 862 // Emit static non-chunked loop. 863 CGM.getOpenMPRuntime().emitForInit( 864 *this, S.getLocStart(), OMPC_SCHEDULE_static, /*IVSize=*/32, 865 /*IVSigned=*/true, IL.getAddress(), LB.getAddress(), UB.getAddress(), 866 ST.getAddress()); 867 // UB = min(UB, GlobalUB); 868 auto *UBVal = EmitLoadOfScalar(UB, S.getLocStart()); 869 auto *MinUBGlobalUB = Builder.CreateSelect( 870 Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal); 871 EmitStoreOfScalar(MinUBGlobalUB, UB); 872 // IV = LB; 873 EmitStoreOfScalar(EmitLoadOfScalar(LB, S.getLocStart()), IV); 874 // while (idx <= UB) { BODY; ++idx; } 875 EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen); 876 // Tell the runtime we are done. 877 CGM.getOpenMPRuntime().emitForFinish(*this, S.getLocStart(), 878 OMPC_SCHEDULE_static); 879 } else { 880 // If only one section is found - no need to generate loop, emit as a single 881 // region. 882 CGM.getOpenMPRuntime().emitSingleRegion(*this, [&]() -> void { 883 InlinedOpenMPRegionScopeRAII Region(*this, S); 884 EmitStmt(Stmt); 885 EnsureInsertPoint(); 886 }, S.getLocStart(), llvm::None, llvm::None, llvm::None, llvm::None); 887 } 888 889 // Emit an implicit barrier at the end. 890 if (!S.getSingleClause(OMPC_nowait)) { 891 CGM.getOpenMPRuntime().emitBarrierCall( 892 *this, S.getLocStart(), 893 (CS && CS->size() > 1) ? OMPD_sections : OMPD_single); 894 } 895 } 896 897 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { 898 InlinedOpenMPRegionScopeRAII Region(*this, S); 899 EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 900 EnsureInsertPoint(); 901 } 902 903 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { 904 llvm::SmallVector<const Expr *, 8> CopyprivateVars; 905 llvm::SmallVector<const Expr *, 8> SrcExprs; 906 llvm::SmallVector<const Expr *, 8> DstExprs; 907 llvm::SmallVector<const Expr *, 8> AssignmentOps; 908 // Check if there are any 'copyprivate' clauses associated with this 'single' 909 // construct. 910 auto CopyprivateFilter = [](const OMPClause *C) -> bool { 911 return C->getClauseKind() == OMPC_copyprivate; 912 }; 913 // Build a list of copyprivate variables along with helper expressions 914 // (<source>, <destination>, <destination>=<source> expressions) 915 typedef OMPExecutableDirective::filtered_clause_iterator<decltype( 916 CopyprivateFilter)> CopyprivateIter; 917 for (CopyprivateIter I(S.clauses(), CopyprivateFilter); I; ++I) { 918 auto *C = cast<OMPCopyprivateClause>(*I); 919 CopyprivateVars.append(C->varlists().begin(), C->varlists().end()); 920 SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end()); 921 DstExprs.append(C->destination_exprs().begin(), 922 C->destination_exprs().end()); 923 AssignmentOps.append(C->assignment_ops().begin(), 924 C->assignment_ops().end()); 925 } 926 // Emit code for 'single' region along with 'copyprivate' clauses 927 CGM.getOpenMPRuntime().emitSingleRegion(*this, [&]() -> void { 928 InlinedOpenMPRegionScopeRAII Region(*this, S); 929 EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 930 EnsureInsertPoint(); 931 }, S.getLocStart(), CopyprivateVars, SrcExprs, DstExprs, AssignmentOps); 932 // Emit an implicit barrier at the end. 933 if (!S.getSingleClause(OMPC_nowait)) { 934 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_single); 935 } 936 } 937 938 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { 939 CGM.getOpenMPRuntime().emitMasterRegion(*this, [&]() -> void { 940 InlinedOpenMPRegionScopeRAII Region(*this, S); 941 EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 942 EnsureInsertPoint(); 943 }, S.getLocStart()); 944 } 945 946 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { 947 CGM.getOpenMPRuntime().emitCriticalRegion( 948 *this, S.getDirectiveName().getAsString(), [&]() -> void { 949 InlinedOpenMPRegionScopeRAII Region(*this, S); 950 EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 951 EnsureInsertPoint(); 952 }, S.getLocStart()); 953 } 954 955 void 956 CodeGenFunction::EmitOMPParallelForDirective(const OMPParallelForDirective &) { 957 llvm_unreachable("CodeGen for 'omp parallel for' is not supported yet."); 958 } 959 960 void CodeGenFunction::EmitOMPParallelForSimdDirective( 961 const OMPParallelForSimdDirective &) { 962 llvm_unreachable("CodeGen for 'omp parallel for simd' is not supported yet."); 963 } 964 965 void CodeGenFunction::EmitOMPParallelSectionsDirective( 966 const OMPParallelSectionsDirective &) { 967 llvm_unreachable("CodeGen for 'omp parallel sections' is not supported yet."); 968 } 969 970 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { 971 // Emit outlined function for task construct. 972 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 973 auto CapturedStruct = GenerateCapturedStmtArgument(*CS); 974 auto *I = CS->getCapturedDecl()->param_begin(); 975 // The first function argument for tasks is a thread id, the second one is a 976 // part id (0 for tied tasks, >=0 for untied task). 977 auto OutlinedFn = 978 CGM.getOpenMPRuntime().emitTaskOutlinedFunction(S, *I, *std::next(I)); 979 // Check if we should emit tied or untied task. 980 bool Tied = !S.getSingleClause(OMPC_untied); 981 // Check if the task is final 982 llvm::PointerIntPair<llvm::Value *, 1, bool> Final; 983 if (auto *Clause = S.getSingleClause(OMPC_final)) { 984 // If the condition constant folds and can be elided, try to avoid emitting 985 // the condition and the dead arm of the if/else. 986 auto *Cond = cast<OMPFinalClause>(Clause)->getCondition(); 987 bool CondConstant; 988 if (ConstantFoldsToSimpleInteger(Cond, CondConstant)) 989 Final.setInt(CondConstant); 990 else 991 Final.setPointer(EvaluateExprAsBool(Cond)); 992 } else { 993 // By default the task is not final. 994 Final.setInt(/*IntVal=*/false); 995 } 996 auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 997 CGM.getOpenMPRuntime().emitTaskCall(*this, S.getLocStart(), Tied, Final, 998 OutlinedFn, SharedsTy, CapturedStruct); 999 } 1000 1001 void CodeGenFunction::EmitOMPTaskyieldDirective( 1002 const OMPTaskyieldDirective &S) { 1003 CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart()); 1004 } 1005 1006 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { 1007 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier); 1008 } 1009 1010 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &) { 1011 llvm_unreachable("CodeGen for 'omp taskwait' is not supported yet."); 1012 } 1013 1014 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { 1015 CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> { 1016 if (auto C = S.getSingleClause(/*K*/ OMPC_flush)) { 1017 auto FlushClause = cast<OMPFlushClause>(C); 1018 return llvm::makeArrayRef(FlushClause->varlist_begin(), 1019 FlushClause->varlist_end()); 1020 } 1021 return llvm::None; 1022 }(), S.getLocStart()); 1023 } 1024 1025 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &) { 1026 llvm_unreachable("CodeGen for 'omp ordered' is not supported yet."); 1027 } 1028 1029 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, 1030 QualType SrcType, QualType DestType) { 1031 assert(CGF.hasScalarEvaluationKind(DestType) && 1032 "DestType must have scalar evaluation kind."); 1033 assert(!Val.isAggregate() && "Must be a scalar or complex."); 1034 return Val.isScalar() 1035 ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType) 1036 : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType, 1037 DestType); 1038 } 1039 1040 static CodeGenFunction::ComplexPairTy 1041 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, 1042 QualType DestType) { 1043 assert(CGF.getEvaluationKind(DestType) == TEK_Complex && 1044 "DestType must have complex evaluation kind."); 1045 CodeGenFunction::ComplexPairTy ComplexVal; 1046 if (Val.isScalar()) { 1047 // Convert the input element to the element type of the complex. 1048 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 1049 auto ScalarVal = 1050 CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestElementType); 1051 ComplexVal = CodeGenFunction::ComplexPairTy( 1052 ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType())); 1053 } else { 1054 assert(Val.isComplex() && "Must be a scalar or complex."); 1055 auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType(); 1056 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 1057 ComplexVal.first = CGF.EmitScalarConversion( 1058 Val.getComplexVal().first, SrcElementType, DestElementType); 1059 ComplexVal.second = CGF.EmitScalarConversion( 1060 Val.getComplexVal().second, SrcElementType, DestElementType); 1061 } 1062 return ComplexVal; 1063 } 1064 1065 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst, 1066 const Expr *X, const Expr *V, 1067 SourceLocation Loc) { 1068 // v = x; 1069 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue"); 1070 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue"); 1071 LValue XLValue = CGF.EmitLValue(X); 1072 LValue VLValue = CGF.EmitLValue(V); 1073 RValue Res = XLValue.isGlobalReg() 1074 ? CGF.EmitLoadOfLValue(XLValue, Loc) 1075 : CGF.EmitAtomicLoad(XLValue, Loc, 1076 IsSeqCst ? llvm::SequentiallyConsistent 1077 : llvm::Monotonic, 1078 XLValue.isVolatile()); 1079 // OpenMP, 2.12.6, atomic Construct 1080 // Any atomic construct with a seq_cst clause forces the atomically 1081 // performed operation to include an implicit flush operation without a 1082 // list. 1083 if (IsSeqCst) 1084 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 1085 switch (CGF.getEvaluationKind(V->getType())) { 1086 case TEK_Scalar: 1087 CGF.EmitStoreOfScalar( 1088 convertToScalarValue(CGF, Res, X->getType(), V->getType()), VLValue); 1089 break; 1090 case TEK_Complex: 1091 CGF.EmitStoreOfComplex( 1092 convertToComplexValue(CGF, Res, X->getType(), V->getType()), VLValue, 1093 /*isInit=*/false); 1094 break; 1095 case TEK_Aggregate: 1096 llvm_unreachable("Must be a scalar or complex."); 1097 } 1098 } 1099 1100 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst, 1101 const Expr *X, const Expr *E, 1102 SourceLocation Loc) { 1103 // x = expr; 1104 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue"); 1105 LValue XLValue = CGF.EmitLValue(X); 1106 RValue ExprRValue = CGF.EmitAnyExpr(E); 1107 if (XLValue.isGlobalReg()) 1108 CGF.EmitStoreThroughGlobalRegLValue(ExprRValue, XLValue); 1109 else 1110 CGF.EmitAtomicStore(ExprRValue, XLValue, 1111 IsSeqCst ? llvm::SequentiallyConsistent 1112 : llvm::Monotonic, 1113 XLValue.isVolatile(), /*IsInit=*/false); 1114 // OpenMP, 2.12.6, atomic Construct 1115 // Any atomic construct with a seq_cst clause forces the atomically 1116 // performed operation to include an implicit flush operation without a 1117 // list. 1118 if (IsSeqCst) 1119 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 1120 } 1121 1122 static Optional<llvm::AtomicRMWInst::BinOp> 1123 getCompatibleAtomicRMWBinOp(ASTContext &Context, BinaryOperatorKind Op, 1124 bool IsXLHSInRHSPart, LValue XLValue, 1125 RValue ExprRValue) { 1126 Optional<llvm::AtomicRMWInst::BinOp> RMWOp; 1127 // Allow atomicrmw only if 'x' and 'expr' are integer values, lvalue for 'x' 1128 // expression is simple and atomic is allowed for the given type for the 1129 // target platform. 1130 if (ExprRValue.isScalar() && 1131 ExprRValue.getScalarVal()->getType()->isIntegerTy() && 1132 XLValue.isSimple() && 1133 (isa<llvm::ConstantInt>(ExprRValue.getScalarVal()) || 1134 (ExprRValue.getScalarVal()->getType() == 1135 XLValue.getAddress()->getType()->getPointerElementType())) && 1136 Context.getTargetInfo().hasBuiltinAtomic( 1137 Context.getTypeSize(XLValue.getType()), 1138 Context.toBits(XLValue.getAlignment()))) { 1139 switch (Op) { 1140 case BO_Add: 1141 RMWOp = llvm::AtomicRMWInst::Add; 1142 break; 1143 case BO_Sub: 1144 if (IsXLHSInRHSPart) { 1145 RMWOp = llvm::AtomicRMWInst::Sub; 1146 } 1147 break; 1148 case BO_And: 1149 RMWOp = llvm::AtomicRMWInst::And; 1150 break; 1151 case BO_Or: 1152 RMWOp = llvm::AtomicRMWInst::Or; 1153 break; 1154 case BO_Xor: 1155 RMWOp = llvm::AtomicRMWInst::Xor; 1156 break; 1157 case BO_Mul: 1158 case BO_Div: 1159 case BO_Rem: 1160 case BO_Shl: 1161 case BO_Shr: 1162 break; 1163 case BO_PtrMemD: 1164 case BO_PtrMemI: 1165 case BO_LT: 1166 case BO_GT: 1167 case BO_LE: 1168 case BO_GE: 1169 case BO_EQ: 1170 case BO_NE: 1171 case BO_LAnd: 1172 case BO_LOr: 1173 case BO_Assign: 1174 case BO_MulAssign: 1175 case BO_DivAssign: 1176 case BO_RemAssign: 1177 case BO_AddAssign: 1178 case BO_SubAssign: 1179 case BO_ShlAssign: 1180 case BO_ShrAssign: 1181 case BO_AndAssign: 1182 case BO_XorAssign: 1183 case BO_OrAssign: 1184 case BO_Comma: 1185 llvm_unreachable("Unexpected binary operation in 'atomic update'."); 1186 } 1187 } 1188 return std::move(RMWOp); 1189 } 1190 1191 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst, 1192 const Expr *X, const Expr *E, 1193 const Expr *UE, bool IsXLHSInRHSPart, 1194 SourceLocation Loc) { 1195 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 1196 "Update expr in 'atomic update' must be a binary operator."); 1197 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 1198 // Update expressions are allowed to have the following forms: 1199 // x binop= expr; -> xrval + expr; 1200 // x++, ++x -> xrval + 1; 1201 // x--, --x -> xrval - 1; 1202 // x = x binop expr; -> xrval binop expr 1203 // x = expr Op x; - > expr binop xrval; 1204 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue"); 1205 LValue XLValue = CGF.EmitLValue(X); 1206 RValue ExprRValue = CGF.EmitAnyExpr(E); 1207 const auto &Op = 1208 getCompatibleAtomicRMWBinOp(CGF.CGM.getContext(), BOUE->getOpcode(), 1209 IsXLHSInRHSPart, XLValue, ExprRValue); 1210 auto AO = IsSeqCst ? llvm::SequentiallyConsistent : llvm::Monotonic; 1211 if (Op) { 1212 auto *ExprVal = ExprRValue.getScalarVal(); 1213 if (auto *IC = dyn_cast<llvm::ConstantInt>(ExprVal)) { 1214 ExprVal = CGF.Builder.CreateIntCast( 1215 IC, XLValue.getAddress()->getType()->getPointerElementType(), 1216 XLValue.getType()->hasSignedIntegerRepresentation()); 1217 } 1218 CGF.Builder.CreateAtomicRMW(*Op, XLValue.getAddress(), ExprVal, AO); 1219 } else { 1220 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 1221 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 1222 CodeGenFunction::OpaqueValueMapping MapExpr( 1223 CGF, IsXLHSInRHSPart ? RHS : LHS, ExprRValue); 1224 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 1225 if (XLValue.isGlobalReg()) { 1226 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop 1227 // 'xrval'. 1228 CodeGenFunction::OpaqueValueMapping MapX( 1229 CGF, XRValExpr, CGF.EmitLoadOfLValue(XLValue, Loc)); 1230 CGF.EmitStoreThroughLValue(CGF.EmitAnyExpr(UE), XLValue); 1231 } else { 1232 // Perform compare-and-swap procedure. 1233 CGF.EmitAtomicUpdate( 1234 XLValue, AO, [&CGF, &UE, &XRValExpr](RValue XRVal) -> RValue { 1235 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRVal); 1236 return CGF.EmitAnyExpr(UE); 1237 }, /*IsVolatile=*/false); 1238 } 1239 } 1240 // OpenMP, 2.12.6, atomic Construct 1241 // Any atomic construct with a seq_cst clause forces the atomically 1242 // performed operation to include an implicit flush operation without a 1243 // list. 1244 if (IsSeqCst) 1245 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 1246 } 1247 1248 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, 1249 bool IsSeqCst, const Expr *X, const Expr *V, 1250 const Expr *E, const Expr *UE, 1251 bool IsXLHSInRHSPart, SourceLocation Loc) { 1252 switch (Kind) { 1253 case OMPC_read: 1254 EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc); 1255 break; 1256 case OMPC_write: 1257 EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc); 1258 break; 1259 case OMPC_unknown: 1260 case OMPC_update: 1261 EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc); 1262 break; 1263 case OMPC_capture: 1264 llvm_unreachable("CodeGen for 'omp atomic clause' is not supported yet."); 1265 case OMPC_if: 1266 case OMPC_final: 1267 case OMPC_num_threads: 1268 case OMPC_private: 1269 case OMPC_firstprivate: 1270 case OMPC_lastprivate: 1271 case OMPC_reduction: 1272 case OMPC_safelen: 1273 case OMPC_collapse: 1274 case OMPC_default: 1275 case OMPC_seq_cst: 1276 case OMPC_shared: 1277 case OMPC_linear: 1278 case OMPC_aligned: 1279 case OMPC_copyin: 1280 case OMPC_copyprivate: 1281 case OMPC_flush: 1282 case OMPC_proc_bind: 1283 case OMPC_schedule: 1284 case OMPC_ordered: 1285 case OMPC_nowait: 1286 case OMPC_untied: 1287 case OMPC_threadprivate: 1288 case OMPC_mergeable: 1289 llvm_unreachable("Clause is not allowed in 'omp atomic'."); 1290 } 1291 } 1292 1293 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { 1294 bool IsSeqCst = S.getSingleClause(/*K=*/OMPC_seq_cst); 1295 OpenMPClauseKind Kind = OMPC_unknown; 1296 for (auto *C : S.clauses()) { 1297 // Find first clause (skip seq_cst clause, if it is first). 1298 if (C->getClauseKind() != OMPC_seq_cst) { 1299 Kind = C->getClauseKind(); 1300 break; 1301 } 1302 } 1303 1304 const auto *CS = 1305 S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 1306 if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) 1307 enterFullExpression(EWC); 1308 InlinedOpenMPRegionScopeRAII Region(*this, S); 1309 1310 EmitOMPAtomicExpr(*this, Kind, IsSeqCst, S.getX(), S.getV(), S.getExpr(), 1311 S.getUpdateExpr(), S.isXLHSInRHSPart(), S.getLocStart()); 1312 } 1313 1314 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &) { 1315 llvm_unreachable("CodeGen for 'omp target' is not supported yet."); 1316 } 1317 1318 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &) { 1319 llvm_unreachable("CodeGen for 'omp teams' is not supported yet."); 1320 } 1321 1322