1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This contains code to emit OpenMP nodes as LLVM code. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGOpenMPRuntime.h" 15 #include "CodeGenFunction.h" 16 #include "CodeGenModule.h" 17 #include "TargetInfo.h" 18 #include "clang/AST/Stmt.h" 19 #include "clang/AST/StmtOpenMP.h" 20 using namespace clang; 21 using namespace CodeGen; 22 23 //===----------------------------------------------------------------------===// 24 // OpenMP Directive Emission 25 //===----------------------------------------------------------------------===// 26 27 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen 28 /// function. Here is the logic: 29 /// if (Cond) { 30 /// CodeGen(true); 31 /// } else { 32 /// CodeGen(false); 33 /// } 34 static void EmitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, 35 const std::function<void(bool)> &CodeGen) { 36 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 37 38 // If the condition constant folds and can be elided, try to avoid emitting 39 // the condition and the dead arm of the if/else. 40 bool CondConstant; 41 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 42 CodeGen(CondConstant); 43 return; 44 } 45 46 // Otherwise, the condition did not fold, or we couldn't elide it. Just 47 // emit the conditional branch. 48 auto ThenBlock = CGF.createBasicBlock(/*name*/ "omp_if.then"); 49 auto ElseBlock = CGF.createBasicBlock(/*name*/ "omp_if.else"); 50 auto ContBlock = CGF.createBasicBlock(/*name*/ "omp_if.end"); 51 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount*/ 0); 52 53 // Emit the 'then' code. 54 CGF.EmitBlock(ThenBlock); 55 CodeGen(/*ThenBlock*/ true); 56 CGF.EmitBranch(ContBlock); 57 // Emit the 'else' code if present. 58 { 59 // There is no need to emit line number for unconditional branch. 60 auto NL = ApplyDebugLocation::CreateEmpty(CGF); 61 CGF.EmitBlock(ElseBlock); 62 } 63 CodeGen(/*ThenBlock*/ false); 64 { 65 // There is no need to emit line number for unconditional branch. 66 auto NL = ApplyDebugLocation::CreateEmpty(CGF); 67 CGF.EmitBranch(ContBlock); 68 } 69 // Emit the continuation block for code after the if. 70 CGF.EmitBlock(ContBlock, /*IsFinished*/ true); 71 } 72 73 void CodeGenFunction::EmitOMPAggregateAssign(LValue OriginalAddr, 74 llvm::Value *PrivateAddr, 75 const Expr *AssignExpr, 76 QualType OriginalType, 77 const VarDecl *VDInit) { 78 EmitBlock(createBasicBlock(".omp.assign.begin.")); 79 if (!isa<CXXConstructExpr>(AssignExpr) || isTrivialInitializer(AssignExpr)) { 80 // Perform simple memcpy. 81 EmitAggregateAssign(PrivateAddr, OriginalAddr.getAddress(), 82 AssignExpr->getType()); 83 } else { 84 // Perform element-by-element initialization. 85 QualType ElementTy; 86 auto SrcBegin = OriginalAddr.getAddress(); 87 auto DestBegin = PrivateAddr; 88 auto ArrayTy = OriginalType->getAsArrayTypeUnsafe(); 89 auto SrcNumElements = emitArrayLength(ArrayTy, ElementTy, SrcBegin); 90 auto DestNumElements = emitArrayLength(ArrayTy, ElementTy, DestBegin); 91 auto SrcEnd = Builder.CreateGEP(SrcBegin, SrcNumElements); 92 auto DestEnd = Builder.CreateGEP(DestBegin, DestNumElements); 93 // The basic structure here is a do-while loop, because we don't 94 // need to check for the zero-element case. 95 auto BodyBB = createBasicBlock("omp.arraycpy.body"); 96 auto DoneBB = createBasicBlock("omp.arraycpy.done"); 97 auto IsEmpty = 98 Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty"); 99 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 100 101 // Enter the loop body, making that address the current address. 102 auto EntryBB = Builder.GetInsertBlock(); 103 EmitBlock(BodyBB); 104 auto SrcElementPast = Builder.CreatePHI(SrcBegin->getType(), 2, 105 "omp.arraycpy.srcElementPast"); 106 SrcElementPast->addIncoming(SrcEnd, EntryBB); 107 auto DestElementPast = Builder.CreatePHI(DestBegin->getType(), 2, 108 "omp.arraycpy.destElementPast"); 109 DestElementPast->addIncoming(DestEnd, EntryBB); 110 111 // Shift the address back by one element. 112 auto NegativeOne = llvm::ConstantInt::get(SizeTy, -1, true); 113 auto DestElement = Builder.CreateGEP(DestElementPast, NegativeOne, 114 "omp.arraycpy.dest.element"); 115 auto SrcElement = Builder.CreateGEP(SrcElementPast, NegativeOne, 116 "omp.arraycpy.src.element"); 117 { 118 // Create RunCleanScope to cleanup possible temps. 119 CodeGenFunction::RunCleanupsScope Init(*this); 120 // Emit initialization for single element. 121 LocalDeclMap[VDInit] = SrcElement; 122 EmitAnyExprToMem(AssignExpr, DestElement, 123 AssignExpr->getType().getQualifiers(), 124 /*IsInitializer*/ false); 125 LocalDeclMap.erase(VDInit); 126 } 127 128 // Check whether we've reached the end. 129 auto Done = 130 Builder.CreateICmpEQ(DestElement, DestBegin, "omp.arraycpy.done"); 131 Builder.CreateCondBr(Done, DoneBB, BodyBB); 132 DestElementPast->addIncoming(DestElement, Builder.GetInsertBlock()); 133 SrcElementPast->addIncoming(SrcElement, Builder.GetInsertBlock()); 134 135 // Done. 136 EmitBlock(DoneBB, true); 137 } 138 EmitBlock(createBasicBlock(".omp.assign.end.")); 139 } 140 141 void CodeGenFunction::EmitOMPFirstprivateClause( 142 const OMPExecutableDirective &D, 143 CodeGenFunction::OMPPrivateScope &PrivateScope) { 144 auto PrivateFilter = [](const OMPClause *C) -> bool { 145 return C->getClauseKind() == OMPC_firstprivate; 146 }; 147 for (OMPExecutableDirective::filtered_clause_iterator<decltype(PrivateFilter)> 148 I(D.clauses(), PrivateFilter); I; ++I) { 149 auto *C = cast<OMPFirstprivateClause>(*I); 150 auto IRef = C->varlist_begin(); 151 auto InitsRef = C->inits().begin(); 152 for (auto IInit : C->private_copies()) { 153 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 154 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 155 bool IsRegistered; 156 if (*InitsRef != nullptr) { 157 // Emit VarDecl with copy init for arrays. 158 auto *FD = CapturedStmtInfo->lookup(OrigVD); 159 LValue Base = MakeNaturalAlignAddrLValue( 160 CapturedStmtInfo->getContextValue(), 161 getContext().getTagDeclType(FD->getParent())); 162 auto OriginalAddr = EmitLValueForField(Base, FD); 163 auto VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl()); 164 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value * { 165 auto Emission = EmitAutoVarAlloca(*VD); 166 // Emit initialization of aggregate firstprivate vars. 167 EmitOMPAggregateAssign(OriginalAddr, Emission.getAllocatedAddress(), 168 VD->getInit(), (*IRef)->getType(), VDInit); 169 EmitAutoVarCleanups(Emission); 170 return Emission.getAllocatedAddress(); 171 }); 172 } else 173 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value * { 174 // Emit private VarDecl with copy init. 175 EmitDecl(*VD); 176 return GetAddrOfLocalVar(VD); 177 }); 178 assert(IsRegistered && "counter already registered as private"); 179 // Silence the warning about unused variable. 180 (void)IsRegistered; 181 ++IRef, ++InitsRef; 182 } 183 } 184 } 185 186 void CodeGenFunction::EmitOMPPrivateClause( 187 const OMPExecutableDirective &D, 188 CodeGenFunction::OMPPrivateScope &PrivateScope) { 189 auto PrivateFilter = [](const OMPClause *C) -> bool { 190 return C->getClauseKind() == OMPC_private; 191 }; 192 for (OMPExecutableDirective::filtered_clause_iterator<decltype(PrivateFilter)> 193 I(D.clauses(), PrivateFilter); I; ++I) { 194 auto *C = cast<OMPPrivateClause>(*I); 195 auto IRef = C->varlist_begin(); 196 for (auto IInit : C->private_copies()) { 197 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 198 auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 199 bool IsRegistered = 200 PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value * { 201 // Emit private VarDecl with copy init. 202 EmitDecl(*VD); 203 return GetAddrOfLocalVar(VD); 204 }); 205 assert(IsRegistered && "counter already registered as private"); 206 // Silence the warning about unused variable. 207 (void)IsRegistered; 208 ++IRef; 209 } 210 } 211 } 212 213 /// \brief Emits code for OpenMP parallel directive in the parallel region. 214 static void EmitOMPParallelCall(CodeGenFunction &CGF, 215 const OMPParallelDirective &S, 216 llvm::Value *OutlinedFn, 217 llvm::Value *CapturedStruct) { 218 if (auto C = S.getSingleClause(/*K*/ OMPC_num_threads)) { 219 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 220 auto NumThreadsClause = cast<OMPNumThreadsClause>(C); 221 auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), 222 /*IgnoreResultAssign*/ true); 223 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause( 224 CGF, NumThreads, NumThreadsClause->getLocStart()); 225 } 226 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn, 227 CapturedStruct); 228 } 229 230 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { 231 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 232 auto CapturedStruct = GenerateCapturedStmtArgument(*CS); 233 auto OutlinedFn = CGM.getOpenMPRuntime().emitOutlinedFunction( 234 S, *CS->getCapturedDecl()->param_begin()); 235 if (auto C = S.getSingleClause(/*K*/ OMPC_if)) { 236 auto Cond = cast<OMPIfClause>(C)->getCondition(); 237 EmitOMPIfClause(*this, Cond, [&](bool ThenBlock) { 238 if (ThenBlock) 239 EmitOMPParallelCall(*this, S, OutlinedFn, CapturedStruct); 240 else 241 CGM.getOpenMPRuntime().emitSerialCall(*this, S.getLocStart(), 242 OutlinedFn, CapturedStruct); 243 }); 244 } else 245 EmitOMPParallelCall(*this, S, OutlinedFn, CapturedStruct); 246 } 247 248 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &S, 249 bool SeparateIter) { 250 RunCleanupsScope BodyScope(*this); 251 // Update counters values on current iteration. 252 for (auto I : S.updates()) { 253 EmitIgnoredExpr(I); 254 } 255 // On a continue in the body, jump to the end. 256 auto Continue = getJumpDestInCurrentScope("omp.body.continue"); 257 BreakContinueStack.push_back(BreakContinue(JumpDest(), Continue)); 258 // Emit loop body. 259 EmitStmt(S.getBody()); 260 // The end (updates/cleanups). 261 EmitBlock(Continue.getBlock()); 262 BreakContinueStack.pop_back(); 263 if (SeparateIter) { 264 // TODO: Update lastprivates if the SeparateIter flag is true. 265 // This will be implemented in a follow-up OMPLastprivateClause patch, but 266 // result should be still correct without it, as we do not make these 267 // variables private yet. 268 } 269 } 270 271 void CodeGenFunction::EmitOMPInnerLoop(const OMPLoopDirective &S, 272 OMPPrivateScope &LoopScope, 273 bool SeparateIter) { 274 auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end"); 275 auto Cnt = getPGORegionCounter(&S); 276 277 // Start the loop with a block that tests the condition. 278 auto CondBlock = createBasicBlock("omp.inner.for.cond"); 279 EmitBlock(CondBlock); 280 LoopStack.push(CondBlock); 281 282 // If there are any cleanups between here and the loop-exit scope, 283 // create a block to stage a loop exit along. 284 auto ExitBlock = LoopExit.getBlock(); 285 if (LoopScope.requiresCleanups()) 286 ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup"); 287 288 auto LoopBody = createBasicBlock("omp.inner.for.body"); 289 290 // Emit condition: "IV < LastIteration + 1 [ - 1]" 291 // ("- 1" when lastprivate clause is present - separate one iteration). 292 llvm::Value *BoolCondVal = EvaluateExprAsBool(S.getCond(SeparateIter)); 293 Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock, 294 PGO.createLoopWeights(S.getCond(SeparateIter), Cnt)); 295 296 if (ExitBlock != LoopExit.getBlock()) { 297 EmitBlock(ExitBlock); 298 EmitBranchThroughCleanup(LoopExit); 299 } 300 301 EmitBlock(LoopBody); 302 Cnt.beginRegion(Builder); 303 304 // Create a block for the increment. 305 auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc"); 306 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 307 308 EmitOMPLoopBody(S); 309 EmitStopPoint(&S); 310 311 // Emit "IV = IV + 1" and a back-edge to the condition block. 312 EmitBlock(Continue.getBlock()); 313 EmitIgnoredExpr(S.getInc()); 314 BreakContinueStack.pop_back(); 315 EmitBranch(CondBlock); 316 LoopStack.pop(); 317 // Emit the fall-through block. 318 EmitBlock(LoopExit.getBlock()); 319 } 320 321 void CodeGenFunction::EmitOMPSimdFinal(const OMPLoopDirective &S) { 322 auto IC = S.counters().begin(); 323 for (auto F : S.finals()) { 324 if (LocalDeclMap.lookup(cast<DeclRefExpr>((*IC))->getDecl())) { 325 EmitIgnoredExpr(F); 326 } 327 ++IC; 328 } 329 } 330 331 static void EmitOMPAlignedClause(CodeGenFunction &CGF, CodeGenModule &CGM, 332 const OMPAlignedClause &Clause) { 333 unsigned ClauseAlignment = 0; 334 if (auto AlignmentExpr = Clause.getAlignment()) { 335 auto AlignmentCI = 336 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); 337 ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue()); 338 } 339 for (auto E : Clause.varlists()) { 340 unsigned Alignment = ClauseAlignment; 341 if (Alignment == 0) { 342 // OpenMP [2.8.1, Description] 343 // If no optional parameter is specified, implementation-defined default 344 // alignments for SIMD instructions on the target platforms are assumed. 345 Alignment = CGM.getTargetCodeGenInfo().getOpenMPSimdDefaultAlignment( 346 E->getType()); 347 } 348 assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) && 349 "alignment is not power of 2"); 350 if (Alignment != 0) { 351 llvm::Value *PtrValue = CGF.EmitScalarExpr(E); 352 CGF.EmitAlignmentAssumption(PtrValue, Alignment); 353 } 354 } 355 } 356 357 static void EmitPrivateLoopCounters(CodeGenFunction &CGF, 358 CodeGenFunction::OMPPrivateScope &LoopScope, 359 ArrayRef<Expr *> Counters) { 360 for (auto *E : Counters) { 361 auto VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 362 bool IsRegistered = LoopScope.addPrivate(VD, [&]() -> llvm::Value * { 363 // Emit var without initialization. 364 auto VarEmission = CGF.EmitAutoVarAlloca(*VD); 365 CGF.EmitAutoVarCleanups(VarEmission); 366 return VarEmission.getAllocatedAddress(); 367 }); 368 assert(IsRegistered && "counter already registered as private"); 369 // Silence the warning about unused variable. 370 (void)IsRegistered; 371 } 372 (void)LoopScope.Privatize(); 373 } 374 375 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { 376 // Pragma 'simd' code depends on presence of 'lastprivate'. 377 // If present, we have to separate last iteration of the loop: 378 // 379 // if (LastIteration != 0) { 380 // for (IV in 0..LastIteration-1) BODY; 381 // BODY with updates of lastprivate vars; 382 // <Final counter/linear vars updates>; 383 // } 384 // 385 // otherwise (when there's no lastprivate): 386 // 387 // for (IV in 0..LastIteration) BODY; 388 // <Final counter/linear vars updates>; 389 // 390 391 // Walk clauses and process safelen/lastprivate. 392 bool SeparateIter = false; 393 LoopStack.setParallel(); 394 LoopStack.setVectorizerEnable(true); 395 for (auto C : S.clauses()) { 396 switch (C->getClauseKind()) { 397 case OMPC_safelen: { 398 RValue Len = EmitAnyExpr(cast<OMPSafelenClause>(C)->getSafelen(), 399 AggValueSlot::ignored(), true); 400 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 401 LoopStack.setVectorizerWidth(Val->getZExtValue()); 402 // In presence of finite 'safelen', it may be unsafe to mark all 403 // the memory instructions parallel, because loop-carried 404 // dependences of 'safelen' iterations are possible. 405 LoopStack.setParallel(false); 406 break; 407 } 408 case OMPC_aligned: 409 EmitOMPAlignedClause(*this, CGM, cast<OMPAlignedClause>(*C)); 410 break; 411 case OMPC_lastprivate: 412 SeparateIter = true; 413 break; 414 default: 415 // Not handled yet 416 ; 417 } 418 } 419 420 InlinedOpenMPRegionRAII Region(*this, S); 421 RunCleanupsScope DirectiveScope(*this); 422 423 CGDebugInfo *DI = getDebugInfo(); 424 if (DI) 425 DI->EmitLexicalBlockStart(Builder, S.getSourceRange().getBegin()); 426 427 // Emit the loop iteration variable. 428 const Expr *IVExpr = S.getIterationVariable(); 429 const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 430 EmitVarDecl(*IVDecl); 431 EmitIgnoredExpr(S.getInit()); 432 433 // Emit the iterations count variable. 434 // If it is not a variable, Sema decided to calculate iterations count on each 435 // iteration (e.g., it is foldable into a constant). 436 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 437 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 438 // Emit calculation of the iterations count. 439 EmitIgnoredExpr(S.getCalcLastIteration()); 440 } 441 442 if (SeparateIter) { 443 // Emit: if (LastIteration > 0) - begin. 444 RegionCounter Cnt = getPGORegionCounter(&S); 445 auto ThenBlock = createBasicBlock("simd.if.then"); 446 auto ContBlock = createBasicBlock("simd.if.end"); 447 EmitBranchOnBoolExpr(S.getPreCond(), ThenBlock, ContBlock, Cnt.getCount()); 448 EmitBlock(ThenBlock); 449 Cnt.beginRegion(Builder); 450 // Emit 'then' code. 451 { 452 OMPPrivateScope LoopScope(*this); 453 EmitPrivateLoopCounters(*this, LoopScope, S.counters()); 454 EmitOMPInnerLoop(S, LoopScope, /* SeparateIter */ true); 455 EmitOMPLoopBody(S, /* SeparateIter */ true); 456 } 457 EmitOMPSimdFinal(S); 458 // Emit: if (LastIteration != 0) - end. 459 EmitBranch(ContBlock); 460 EmitBlock(ContBlock, true); 461 } else { 462 { 463 OMPPrivateScope LoopScope(*this); 464 EmitPrivateLoopCounters(*this, LoopScope, S.counters()); 465 EmitOMPInnerLoop(S, LoopScope); 466 } 467 EmitOMPSimdFinal(S); 468 } 469 470 if (DI) 471 DI->EmitLexicalBlockEnd(Builder, S.getSourceRange().getEnd()); 472 } 473 474 void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind, 475 const OMPLoopDirective &S, 476 OMPPrivateScope &LoopScope, 477 llvm::Value *LB, llvm::Value *UB, 478 llvm::Value *ST, llvm::Value *IL, 479 llvm::Value *Chunk) { 480 auto &RT = CGM.getOpenMPRuntime(); 481 assert(!RT.isStaticNonchunked(ScheduleKind, /* Chunked */ Chunk != nullptr) && 482 "static non-chunked schedule does not need outer loop"); 483 if (RT.isDynamic(ScheduleKind)) { 484 ErrorUnsupported(&S, "OpenMP loop with dynamic schedule"); 485 return; 486 } 487 488 // Emit outer loop. 489 // 490 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 491 // When schedule(static, chunk_size) is specified, iterations are divided into 492 // chunks of size chunk_size, and the chunks are assigned to the threads in 493 // the team in a round-robin fashion in the order of the thread number. 494 // 495 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { 496 // while (idx <= UB) { BODY; ++idx; } // inner loop 497 // LB = LB + ST; 498 // UB = UB + ST; 499 // } 500 // 501 const Expr *IVExpr = S.getIterationVariable(); 502 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 503 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 504 505 RT.emitForInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, IL, LB, 506 UB, ST, Chunk); 507 auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end"); 508 509 // Start the loop with a block that tests the condition. 510 auto CondBlock = createBasicBlock("omp.dispatch.cond"); 511 EmitBlock(CondBlock); 512 LoopStack.push(CondBlock); 513 514 llvm::Value *BoolCondVal = nullptr; 515 // UB = min(UB, GlobalUB) 516 EmitIgnoredExpr(S.getEnsureUpperBound()); 517 // IV = LB 518 EmitIgnoredExpr(S.getInit()); 519 // IV < UB 520 BoolCondVal = EvaluateExprAsBool(S.getCond(false)); 521 522 // If there are any cleanups between here and the loop-exit scope, 523 // create a block to stage a loop exit along. 524 auto ExitBlock = LoopExit.getBlock(); 525 if (LoopScope.requiresCleanups()) 526 ExitBlock = createBasicBlock("omp.dispatch.cleanup"); 527 528 auto LoopBody = createBasicBlock("omp.dispatch.body"); 529 Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock); 530 if (ExitBlock != LoopExit.getBlock()) { 531 EmitBlock(ExitBlock); 532 EmitBranchThroughCleanup(LoopExit); 533 } 534 EmitBlock(LoopBody); 535 536 // Create a block for the increment. 537 auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); 538 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 539 540 EmitOMPInnerLoop(S, LoopScope); 541 542 EmitBlock(Continue.getBlock()); 543 BreakContinueStack.pop_back(); 544 // Emit "LB = LB + Stride", "UB = UB + Stride". 545 EmitIgnoredExpr(S.getNextLowerBound()); 546 EmitIgnoredExpr(S.getNextUpperBound()); 547 548 EmitBranch(CondBlock); 549 LoopStack.pop(); 550 // Emit the fall-through block. 551 EmitBlock(LoopExit.getBlock()); 552 553 // Tell the runtime we are done. 554 RT.emitForFinish(*this, S.getLocStart(), ScheduleKind); 555 } 556 557 /// \brief Emit a helper variable and return corresponding lvalue. 558 static LValue EmitOMPHelperVar(CodeGenFunction &CGF, 559 const DeclRefExpr *Helper) { 560 auto VDecl = cast<VarDecl>(Helper->getDecl()); 561 CGF.EmitVarDecl(*VDecl); 562 return CGF.EmitLValue(Helper); 563 } 564 565 void CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { 566 // Emit the loop iteration variable. 567 auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 568 auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); 569 EmitVarDecl(*IVDecl); 570 571 // Emit the iterations count variable. 572 // If it is not a variable, Sema decided to calculate iterations count on each 573 // iteration (e.g., it is foldable into a constant). 574 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 575 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 576 // Emit calculation of the iterations count. 577 EmitIgnoredExpr(S.getCalcLastIteration()); 578 } 579 580 auto &RT = CGM.getOpenMPRuntime(); 581 582 // Check pre-condition. 583 { 584 // Skip the entire loop if we don't meet the precondition. 585 RegionCounter Cnt = getPGORegionCounter(&S); 586 auto ThenBlock = createBasicBlock("omp.precond.then"); 587 auto ContBlock = createBasicBlock("omp.precond.end"); 588 EmitBranchOnBoolExpr(S.getPreCond(), ThenBlock, ContBlock, Cnt.getCount()); 589 EmitBlock(ThenBlock); 590 Cnt.beginRegion(Builder); 591 // Emit 'then' code. 592 { 593 // Emit helper vars inits. 594 LValue LB = 595 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable())); 596 LValue UB = 597 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable())); 598 LValue ST = 599 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 600 LValue IL = 601 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 602 603 OMPPrivateScope LoopScope(*this); 604 EmitPrivateLoopCounters(*this, LoopScope, S.counters()); 605 606 // Detect the loop schedule kind and chunk. 607 auto ScheduleKind = OMPC_SCHEDULE_unknown; 608 llvm::Value *Chunk = nullptr; 609 if (auto C = cast_or_null<OMPScheduleClause>( 610 S.getSingleClause(OMPC_schedule))) { 611 ScheduleKind = C->getScheduleKind(); 612 if (auto Ch = C->getChunkSize()) { 613 Chunk = EmitScalarExpr(Ch); 614 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 615 S.getIterationVariable()->getType()); 616 } 617 } 618 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 619 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 620 if (RT.isStaticNonchunked(ScheduleKind, 621 /* Chunked */ Chunk != nullptr)) { 622 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 623 // When no chunk_size is specified, the iteration space is divided into 624 // chunks that are approximately equal in size, and at most one chunk is 625 // distributed to each thread. Note that the size of the chunks is 626 // unspecified in this case. 627 RT.emitForInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, 628 IL.getAddress(), LB.getAddress(), UB.getAddress(), 629 ST.getAddress()); 630 // UB = min(UB, GlobalUB); 631 EmitIgnoredExpr(S.getEnsureUpperBound()); 632 // IV = LB; 633 EmitIgnoredExpr(S.getInit()); 634 // while (idx <= UB) { BODY; ++idx; } 635 EmitOMPInnerLoop(S, LoopScope); 636 // Tell the runtime we are done. 637 RT.emitForFinish(*this, S.getLocStart(), ScheduleKind); 638 } else { 639 // Emit the outer loop, which requests its work chunk [LB..UB] from 640 // runtime and runs the inner loop to process it. 641 EmitOMPForOuterLoop(ScheduleKind, S, LoopScope, LB.getAddress(), 642 UB.getAddress(), ST.getAddress(), IL.getAddress(), 643 Chunk); 644 } 645 } 646 // We're now done with the loop, so jump to the continuation block. 647 EmitBranch(ContBlock); 648 EmitBlock(ContBlock, true); 649 } 650 } 651 652 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { 653 InlinedOpenMPRegionRAII Region(*this, S); 654 RunCleanupsScope DirectiveScope(*this); 655 656 CGDebugInfo *DI = getDebugInfo(); 657 if (DI) 658 DI->EmitLexicalBlockStart(Builder, S.getSourceRange().getBegin()); 659 660 EmitOMPWorksharingLoop(S); 661 662 // Emit an implicit barrier at the end. 663 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), 664 /*IsExplicit*/ false); 665 if (DI) 666 DI->EmitLexicalBlockEnd(Builder, S.getSourceRange().getEnd()); 667 } 668 669 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &) { 670 llvm_unreachable("CodeGen for 'omp for simd' is not supported yet."); 671 } 672 673 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &) { 674 llvm_unreachable("CodeGen for 'omp sections' is not supported yet."); 675 } 676 677 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &) { 678 llvm_unreachable("CodeGen for 'omp section' is not supported yet."); 679 } 680 681 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { 682 CGM.getOpenMPRuntime().emitSingleRegion(*this, [&]() -> void { 683 InlinedOpenMPRegionRAII Region(*this, S); 684 RunCleanupsScope Scope(*this); 685 EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 686 EnsureInsertPoint(); 687 }, S.getLocStart()); 688 } 689 690 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { 691 CGM.getOpenMPRuntime().emitMasterRegion(*this, [&]() -> void { 692 InlinedOpenMPRegionRAII Region(*this, S); 693 RunCleanupsScope Scope(*this); 694 EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 695 EnsureInsertPoint(); 696 }, S.getLocStart()); 697 } 698 699 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { 700 CGM.getOpenMPRuntime().emitCriticalRegion( 701 *this, S.getDirectiveName().getAsString(), [&]() -> void { 702 InlinedOpenMPRegionRAII Region(*this, S); 703 RunCleanupsScope Scope(*this); 704 EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 705 EnsureInsertPoint(); 706 }, S.getLocStart()); 707 } 708 709 void 710 CodeGenFunction::EmitOMPParallelForDirective(const OMPParallelForDirective &) { 711 llvm_unreachable("CodeGen for 'omp parallel for' is not supported yet."); 712 } 713 714 void CodeGenFunction::EmitOMPParallelForSimdDirective( 715 const OMPParallelForSimdDirective &) { 716 llvm_unreachable("CodeGen for 'omp parallel for simd' is not supported yet."); 717 } 718 719 void CodeGenFunction::EmitOMPParallelSectionsDirective( 720 const OMPParallelSectionsDirective &) { 721 llvm_unreachable("CodeGen for 'omp parallel sections' is not supported yet."); 722 } 723 724 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &) { 725 llvm_unreachable("CodeGen for 'omp task' is not supported yet."); 726 } 727 728 void CodeGenFunction::EmitOMPTaskyieldDirective( 729 const OMPTaskyieldDirective &S) { 730 CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart()); 731 } 732 733 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { 734 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart()); 735 } 736 737 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &) { 738 llvm_unreachable("CodeGen for 'omp taskwait' is not supported yet."); 739 } 740 741 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { 742 CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> { 743 if (auto C = S.getSingleClause(/*K*/ OMPC_flush)) { 744 auto FlushClause = cast<OMPFlushClause>(C); 745 return llvm::makeArrayRef(FlushClause->varlist_begin(), 746 FlushClause->varlist_end()); 747 } 748 return llvm::None; 749 }(), S.getLocStart()); 750 } 751 752 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &) { 753 llvm_unreachable("CodeGen for 'omp ordered' is not supported yet."); 754 } 755 756 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, 757 QualType SrcType, QualType DestType) { 758 assert(CGF.hasScalarEvaluationKind(DestType) && 759 "DestType must have scalar evaluation kind."); 760 assert(!Val.isAggregate() && "Must be a scalar or complex."); 761 return Val.isScalar() 762 ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType) 763 : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType, 764 DestType); 765 } 766 767 static CodeGenFunction::ComplexPairTy 768 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, 769 QualType DestType) { 770 assert(CGF.getEvaluationKind(DestType) == TEK_Complex && 771 "DestType must have complex evaluation kind."); 772 CodeGenFunction::ComplexPairTy ComplexVal; 773 if (Val.isScalar()) { 774 // Convert the input element to the element type of the complex. 775 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 776 auto ScalarVal = 777 CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestElementType); 778 ComplexVal = CodeGenFunction::ComplexPairTy( 779 ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType())); 780 } else { 781 assert(Val.isComplex() && "Must be a scalar or complex."); 782 auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType(); 783 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 784 ComplexVal.first = CGF.EmitScalarConversion( 785 Val.getComplexVal().first, SrcElementType, DestElementType); 786 ComplexVal.second = CGF.EmitScalarConversion( 787 Val.getComplexVal().second, SrcElementType, DestElementType); 788 } 789 return ComplexVal; 790 } 791 792 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst, 793 const Expr *X, const Expr *V, 794 SourceLocation Loc) { 795 // v = x; 796 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue"); 797 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue"); 798 LValue XLValue = CGF.EmitLValue(X); 799 LValue VLValue = CGF.EmitLValue(V); 800 RValue Res = XLValue.isGlobalReg() 801 ? CGF.EmitLoadOfLValue(XLValue, Loc) 802 : CGF.EmitAtomicLoad(XLValue, Loc, 803 IsSeqCst ? llvm::SequentiallyConsistent 804 : llvm::Monotonic, 805 XLValue.isVolatile()); 806 // OpenMP, 2.12.6, atomic Construct 807 // Any atomic construct with a seq_cst clause forces the atomically 808 // performed operation to include an implicit flush operation without a 809 // list. 810 if (IsSeqCst) 811 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 812 switch (CGF.getEvaluationKind(V->getType())) { 813 case TEK_Scalar: 814 CGF.EmitStoreOfScalar( 815 convertToScalarValue(CGF, Res, X->getType(), V->getType()), VLValue); 816 break; 817 case TEK_Complex: 818 CGF.EmitStoreOfComplex( 819 convertToComplexValue(CGF, Res, X->getType(), V->getType()), VLValue, 820 /*isInit=*/false); 821 break; 822 case TEK_Aggregate: 823 llvm_unreachable("Must be a scalar or complex."); 824 } 825 } 826 827 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst, 828 const Expr *X, const Expr *E, 829 SourceLocation Loc) { 830 // x = expr; 831 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue"); 832 LValue XLValue = CGF.EmitLValue(X); 833 RValue ExprRValue = CGF.EmitAnyExpr(E); 834 if (XLValue.isGlobalReg()) 835 CGF.EmitStoreThroughGlobalRegLValue(ExprRValue, XLValue); 836 else 837 CGF.EmitAtomicStore(ExprRValue, XLValue, 838 IsSeqCst ? llvm::SequentiallyConsistent 839 : llvm::Monotonic, 840 XLValue.isVolatile(), /*IsInit=*/false); 841 // OpenMP, 2.12.6, atomic Construct 842 // Any atomic construct with a seq_cst clause forces the atomically 843 // performed operation to include an implicit flush operation without a 844 // list. 845 if (IsSeqCst) 846 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 847 } 848 849 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, 850 bool IsSeqCst, const Expr *X, const Expr *V, 851 const Expr *E, SourceLocation Loc) { 852 switch (Kind) { 853 case OMPC_read: 854 EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc); 855 break; 856 case OMPC_write: 857 EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc); 858 break; 859 case OMPC_update: 860 case OMPC_capture: 861 llvm_unreachable("CodeGen for 'omp atomic clause' is not supported yet."); 862 case OMPC_if: 863 case OMPC_final: 864 case OMPC_num_threads: 865 case OMPC_private: 866 case OMPC_firstprivate: 867 case OMPC_lastprivate: 868 case OMPC_reduction: 869 case OMPC_safelen: 870 case OMPC_collapse: 871 case OMPC_default: 872 case OMPC_seq_cst: 873 case OMPC_shared: 874 case OMPC_linear: 875 case OMPC_aligned: 876 case OMPC_copyin: 877 case OMPC_copyprivate: 878 case OMPC_flush: 879 case OMPC_proc_bind: 880 case OMPC_schedule: 881 case OMPC_ordered: 882 case OMPC_nowait: 883 case OMPC_untied: 884 case OMPC_threadprivate: 885 case OMPC_mergeable: 886 case OMPC_unknown: 887 llvm_unreachable("Clause is not allowed in 'omp atomic'."); 888 } 889 } 890 891 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { 892 bool IsSeqCst = S.getSingleClause(/*K=*/OMPC_seq_cst); 893 OpenMPClauseKind Kind = OMPC_unknown; 894 for (auto *C : S.clauses()) { 895 // Find first clause (skip seq_cst clause, if it is first). 896 if (C->getClauseKind() != OMPC_seq_cst) { 897 Kind = C->getClauseKind(); 898 break; 899 } 900 } 901 EmitOMPAtomicExpr(*this, Kind, IsSeqCst, S.getX(), S.getV(), S.getExpr(), 902 S.getLocStart()); 903 } 904 905 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &) { 906 llvm_unreachable("CodeGen for 'omp target' is not supported yet."); 907 } 908 909 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &) { 910 llvm_unreachable("CodeGen for 'omp teams' is not supported yet."); 911 } 912 913