1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This contains code to emit OpenMP nodes as LLVM code. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGOpenMPRuntime.h" 15 #include "CodeGenFunction.h" 16 #include "CodeGenModule.h" 17 #include "TargetInfo.h" 18 #include "clang/AST/Stmt.h" 19 #include "clang/AST/StmtOpenMP.h" 20 using namespace clang; 21 using namespace CodeGen; 22 23 namespace { 24 /// \brief RAII for emitting code of CapturedStmt without function outlining. 25 class InlinedOpenMPRegion { 26 CodeGenFunction &CGF; 27 CodeGenFunction::CGCapturedStmtInfo *PrevCapturedStmtInfo; 28 const Decl *StoredCurCodeDecl; 29 30 /// \brief A class to emit CapturedStmt construct as inlined statement without 31 /// generating a function for outlined code. 32 class CGInlinedOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 33 public: 34 CGInlinedOpenMPRegionInfo() : CGCapturedStmtInfo() {} 35 }; 36 37 public: 38 InlinedOpenMPRegion(CodeGenFunction &CGF, const Stmt *S) 39 : CGF(CGF), PrevCapturedStmtInfo(CGF.CapturedStmtInfo), 40 StoredCurCodeDecl(CGF.CurCodeDecl) { 41 CGF.CurCodeDecl = cast<CapturedStmt>(S)->getCapturedDecl(); 42 CGF.CapturedStmtInfo = new CGInlinedOpenMPRegionInfo(); 43 } 44 ~InlinedOpenMPRegion() { 45 delete CGF.CapturedStmtInfo; 46 CGF.CapturedStmtInfo = PrevCapturedStmtInfo; 47 CGF.CurCodeDecl = StoredCurCodeDecl; 48 } 49 }; 50 } // namespace 51 52 //===----------------------------------------------------------------------===// 53 // OpenMP Directive Emission 54 //===----------------------------------------------------------------------===// 55 56 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen 57 /// function. Here is the logic: 58 /// if (Cond) { 59 /// CodeGen(true); 60 /// } else { 61 /// CodeGen(false); 62 /// } 63 static void EmitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, 64 const std::function<void(bool)> &CodeGen) { 65 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 66 67 // If the condition constant folds and can be elided, try to avoid emitting 68 // the condition and the dead arm of the if/else. 69 bool CondConstant; 70 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 71 CodeGen(CondConstant); 72 return; 73 } 74 75 // Otherwise, the condition did not fold, or we couldn't elide it. Just 76 // emit the conditional branch. 77 auto ThenBlock = CGF.createBasicBlock(/*name*/ "omp_if.then"); 78 auto ElseBlock = CGF.createBasicBlock(/*name*/ "omp_if.else"); 79 auto ContBlock = CGF.createBasicBlock(/*name*/ "omp_if.end"); 80 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount*/ 0); 81 82 // Emit the 'then' code. 83 CGF.EmitBlock(ThenBlock); 84 CodeGen(/*ThenBlock*/ true); 85 CGF.EmitBranch(ContBlock); 86 // Emit the 'else' code if present. 87 { 88 // There is no need to emit line number for unconditional branch. 89 auto NL = ApplyDebugLocation::CreateEmpty(CGF); 90 CGF.EmitBlock(ElseBlock); 91 } 92 CodeGen(/*ThenBlock*/ false); 93 { 94 // There is no need to emit line number for unconditional branch. 95 auto NL = ApplyDebugLocation::CreateEmpty(CGF); 96 CGF.EmitBranch(ContBlock); 97 } 98 // Emit the continuation block for code after the if. 99 CGF.EmitBlock(ContBlock, /*IsFinished*/ true); 100 } 101 102 void CodeGenFunction::EmitOMPAggregateAssign(LValue OriginalAddr, 103 llvm::Value *PrivateAddr, 104 const Expr *AssignExpr, 105 QualType OriginalType, 106 const VarDecl *VDInit) { 107 EmitBlock(createBasicBlock(".omp.assign.begin.")); 108 if (!isa<CXXConstructExpr>(AssignExpr) || isTrivialInitializer(AssignExpr)) { 109 // Perform simple memcpy. 110 EmitAggregateAssign(PrivateAddr, OriginalAddr.getAddress(), 111 AssignExpr->getType()); 112 } else { 113 // Perform element-by-element initialization. 114 QualType ElementTy; 115 auto SrcBegin = OriginalAddr.getAddress(); 116 auto DestBegin = PrivateAddr; 117 auto ArrayTy = OriginalType->getAsArrayTypeUnsafe(); 118 auto SrcNumElements = emitArrayLength(ArrayTy, ElementTy, SrcBegin); 119 auto DestNumElements = emitArrayLength(ArrayTy, ElementTy, DestBegin); 120 auto SrcEnd = Builder.CreateGEP(SrcBegin, SrcNumElements); 121 auto DestEnd = Builder.CreateGEP(DestBegin, DestNumElements); 122 // The basic structure here is a do-while loop, because we don't 123 // need to check for the zero-element case. 124 auto BodyBB = createBasicBlock("omp.arraycpy.body"); 125 auto DoneBB = createBasicBlock("omp.arraycpy.done"); 126 auto IsEmpty = 127 Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty"); 128 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 129 130 // Enter the loop body, making that address the current address. 131 auto EntryBB = Builder.GetInsertBlock(); 132 EmitBlock(BodyBB); 133 auto SrcElementPast = Builder.CreatePHI(SrcBegin->getType(), 2, 134 "omp.arraycpy.srcElementPast"); 135 SrcElementPast->addIncoming(SrcEnd, EntryBB); 136 auto DestElementPast = Builder.CreatePHI(DestBegin->getType(), 2, 137 "omp.arraycpy.destElementPast"); 138 DestElementPast->addIncoming(DestEnd, EntryBB); 139 140 // Shift the address back by one element. 141 auto NegativeOne = llvm::ConstantInt::get(SizeTy, -1, true); 142 auto DestElement = Builder.CreateGEP(DestElementPast, NegativeOne, 143 "omp.arraycpy.dest.element"); 144 auto SrcElement = Builder.CreateGEP(SrcElementPast, NegativeOne, 145 "omp.arraycpy.src.element"); 146 { 147 // Create RunCleanScope to cleanup possible temps. 148 CodeGenFunction::RunCleanupsScope Init(*this); 149 // Emit initialization for single element. 150 LocalDeclMap[VDInit] = SrcElement; 151 EmitAnyExprToMem(AssignExpr, DestElement, 152 AssignExpr->getType().getQualifiers(), 153 /*IsInitializer*/ false); 154 LocalDeclMap.erase(VDInit); 155 } 156 157 // Check whether we've reached the end. 158 auto Done = 159 Builder.CreateICmpEQ(DestElement, DestBegin, "omp.arraycpy.done"); 160 Builder.CreateCondBr(Done, DoneBB, BodyBB); 161 DestElementPast->addIncoming(DestElement, Builder.GetInsertBlock()); 162 SrcElementPast->addIncoming(SrcElement, Builder.GetInsertBlock()); 163 164 // Done. 165 EmitBlock(DoneBB, true); 166 } 167 EmitBlock(createBasicBlock(".omp.assign.end.")); 168 } 169 170 void CodeGenFunction::EmitOMPFirstprivateClause( 171 const OMPExecutableDirective &D, 172 CodeGenFunction::OMPPrivateScope &PrivateScope) { 173 auto PrivateFilter = [](const OMPClause *C) -> bool { 174 return C->getClauseKind() == OMPC_firstprivate; 175 }; 176 for (OMPExecutableDirective::filtered_clause_iterator<decltype(PrivateFilter)> 177 I(D.clauses(), PrivateFilter); I; ++I) { 178 auto *C = cast<OMPFirstprivateClause>(*I); 179 auto IRef = C->varlist_begin(); 180 auto InitsRef = C->inits().begin(); 181 for (auto IInit : C->private_copies()) { 182 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 183 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 184 bool IsRegistered; 185 if (*InitsRef != nullptr) { 186 // Emit VarDecl with copy init for arrays. 187 auto *FD = CapturedStmtInfo->lookup(OrigVD); 188 LValue Base = MakeNaturalAlignAddrLValue( 189 CapturedStmtInfo->getContextValue(), 190 getContext().getTagDeclType(FD->getParent())); 191 auto OriginalAddr = EmitLValueForField(Base, FD); 192 auto VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl()); 193 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value * { 194 auto Emission = EmitAutoVarAlloca(*VD); 195 // Emit initialization of aggregate firstprivate vars. 196 EmitOMPAggregateAssign(OriginalAddr, Emission.getAllocatedAddress(), 197 VD->getInit(), (*IRef)->getType(), VDInit); 198 EmitAutoVarCleanups(Emission); 199 return Emission.getAllocatedAddress(); 200 }); 201 } else 202 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value * { 203 // Emit private VarDecl with copy init. 204 EmitDecl(*VD); 205 return GetAddrOfLocalVar(VD); 206 }); 207 assert(IsRegistered && "counter already registered as private"); 208 // Silence the warning about unused variable. 209 (void)IsRegistered; 210 ++IRef, ++InitsRef; 211 } 212 } 213 } 214 215 void CodeGenFunction::EmitOMPPrivateClause( 216 const OMPExecutableDirective &D, 217 CodeGenFunction::OMPPrivateScope &PrivateScope) { 218 auto PrivateFilter = [](const OMPClause *C) -> bool { 219 return C->getClauseKind() == OMPC_private; 220 }; 221 for (OMPExecutableDirective::filtered_clause_iterator<decltype(PrivateFilter)> 222 I(D.clauses(), PrivateFilter); I; ++I) { 223 auto *C = cast<OMPPrivateClause>(*I); 224 auto IRef = C->varlist_begin(); 225 for (auto IInit : C->private_copies()) { 226 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 227 auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 228 bool IsRegistered = 229 PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value * { 230 // Emit private VarDecl with copy init. 231 EmitDecl(*VD); 232 return GetAddrOfLocalVar(VD); 233 }); 234 assert(IsRegistered && "counter already registered as private"); 235 // Silence the warning about unused variable. 236 (void)IsRegistered; 237 ++IRef; 238 } 239 } 240 } 241 242 /// \brief Emits code for OpenMP parallel directive in the parallel region. 243 static void EmitOMPParallelCall(CodeGenFunction &CGF, 244 const OMPParallelDirective &S, 245 llvm::Value *OutlinedFn, 246 llvm::Value *CapturedStruct) { 247 if (auto C = S.getSingleClause(/*K*/ OMPC_num_threads)) { 248 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 249 auto NumThreadsClause = cast<OMPNumThreadsClause>(C); 250 auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), 251 /*IgnoreResultAssign*/ true); 252 CGF.CGM.getOpenMPRuntime().EmitOMPNumThreadsClause( 253 CGF, NumThreads, NumThreadsClause->getLocStart()); 254 } 255 CGF.CGM.getOpenMPRuntime().EmitOMPParallelCall(CGF, S.getLocStart(), 256 OutlinedFn, CapturedStruct); 257 } 258 259 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { 260 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 261 auto CapturedStruct = GenerateCapturedStmtArgument(*CS); 262 auto OutlinedFn = CGM.getOpenMPRuntime().EmitOpenMPOutlinedFunction( 263 S, *CS->getCapturedDecl()->param_begin()); 264 if (auto C = S.getSingleClause(/*K*/ OMPC_if)) { 265 auto Cond = cast<OMPIfClause>(C)->getCondition(); 266 EmitOMPIfClause(*this, Cond, [&](bool ThenBlock) { 267 if (ThenBlock) 268 EmitOMPParallelCall(*this, S, OutlinedFn, CapturedStruct); 269 else 270 CGM.getOpenMPRuntime().EmitOMPSerialCall(*this, S.getLocStart(), 271 OutlinedFn, CapturedStruct); 272 }); 273 } else 274 EmitOMPParallelCall(*this, S, OutlinedFn, CapturedStruct); 275 } 276 277 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &S, 278 bool SeparateIter) { 279 RunCleanupsScope BodyScope(*this); 280 // Update counters values on current iteration. 281 for (auto I : S.updates()) { 282 EmitIgnoredExpr(I); 283 } 284 // On a continue in the body, jump to the end. 285 auto Continue = getJumpDestInCurrentScope("omp.body.continue"); 286 BreakContinueStack.push_back(BreakContinue(JumpDest(), Continue)); 287 // Emit loop body. 288 EmitStmt(S.getBody()); 289 // The end (updates/cleanups). 290 EmitBlock(Continue.getBlock()); 291 BreakContinueStack.pop_back(); 292 if (SeparateIter) { 293 // TODO: Update lastprivates if the SeparateIter flag is true. 294 // This will be implemented in a follow-up OMPLastprivateClause patch, but 295 // result should be still correct without it, as we do not make these 296 // variables private yet. 297 } 298 } 299 300 void CodeGenFunction::EmitOMPInnerLoop(const OMPLoopDirective &S, 301 OMPPrivateScope &LoopScope, 302 bool SeparateIter) { 303 auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end"); 304 auto Cnt = getPGORegionCounter(&S); 305 306 // Start the loop with a block that tests the condition. 307 auto CondBlock = createBasicBlock("omp.inner.for.cond"); 308 EmitBlock(CondBlock); 309 LoopStack.push(CondBlock); 310 311 // If there are any cleanups between here and the loop-exit scope, 312 // create a block to stage a loop exit along. 313 auto ExitBlock = LoopExit.getBlock(); 314 if (LoopScope.requiresCleanups()) 315 ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup"); 316 317 auto LoopBody = createBasicBlock("omp.inner.for.body"); 318 319 // Emit condition: "IV < LastIteration + 1 [ - 1]" 320 // ("- 1" when lastprivate clause is present - separate one iteration). 321 llvm::Value *BoolCondVal = EvaluateExprAsBool(S.getCond(SeparateIter)); 322 Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock, 323 PGO.createLoopWeights(S.getCond(SeparateIter), Cnt)); 324 325 if (ExitBlock != LoopExit.getBlock()) { 326 EmitBlock(ExitBlock); 327 EmitBranchThroughCleanup(LoopExit); 328 } 329 330 EmitBlock(LoopBody); 331 Cnt.beginRegion(Builder); 332 333 // Create a block for the increment. 334 auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc"); 335 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 336 337 EmitOMPLoopBody(S); 338 EmitStopPoint(&S); 339 340 // Emit "IV = IV + 1" and a back-edge to the condition block. 341 EmitBlock(Continue.getBlock()); 342 EmitIgnoredExpr(S.getInc()); 343 BreakContinueStack.pop_back(); 344 EmitBranch(CondBlock); 345 LoopStack.pop(); 346 // Emit the fall-through block. 347 EmitBlock(LoopExit.getBlock()); 348 } 349 350 void CodeGenFunction::EmitOMPSimdFinal(const OMPLoopDirective &S) { 351 auto IC = S.counters().begin(); 352 for (auto F : S.finals()) { 353 if (LocalDeclMap.lookup(cast<DeclRefExpr>((*IC))->getDecl())) { 354 EmitIgnoredExpr(F); 355 } 356 ++IC; 357 } 358 } 359 360 static void EmitOMPAlignedClause(CodeGenFunction &CGF, CodeGenModule &CGM, 361 const OMPAlignedClause &Clause) { 362 unsigned ClauseAlignment = 0; 363 if (auto AlignmentExpr = Clause.getAlignment()) { 364 auto AlignmentCI = 365 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); 366 ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue()); 367 } 368 for (auto E : Clause.varlists()) { 369 unsigned Alignment = ClauseAlignment; 370 if (Alignment == 0) { 371 // OpenMP [2.8.1, Description] 372 // If no optional parameter is specified, implementation-defined default 373 // alignments for SIMD instructions on the target platforms are assumed. 374 Alignment = CGM.getTargetCodeGenInfo().getOpenMPSimdDefaultAlignment( 375 E->getType()); 376 } 377 assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) && 378 "alignment is not power of 2"); 379 if (Alignment != 0) { 380 llvm::Value *PtrValue = CGF.EmitScalarExpr(E); 381 CGF.EmitAlignmentAssumption(PtrValue, Alignment); 382 } 383 } 384 } 385 386 static void EmitPrivateLoopCounters(CodeGenFunction &CGF, 387 CodeGenFunction::OMPPrivateScope &LoopScope, 388 ArrayRef<Expr *> Counters) { 389 for (auto *E : Counters) { 390 auto VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 391 bool IsRegistered = LoopScope.addPrivate(VD, [&]() -> llvm::Value * { 392 // Emit var without initialization. 393 auto VarEmission = CGF.EmitAutoVarAlloca(*VD); 394 CGF.EmitAutoVarCleanups(VarEmission); 395 return VarEmission.getAllocatedAddress(); 396 }); 397 assert(IsRegistered && "counter already registered as private"); 398 // Silence the warning about unused variable. 399 (void)IsRegistered; 400 } 401 (void)LoopScope.Privatize(); 402 } 403 404 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { 405 // Pragma 'simd' code depends on presence of 'lastprivate'. 406 // If present, we have to separate last iteration of the loop: 407 // 408 // if (LastIteration != 0) { 409 // for (IV in 0..LastIteration-1) BODY; 410 // BODY with updates of lastprivate vars; 411 // <Final counter/linear vars updates>; 412 // } 413 // 414 // otherwise (when there's no lastprivate): 415 // 416 // for (IV in 0..LastIteration) BODY; 417 // <Final counter/linear vars updates>; 418 // 419 420 // Walk clauses and process safelen/lastprivate. 421 bool SeparateIter = false; 422 LoopStack.setParallel(); 423 LoopStack.setVectorizerEnable(true); 424 for (auto C : S.clauses()) { 425 switch (C->getClauseKind()) { 426 case OMPC_safelen: { 427 RValue Len = EmitAnyExpr(cast<OMPSafelenClause>(C)->getSafelen(), 428 AggValueSlot::ignored(), true); 429 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 430 LoopStack.setVectorizerWidth(Val->getZExtValue()); 431 // In presence of finite 'safelen', it may be unsafe to mark all 432 // the memory instructions parallel, because loop-carried 433 // dependences of 'safelen' iterations are possible. 434 LoopStack.setParallel(false); 435 break; 436 } 437 case OMPC_aligned: 438 EmitOMPAlignedClause(*this, CGM, cast<OMPAlignedClause>(*C)); 439 break; 440 case OMPC_lastprivate: 441 SeparateIter = true; 442 break; 443 default: 444 // Not handled yet 445 ; 446 } 447 } 448 449 InlinedOpenMPRegion Region(*this, S.getAssociatedStmt()); 450 RunCleanupsScope DirectiveScope(*this); 451 452 CGDebugInfo *DI = getDebugInfo(); 453 if (DI) 454 DI->EmitLexicalBlockStart(Builder, S.getSourceRange().getBegin()); 455 456 // Emit the loop iteration variable. 457 const Expr *IVExpr = S.getIterationVariable(); 458 const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 459 EmitVarDecl(*IVDecl); 460 EmitIgnoredExpr(S.getInit()); 461 462 // Emit the iterations count variable. 463 // If it is not a variable, Sema decided to calculate iterations count on each 464 // iteration (e.g., it is foldable into a constant). 465 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 466 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 467 // Emit calculation of the iterations count. 468 EmitIgnoredExpr(S.getCalcLastIteration()); 469 } 470 471 if (SeparateIter) { 472 // Emit: if (LastIteration > 0) - begin. 473 RegionCounter Cnt = getPGORegionCounter(&S); 474 auto ThenBlock = createBasicBlock("simd.if.then"); 475 auto ContBlock = createBasicBlock("simd.if.end"); 476 EmitBranchOnBoolExpr(S.getPreCond(), ThenBlock, ContBlock, Cnt.getCount()); 477 EmitBlock(ThenBlock); 478 Cnt.beginRegion(Builder); 479 // Emit 'then' code. 480 { 481 OMPPrivateScope LoopScope(*this); 482 EmitPrivateLoopCounters(*this, LoopScope, S.counters()); 483 EmitOMPInnerLoop(S, LoopScope, /* SeparateIter */ true); 484 EmitOMPLoopBody(S, /* SeparateIter */ true); 485 } 486 EmitOMPSimdFinal(S); 487 // Emit: if (LastIteration != 0) - end. 488 EmitBranch(ContBlock); 489 EmitBlock(ContBlock, true); 490 } else { 491 { 492 OMPPrivateScope LoopScope(*this); 493 EmitPrivateLoopCounters(*this, LoopScope, S.counters()); 494 EmitOMPInnerLoop(S, LoopScope); 495 } 496 EmitOMPSimdFinal(S); 497 } 498 499 if (DI) 500 DI->EmitLexicalBlockEnd(Builder, S.getSourceRange().getEnd()); 501 } 502 503 void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind, 504 const OMPLoopDirective &S, 505 OMPPrivateScope &LoopScope, 506 llvm::Value *LB, llvm::Value *UB, 507 llvm::Value *ST, llvm::Value *IL, 508 llvm::Value *Chunk) { 509 auto &RT = CGM.getOpenMPRuntime(); 510 assert(!RT.isStaticNonchunked(ScheduleKind, /* Chunked */ Chunk != nullptr) && 511 "static non-chunked schedule does not need outer loop"); 512 if (RT.isDynamic(ScheduleKind)) { 513 ErrorUnsupported(&S, "OpenMP loop with dynamic schedule"); 514 return; 515 } 516 517 // Emit outer loop. 518 // 519 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 520 // When schedule(static, chunk_size) is specified, iterations are divided into 521 // chunks of size chunk_size, and the chunks are assigned to the threads in 522 // the team in a round-robin fashion in the order of the thread number. 523 // 524 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { 525 // while (idx <= UB) { BODY; ++idx; } // inner loop 526 // LB = LB + ST; 527 // UB = UB + ST; 528 // } 529 // 530 const Expr *IVExpr = S.getIterationVariable(); 531 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 532 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 533 534 RT.EmitOMPForInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, IL, 535 LB, UB, ST, Chunk); 536 auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end"); 537 538 // Start the loop with a block that tests the condition. 539 auto CondBlock = createBasicBlock("omp.dispatch.cond"); 540 EmitBlock(CondBlock); 541 LoopStack.push(CondBlock); 542 543 llvm::Value *BoolCondVal = nullptr; 544 // UB = min(UB, GlobalUB) 545 EmitIgnoredExpr(S.getEnsureUpperBound()); 546 // IV = LB 547 EmitIgnoredExpr(S.getInit()); 548 // IV < UB 549 BoolCondVal = EvaluateExprAsBool(S.getCond(false)); 550 551 // If there are any cleanups between here and the loop-exit scope, 552 // create a block to stage a loop exit along. 553 auto ExitBlock = LoopExit.getBlock(); 554 if (LoopScope.requiresCleanups()) 555 ExitBlock = createBasicBlock("omp.dispatch.cleanup"); 556 557 auto LoopBody = createBasicBlock("omp.dispatch.body"); 558 Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock); 559 if (ExitBlock != LoopExit.getBlock()) { 560 EmitBlock(ExitBlock); 561 EmitBranchThroughCleanup(LoopExit); 562 } 563 EmitBlock(LoopBody); 564 565 // Create a block for the increment. 566 auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); 567 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 568 569 EmitOMPInnerLoop(S, LoopScope); 570 571 EmitBlock(Continue.getBlock()); 572 BreakContinueStack.pop_back(); 573 // Emit "LB = LB + Stride", "UB = UB + Stride". 574 EmitIgnoredExpr(S.getNextLowerBound()); 575 EmitIgnoredExpr(S.getNextUpperBound()); 576 577 EmitBranch(CondBlock); 578 LoopStack.pop(); 579 // Emit the fall-through block. 580 EmitBlock(LoopExit.getBlock()); 581 582 // Tell the runtime we are done. 583 RT.EmitOMPForFinish(*this, S.getLocStart(), ScheduleKind); 584 } 585 586 /// \brief Emit a helper variable and return corresponding lvalue. 587 static LValue EmitOMPHelperVar(CodeGenFunction &CGF, 588 const DeclRefExpr *Helper) { 589 auto VDecl = cast<VarDecl>(Helper->getDecl()); 590 CGF.EmitVarDecl(*VDecl); 591 return CGF.EmitLValue(Helper); 592 } 593 594 void CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { 595 // Emit the loop iteration variable. 596 auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 597 auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); 598 EmitVarDecl(*IVDecl); 599 600 // Emit the iterations count variable. 601 // If it is not a variable, Sema decided to calculate iterations count on each 602 // iteration (e.g., it is foldable into a constant). 603 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 604 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 605 // Emit calculation of the iterations count. 606 EmitIgnoredExpr(S.getCalcLastIteration()); 607 } 608 609 auto &RT = CGM.getOpenMPRuntime(); 610 611 // Check pre-condition. 612 { 613 // Skip the entire loop if we don't meet the precondition. 614 RegionCounter Cnt = getPGORegionCounter(&S); 615 auto ThenBlock = createBasicBlock("omp.precond.then"); 616 auto ContBlock = createBasicBlock("omp.precond.end"); 617 EmitBranchOnBoolExpr(S.getPreCond(), ThenBlock, ContBlock, Cnt.getCount()); 618 EmitBlock(ThenBlock); 619 Cnt.beginRegion(Builder); 620 // Emit 'then' code. 621 { 622 // Emit helper vars inits. 623 LValue LB = 624 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable())); 625 LValue UB = 626 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable())); 627 LValue ST = 628 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 629 LValue IL = 630 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 631 632 OMPPrivateScope LoopScope(*this); 633 EmitPrivateLoopCounters(*this, LoopScope, S.counters()); 634 635 // Detect the loop schedule kind and chunk. 636 auto ScheduleKind = OMPC_SCHEDULE_unknown; 637 llvm::Value *Chunk = nullptr; 638 if (auto C = cast_or_null<OMPScheduleClause>( 639 S.getSingleClause(OMPC_schedule))) { 640 ScheduleKind = C->getScheduleKind(); 641 if (auto Ch = C->getChunkSize()) { 642 Chunk = EmitScalarExpr(Ch); 643 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 644 S.getIterationVariable()->getType()); 645 } 646 } 647 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 648 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 649 if (RT.isStaticNonchunked(ScheduleKind, 650 /* Chunked */ Chunk != nullptr)) { 651 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 652 // When no chunk_size is specified, the iteration space is divided into 653 // chunks that are approximately equal in size, and at most one chunk is 654 // distributed to each thread. Note that the size of the chunks is 655 // unspecified in this case. 656 RT.EmitOMPForInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, 657 IL.getAddress(), LB.getAddress(), UB.getAddress(), 658 ST.getAddress()); 659 // UB = min(UB, GlobalUB); 660 EmitIgnoredExpr(S.getEnsureUpperBound()); 661 // IV = LB; 662 EmitIgnoredExpr(S.getInit()); 663 // while (idx <= UB) { BODY; ++idx; } 664 EmitOMPInnerLoop(S, LoopScope); 665 // Tell the runtime we are done. 666 RT.EmitOMPForFinish(*this, S.getLocStart(), ScheduleKind); 667 } else { 668 // Emit the outer loop, which requests its work chunk [LB..UB] from 669 // runtime and runs the inner loop to process it. 670 EmitOMPForOuterLoop(ScheduleKind, S, LoopScope, LB.getAddress(), 671 UB.getAddress(), ST.getAddress(), IL.getAddress(), 672 Chunk); 673 } 674 } 675 // We're now done with the loop, so jump to the continuation block. 676 EmitBranch(ContBlock); 677 EmitBlock(ContBlock, true); 678 } 679 } 680 681 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { 682 InlinedOpenMPRegion Region(*this, S.getAssociatedStmt()); 683 RunCleanupsScope DirectiveScope(*this); 684 685 CGDebugInfo *DI = getDebugInfo(); 686 if (DI) 687 DI->EmitLexicalBlockStart(Builder, S.getSourceRange().getBegin()); 688 689 EmitOMPWorksharingLoop(S); 690 691 // Emit an implicit barrier at the end. 692 CGM.getOpenMPRuntime().EmitOMPBarrierCall(*this, S.getLocStart(), 693 /*IsExplicit*/ false); 694 if (DI) 695 DI->EmitLexicalBlockEnd(Builder, S.getSourceRange().getEnd()); 696 } 697 698 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &) { 699 llvm_unreachable("CodeGen for 'omp for simd' is not supported yet."); 700 } 701 702 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &) { 703 llvm_unreachable("CodeGen for 'omp sections' is not supported yet."); 704 } 705 706 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &) { 707 llvm_unreachable("CodeGen for 'omp section' is not supported yet."); 708 } 709 710 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { 711 CGM.getOpenMPRuntime().EmitOMPSingleRegion(*this, [&]() -> void { 712 InlinedOpenMPRegion Region(*this, S.getAssociatedStmt()); 713 RunCleanupsScope Scope(*this); 714 EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 715 EnsureInsertPoint(); 716 }, S.getLocStart()); 717 } 718 719 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { 720 CGM.getOpenMPRuntime().EmitOMPMasterRegion(*this, [&]() -> void { 721 InlinedOpenMPRegion Region(*this, S.getAssociatedStmt()); 722 RunCleanupsScope Scope(*this); 723 EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 724 EnsureInsertPoint(); 725 }, S.getLocStart()); 726 } 727 728 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { 729 CGM.getOpenMPRuntime().EmitOMPCriticalRegion( 730 *this, S.getDirectiveName().getAsString(), [&]() -> void { 731 InlinedOpenMPRegion Region(*this, S.getAssociatedStmt()); 732 RunCleanupsScope Scope(*this); 733 EmitStmt( 734 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 735 EnsureInsertPoint(); 736 }, S.getLocStart()); 737 } 738 739 void 740 CodeGenFunction::EmitOMPParallelForDirective(const OMPParallelForDirective &) { 741 llvm_unreachable("CodeGen for 'omp parallel for' is not supported yet."); 742 } 743 744 void CodeGenFunction::EmitOMPParallelForSimdDirective( 745 const OMPParallelForSimdDirective &) { 746 llvm_unreachable("CodeGen for 'omp parallel for simd' is not supported yet."); 747 } 748 749 void CodeGenFunction::EmitOMPParallelSectionsDirective( 750 const OMPParallelSectionsDirective &) { 751 llvm_unreachable("CodeGen for 'omp parallel sections' is not supported yet."); 752 } 753 754 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &) { 755 llvm_unreachable("CodeGen for 'omp task' is not supported yet."); 756 } 757 758 void CodeGenFunction::EmitOMPTaskyieldDirective( 759 const OMPTaskyieldDirective &S) { 760 CGM.getOpenMPRuntime().EmitOMPTaskyieldCall(*this, S.getLocStart()); 761 } 762 763 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { 764 CGM.getOpenMPRuntime().EmitOMPBarrierCall(*this, S.getLocStart()); 765 } 766 767 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &) { 768 llvm_unreachable("CodeGen for 'omp taskwait' is not supported yet."); 769 } 770 771 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { 772 CGM.getOpenMPRuntime().EmitOMPFlush( 773 *this, [&]() -> ArrayRef<const Expr *> { 774 if (auto C = S.getSingleClause(/*K*/ OMPC_flush)) { 775 auto FlushClause = cast<OMPFlushClause>(C); 776 return llvm::makeArrayRef(FlushClause->varlist_begin(), 777 FlushClause->varlist_end()); 778 } 779 return llvm::None; 780 }(), 781 S.getLocStart()); 782 } 783 784 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &) { 785 llvm_unreachable("CodeGen for 'omp ordered' is not supported yet."); 786 } 787 788 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, 789 QualType SrcType, QualType DestType) { 790 assert(CGF.hasScalarEvaluationKind(DestType) && 791 "DestType must have scalar evaluation kind."); 792 assert(!Val.isAggregate() && "Must be a scalar or complex."); 793 return Val.isScalar() 794 ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType) 795 : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType, 796 DestType); 797 } 798 799 static CodeGenFunction::ComplexPairTy 800 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, 801 QualType DestType) { 802 assert(CGF.getEvaluationKind(DestType) == TEK_Complex && 803 "DestType must have complex evaluation kind."); 804 CodeGenFunction::ComplexPairTy ComplexVal; 805 if (Val.isScalar()) { 806 // Convert the input element to the element type of the complex. 807 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 808 auto ScalarVal = 809 CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestElementType); 810 ComplexVal = CodeGenFunction::ComplexPairTy( 811 ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType())); 812 } else { 813 assert(Val.isComplex() && "Must be a scalar or complex."); 814 auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType(); 815 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 816 ComplexVal.first = CGF.EmitScalarConversion( 817 Val.getComplexVal().first, SrcElementType, DestElementType); 818 ComplexVal.second = CGF.EmitScalarConversion( 819 Val.getComplexVal().second, SrcElementType, DestElementType); 820 } 821 return ComplexVal; 822 } 823 824 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst, 825 const Expr *X, const Expr *V, 826 SourceLocation Loc) { 827 // v = x; 828 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue"); 829 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue"); 830 LValue XLValue = CGF.EmitLValue(X); 831 LValue VLValue = CGF.EmitLValue(V); 832 RValue Res = XLValue.isGlobalReg() ? CGF.EmitLoadOfLValue(XLValue, Loc) 833 : CGF.EmitAtomicLoad(XLValue, Loc); 834 // OpenMP, 2.12.6, atomic Construct 835 // Any atomic construct with a seq_cst clause forces the atomically 836 // performed operation to include an implicit flush operation without a 837 // list. 838 if (IsSeqCst) 839 CGF.CGM.getOpenMPRuntime().EmitOMPFlush(CGF, llvm::None, Loc); 840 switch (CGF.getEvaluationKind(V->getType())) { 841 case TEK_Scalar: 842 CGF.EmitStoreOfScalar( 843 convertToScalarValue(CGF, Res, X->getType(), V->getType()), VLValue); 844 break; 845 case TEK_Complex: 846 CGF.EmitStoreOfComplex( 847 convertToComplexValue(CGF, Res, X->getType(), V->getType()), VLValue, 848 /*isInit=*/false); 849 break; 850 case TEK_Aggregate: 851 llvm_unreachable("Must be a scalar or complex."); 852 } 853 } 854 855 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, 856 bool IsSeqCst, const Expr *X, const Expr *V, 857 const Expr *, SourceLocation Loc) { 858 switch (Kind) { 859 case OMPC_read: 860 EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc); 861 break; 862 case OMPC_write: 863 case OMPC_update: 864 case OMPC_capture: 865 llvm_unreachable("CodeGen for 'omp atomic clause' is not supported yet."); 866 case OMPC_if: 867 case OMPC_final: 868 case OMPC_num_threads: 869 case OMPC_private: 870 case OMPC_firstprivate: 871 case OMPC_lastprivate: 872 case OMPC_reduction: 873 case OMPC_safelen: 874 case OMPC_collapse: 875 case OMPC_default: 876 case OMPC_seq_cst: 877 case OMPC_shared: 878 case OMPC_linear: 879 case OMPC_aligned: 880 case OMPC_copyin: 881 case OMPC_copyprivate: 882 case OMPC_flush: 883 case OMPC_proc_bind: 884 case OMPC_schedule: 885 case OMPC_ordered: 886 case OMPC_nowait: 887 case OMPC_untied: 888 case OMPC_threadprivate: 889 case OMPC_mergeable: 890 case OMPC_unknown: 891 llvm_unreachable("Clause is not allowed in 'omp atomic'."); 892 } 893 } 894 895 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { 896 bool IsSeqCst = S.getSingleClause(/*K=*/OMPC_seq_cst); 897 OpenMPClauseKind Kind = OMPC_unknown; 898 for (auto *C : S.clauses()) { 899 // Find first clause (skip seq_cst clause, if it is first). 900 if (C->getClauseKind() != OMPC_seq_cst) { 901 Kind = C->getClauseKind(); 902 break; 903 } 904 } 905 EmitOMPAtomicExpr(*this, Kind, IsSeqCst, S.getX(), S.getV(), S.getExpr(), 906 S.getLocStart()); 907 } 908 909 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &) { 910 llvm_unreachable("CodeGen for 'omp target' is not supported yet."); 911 } 912 913 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &) { 914 llvm_unreachable("CodeGen for 'omp teams' is not supported yet."); 915 } 916 917