1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit OpenMP nodes as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGOpenMPRuntime.h"
15 #include "CodeGenFunction.h"
16 #include "CodeGenModule.h"
17 #include "TargetInfo.h"
18 #include "clang/AST/Stmt.h"
19 #include "clang/AST/StmtOpenMP.h"
20 using namespace clang;
21 using namespace CodeGen;
22 
23 //===----------------------------------------------------------------------===//
24 //                              OpenMP Directive Emission
25 //===----------------------------------------------------------------------===//
26 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen
27 /// function. Here is the logic:
28 /// if (Cond) {
29 ///   CodeGen(true);
30 /// } else {
31 ///   CodeGen(false);
32 /// }
33 static void EmitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
34                             const std::function<void(bool)> &CodeGen) {
35   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
36 
37   // If the condition constant folds and can be elided, try to avoid emitting
38   // the condition and the dead arm of the if/else.
39   bool CondConstant;
40   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
41     CodeGen(CondConstant);
42     return;
43   }
44 
45   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
46   // emit the conditional branch.
47   auto ThenBlock = CGF.createBasicBlock(/*name*/ "omp_if.then");
48   auto ElseBlock = CGF.createBasicBlock(/*name*/ "omp_if.else");
49   auto ContBlock = CGF.createBasicBlock(/*name*/ "omp_if.end");
50   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount*/ 0);
51 
52   // Emit the 'then' code.
53   CGF.EmitBlock(ThenBlock);
54   CodeGen(/*ThenBlock*/ true);
55   CGF.EmitBranch(ContBlock);
56   // Emit the 'else' code if present.
57   {
58     // There is no need to emit line number for unconditional branch.
59     auto NL = ApplyDebugLocation::CreateEmpty(CGF);
60     CGF.EmitBlock(ElseBlock);
61   }
62   CodeGen(/*ThenBlock*/ false);
63   {
64     // There is no need to emit line number for unconditional branch.
65     auto NL = ApplyDebugLocation::CreateEmpty(CGF);
66     CGF.EmitBranch(ContBlock);
67   }
68   // Emit the continuation block for code after the if.
69   CGF.EmitBlock(ContBlock, /*IsFinished*/ true);
70 }
71 
72 void CodeGenFunction::EmitOMPAggregateAssign(LValue OriginalAddr,
73                                              llvm::Value *PrivateAddr,
74                                              const Expr *AssignExpr,
75                                              QualType OriginalType,
76                                              const VarDecl *VDInit) {
77   EmitBlock(createBasicBlock(".omp.assign.begin."));
78   if (!isa<CXXConstructExpr>(AssignExpr) || isTrivialInitializer(AssignExpr)) {
79     // Perform simple memcpy.
80     EmitAggregateAssign(PrivateAddr, OriginalAddr.getAddress(),
81                         AssignExpr->getType());
82   } else {
83     // Perform element-by-element initialization.
84     QualType ElementTy;
85     auto SrcBegin = OriginalAddr.getAddress();
86     auto DestBegin = PrivateAddr;
87     auto ArrayTy = OriginalType->getAsArrayTypeUnsafe();
88     auto SrcNumElements = emitArrayLength(ArrayTy, ElementTy, SrcBegin);
89     auto DestNumElements = emitArrayLength(ArrayTy, ElementTy, DestBegin);
90     auto SrcEnd = Builder.CreateGEP(SrcBegin, SrcNumElements);
91     auto DestEnd = Builder.CreateGEP(DestBegin, DestNumElements);
92     // The basic structure here is a do-while loop, because we don't
93     // need to check for the zero-element case.
94     auto BodyBB = createBasicBlock("omp.arraycpy.body");
95     auto DoneBB = createBasicBlock("omp.arraycpy.done");
96     auto IsEmpty =
97         Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
98     Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
99 
100     // Enter the loop body, making that address the current address.
101     auto EntryBB = Builder.GetInsertBlock();
102     EmitBlock(BodyBB);
103     auto SrcElementPast = Builder.CreatePHI(SrcBegin->getType(), 2,
104                                             "omp.arraycpy.srcElementPast");
105     SrcElementPast->addIncoming(SrcEnd, EntryBB);
106     auto DestElementPast = Builder.CreatePHI(DestBegin->getType(), 2,
107                                              "omp.arraycpy.destElementPast");
108     DestElementPast->addIncoming(DestEnd, EntryBB);
109 
110     // Shift the address back by one element.
111     auto NegativeOne = llvm::ConstantInt::get(SizeTy, -1, true);
112     auto DestElement = Builder.CreateGEP(DestElementPast, NegativeOne,
113                                          "omp.arraycpy.dest.element");
114     auto SrcElement = Builder.CreateGEP(SrcElementPast, NegativeOne,
115                                         "omp.arraycpy.src.element");
116     {
117       // Create RunCleanScope to cleanup possible temps.
118       CodeGenFunction::RunCleanupsScope Init(*this);
119       // Emit initialization for single element.
120       LocalDeclMap[VDInit] = SrcElement;
121       EmitAnyExprToMem(AssignExpr, DestElement,
122                        AssignExpr->getType().getQualifiers(),
123                        /*IsInitializer*/ false);
124       LocalDeclMap.erase(VDInit);
125     }
126 
127     // Check whether we've reached the end.
128     auto Done =
129         Builder.CreateICmpEQ(DestElement, DestBegin, "omp.arraycpy.done");
130     Builder.CreateCondBr(Done, DoneBB, BodyBB);
131     DestElementPast->addIncoming(DestElement, Builder.GetInsertBlock());
132     SrcElementPast->addIncoming(SrcElement, Builder.GetInsertBlock());
133 
134     // Done.
135     EmitBlock(DoneBB, true);
136   }
137   EmitBlock(createBasicBlock(".omp.assign.end."));
138 }
139 
140 void CodeGenFunction::EmitOMPFirstprivateClause(
141     const OMPExecutableDirective &D,
142     CodeGenFunction::OMPPrivateScope &PrivateScope) {
143   auto PrivateFilter = [](const OMPClause *C) -> bool {
144     return C->getClauseKind() == OMPC_firstprivate;
145   };
146   for (OMPExecutableDirective::filtered_clause_iterator<decltype(PrivateFilter)>
147            I(D.clauses(), PrivateFilter); I; ++I) {
148     auto *C = cast<OMPFirstprivateClause>(*I);
149     auto IRef = C->varlist_begin();
150     auto InitsRef = C->inits().begin();
151     for (auto IInit : C->private_copies()) {
152       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
153       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
154       bool IsRegistered;
155       if (*InitsRef != nullptr) {
156         // Emit VarDecl with copy init for arrays.
157         auto *FD = CapturedStmtInfo->lookup(OrigVD);
158         LValue Base = MakeNaturalAlignAddrLValue(
159             CapturedStmtInfo->getContextValue(),
160             getContext().getTagDeclType(FD->getParent()));
161         auto OriginalAddr = EmitLValueForField(Base, FD);
162         auto VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
163         IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value * {
164           auto Emission = EmitAutoVarAlloca(*VD);
165           // Emit initialization of aggregate firstprivate vars.
166           EmitOMPAggregateAssign(OriginalAddr, Emission.getAllocatedAddress(),
167                                  VD->getInit(), (*IRef)->getType(), VDInit);
168           EmitAutoVarCleanups(Emission);
169           return Emission.getAllocatedAddress();
170         });
171       } else
172         IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value * {
173           // Emit private VarDecl with copy init.
174           EmitDecl(*VD);
175           return GetAddrOfLocalVar(VD);
176         });
177       assert(IsRegistered && "firstprivate var already registered as private");
178       // Silence the warning about unused variable.
179       (void)IsRegistered;
180       ++IRef, ++InitsRef;
181     }
182   }
183 }
184 
185 void CodeGenFunction::EmitOMPPrivateClause(
186     const OMPExecutableDirective &D,
187     CodeGenFunction::OMPPrivateScope &PrivateScope) {
188   auto PrivateFilter = [](const OMPClause *C) -> bool {
189     return C->getClauseKind() == OMPC_private;
190   };
191   for (OMPExecutableDirective::filtered_clause_iterator<decltype(PrivateFilter)>
192            I(D.clauses(), PrivateFilter); I; ++I) {
193     auto *C = cast<OMPPrivateClause>(*I);
194     auto IRef = C->varlist_begin();
195     for (auto IInit : C->private_copies()) {
196       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
197       auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
198       bool IsRegistered =
199           PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value * {
200             // Emit private VarDecl with copy init.
201             EmitDecl(*VD);
202             return GetAddrOfLocalVar(VD);
203           });
204       assert(IsRegistered && "private var already registered as private");
205       // Silence the warning about unused variable.
206       (void)IsRegistered;
207       ++IRef;
208     }
209   }
210 }
211 
212 void CodeGenFunction::EmitOMPReductionClauseInit(
213     const OMPExecutableDirective &D,
214     CodeGenFunction::OMPPrivateScope &PrivateScope) {
215   auto ReductionFilter = [](const OMPClause *C) -> bool {
216     return C->getClauseKind() == OMPC_reduction;
217   };
218   for (OMPExecutableDirective::filtered_clause_iterator<decltype(
219            ReductionFilter)> I(D.clauses(), ReductionFilter);
220        I; ++I) {
221     auto *C = cast<OMPReductionClause>(*I);
222     auto ILHS = C->lhs_exprs().begin();
223     auto IRHS = C->rhs_exprs().begin();
224     for (auto IRef : C->varlists()) {
225       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
226       auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
227       auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
228       // Store the address of the original variable associated with the LHS
229       // implicit variable.
230       PrivateScope.addPrivate(LHSVD, [this, OrigVD, IRef]() -> llvm::Value *{
231         DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
232                         CapturedStmtInfo->lookup(OrigVD) != nullptr,
233                         IRef->getType(), VK_LValue, IRef->getExprLoc());
234         return EmitLValue(&DRE).getAddress();
235       });
236       // Emit reduction copy.
237       bool IsRegistered =
238           PrivateScope.addPrivate(OrigVD, [this, PrivateVD]() -> llvm::Value *{
239             // Emit private VarDecl with reduction init.
240             EmitDecl(*PrivateVD);
241             return GetAddrOfLocalVar(PrivateVD);
242           });
243       assert(IsRegistered && "private var already registered as private");
244       // Silence the warning about unused variable.
245       (void)IsRegistered;
246       ++ILHS, ++IRHS;
247     }
248   }
249 }
250 
251 void CodeGenFunction::EmitOMPReductionClauseFinal(
252     const OMPExecutableDirective &D) {
253   llvm::SmallVector<const Expr *, 8> LHSExprs;
254   llvm::SmallVector<const Expr *, 8> RHSExprs;
255   llvm::SmallVector<const Expr *, 8> ReductionOps;
256   auto ReductionFilter = [](const OMPClause *C) -> bool {
257     return C->getClauseKind() == OMPC_reduction;
258   };
259   bool HasAtLeastOneReduction = false;
260   for (OMPExecutableDirective::filtered_clause_iterator<decltype(
261            ReductionFilter)> I(D.clauses(), ReductionFilter);
262        I; ++I) {
263     HasAtLeastOneReduction = true;
264     auto *C = cast<OMPReductionClause>(*I);
265     LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
266     RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
267     ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
268   }
269   if (HasAtLeastOneReduction) {
270     // Emit nowait reduction if nowait clause is present or directive is a
271     // parallel directive (it always has implicit barrier).
272     CGM.getOpenMPRuntime().emitReduction(
273         *this, D.getLocEnd(), LHSExprs, RHSExprs, ReductionOps,
274         D.getSingleClause(OMPC_nowait) ||
275             isOpenMPParallelDirective(D.getDirectiveKind()));
276   }
277 }
278 
279 /// \brief Emits code for OpenMP parallel directive in the parallel region.
280 static void emitOMPParallelCall(CodeGenFunction &CGF,
281                                 const OMPExecutableDirective &S,
282                                 llvm::Value *OutlinedFn,
283                                 llvm::Value *CapturedStruct) {
284   if (auto C = S.getSingleClause(/*K*/ OMPC_num_threads)) {
285     CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
286     auto NumThreadsClause = cast<OMPNumThreadsClause>(C);
287     auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
288                                          /*IgnoreResultAssign*/ true);
289     CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
290         CGF, NumThreads, NumThreadsClause->getLocStart());
291   }
292   CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn,
293                                               CapturedStruct);
294 }
295 
296 static void emitCommonOMPParallelDirective(CodeGenFunction &CGF,
297                                            const OMPExecutableDirective &S,
298                                            const RegionCodeGenTy &CodeGen) {
299   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
300   auto CapturedStruct = CGF.GenerateCapturedStmtArgument(*CS);
301   auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
302       S, *CS->getCapturedDecl()->param_begin(), CodeGen);
303   if (auto C = S.getSingleClause(/*K*/ OMPC_if)) {
304     auto Cond = cast<OMPIfClause>(C)->getCondition();
305     EmitOMPIfClause(CGF, Cond, [&](bool ThenBlock) {
306       if (ThenBlock)
307         emitOMPParallelCall(CGF, S, OutlinedFn, CapturedStruct);
308       else
309         CGF.CGM.getOpenMPRuntime().emitSerialCall(CGF, S.getLocStart(),
310                                                   OutlinedFn, CapturedStruct);
311     });
312   } else
313     emitOMPParallelCall(CGF, S, OutlinedFn, CapturedStruct);
314 }
315 
316 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
317   LexicalScope Scope(*this, S.getSourceRange());
318   // Emit parallel region as a standalone region.
319   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
320     OMPPrivateScope PrivateScope(CGF);
321     CGF.EmitOMPPrivateClause(S, PrivateScope);
322     CGF.EmitOMPFirstprivateClause(S, PrivateScope);
323     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
324     if (PrivateScope.Privatize())
325       // Emit implicit barrier to synchronize threads and avoid data races.
326       CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(),
327                                                  OMPD_unknown);
328     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
329     CGF.EmitOMPReductionClauseFinal(S);
330     // Emit implicit barrier at the end of the 'parallel' directive.
331     CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(),
332                                                OMPD_unknown);
333   };
334   emitCommonOMPParallelDirective(*this, S, CodeGen);
335 }
336 
337 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &S,
338                                       bool SeparateIter) {
339   RunCleanupsScope BodyScope(*this);
340   // Update counters values on current iteration.
341   for (auto I : S.updates()) {
342     EmitIgnoredExpr(I);
343   }
344   // Update the linear variables.
345   for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) {
346     for (auto U : C->updates()) {
347       EmitIgnoredExpr(U);
348     }
349   }
350 
351   // On a continue in the body, jump to the end.
352   auto Continue = getJumpDestInCurrentScope("omp.body.continue");
353   BreakContinueStack.push_back(BreakContinue(JumpDest(), Continue));
354   // Emit loop body.
355   EmitStmt(S.getBody());
356   // The end (updates/cleanups).
357   EmitBlock(Continue.getBlock());
358   BreakContinueStack.pop_back();
359   if (SeparateIter) {
360     // TODO: Update lastprivates if the SeparateIter flag is true.
361     // This will be implemented in a follow-up OMPLastprivateClause patch, but
362     // result should be still correct without it, as we do not make these
363     // variables private yet.
364   }
365 }
366 
367 void CodeGenFunction::EmitOMPInnerLoop(
368     const Stmt &S, bool RequiresCleanup, const Expr *LoopCond,
369     const Expr *IncExpr,
370     const llvm::function_ref<void(CodeGenFunction &)> &BodyGen) {
371   auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
372   auto Cnt = getPGORegionCounter(&S);
373 
374   // Start the loop with a block that tests the condition.
375   auto CondBlock = createBasicBlock("omp.inner.for.cond");
376   EmitBlock(CondBlock);
377   LoopStack.push(CondBlock);
378 
379   // If there are any cleanups between here and the loop-exit scope,
380   // create a block to stage a loop exit along.
381   auto ExitBlock = LoopExit.getBlock();
382   if (RequiresCleanup)
383     ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
384 
385   auto LoopBody = createBasicBlock("omp.inner.for.body");
386 
387   // Emit condition.
388   EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, Cnt.getCount());
389   if (ExitBlock != LoopExit.getBlock()) {
390     EmitBlock(ExitBlock);
391     EmitBranchThroughCleanup(LoopExit);
392   }
393 
394   EmitBlock(LoopBody);
395   Cnt.beginRegion(Builder);
396 
397   // Create a block for the increment.
398   auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
399   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
400 
401   BodyGen(*this);
402 
403   // Emit "IV = IV + 1" and a back-edge to the condition block.
404   EmitBlock(Continue.getBlock());
405   EmitIgnoredExpr(IncExpr);
406   BreakContinueStack.pop_back();
407   EmitBranch(CondBlock);
408   LoopStack.pop();
409   // Emit the fall-through block.
410   EmitBlock(LoopExit.getBlock());
411 }
412 
413 void CodeGenFunction::EmitOMPSimdFinal(const OMPLoopDirective &S) {
414   auto IC = S.counters().begin();
415   for (auto F : S.finals()) {
416     if (LocalDeclMap.lookup(cast<DeclRefExpr>((*IC))->getDecl())) {
417       EmitIgnoredExpr(F);
418     }
419     ++IC;
420   }
421   // Emit the final values of the linear variables.
422   for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) {
423     for (auto F : C->finals()) {
424       EmitIgnoredExpr(F);
425     }
426   }
427 }
428 
429 static void EmitOMPAlignedClause(CodeGenFunction &CGF, CodeGenModule &CGM,
430                                  const OMPAlignedClause &Clause) {
431   unsigned ClauseAlignment = 0;
432   if (auto AlignmentExpr = Clause.getAlignment()) {
433     auto AlignmentCI =
434         cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
435     ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue());
436   }
437   for (auto E : Clause.varlists()) {
438     unsigned Alignment = ClauseAlignment;
439     if (Alignment == 0) {
440       // OpenMP [2.8.1, Description]
441       // If no optional parameter is specified, implementation-defined default
442       // alignments for SIMD instructions on the target platforms are assumed.
443       Alignment = CGM.getTargetCodeGenInfo().getOpenMPSimdDefaultAlignment(
444           E->getType());
445     }
446     assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) &&
447            "alignment is not power of 2");
448     if (Alignment != 0) {
449       llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
450       CGF.EmitAlignmentAssumption(PtrValue, Alignment);
451     }
452   }
453 }
454 
455 static void EmitPrivateLoopCounters(CodeGenFunction &CGF,
456                                     CodeGenFunction::OMPPrivateScope &LoopScope,
457                                     ArrayRef<Expr *> Counters) {
458   for (auto *E : Counters) {
459     auto VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
460     bool IsRegistered = LoopScope.addPrivate(VD, [&]() -> llvm::Value * {
461       // Emit var without initialization.
462       auto VarEmission = CGF.EmitAutoVarAlloca(*VD);
463       CGF.EmitAutoVarCleanups(VarEmission);
464       return VarEmission.getAllocatedAddress();
465     });
466     assert(IsRegistered && "counter already registered as private");
467     // Silence the warning about unused variable.
468     (void)IsRegistered;
469   }
470 }
471 
472 static void
473 EmitPrivateLinearVars(CodeGenFunction &CGF, const OMPExecutableDirective &D,
474                       CodeGenFunction::OMPPrivateScope &PrivateScope) {
475   for (auto Clause : OMPExecutableDirective::linear_filter(D.clauses())) {
476     for (auto *E : Clause->varlists()) {
477       auto VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
478       bool IsRegistered = PrivateScope.addPrivate(VD, [&]()->llvm::Value * {
479         // Emit var without initialization.
480         auto VarEmission = CGF.EmitAutoVarAlloca(*VD);
481         CGF.EmitAutoVarCleanups(VarEmission);
482         return VarEmission.getAllocatedAddress();
483       });
484       assert(IsRegistered && "linear var already registered as private");
485       // Silence the warning about unused variable.
486       (void)IsRegistered;
487     }
488   }
489 }
490 
491 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
492   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
493     // Pragma 'simd' code depends on presence of 'lastprivate'.
494     // If present, we have to separate last iteration of the loop:
495     //
496     // if (LastIteration != 0) {
497     //   for (IV in 0..LastIteration-1) BODY;
498     //   BODY with updates of lastprivate vars;
499     //   <Final counter/linear vars updates>;
500     // }
501     //
502     // otherwise (when there's no lastprivate):
503     //
504     //   for (IV in 0..LastIteration) BODY;
505     //   <Final counter/linear vars updates>;
506     //
507 
508     // Walk clauses and process safelen/lastprivate.
509     bool SeparateIter = false;
510     CGF.LoopStack.setParallel();
511     CGF.LoopStack.setVectorizerEnable(true);
512     for (auto C : S.clauses()) {
513       switch (C->getClauseKind()) {
514       case OMPC_safelen: {
515         RValue Len = CGF.EmitAnyExpr(cast<OMPSafelenClause>(C)->getSafelen(),
516                                      AggValueSlot::ignored(), true);
517         llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
518         CGF.LoopStack.setVectorizerWidth(Val->getZExtValue());
519         // In presence of finite 'safelen', it may be unsafe to mark all
520         // the memory instructions parallel, because loop-carried
521         // dependences of 'safelen' iterations are possible.
522         CGF.LoopStack.setParallel(false);
523         break;
524       }
525       case OMPC_aligned:
526         EmitOMPAlignedClause(CGF, CGF.CGM, cast<OMPAlignedClause>(*C));
527         break;
528       case OMPC_lastprivate:
529         SeparateIter = true;
530         break;
531       default:
532         // Not handled yet
533         ;
534       }
535     }
536 
537     // Emit inits for the linear variables.
538     for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) {
539       for (auto Init : C->inits()) {
540         auto *D = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
541         CGF.EmitVarDecl(*D);
542       }
543     }
544 
545     // Emit the loop iteration variable.
546     const Expr *IVExpr = S.getIterationVariable();
547     const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
548     CGF.EmitVarDecl(*IVDecl);
549     CGF.EmitIgnoredExpr(S.getInit());
550 
551     // Emit the iterations count variable.
552     // If it is not a variable, Sema decided to calculate iterations count on
553     // each
554     // iteration (e.g., it is foldable into a constant).
555     if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
556       CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
557       // Emit calculation of the iterations count.
558       CGF.EmitIgnoredExpr(S.getCalcLastIteration());
559     }
560 
561     // Emit the linear steps for the linear clauses.
562     // If a step is not constant, it is pre-calculated before the loop.
563     for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) {
564       if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
565         if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
566           CGF.EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
567           // Emit calculation of the linear step.
568           CGF.EmitIgnoredExpr(CS);
569         }
570     }
571 
572     if (SeparateIter) {
573       // Emit: if (LastIteration > 0) - begin.
574       RegionCounter Cnt = CGF.getPGORegionCounter(&S);
575       auto ThenBlock = CGF.createBasicBlock("simd.if.then");
576       auto ContBlock = CGF.createBasicBlock("simd.if.end");
577       CGF.EmitBranchOnBoolExpr(S.getPreCond(), ThenBlock, ContBlock,
578                                Cnt.getCount());
579       CGF.EmitBlock(ThenBlock);
580       Cnt.beginRegion(CGF.Builder);
581       // Emit 'then' code.
582       {
583         OMPPrivateScope LoopScope(CGF);
584         EmitPrivateLoopCounters(CGF, LoopScope, S.counters());
585         EmitPrivateLinearVars(CGF, S, LoopScope);
586         CGF.EmitOMPPrivateClause(S, LoopScope);
587         (void)LoopScope.Privatize();
588         CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
589                              S.getCond(/*SeparateIter=*/true), S.getInc(),
590                              [&S](CodeGenFunction &CGF) {
591                                CGF.EmitOMPLoopBody(S);
592                                CGF.EmitStopPoint(&S);
593                              });
594         CGF.EmitOMPLoopBody(S, /* SeparateIter */ true);
595       }
596       CGF.EmitOMPSimdFinal(S);
597       // Emit: if (LastIteration != 0) - end.
598       CGF.EmitBranch(ContBlock);
599       CGF.EmitBlock(ContBlock, true);
600     } else {
601       {
602         OMPPrivateScope LoopScope(CGF);
603         EmitPrivateLoopCounters(CGF, LoopScope, S.counters());
604         EmitPrivateLinearVars(CGF, S, LoopScope);
605         CGF.EmitOMPPrivateClause(S, LoopScope);
606         (void)LoopScope.Privatize();
607         CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
608                              S.getCond(/*SeparateIter=*/false), S.getInc(),
609                              [&S](CodeGenFunction &CGF) {
610                                CGF.EmitOMPLoopBody(S);
611                                CGF.EmitStopPoint(&S);
612                              });
613       }
614       CGF.EmitOMPSimdFinal(S);
615     }
616   };
617   CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen);
618 }
619 
620 void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind,
621                                           const OMPLoopDirective &S,
622                                           OMPPrivateScope &LoopScope,
623                                           llvm::Value *LB, llvm::Value *UB,
624                                           llvm::Value *ST, llvm::Value *IL,
625                                           llvm::Value *Chunk) {
626   auto &RT = CGM.getOpenMPRuntime();
627 
628   // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
629   const bool Dynamic = RT.isDynamic(ScheduleKind);
630 
631   assert(!RT.isStaticNonchunked(ScheduleKind, /* Chunked */ Chunk != nullptr) &&
632          "static non-chunked schedule does not need outer loop");
633 
634   // Emit outer loop.
635   //
636   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
637   // When schedule(dynamic,chunk_size) is specified, the iterations are
638   // distributed to threads in the team in chunks as the threads request them.
639   // Each thread executes a chunk of iterations, then requests another chunk,
640   // until no chunks remain to be distributed. Each chunk contains chunk_size
641   // iterations, except for the last chunk to be distributed, which may have
642   // fewer iterations. When no chunk_size is specified, it defaults to 1.
643   //
644   // When schedule(guided,chunk_size) is specified, the iterations are assigned
645   // to threads in the team in chunks as the executing threads request them.
646   // Each thread executes a chunk of iterations, then requests another chunk,
647   // until no chunks remain to be assigned. For a chunk_size of 1, the size of
648   // each chunk is proportional to the number of unassigned iterations divided
649   // by the number of threads in the team, decreasing to 1. For a chunk_size
650   // with value k (greater than 1), the size of each chunk is determined in the
651   // same way, with the restriction that the chunks do not contain fewer than k
652   // iterations (except for the last chunk to be assigned, which may have fewer
653   // than k iterations).
654   //
655   // When schedule(auto) is specified, the decision regarding scheduling is
656   // delegated to the compiler and/or runtime system. The programmer gives the
657   // implementation the freedom to choose any possible mapping of iterations to
658   // threads in the team.
659   //
660   // When schedule(runtime) is specified, the decision regarding scheduling is
661   // deferred until run time, and the schedule and chunk size are taken from the
662   // run-sched-var ICV. If the ICV is set to auto, the schedule is
663   // implementation defined
664   //
665   // while(__kmpc_dispatch_next(&LB, &UB)) {
666   //   idx = LB;
667   //   while (idx <= UB) { BODY; ++idx; } // inner loop
668   // }
669   //
670   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
671   // When schedule(static, chunk_size) is specified, iterations are divided into
672   // chunks of size chunk_size, and the chunks are assigned to the threads in
673   // the team in a round-robin fashion in the order of the thread number.
674   //
675   // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
676   //   while (idx <= UB) { BODY; ++idx; } // inner loop
677   //   LB = LB + ST;
678   //   UB = UB + ST;
679   // }
680   //
681 
682   const Expr *IVExpr = S.getIterationVariable();
683   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
684   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
685 
686   RT.emitForInit(
687       *this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, IL, LB,
688       (Dynamic ? EmitAnyExpr(S.getLastIteration()).getScalarVal() : UB), ST,
689       Chunk);
690 
691   auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
692 
693   // Start the loop with a block that tests the condition.
694   auto CondBlock = createBasicBlock("omp.dispatch.cond");
695   EmitBlock(CondBlock);
696   LoopStack.push(CondBlock);
697 
698   llvm::Value *BoolCondVal = nullptr;
699   if (!Dynamic) {
700     // UB = min(UB, GlobalUB)
701     EmitIgnoredExpr(S.getEnsureUpperBound());
702     // IV = LB
703     EmitIgnoredExpr(S.getInit());
704     // IV < UB
705     BoolCondVal = EvaluateExprAsBool(S.getCond(false));
706   } else {
707     BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned,
708                                     IL, LB, UB, ST);
709   }
710 
711   // If there are any cleanups between here and the loop-exit scope,
712   // create a block to stage a loop exit along.
713   auto ExitBlock = LoopExit.getBlock();
714   if (LoopScope.requiresCleanups())
715     ExitBlock = createBasicBlock("omp.dispatch.cleanup");
716 
717   auto LoopBody = createBasicBlock("omp.dispatch.body");
718   Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
719   if (ExitBlock != LoopExit.getBlock()) {
720     EmitBlock(ExitBlock);
721     EmitBranchThroughCleanup(LoopExit);
722   }
723   EmitBlock(LoopBody);
724 
725   // Emit "IV = LB" (in case of static schedule, we have already calculated new
726   // LB for loop condition and emitted it above).
727   if (Dynamic)
728     EmitIgnoredExpr(S.getInit());
729 
730   // Create a block for the increment.
731   auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
732   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
733 
734   EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
735                    S.getCond(/*SeparateIter=*/false), S.getInc(),
736                    [&S](CodeGenFunction &CGF) {
737                      CGF.EmitOMPLoopBody(S);
738                      CGF.EmitStopPoint(&S);
739                    });
740 
741   EmitBlock(Continue.getBlock());
742   BreakContinueStack.pop_back();
743   if (!Dynamic) {
744     // Emit "LB = LB + Stride", "UB = UB + Stride".
745     EmitIgnoredExpr(S.getNextLowerBound());
746     EmitIgnoredExpr(S.getNextUpperBound());
747   }
748 
749   EmitBranch(CondBlock);
750   LoopStack.pop();
751   // Emit the fall-through block.
752   EmitBlock(LoopExit.getBlock());
753 
754   // Tell the runtime we are done.
755   // FIXME: Also call fini for ordered loops with dynamic scheduling.
756   if (!Dynamic)
757     RT.emitForFinish(*this, S.getLocStart(), ScheduleKind);
758 }
759 
760 /// \brief Emit a helper variable and return corresponding lvalue.
761 static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
762                                const DeclRefExpr *Helper) {
763   auto VDecl = cast<VarDecl>(Helper->getDecl());
764   CGF.EmitVarDecl(*VDecl);
765   return CGF.EmitLValue(Helper);
766 }
767 
768 void CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
769   // Emit the loop iteration variable.
770   auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
771   auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
772   EmitVarDecl(*IVDecl);
773 
774   // Emit the iterations count variable.
775   // If it is not a variable, Sema decided to calculate iterations count on each
776   // iteration (e.g., it is foldable into a constant).
777   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
778     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
779     // Emit calculation of the iterations count.
780     EmitIgnoredExpr(S.getCalcLastIteration());
781   }
782 
783   auto &RT = CGM.getOpenMPRuntime();
784 
785   // Check pre-condition.
786   {
787     // Skip the entire loop if we don't meet the precondition.
788     RegionCounter Cnt = getPGORegionCounter(&S);
789     auto ThenBlock = createBasicBlock("omp.precond.then");
790     auto ContBlock = createBasicBlock("omp.precond.end");
791     EmitBranchOnBoolExpr(S.getPreCond(), ThenBlock, ContBlock, Cnt.getCount());
792     EmitBlock(ThenBlock);
793     Cnt.beginRegion(Builder);
794     // Emit 'then' code.
795     {
796       // Emit helper vars inits.
797       LValue LB =
798           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable()));
799       LValue UB =
800           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable()));
801       LValue ST =
802           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
803       LValue IL =
804           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
805 
806       OMPPrivateScope LoopScope(*this);
807       EmitPrivateLoopCounters(*this, LoopScope, S.counters());
808       (void)LoopScope.Privatize();
809 
810       // Detect the loop schedule kind and chunk.
811       auto ScheduleKind = OMPC_SCHEDULE_unknown;
812       llvm::Value *Chunk = nullptr;
813       if (auto C = cast_or_null<OMPScheduleClause>(
814               S.getSingleClause(OMPC_schedule))) {
815         ScheduleKind = C->getScheduleKind();
816         if (auto Ch = C->getChunkSize()) {
817           Chunk = EmitScalarExpr(Ch);
818           Chunk = EmitScalarConversion(Chunk, Ch->getType(),
819                                        S.getIterationVariable()->getType());
820         }
821       }
822       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
823       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
824       if (RT.isStaticNonchunked(ScheduleKind,
825                                 /* Chunked */ Chunk != nullptr)) {
826         // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
827         // When no chunk_size is specified, the iteration space is divided into
828         // chunks that are approximately equal in size, and at most one chunk is
829         // distributed to each thread. Note that the size of the chunks is
830         // unspecified in this case.
831         RT.emitForInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned,
832                        IL.getAddress(), LB.getAddress(), UB.getAddress(),
833                        ST.getAddress());
834         // UB = min(UB, GlobalUB);
835         EmitIgnoredExpr(S.getEnsureUpperBound());
836         // IV = LB;
837         EmitIgnoredExpr(S.getInit());
838         // while (idx <= UB) { BODY; ++idx; }
839         EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
840                          S.getCond(/*SeparateIter=*/false), S.getInc(),
841                          [&S](CodeGenFunction &CGF) {
842                            CGF.EmitOMPLoopBody(S);
843                            CGF.EmitStopPoint(&S);
844                          });
845         // Tell the runtime we are done.
846         RT.emitForFinish(*this, S.getLocStart(), ScheduleKind);
847       } else {
848         // Emit the outer loop, which requests its work chunk [LB..UB] from
849         // runtime and runs the inner loop to process it.
850         EmitOMPForOuterLoop(ScheduleKind, S, LoopScope, LB.getAddress(),
851                             UB.getAddress(), ST.getAddress(), IL.getAddress(),
852                             Chunk);
853       }
854     }
855     // We're now done with the loop, so jump to the continuation block.
856     EmitBranch(ContBlock);
857     EmitBlock(ContBlock, true);
858   }
859 }
860 
861 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
862   LexicalScope Scope(*this, S.getSourceRange());
863   auto &&CodeGen =
864       [&S](CodeGenFunction &CGF) { CGF.EmitOMPWorksharingLoop(S); };
865   CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen);
866 
867   // Emit an implicit barrier at the end.
868   if (!S.getSingleClause(OMPC_nowait)) {
869     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
870   }
871 }
872 
873 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &) {
874   llvm_unreachable("CodeGen for 'omp for simd' is not supported yet.");
875 }
876 
877 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
878                                 const Twine &Name,
879                                 llvm::Value *Init = nullptr) {
880   auto LVal = CGF.MakeNaturalAlignAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
881   if (Init)
882     CGF.EmitScalarInit(Init, LVal);
883   return LVal;
884 }
885 
886 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
887   LexicalScope Scope(*this, S.getSourceRange());
888   auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt();
889   auto *CS = dyn_cast<CompoundStmt>(Stmt);
890   if (CS && CS->size() > 1) {
891     auto &&CodeGen = [&S, CS](CodeGenFunction &CGF) {
892       auto &C = CGF.CGM.getContext();
893       auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
894       // Emit helper vars inits.
895       LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.",
896                                     CGF.Builder.getInt32(0));
897       auto *GlobalUBVal = CGF.Builder.getInt32(CS->size() - 1);
898       LValue UB =
899           createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
900       LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.",
901                                     CGF.Builder.getInt32(1));
902       LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.",
903                                     CGF.Builder.getInt32(0));
904       // Loop counter.
905       LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv.");
906       OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
907       OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
908       OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
909       OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
910       // Generate condition for loop.
911       BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue,
912                           OK_Ordinary, S.getLocStart(),
913                           /*fpContractable=*/false);
914       // Increment for loop counter.
915       UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue,
916                         OK_Ordinary, S.getLocStart());
917       auto BodyGen = [CS, &S, &IV](CodeGenFunction &CGF) {
918         // Iterate through all sections and emit a switch construct:
919         // switch (IV) {
920         //   case 0:
921         //     <SectionStmt[0]>;
922         //     break;
923         // ...
924         //   case <NumSection> - 1:
925         //     <SectionStmt[<NumSection> - 1]>;
926         //     break;
927         // }
928         // .omp.sections.exit:
929         auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
930         auto *SwitchStmt = CGF.Builder.CreateSwitch(
931             CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB,
932             CS->size());
933         unsigned CaseNumber = 0;
934         for (auto C = CS->children(); C; ++C, ++CaseNumber) {
935           auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
936           CGF.EmitBlock(CaseBB);
937           SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB);
938           CGF.EmitStmt(*C);
939           CGF.EmitBranch(ExitBB);
940         }
941         CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
942       };
943       // Emit static non-chunked loop.
944       CGF.CGM.getOpenMPRuntime().emitForInit(
945           CGF, S.getLocStart(), OMPC_SCHEDULE_static, /*IVSize=*/32,
946           /*IVSigned=*/true, IL.getAddress(), LB.getAddress(), UB.getAddress(),
947           ST.getAddress());
948       // UB = min(UB, GlobalUB);
949       auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart());
950       auto *MinUBGlobalUB = CGF.Builder.CreateSelect(
951           CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
952       CGF.EmitStoreOfScalar(MinUBGlobalUB, UB);
953       // IV = LB;
954       CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV);
955       // while (idx <= UB) { BODY; ++idx; }
956       CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen);
957       // Tell the runtime we are done.
958       CGF.CGM.getOpenMPRuntime().emitForFinish(CGF, S.getLocStart(),
959                                                OMPC_SCHEDULE_static);
960     };
961 
962     CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen);
963   } else {
964     // If only one section is found - no need to generate loop, emit as a
965     // single
966     // region.
967     auto &&CodeGen = [&S](CodeGenFunction &CGF) {
968       CGF.EmitStmt(
969           cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
970       CGF.EnsureInsertPoint();
971     };
972     CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(),
973                                             llvm::None, llvm::None, llvm::None,
974                                             llvm::None);
975   }
976 
977   // Emit an implicit barrier at the end.
978   if (!S.getSingleClause(OMPC_nowait)) {
979     CGM.getOpenMPRuntime().emitBarrierCall(
980         *this, S.getLocStart(),
981         (CS && CS->size() > 1) ? OMPD_sections : OMPD_single);
982   }
983 }
984 
985 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
986   LexicalScope Scope(*this, S.getSourceRange());
987   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
988     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
989     CGF.EnsureInsertPoint();
990   };
991   CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen);
992 }
993 
994 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
995   llvm::SmallVector<const Expr *, 8> CopyprivateVars;
996   llvm::SmallVector<const Expr *, 8> SrcExprs;
997   llvm::SmallVector<const Expr *, 8> DstExprs;
998   llvm::SmallVector<const Expr *, 8> AssignmentOps;
999   // Check if there are any 'copyprivate' clauses associated with this
1000   // 'single'
1001   // construct.
1002   auto CopyprivateFilter = [](const OMPClause *C) -> bool {
1003     return C->getClauseKind() == OMPC_copyprivate;
1004   };
1005   // Build a list of copyprivate variables along with helper expressions
1006   // (<source>, <destination>, <destination>=<source> expressions)
1007   typedef OMPExecutableDirective::filtered_clause_iterator<decltype(
1008       CopyprivateFilter)> CopyprivateIter;
1009   for (CopyprivateIter I(S.clauses(), CopyprivateFilter); I; ++I) {
1010     auto *C = cast<OMPCopyprivateClause>(*I);
1011     CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
1012     SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end());
1013     DstExprs.append(C->destination_exprs().begin(),
1014                     C->destination_exprs().end());
1015     AssignmentOps.append(C->assignment_ops().begin(),
1016                          C->assignment_ops().end());
1017   }
1018   LexicalScope Scope(*this, S.getSourceRange());
1019   // Emit code for 'single' region along with 'copyprivate' clauses
1020   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
1021     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1022     CGF.EnsureInsertPoint();
1023   };
1024   CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(),
1025                                           CopyprivateVars, SrcExprs, DstExprs,
1026                                           AssignmentOps);
1027   // Emit an implicit barrier at the end.
1028   if (!S.getSingleClause(OMPC_nowait)) {
1029     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_single);
1030   }
1031 }
1032 
1033 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
1034   LexicalScope Scope(*this, S.getSourceRange());
1035   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
1036     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1037     CGF.EnsureInsertPoint();
1038   };
1039   CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart());
1040 }
1041 
1042 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
1043   LexicalScope Scope(*this, S.getSourceRange());
1044   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
1045     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1046     CGF.EnsureInsertPoint();
1047   };
1048   CGM.getOpenMPRuntime().emitCriticalRegion(
1049       *this, S.getDirectiveName().getAsString(), CodeGen, S.getLocStart());
1050 }
1051 
1052 void
1053 CodeGenFunction::EmitOMPParallelForDirective(const OMPParallelForDirective &) {
1054   llvm_unreachable("CodeGen for 'omp parallel for' is not supported yet.");
1055 }
1056 
1057 void CodeGenFunction::EmitOMPParallelForSimdDirective(
1058     const OMPParallelForSimdDirective &) {
1059   llvm_unreachable("CodeGen for 'omp parallel for simd' is not supported yet.");
1060 }
1061 
1062 void CodeGenFunction::EmitOMPParallelSectionsDirective(
1063     const OMPParallelSectionsDirective &) {
1064   llvm_unreachable("CodeGen for 'omp parallel sections' is not supported yet.");
1065 }
1066 
1067 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
1068   // Emit outlined function for task construct.
1069   LexicalScope Scope(*this, S.getSourceRange());
1070   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
1071   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
1072   auto *I = CS->getCapturedDecl()->param_begin();
1073   auto *PartId = std::next(I);
1074   // The first function argument for tasks is a thread id, the second one is a
1075   // part id (0 for tied tasks, >=0 for untied task).
1076   auto &&CodeGen = [PartId, &S](CodeGenFunction &CGF) {
1077     if (*PartId) {
1078       // TODO: emit code for untied tasks.
1079     }
1080     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1081   };
1082   auto OutlinedFn =
1083       CGM.getOpenMPRuntime().emitTaskOutlinedFunction(S, *I, CodeGen);
1084   // Check if we should emit tied or untied task.
1085   bool Tied = !S.getSingleClause(OMPC_untied);
1086   // Check if the task is final
1087   llvm::PointerIntPair<llvm::Value *, 1, bool> Final;
1088   if (auto *Clause = S.getSingleClause(OMPC_final)) {
1089     // If the condition constant folds and can be elided, try to avoid emitting
1090     // the condition and the dead arm of the if/else.
1091     auto *Cond = cast<OMPFinalClause>(Clause)->getCondition();
1092     bool CondConstant;
1093     if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
1094       Final.setInt(CondConstant);
1095     else
1096       Final.setPointer(EvaluateExprAsBool(Cond));
1097   } else {
1098     // By default the task is not final.
1099     Final.setInt(/*IntVal=*/false);
1100   }
1101   auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
1102   CGM.getOpenMPRuntime().emitTaskCall(*this, S.getLocStart(), Tied, Final,
1103                                       OutlinedFn, SharedsTy, CapturedStruct);
1104 }
1105 
1106 void CodeGenFunction::EmitOMPTaskyieldDirective(
1107     const OMPTaskyieldDirective &S) {
1108   CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart());
1109 }
1110 
1111 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
1112   CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier);
1113 }
1114 
1115 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &) {
1116   llvm_unreachable("CodeGen for 'omp taskwait' is not supported yet.");
1117 }
1118 
1119 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
1120   CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> {
1121     if (auto C = S.getSingleClause(/*K*/ OMPC_flush)) {
1122       auto FlushClause = cast<OMPFlushClause>(C);
1123       return llvm::makeArrayRef(FlushClause->varlist_begin(),
1124                                 FlushClause->varlist_end());
1125     }
1126     return llvm::None;
1127   }(), S.getLocStart());
1128 }
1129 
1130 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &) {
1131   llvm_unreachable("CodeGen for 'omp ordered' is not supported yet.");
1132 }
1133 
1134 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
1135                                          QualType SrcType, QualType DestType) {
1136   assert(CGF.hasScalarEvaluationKind(DestType) &&
1137          "DestType must have scalar evaluation kind.");
1138   assert(!Val.isAggregate() && "Must be a scalar or complex.");
1139   return Val.isScalar()
1140              ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType)
1141              : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType,
1142                                                  DestType);
1143 }
1144 
1145 static CodeGenFunction::ComplexPairTy
1146 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
1147                       QualType DestType) {
1148   assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
1149          "DestType must have complex evaluation kind.");
1150   CodeGenFunction::ComplexPairTy ComplexVal;
1151   if (Val.isScalar()) {
1152     // Convert the input element to the element type of the complex.
1153     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
1154     auto ScalarVal =
1155         CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestElementType);
1156     ComplexVal = CodeGenFunction::ComplexPairTy(
1157         ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
1158   } else {
1159     assert(Val.isComplex() && "Must be a scalar or complex.");
1160     auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
1161     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
1162     ComplexVal.first = CGF.EmitScalarConversion(
1163         Val.getComplexVal().first, SrcElementType, DestElementType);
1164     ComplexVal.second = CGF.EmitScalarConversion(
1165         Val.getComplexVal().second, SrcElementType, DestElementType);
1166   }
1167   return ComplexVal;
1168 }
1169 
1170 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst,
1171                                   const Expr *X, const Expr *V,
1172                                   SourceLocation Loc) {
1173   // v = x;
1174   assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
1175   assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
1176   LValue XLValue = CGF.EmitLValue(X);
1177   LValue VLValue = CGF.EmitLValue(V);
1178   RValue Res = XLValue.isGlobalReg()
1179                    ? CGF.EmitLoadOfLValue(XLValue, Loc)
1180                    : CGF.EmitAtomicLoad(XLValue, Loc,
1181                                         IsSeqCst ? llvm::SequentiallyConsistent
1182                                                  : llvm::Monotonic,
1183                                         XLValue.isVolatile());
1184   // OpenMP, 2.12.6, atomic Construct
1185   // Any atomic construct with a seq_cst clause forces the atomically
1186   // performed operation to include an implicit flush operation without a
1187   // list.
1188   if (IsSeqCst)
1189     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
1190   switch (CGF.getEvaluationKind(V->getType())) {
1191   case TEK_Scalar:
1192     CGF.EmitStoreOfScalar(
1193         convertToScalarValue(CGF, Res, X->getType(), V->getType()), VLValue);
1194     break;
1195   case TEK_Complex:
1196     CGF.EmitStoreOfComplex(
1197         convertToComplexValue(CGF, Res, X->getType(), V->getType()), VLValue,
1198         /*isInit=*/false);
1199     break;
1200   case TEK_Aggregate:
1201     llvm_unreachable("Must be a scalar or complex.");
1202   }
1203 }
1204 
1205 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst,
1206                                    const Expr *X, const Expr *E,
1207                                    SourceLocation Loc) {
1208   // x = expr;
1209   assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
1210   LValue XLValue = CGF.EmitLValue(X);
1211   RValue ExprRValue = CGF.EmitAnyExpr(E);
1212   if (XLValue.isGlobalReg())
1213     CGF.EmitStoreThroughGlobalRegLValue(ExprRValue, XLValue);
1214   else
1215     CGF.EmitAtomicStore(ExprRValue, XLValue,
1216                         IsSeqCst ? llvm::SequentiallyConsistent
1217                                  : llvm::Monotonic,
1218                         XLValue.isVolatile(), /*IsInit=*/false);
1219   // OpenMP, 2.12.6, atomic Construct
1220   // Any atomic construct with a seq_cst clause forces the atomically
1221   // performed operation to include an implicit flush operation without a
1222   // list.
1223   if (IsSeqCst)
1224     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
1225 }
1226 
1227 bool emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, RValue Update,
1228                       BinaryOperatorKind BO, llvm::AtomicOrdering AO,
1229                       bool IsXLHSInRHSPart) {
1230   auto &Context = CGF.CGM.getContext();
1231   // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
1232   // expression is simple and atomic is allowed for the given type for the
1233   // target platform.
1234   if (BO == BO_Comma || !Update.isScalar() ||
1235       !Update.getScalarVal()->getType()->isIntegerTy() || !X.isSimple() ||
1236       (!isa<llvm::ConstantInt>(Update.getScalarVal()) &&
1237        (Update.getScalarVal()->getType() !=
1238         X.getAddress()->getType()->getPointerElementType())) ||
1239       !Context.getTargetInfo().hasBuiltinAtomic(
1240           Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment())))
1241     return false;
1242 
1243   llvm::AtomicRMWInst::BinOp RMWOp;
1244   switch (BO) {
1245   case BO_Add:
1246     RMWOp = llvm::AtomicRMWInst::Add;
1247     break;
1248   case BO_Sub:
1249     if (!IsXLHSInRHSPart)
1250       return false;
1251     RMWOp = llvm::AtomicRMWInst::Sub;
1252     break;
1253   case BO_And:
1254     RMWOp = llvm::AtomicRMWInst::And;
1255     break;
1256   case BO_Or:
1257     RMWOp = llvm::AtomicRMWInst::Or;
1258     break;
1259   case BO_Xor:
1260     RMWOp = llvm::AtomicRMWInst::Xor;
1261     break;
1262   case BO_LT:
1263     RMWOp = X.getType()->hasSignedIntegerRepresentation()
1264                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min
1265                                    : llvm::AtomicRMWInst::Max)
1266                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin
1267                                    : llvm::AtomicRMWInst::UMax);
1268     break;
1269   case BO_GT:
1270     RMWOp = X.getType()->hasSignedIntegerRepresentation()
1271                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max
1272                                    : llvm::AtomicRMWInst::Min)
1273                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
1274                                    : llvm::AtomicRMWInst::UMin);
1275     break;
1276   case BO_Mul:
1277   case BO_Div:
1278   case BO_Rem:
1279   case BO_Shl:
1280   case BO_Shr:
1281   case BO_LAnd:
1282   case BO_LOr:
1283     return false;
1284   case BO_PtrMemD:
1285   case BO_PtrMemI:
1286   case BO_LE:
1287   case BO_GE:
1288   case BO_EQ:
1289   case BO_NE:
1290   case BO_Assign:
1291   case BO_AddAssign:
1292   case BO_SubAssign:
1293   case BO_AndAssign:
1294   case BO_OrAssign:
1295   case BO_XorAssign:
1296   case BO_MulAssign:
1297   case BO_DivAssign:
1298   case BO_RemAssign:
1299   case BO_ShlAssign:
1300   case BO_ShrAssign:
1301   case BO_Comma:
1302     llvm_unreachable("Unsupported atomic update operation");
1303   }
1304   auto *UpdateVal = Update.getScalarVal();
1305   if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) {
1306     UpdateVal = CGF.Builder.CreateIntCast(
1307         IC, X.getAddress()->getType()->getPointerElementType(),
1308         X.getType()->hasSignedIntegerRepresentation());
1309   }
1310   CGF.Builder.CreateAtomicRMW(RMWOp, X.getAddress(), UpdateVal, AO);
1311   return true;
1312 }
1313 
1314 void CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
1315     LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
1316     llvm::AtomicOrdering AO, SourceLocation Loc,
1317     const llvm::function_ref<RValue(RValue)> &CommonGen) {
1318   // Update expressions are allowed to have the following forms:
1319   // x binop= expr; -> xrval + expr;
1320   // x++, ++x -> xrval + 1;
1321   // x--, --x -> xrval - 1;
1322   // x = x binop expr; -> xrval binop expr
1323   // x = expr Op x; - > expr binop xrval;
1324   if (!emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart)) {
1325     if (X.isGlobalReg()) {
1326       // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
1327       // 'xrval'.
1328       EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X);
1329     } else {
1330       // Perform compare-and-swap procedure.
1331       EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified());
1332     }
1333   }
1334 }
1335 
1336 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst,
1337                                     const Expr *X, const Expr *E,
1338                                     const Expr *UE, bool IsXLHSInRHSPart,
1339                                     SourceLocation Loc) {
1340   assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
1341          "Update expr in 'atomic update' must be a binary operator.");
1342   auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
1343   // Update expressions are allowed to have the following forms:
1344   // x binop= expr; -> xrval + expr;
1345   // x++, ++x -> xrval + 1;
1346   // x--, --x -> xrval - 1;
1347   // x = x binop expr; -> xrval binop expr
1348   // x = expr Op x; - > expr binop xrval;
1349   assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
1350   LValue XLValue = CGF.EmitLValue(X);
1351   RValue ExprRValue = CGF.EmitAnyExpr(E);
1352   auto AO = IsSeqCst ? llvm::SequentiallyConsistent : llvm::Monotonic;
1353   auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
1354   auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
1355   auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
1356   auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
1357   auto Gen =
1358       [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue {
1359         CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
1360         CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
1361         return CGF.EmitAnyExpr(UE);
1362       };
1363   CGF.EmitOMPAtomicSimpleUpdateExpr(XLValue, ExprRValue, BOUE->getOpcode(),
1364                                     IsXLHSInRHSPart, AO, Loc, Gen);
1365   // OpenMP, 2.12.6, atomic Construct
1366   // Any atomic construct with a seq_cst clause forces the atomically
1367   // performed operation to include an implicit flush operation without a
1368   // list.
1369   if (IsSeqCst)
1370     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
1371 }
1372 
1373 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
1374                               bool IsSeqCst, const Expr *X, const Expr *V,
1375                               const Expr *E, const Expr *UE,
1376                               bool IsXLHSInRHSPart, SourceLocation Loc) {
1377   switch (Kind) {
1378   case OMPC_read:
1379     EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc);
1380     break;
1381   case OMPC_write:
1382     EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc);
1383     break;
1384   case OMPC_unknown:
1385   case OMPC_update:
1386     EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc);
1387     break;
1388   case OMPC_capture:
1389     llvm_unreachable("CodeGen for 'omp atomic clause' is not supported yet.");
1390   case OMPC_if:
1391   case OMPC_final:
1392   case OMPC_num_threads:
1393   case OMPC_private:
1394   case OMPC_firstprivate:
1395   case OMPC_lastprivate:
1396   case OMPC_reduction:
1397   case OMPC_safelen:
1398   case OMPC_collapse:
1399   case OMPC_default:
1400   case OMPC_seq_cst:
1401   case OMPC_shared:
1402   case OMPC_linear:
1403   case OMPC_aligned:
1404   case OMPC_copyin:
1405   case OMPC_copyprivate:
1406   case OMPC_flush:
1407   case OMPC_proc_bind:
1408   case OMPC_schedule:
1409   case OMPC_ordered:
1410   case OMPC_nowait:
1411   case OMPC_untied:
1412   case OMPC_threadprivate:
1413   case OMPC_mergeable:
1414     llvm_unreachable("Clause is not allowed in 'omp atomic'.");
1415   }
1416 }
1417 
1418 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
1419   bool IsSeqCst = S.getSingleClause(/*K=*/OMPC_seq_cst);
1420   OpenMPClauseKind Kind = OMPC_unknown;
1421   for (auto *C : S.clauses()) {
1422     // Find first clause (skip seq_cst clause, if it is first).
1423     if (C->getClauseKind() != OMPC_seq_cst) {
1424       Kind = C->getClauseKind();
1425       break;
1426     }
1427   }
1428 
1429   const auto *CS =
1430       S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
1431   if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS))
1432     enterFullExpression(EWC);
1433 
1434   LexicalScope Scope(*this, S.getSourceRange());
1435   auto &&CodeGen = [&S, Kind, IsSeqCst](CodeGenFunction &CGF) {
1436     EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.getX(), S.getV(), S.getExpr(),
1437                       S.getUpdateExpr(), S.isXLHSInRHSPart(), S.getLocStart());
1438   };
1439   CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen);
1440 }
1441 
1442 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &) {
1443   llvm_unreachable("CodeGen for 'omp target' is not supported yet.");
1444 }
1445 
1446 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &) {
1447   llvm_unreachable("CodeGen for 'omp teams' is not supported yet.");
1448 }
1449 
1450