1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit OpenMP nodes as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGOpenMPRuntime.h"
15 #include "CodeGenFunction.h"
16 #include "CodeGenModule.h"
17 #include "TargetInfo.h"
18 #include "clang/AST/Stmt.h"
19 #include "clang/AST/StmtOpenMP.h"
20 using namespace clang;
21 using namespace CodeGen;
22 
23 //===----------------------------------------------------------------------===//
24 //                              OpenMP Directive Emission
25 //===----------------------------------------------------------------------===//
26 namespace {
27 /// \brief RAII for inlined OpenMP regions (like 'omp for', 'omp simd', 'omp
28 /// critical' etc.). Helps to generate proper debug info and provides correct
29 /// code generation for such constructs.
30 class InlinedOpenMPRegionScopeRAII {
31   InlinedOpenMPRegionRAII Region;
32   CodeGenFunction::LexicalScope DirectiveScope;
33 
34 public:
35   InlinedOpenMPRegionScopeRAII(CodeGenFunction &CGF,
36                                const OMPExecutableDirective &D)
37       : Region(CGF, D), DirectiveScope(CGF, D.getSourceRange()) {}
38 };
39 } // namespace
40 
41 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen
42 /// function. Here is the logic:
43 /// if (Cond) {
44 ///   CodeGen(true);
45 /// } else {
46 ///   CodeGen(false);
47 /// }
48 static void EmitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
49                             const std::function<void(bool)> &CodeGen) {
50   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
51 
52   // If the condition constant folds and can be elided, try to avoid emitting
53   // the condition and the dead arm of the if/else.
54   bool CondConstant;
55   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
56     CodeGen(CondConstant);
57     return;
58   }
59 
60   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
61   // emit the conditional branch.
62   auto ThenBlock = CGF.createBasicBlock(/*name*/ "omp_if.then");
63   auto ElseBlock = CGF.createBasicBlock(/*name*/ "omp_if.else");
64   auto ContBlock = CGF.createBasicBlock(/*name*/ "omp_if.end");
65   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount*/ 0);
66 
67   // Emit the 'then' code.
68   CGF.EmitBlock(ThenBlock);
69   CodeGen(/*ThenBlock*/ true);
70   CGF.EmitBranch(ContBlock);
71   // Emit the 'else' code if present.
72   {
73     // There is no need to emit line number for unconditional branch.
74     auto NL = ApplyDebugLocation::CreateEmpty(CGF);
75     CGF.EmitBlock(ElseBlock);
76   }
77   CodeGen(/*ThenBlock*/ false);
78   {
79     // There is no need to emit line number for unconditional branch.
80     auto NL = ApplyDebugLocation::CreateEmpty(CGF);
81     CGF.EmitBranch(ContBlock);
82   }
83   // Emit the continuation block for code after the if.
84   CGF.EmitBlock(ContBlock, /*IsFinished*/ true);
85 }
86 
87 void CodeGenFunction::EmitOMPAggregateAssign(LValue OriginalAddr,
88                                              llvm::Value *PrivateAddr,
89                                              const Expr *AssignExpr,
90                                              QualType OriginalType,
91                                              const VarDecl *VDInit) {
92   EmitBlock(createBasicBlock(".omp.assign.begin."));
93   if (!isa<CXXConstructExpr>(AssignExpr) || isTrivialInitializer(AssignExpr)) {
94     // Perform simple memcpy.
95     EmitAggregateAssign(PrivateAddr, OriginalAddr.getAddress(),
96                         AssignExpr->getType());
97   } else {
98     // Perform element-by-element initialization.
99     QualType ElementTy;
100     auto SrcBegin = OriginalAddr.getAddress();
101     auto DestBegin = PrivateAddr;
102     auto ArrayTy = OriginalType->getAsArrayTypeUnsafe();
103     auto SrcNumElements = emitArrayLength(ArrayTy, ElementTy, SrcBegin);
104     auto DestNumElements = emitArrayLength(ArrayTy, ElementTy, DestBegin);
105     auto SrcEnd = Builder.CreateGEP(SrcBegin, SrcNumElements);
106     auto DestEnd = Builder.CreateGEP(DestBegin, DestNumElements);
107     // The basic structure here is a do-while loop, because we don't
108     // need to check for the zero-element case.
109     auto BodyBB = createBasicBlock("omp.arraycpy.body");
110     auto DoneBB = createBasicBlock("omp.arraycpy.done");
111     auto IsEmpty =
112         Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
113     Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
114 
115     // Enter the loop body, making that address the current address.
116     auto EntryBB = Builder.GetInsertBlock();
117     EmitBlock(BodyBB);
118     auto SrcElementPast = Builder.CreatePHI(SrcBegin->getType(), 2,
119                                             "omp.arraycpy.srcElementPast");
120     SrcElementPast->addIncoming(SrcEnd, EntryBB);
121     auto DestElementPast = Builder.CreatePHI(DestBegin->getType(), 2,
122                                              "omp.arraycpy.destElementPast");
123     DestElementPast->addIncoming(DestEnd, EntryBB);
124 
125     // Shift the address back by one element.
126     auto NegativeOne = llvm::ConstantInt::get(SizeTy, -1, true);
127     auto DestElement = Builder.CreateGEP(DestElementPast, NegativeOne,
128                                          "omp.arraycpy.dest.element");
129     auto SrcElement = Builder.CreateGEP(SrcElementPast, NegativeOne,
130                                         "omp.arraycpy.src.element");
131     {
132       // Create RunCleanScope to cleanup possible temps.
133       CodeGenFunction::RunCleanupsScope Init(*this);
134       // Emit initialization for single element.
135       LocalDeclMap[VDInit] = SrcElement;
136       EmitAnyExprToMem(AssignExpr, DestElement,
137                        AssignExpr->getType().getQualifiers(),
138                        /*IsInitializer*/ false);
139       LocalDeclMap.erase(VDInit);
140     }
141 
142     // Check whether we've reached the end.
143     auto Done =
144         Builder.CreateICmpEQ(DestElement, DestBegin, "omp.arraycpy.done");
145     Builder.CreateCondBr(Done, DoneBB, BodyBB);
146     DestElementPast->addIncoming(DestElement, Builder.GetInsertBlock());
147     SrcElementPast->addIncoming(SrcElement, Builder.GetInsertBlock());
148 
149     // Done.
150     EmitBlock(DoneBB, true);
151   }
152   EmitBlock(createBasicBlock(".omp.assign.end."));
153 }
154 
155 void CodeGenFunction::EmitOMPFirstprivateClause(
156     const OMPExecutableDirective &D,
157     CodeGenFunction::OMPPrivateScope &PrivateScope) {
158   auto PrivateFilter = [](const OMPClause *C) -> bool {
159     return C->getClauseKind() == OMPC_firstprivate;
160   };
161   for (OMPExecutableDirective::filtered_clause_iterator<decltype(PrivateFilter)>
162            I(D.clauses(), PrivateFilter); I; ++I) {
163     auto *C = cast<OMPFirstprivateClause>(*I);
164     auto IRef = C->varlist_begin();
165     auto InitsRef = C->inits().begin();
166     for (auto IInit : C->private_copies()) {
167       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
168       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
169       bool IsRegistered;
170       if (*InitsRef != nullptr) {
171         // Emit VarDecl with copy init for arrays.
172         auto *FD = CapturedStmtInfo->lookup(OrigVD);
173         LValue Base = MakeNaturalAlignAddrLValue(
174             CapturedStmtInfo->getContextValue(),
175             getContext().getTagDeclType(FD->getParent()));
176         auto OriginalAddr = EmitLValueForField(Base, FD);
177         auto VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
178         IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value * {
179           auto Emission = EmitAutoVarAlloca(*VD);
180           // Emit initialization of aggregate firstprivate vars.
181           EmitOMPAggregateAssign(OriginalAddr, Emission.getAllocatedAddress(),
182                                  VD->getInit(), (*IRef)->getType(), VDInit);
183           EmitAutoVarCleanups(Emission);
184           return Emission.getAllocatedAddress();
185         });
186       } else
187         IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value * {
188           // Emit private VarDecl with copy init.
189           EmitDecl(*VD);
190           return GetAddrOfLocalVar(VD);
191         });
192       assert(IsRegistered && "firstprivate var already registered as private");
193       // Silence the warning about unused variable.
194       (void)IsRegistered;
195       ++IRef, ++InitsRef;
196     }
197   }
198 }
199 
200 void CodeGenFunction::EmitOMPPrivateClause(
201     const OMPExecutableDirective &D,
202     CodeGenFunction::OMPPrivateScope &PrivateScope) {
203   auto PrivateFilter = [](const OMPClause *C) -> bool {
204     return C->getClauseKind() == OMPC_private;
205   };
206   for (OMPExecutableDirective::filtered_clause_iterator<decltype(PrivateFilter)>
207            I(D.clauses(), PrivateFilter); I; ++I) {
208     auto *C = cast<OMPPrivateClause>(*I);
209     auto IRef = C->varlist_begin();
210     for (auto IInit : C->private_copies()) {
211       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
212       auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
213       bool IsRegistered =
214           PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value * {
215             // Emit private VarDecl with copy init.
216             EmitDecl(*VD);
217             return GetAddrOfLocalVar(VD);
218           });
219       assert(IsRegistered && "private var already registered as private");
220       // Silence the warning about unused variable.
221       (void)IsRegistered;
222       ++IRef;
223     }
224   }
225 }
226 
227 /// \brief Emits code for OpenMP parallel directive in the parallel region.
228 static void EmitOMPParallelCall(CodeGenFunction &CGF,
229                                 const OMPParallelDirective &S,
230                                 llvm::Value *OutlinedFn,
231                                 llvm::Value *CapturedStruct) {
232   if (auto C = S.getSingleClause(/*K*/ OMPC_num_threads)) {
233     CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
234     auto NumThreadsClause = cast<OMPNumThreadsClause>(C);
235     auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
236                                          /*IgnoreResultAssign*/ true);
237     CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
238         CGF, NumThreads, NumThreadsClause->getLocStart());
239   }
240   CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn,
241                                               CapturedStruct);
242 }
243 
244 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
245   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
246   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
247   auto OutlinedFn = CGM.getOpenMPRuntime().emitOutlinedFunction(
248       S, *CS->getCapturedDecl()->param_begin());
249   if (auto C = S.getSingleClause(/*K*/ OMPC_if)) {
250     auto Cond = cast<OMPIfClause>(C)->getCondition();
251     EmitOMPIfClause(*this, Cond, [&](bool ThenBlock) {
252       if (ThenBlock)
253         EmitOMPParallelCall(*this, S, OutlinedFn, CapturedStruct);
254       else
255         CGM.getOpenMPRuntime().emitSerialCall(*this, S.getLocStart(),
256                                               OutlinedFn, CapturedStruct);
257     });
258   } else
259     EmitOMPParallelCall(*this, S, OutlinedFn, CapturedStruct);
260 }
261 
262 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &S,
263                                       bool SeparateIter) {
264   RunCleanupsScope BodyScope(*this);
265   // Update counters values on current iteration.
266   for (auto I : S.updates()) {
267     EmitIgnoredExpr(I);
268   }
269   // Update the linear variables.
270   for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) {
271     for (auto U : C->updates()) {
272       EmitIgnoredExpr(U);
273     }
274   }
275 
276   // On a continue in the body, jump to the end.
277   auto Continue = getJumpDestInCurrentScope("omp.body.continue");
278   BreakContinueStack.push_back(BreakContinue(JumpDest(), Continue));
279   // Emit loop body.
280   EmitStmt(S.getBody());
281   // The end (updates/cleanups).
282   EmitBlock(Continue.getBlock());
283   BreakContinueStack.pop_back();
284   if (SeparateIter) {
285     // TODO: Update lastprivates if the SeparateIter flag is true.
286     // This will be implemented in a follow-up OMPLastprivateClause patch, but
287     // result should be still correct without it, as we do not make these
288     // variables private yet.
289   }
290 }
291 
292 void CodeGenFunction::EmitOMPInnerLoop(const Stmt &S, bool RequiresCleanup,
293                                        const Expr *LoopCond,
294                                        const Expr *IncExpr,
295                                        const std::function<void()> &BodyGen) {
296   auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
297   auto Cnt = getPGORegionCounter(&S);
298 
299   // Start the loop with a block that tests the condition.
300   auto CondBlock = createBasicBlock("omp.inner.for.cond");
301   EmitBlock(CondBlock);
302   LoopStack.push(CondBlock);
303 
304   // If there are any cleanups between here and the loop-exit scope,
305   // create a block to stage a loop exit along.
306   auto ExitBlock = LoopExit.getBlock();
307   if (RequiresCleanup)
308     ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
309 
310   auto LoopBody = createBasicBlock("omp.inner.for.body");
311 
312   // Emit condition.
313   EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, Cnt.getCount());
314   if (ExitBlock != LoopExit.getBlock()) {
315     EmitBlock(ExitBlock);
316     EmitBranchThroughCleanup(LoopExit);
317   }
318 
319   EmitBlock(LoopBody);
320   Cnt.beginRegion(Builder);
321 
322   // Create a block for the increment.
323   auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
324   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
325 
326   BodyGen();
327 
328   // Emit "IV = IV + 1" and a back-edge to the condition block.
329   EmitBlock(Continue.getBlock());
330   EmitIgnoredExpr(IncExpr);
331   BreakContinueStack.pop_back();
332   EmitBranch(CondBlock);
333   LoopStack.pop();
334   // Emit the fall-through block.
335   EmitBlock(LoopExit.getBlock());
336 }
337 
338 void CodeGenFunction::EmitOMPSimdFinal(const OMPLoopDirective &S) {
339   auto IC = S.counters().begin();
340   for (auto F : S.finals()) {
341     if (LocalDeclMap.lookup(cast<DeclRefExpr>((*IC))->getDecl())) {
342       EmitIgnoredExpr(F);
343     }
344     ++IC;
345   }
346   // Emit the final values of the linear variables.
347   for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) {
348     for (auto F : C->finals()) {
349       EmitIgnoredExpr(F);
350     }
351   }
352 }
353 
354 static void EmitOMPAlignedClause(CodeGenFunction &CGF, CodeGenModule &CGM,
355                                  const OMPAlignedClause &Clause) {
356   unsigned ClauseAlignment = 0;
357   if (auto AlignmentExpr = Clause.getAlignment()) {
358     auto AlignmentCI =
359         cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
360     ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue());
361   }
362   for (auto E : Clause.varlists()) {
363     unsigned Alignment = ClauseAlignment;
364     if (Alignment == 0) {
365       // OpenMP [2.8.1, Description]
366       // If no optional parameter is specified, implementation-defined default
367       // alignments for SIMD instructions on the target platforms are assumed.
368       Alignment = CGM.getTargetCodeGenInfo().getOpenMPSimdDefaultAlignment(
369           E->getType());
370     }
371     assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) &&
372            "alignment is not power of 2");
373     if (Alignment != 0) {
374       llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
375       CGF.EmitAlignmentAssumption(PtrValue, Alignment);
376     }
377   }
378 }
379 
380 static void EmitPrivateLoopCounters(CodeGenFunction &CGF,
381                                     CodeGenFunction::OMPPrivateScope &LoopScope,
382                                     ArrayRef<Expr *> Counters) {
383   for (auto *E : Counters) {
384     auto VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
385     bool IsRegistered = LoopScope.addPrivate(VD, [&]() -> llvm::Value * {
386       // Emit var without initialization.
387       auto VarEmission = CGF.EmitAutoVarAlloca(*VD);
388       CGF.EmitAutoVarCleanups(VarEmission);
389       return VarEmission.getAllocatedAddress();
390     });
391     assert(IsRegistered && "counter already registered as private");
392     // Silence the warning about unused variable.
393     (void)IsRegistered;
394   }
395 }
396 
397 static void
398 EmitPrivateLinearVars(CodeGenFunction &CGF, const OMPExecutableDirective &D,
399                       CodeGenFunction::OMPPrivateScope &PrivateScope) {
400   for (auto Clause : OMPExecutableDirective::linear_filter(D.clauses())) {
401     for (auto *E : Clause->varlists()) {
402       auto VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
403       bool IsRegistered = PrivateScope.addPrivate(VD, [&]()->llvm::Value * {
404         // Emit var without initialization.
405         auto VarEmission = CGF.EmitAutoVarAlloca(*VD);
406         CGF.EmitAutoVarCleanups(VarEmission);
407         return VarEmission.getAllocatedAddress();
408       });
409       assert(IsRegistered && "linear var already registered as private");
410       // Silence the warning about unused variable.
411       (void)IsRegistered;
412     }
413   }
414 }
415 
416 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
417   // Pragma 'simd' code depends on presence of 'lastprivate'.
418   // If present, we have to separate last iteration of the loop:
419   //
420   // if (LastIteration != 0) {
421   //   for (IV in 0..LastIteration-1) BODY;
422   //   BODY with updates of lastprivate vars;
423   //   <Final counter/linear vars updates>;
424   // }
425   //
426   // otherwise (when there's no lastprivate):
427   //
428   //   for (IV in 0..LastIteration) BODY;
429   //   <Final counter/linear vars updates>;
430   //
431 
432   // Walk clauses and process safelen/lastprivate.
433   bool SeparateIter = false;
434   LoopStack.setParallel();
435   LoopStack.setVectorizerEnable(true);
436   for (auto C : S.clauses()) {
437     switch (C->getClauseKind()) {
438     case OMPC_safelen: {
439       RValue Len = EmitAnyExpr(cast<OMPSafelenClause>(C)->getSafelen(),
440                                AggValueSlot::ignored(), true);
441       llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
442       LoopStack.setVectorizerWidth(Val->getZExtValue());
443       // In presence of finite 'safelen', it may be unsafe to mark all
444       // the memory instructions parallel, because loop-carried
445       // dependences of 'safelen' iterations are possible.
446       LoopStack.setParallel(false);
447       break;
448     }
449     case OMPC_aligned:
450       EmitOMPAlignedClause(*this, CGM, cast<OMPAlignedClause>(*C));
451       break;
452     case OMPC_lastprivate:
453       SeparateIter = true;
454       break;
455     default:
456       // Not handled yet
457       ;
458     }
459   }
460 
461   InlinedOpenMPRegionScopeRAII Region(*this, S);
462 
463   // Emit inits for the linear variables.
464   for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) {
465     for (auto Init : C->inits()) {
466       auto *D = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
467       EmitVarDecl(*D);
468     }
469   }
470 
471   // Emit the loop iteration variable.
472   const Expr *IVExpr = S.getIterationVariable();
473   const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
474   EmitVarDecl(*IVDecl);
475   EmitIgnoredExpr(S.getInit());
476 
477   // Emit the iterations count variable.
478   // If it is not a variable, Sema decided to calculate iterations count on each
479   // iteration (e.g., it is foldable into a constant).
480   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
481     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
482     // Emit calculation of the iterations count.
483     EmitIgnoredExpr(S.getCalcLastIteration());
484   }
485 
486   // Emit the linear steps for the linear clauses.
487   // If a step is not constant, it is pre-calculated before the loop.
488   for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) {
489     if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
490       if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
491         EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
492         // Emit calculation of the linear step.
493         EmitIgnoredExpr(CS);
494       }
495   }
496 
497   if (SeparateIter) {
498     // Emit: if (LastIteration > 0) - begin.
499     RegionCounter Cnt = getPGORegionCounter(&S);
500     auto ThenBlock = createBasicBlock("simd.if.then");
501     auto ContBlock = createBasicBlock("simd.if.end");
502     EmitBranchOnBoolExpr(S.getPreCond(), ThenBlock, ContBlock, Cnt.getCount());
503     EmitBlock(ThenBlock);
504     Cnt.beginRegion(Builder);
505     // Emit 'then' code.
506     {
507       OMPPrivateScope LoopScope(*this);
508       EmitPrivateLoopCounters(*this, LoopScope, S.counters());
509       EmitPrivateLinearVars(*this, S, LoopScope);
510       EmitOMPPrivateClause(S, LoopScope);
511       (void)LoopScope.Privatize();
512       EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
513                        S.getCond(/*SeparateIter=*/true), S.getInc(),
514                        [&S, this]() {
515                          EmitOMPLoopBody(S);
516                          EmitStopPoint(&S);
517                        });
518       EmitOMPLoopBody(S, /* SeparateIter */ true);
519     }
520     EmitOMPSimdFinal(S);
521     // Emit: if (LastIteration != 0) - end.
522     EmitBranch(ContBlock);
523     EmitBlock(ContBlock, true);
524   } else {
525     {
526       OMPPrivateScope LoopScope(*this);
527       EmitPrivateLoopCounters(*this, LoopScope, S.counters());
528       EmitPrivateLinearVars(*this, S, LoopScope);
529       EmitOMPPrivateClause(S, LoopScope);
530       (void)LoopScope.Privatize();
531       EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
532                        S.getCond(/*SeparateIter=*/false), S.getInc(),
533                        [&S, this]() {
534                          EmitOMPLoopBody(S);
535                          EmitStopPoint(&S);
536                        });
537     }
538     EmitOMPSimdFinal(S);
539   }
540 }
541 
542 void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind,
543                                           const OMPLoopDirective &S,
544                                           OMPPrivateScope &LoopScope,
545                                           llvm::Value *LB, llvm::Value *UB,
546                                           llvm::Value *ST, llvm::Value *IL,
547                                           llvm::Value *Chunk) {
548   auto &RT = CGM.getOpenMPRuntime();
549 
550   // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
551   const bool Dynamic = RT.isDynamic(ScheduleKind);
552 
553   assert(!RT.isStaticNonchunked(ScheduleKind, /* Chunked */ Chunk != nullptr) &&
554          "static non-chunked schedule does not need outer loop");
555 
556   // Emit outer loop.
557   //
558   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
559   // When schedule(dynamic,chunk_size) is specified, the iterations are
560   // distributed to threads in the team in chunks as the threads request them.
561   // Each thread executes a chunk of iterations, then requests another chunk,
562   // until no chunks remain to be distributed. Each chunk contains chunk_size
563   // iterations, except for the last chunk to be distributed, which may have
564   // fewer iterations. When no chunk_size is specified, it defaults to 1.
565   //
566   // When schedule(guided,chunk_size) is specified, the iterations are assigned
567   // to threads in the team in chunks as the executing threads request them.
568   // Each thread executes a chunk of iterations, then requests another chunk,
569   // until no chunks remain to be assigned. For a chunk_size of 1, the size of
570   // each chunk is proportional to the number of unassigned iterations divided
571   // by the number of threads in the team, decreasing to 1. For a chunk_size
572   // with value k (greater than 1), the size of each chunk is determined in the
573   // same way, with the restriction that the chunks do not contain fewer than k
574   // iterations (except for the last chunk to be assigned, which may have fewer
575   // than k iterations).
576   //
577   // When schedule(auto) is specified, the decision regarding scheduling is
578   // delegated to the compiler and/or runtime system. The programmer gives the
579   // implementation the freedom to choose any possible mapping of iterations to
580   // threads in the team.
581   //
582   // When schedule(runtime) is specified, the decision regarding scheduling is
583   // deferred until run time, and the schedule and chunk size are taken from the
584   // run-sched-var ICV. If the ICV is set to auto, the schedule is
585   // implementation defined
586   //
587   // while(__kmpc_dispatch_next(&LB, &UB)) {
588   //   idx = LB;
589   //   while (idx <= UB) { BODY; ++idx; } // inner loop
590   // }
591   //
592   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
593   // When schedule(static, chunk_size) is specified, iterations are divided into
594   // chunks of size chunk_size, and the chunks are assigned to the threads in
595   // the team in a round-robin fashion in the order of the thread number.
596   //
597   // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
598   //   while (idx <= UB) { BODY; ++idx; } // inner loop
599   //   LB = LB + ST;
600   //   UB = UB + ST;
601   // }
602   //
603 
604   const Expr *IVExpr = S.getIterationVariable();
605   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
606   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
607 
608   RT.emitForInit(
609       *this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, IL, LB,
610       (Dynamic ? EmitAnyExpr(S.getLastIteration()).getScalarVal() : UB), ST,
611       Chunk);
612 
613   auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
614 
615   // Start the loop with a block that tests the condition.
616   auto CondBlock = createBasicBlock("omp.dispatch.cond");
617   EmitBlock(CondBlock);
618   LoopStack.push(CondBlock);
619 
620   llvm::Value *BoolCondVal = nullptr;
621   if (!Dynamic) {
622     // UB = min(UB, GlobalUB)
623     EmitIgnoredExpr(S.getEnsureUpperBound());
624     // IV = LB
625     EmitIgnoredExpr(S.getInit());
626     // IV < UB
627     BoolCondVal = EvaluateExprAsBool(S.getCond(false));
628   } else {
629     BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned,
630                                     IL, LB, UB, ST);
631   }
632 
633   // If there are any cleanups between here and the loop-exit scope,
634   // create a block to stage a loop exit along.
635   auto ExitBlock = LoopExit.getBlock();
636   if (LoopScope.requiresCleanups())
637     ExitBlock = createBasicBlock("omp.dispatch.cleanup");
638 
639   auto LoopBody = createBasicBlock("omp.dispatch.body");
640   Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
641   if (ExitBlock != LoopExit.getBlock()) {
642     EmitBlock(ExitBlock);
643     EmitBranchThroughCleanup(LoopExit);
644   }
645   EmitBlock(LoopBody);
646 
647   // Emit "IV = LB" (in case of static schedule, we have already calculated new
648   // LB for loop condition and emitted it above).
649   if (Dynamic)
650     EmitIgnoredExpr(S.getInit());
651 
652   // Create a block for the increment.
653   auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
654   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
655 
656   EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
657                    S.getCond(/*SeparateIter=*/false), S.getInc(), [&S, this]() {
658                      EmitOMPLoopBody(S);
659                      EmitStopPoint(&S);
660                    });
661 
662   EmitBlock(Continue.getBlock());
663   BreakContinueStack.pop_back();
664   if (!Dynamic) {
665     // Emit "LB = LB + Stride", "UB = UB + Stride".
666     EmitIgnoredExpr(S.getNextLowerBound());
667     EmitIgnoredExpr(S.getNextUpperBound());
668   }
669 
670   EmitBranch(CondBlock);
671   LoopStack.pop();
672   // Emit the fall-through block.
673   EmitBlock(LoopExit.getBlock());
674 
675   // Tell the runtime we are done.
676   // FIXME: Also call fini for ordered loops with dynamic scheduling.
677   if (!Dynamic)
678     RT.emitForFinish(*this, S.getLocStart(), ScheduleKind);
679 }
680 
681 /// \brief Emit a helper variable and return corresponding lvalue.
682 static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
683                                const DeclRefExpr *Helper) {
684   auto VDecl = cast<VarDecl>(Helper->getDecl());
685   CGF.EmitVarDecl(*VDecl);
686   return CGF.EmitLValue(Helper);
687 }
688 
689 void CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
690   // Emit the loop iteration variable.
691   auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
692   auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
693   EmitVarDecl(*IVDecl);
694 
695   // Emit the iterations count variable.
696   // If it is not a variable, Sema decided to calculate iterations count on each
697   // iteration (e.g., it is foldable into a constant).
698   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
699     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
700     // Emit calculation of the iterations count.
701     EmitIgnoredExpr(S.getCalcLastIteration());
702   }
703 
704   auto &RT = CGM.getOpenMPRuntime();
705 
706   // Check pre-condition.
707   {
708     // Skip the entire loop if we don't meet the precondition.
709     RegionCounter Cnt = getPGORegionCounter(&S);
710     auto ThenBlock = createBasicBlock("omp.precond.then");
711     auto ContBlock = createBasicBlock("omp.precond.end");
712     EmitBranchOnBoolExpr(S.getPreCond(), ThenBlock, ContBlock, Cnt.getCount());
713     EmitBlock(ThenBlock);
714     Cnt.beginRegion(Builder);
715     // Emit 'then' code.
716     {
717       // Emit helper vars inits.
718       LValue LB =
719           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable()));
720       LValue UB =
721           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable()));
722       LValue ST =
723           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
724       LValue IL =
725           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
726 
727       OMPPrivateScope LoopScope(*this);
728       EmitPrivateLoopCounters(*this, LoopScope, S.counters());
729       (void)LoopScope.Privatize();
730 
731       // Detect the loop schedule kind and chunk.
732       auto ScheduleKind = OMPC_SCHEDULE_unknown;
733       llvm::Value *Chunk = nullptr;
734       if (auto C = cast_or_null<OMPScheduleClause>(
735               S.getSingleClause(OMPC_schedule))) {
736         ScheduleKind = C->getScheduleKind();
737         if (auto Ch = C->getChunkSize()) {
738           Chunk = EmitScalarExpr(Ch);
739           Chunk = EmitScalarConversion(Chunk, Ch->getType(),
740                                        S.getIterationVariable()->getType());
741         }
742       }
743       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
744       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
745       if (RT.isStaticNonchunked(ScheduleKind,
746                                 /* Chunked */ Chunk != nullptr)) {
747         // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
748         // When no chunk_size is specified, the iteration space is divided into
749         // chunks that are approximately equal in size, and at most one chunk is
750         // distributed to each thread. Note that the size of the chunks is
751         // unspecified in this case.
752         RT.emitForInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned,
753                        IL.getAddress(), LB.getAddress(), UB.getAddress(),
754                        ST.getAddress());
755         // UB = min(UB, GlobalUB);
756         EmitIgnoredExpr(S.getEnsureUpperBound());
757         // IV = LB;
758         EmitIgnoredExpr(S.getInit());
759         // while (idx <= UB) { BODY; ++idx; }
760         EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
761                          S.getCond(/*SeparateIter=*/false), S.getInc(),
762                          [&S, this]() {
763                            EmitOMPLoopBody(S);
764                            EmitStopPoint(&S);
765                          });
766         // Tell the runtime we are done.
767         RT.emitForFinish(*this, S.getLocStart(), ScheduleKind);
768       } else {
769         // Emit the outer loop, which requests its work chunk [LB..UB] from
770         // runtime and runs the inner loop to process it.
771         EmitOMPForOuterLoop(ScheduleKind, S, LoopScope, LB.getAddress(),
772                             UB.getAddress(), ST.getAddress(), IL.getAddress(),
773                             Chunk);
774       }
775     }
776     // We're now done with the loop, so jump to the continuation block.
777     EmitBranch(ContBlock);
778     EmitBlock(ContBlock, true);
779   }
780 }
781 
782 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
783   InlinedOpenMPRegionScopeRAII Region(*this, S);
784 
785   EmitOMPWorksharingLoop(S);
786 
787   // Emit an implicit barrier at the end.
788   if (!S.getSingleClause(OMPC_nowait)) {
789     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
790   }
791 }
792 
793 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &) {
794   llvm_unreachable("CodeGen for 'omp for simd' is not supported yet.");
795 }
796 
797 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
798                                 const Twine &Name,
799                                 llvm::Value *Init = nullptr) {
800   auto LVal = CGF.MakeNaturalAlignAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
801   if (Init)
802     CGF.EmitScalarInit(Init, LVal);
803   return LVal;
804 }
805 
806 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
807   InlinedOpenMPRegionScopeRAII Region(*this, S);
808 
809   auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt();
810   auto *CS = dyn_cast<CompoundStmt>(Stmt);
811   if (CS && CS->size() > 1) {
812     auto &C = CGM.getContext();
813     auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
814     // Emit helper vars inits.
815     LValue LB = createSectionLVal(*this, KmpInt32Ty, ".omp.sections.lb.",
816                                   Builder.getInt32(0));
817     auto *GlobalUBVal = Builder.getInt32(CS->size() - 1);
818     LValue UB =
819         createSectionLVal(*this, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
820     LValue ST = createSectionLVal(*this, KmpInt32Ty, ".omp.sections.st.",
821                                   Builder.getInt32(1));
822     LValue IL = createSectionLVal(*this, KmpInt32Ty, ".omp.sections.il.",
823                                   Builder.getInt32(0));
824     // Loop counter.
825     LValue IV = createSectionLVal(*this, KmpInt32Ty, ".omp.sections.iv.");
826     OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
827     OpaqueValueMapping OpaqueIV(*this, &IVRefExpr, IV);
828     OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
829     OpaqueValueMapping OpaqueUB(*this, &UBRefExpr, UB);
830     // Generate condition for loop.
831     BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue,
832                         OK_Ordinary, S.getLocStart(), /*fpContractable=*/false);
833     // Increment for loop counter.
834     UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary,
835                       S.getLocStart());
836     auto BodyGen = [this, CS, &S, &IV]() {
837       // Iterate through all sections and emit a switch construct:
838       // switch (IV) {
839       //   case 0:
840       //     <SectionStmt[0]>;
841       //     break;
842       // ...
843       //   case <NumSection> - 1:
844       //     <SectionStmt[<NumSection> - 1]>;
845       //     break;
846       // }
847       // .omp.sections.exit:
848       auto *ExitBB = createBasicBlock(".omp.sections.exit");
849       auto *SwitchStmt = Builder.CreateSwitch(
850           EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB,
851           CS->size());
852       unsigned CaseNumber = 0;
853       for (auto C = CS->children(); C; ++C, ++CaseNumber) {
854         auto CaseBB = createBasicBlock(".omp.sections.case");
855         EmitBlock(CaseBB);
856         SwitchStmt->addCase(Builder.getInt32(CaseNumber), CaseBB);
857         EmitStmt(*C);
858         EmitBranch(ExitBB);
859       }
860       EmitBlock(ExitBB, /*IsFinished=*/true);
861     };
862     // Emit static non-chunked loop.
863     CGM.getOpenMPRuntime().emitForInit(
864         *this, S.getLocStart(), OMPC_SCHEDULE_static, /*IVSize=*/32,
865         /*IVSigned=*/true, IL.getAddress(), LB.getAddress(), UB.getAddress(),
866         ST.getAddress());
867     // UB = min(UB, GlobalUB);
868     auto *UBVal = EmitLoadOfScalar(UB, S.getLocStart());
869     auto *MinUBGlobalUB = Builder.CreateSelect(
870         Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
871     EmitStoreOfScalar(MinUBGlobalUB, UB);
872     // IV = LB;
873     EmitStoreOfScalar(EmitLoadOfScalar(LB, S.getLocStart()), IV);
874     // while (idx <= UB) { BODY; ++idx; }
875     EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen);
876     // Tell the runtime we are done.
877     CGM.getOpenMPRuntime().emitForFinish(*this, S.getLocStart(),
878                                          OMPC_SCHEDULE_static);
879   } else {
880     // If only one section is found - no need to generate loop, emit as a single
881     // region.
882     CGM.getOpenMPRuntime().emitSingleRegion(*this, [&]() -> void {
883       InlinedOpenMPRegionScopeRAII Region(*this, S);
884       EmitStmt(Stmt);
885       EnsureInsertPoint();
886     }, S.getLocStart(), llvm::None, llvm::None, llvm::None, llvm::None);
887   }
888 
889   // Emit an implicit barrier at the end.
890   if (!S.getSingleClause(OMPC_nowait)) {
891     CGM.getOpenMPRuntime().emitBarrierCall(
892         *this, S.getLocStart(),
893         (CS && CS->size() > 1) ? OMPD_sections : OMPD_single);
894   }
895 }
896 
897 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
898   InlinedOpenMPRegionScopeRAII Region(*this, S);
899   EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
900   EnsureInsertPoint();
901 }
902 
903 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
904   llvm::SmallVector<const Expr *, 8> CopyprivateVars;
905   llvm::SmallVector<const Expr *, 8> SrcExprs;
906   llvm::SmallVector<const Expr *, 8> DstExprs;
907   llvm::SmallVector<const Expr *, 8> AssignmentOps;
908   // Check if there are any 'copyprivate' clauses associated with this 'single'
909   // construct.
910   auto CopyprivateFilter = [](const OMPClause *C) -> bool {
911     return C->getClauseKind() == OMPC_copyprivate;
912   };
913   // Build a list of copyprivate variables along with helper expressions
914   // (<source>, <destination>, <destination>=<source> expressions)
915   typedef OMPExecutableDirective::filtered_clause_iterator<decltype(
916       CopyprivateFilter)> CopyprivateIter;
917   for (CopyprivateIter I(S.clauses(), CopyprivateFilter); I; ++I) {
918     auto *C = cast<OMPCopyprivateClause>(*I);
919     CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
920     SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end());
921     DstExprs.append(C->destination_exprs().begin(),
922                     C->destination_exprs().end());
923     AssignmentOps.append(C->assignment_ops().begin(),
924                          C->assignment_ops().end());
925   }
926   // Emit code for 'single' region along with 'copyprivate' clauses
927   CGM.getOpenMPRuntime().emitSingleRegion(*this, [&]() -> void {
928     InlinedOpenMPRegionScopeRAII Region(*this, S);
929     EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
930     EnsureInsertPoint();
931   }, S.getLocStart(), CopyprivateVars, SrcExprs, DstExprs, AssignmentOps);
932   // Emit an implicit barrier at the end.
933   if (!S.getSingleClause(OMPC_nowait)) {
934     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_single);
935   }
936 }
937 
938 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
939   CGM.getOpenMPRuntime().emitMasterRegion(*this, [&]() -> void {
940     InlinedOpenMPRegionScopeRAII Region(*this, S);
941     EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
942     EnsureInsertPoint();
943   }, S.getLocStart());
944 }
945 
946 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
947   CGM.getOpenMPRuntime().emitCriticalRegion(
948       *this, S.getDirectiveName().getAsString(), [&]() -> void {
949         InlinedOpenMPRegionScopeRAII Region(*this, S);
950         EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
951         EnsureInsertPoint();
952       }, S.getLocStart());
953 }
954 
955 void
956 CodeGenFunction::EmitOMPParallelForDirective(const OMPParallelForDirective &) {
957   llvm_unreachable("CodeGen for 'omp parallel for' is not supported yet.");
958 }
959 
960 void CodeGenFunction::EmitOMPParallelForSimdDirective(
961     const OMPParallelForSimdDirective &) {
962   llvm_unreachable("CodeGen for 'omp parallel for simd' is not supported yet.");
963 }
964 
965 void CodeGenFunction::EmitOMPParallelSectionsDirective(
966     const OMPParallelSectionsDirective &) {
967   llvm_unreachable("CodeGen for 'omp parallel sections' is not supported yet.");
968 }
969 
970 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
971   // Emit outlined function for task construct.
972   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
973   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
974   auto *I = CS->getCapturedDecl()->param_begin();
975   // The first function argument for tasks is a thread id, the second one is a
976   // part id (0 for tied tasks, >=0 for untied task).
977   auto OutlinedFn =
978       CGM.getOpenMPRuntime().emitTaskOutlinedFunction(S, *I, *std::next(I));
979   // Check if we should emit tied or untied task.
980   bool Tied = !S.getSingleClause(OMPC_untied);
981   // Check if the task is final
982   llvm::PointerIntPair<llvm::Value *, 1, bool> Final;
983   if (auto *Clause = S.getSingleClause(OMPC_final)) {
984     // If the condition constant folds and can be elided, try to avoid emitting
985     // the condition and the dead arm of the if/else.
986     auto *Cond = cast<OMPFinalClause>(Clause)->getCondition();
987     bool CondConstant;
988     if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
989       Final.setInt(CondConstant);
990     else
991       Final.setPointer(EvaluateExprAsBool(Cond));
992   } else {
993     // By default the task is not final.
994     Final.setInt(/*IntVal=*/false);
995   }
996   auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
997   CGM.getOpenMPRuntime().emitTaskCall(*this, S.getLocStart(), Tied, Final,
998                                       OutlinedFn, SharedsTy, CapturedStruct);
999 }
1000 
1001 void CodeGenFunction::EmitOMPTaskyieldDirective(
1002     const OMPTaskyieldDirective &S) {
1003   CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart());
1004 }
1005 
1006 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
1007   CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier);
1008 }
1009 
1010 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &) {
1011   llvm_unreachable("CodeGen for 'omp taskwait' is not supported yet.");
1012 }
1013 
1014 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
1015   CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> {
1016     if (auto C = S.getSingleClause(/*K*/ OMPC_flush)) {
1017       auto FlushClause = cast<OMPFlushClause>(C);
1018       return llvm::makeArrayRef(FlushClause->varlist_begin(),
1019                                 FlushClause->varlist_end());
1020     }
1021     return llvm::None;
1022   }(), S.getLocStart());
1023 }
1024 
1025 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &) {
1026   llvm_unreachable("CodeGen for 'omp ordered' is not supported yet.");
1027 }
1028 
1029 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
1030                                          QualType SrcType, QualType DestType) {
1031   assert(CGF.hasScalarEvaluationKind(DestType) &&
1032          "DestType must have scalar evaluation kind.");
1033   assert(!Val.isAggregate() && "Must be a scalar or complex.");
1034   return Val.isScalar()
1035              ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType)
1036              : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType,
1037                                                  DestType);
1038 }
1039 
1040 static CodeGenFunction::ComplexPairTy
1041 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
1042                       QualType DestType) {
1043   assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
1044          "DestType must have complex evaluation kind.");
1045   CodeGenFunction::ComplexPairTy ComplexVal;
1046   if (Val.isScalar()) {
1047     // Convert the input element to the element type of the complex.
1048     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
1049     auto ScalarVal =
1050         CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestElementType);
1051     ComplexVal = CodeGenFunction::ComplexPairTy(
1052         ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
1053   } else {
1054     assert(Val.isComplex() && "Must be a scalar or complex.");
1055     auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
1056     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
1057     ComplexVal.first = CGF.EmitScalarConversion(
1058         Val.getComplexVal().first, SrcElementType, DestElementType);
1059     ComplexVal.second = CGF.EmitScalarConversion(
1060         Val.getComplexVal().second, SrcElementType, DestElementType);
1061   }
1062   return ComplexVal;
1063 }
1064 
1065 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst,
1066                                   const Expr *X, const Expr *V,
1067                                   SourceLocation Loc) {
1068   // v = x;
1069   assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
1070   assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
1071   LValue XLValue = CGF.EmitLValue(X);
1072   LValue VLValue = CGF.EmitLValue(V);
1073   RValue Res = XLValue.isGlobalReg()
1074                    ? CGF.EmitLoadOfLValue(XLValue, Loc)
1075                    : CGF.EmitAtomicLoad(XLValue, Loc,
1076                                         IsSeqCst ? llvm::SequentiallyConsistent
1077                                                  : llvm::Monotonic,
1078                                         XLValue.isVolatile());
1079   // OpenMP, 2.12.6, atomic Construct
1080   // Any atomic construct with a seq_cst clause forces the atomically
1081   // performed operation to include an implicit flush operation without a
1082   // list.
1083   if (IsSeqCst)
1084     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
1085   switch (CGF.getEvaluationKind(V->getType())) {
1086   case TEK_Scalar:
1087     CGF.EmitStoreOfScalar(
1088         convertToScalarValue(CGF, Res, X->getType(), V->getType()), VLValue);
1089     break;
1090   case TEK_Complex:
1091     CGF.EmitStoreOfComplex(
1092         convertToComplexValue(CGF, Res, X->getType(), V->getType()), VLValue,
1093         /*isInit=*/false);
1094     break;
1095   case TEK_Aggregate:
1096     llvm_unreachable("Must be a scalar or complex.");
1097   }
1098 }
1099 
1100 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst,
1101                                    const Expr *X, const Expr *E,
1102                                    SourceLocation Loc) {
1103   // x = expr;
1104   assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
1105   LValue XLValue = CGF.EmitLValue(X);
1106   RValue ExprRValue = CGF.EmitAnyExpr(E);
1107   if (XLValue.isGlobalReg())
1108     CGF.EmitStoreThroughGlobalRegLValue(ExprRValue, XLValue);
1109   else
1110     CGF.EmitAtomicStore(ExprRValue, XLValue,
1111                         IsSeqCst ? llvm::SequentiallyConsistent
1112                                  : llvm::Monotonic,
1113                         XLValue.isVolatile(), /*IsInit=*/false);
1114   // OpenMP, 2.12.6, atomic Construct
1115   // Any atomic construct with a seq_cst clause forces the atomically
1116   // performed operation to include an implicit flush operation without a
1117   // list.
1118   if (IsSeqCst)
1119     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
1120 }
1121 
1122 static Optional<llvm::AtomicRMWInst::BinOp>
1123 getCompatibleAtomicRMWBinOp(ASTContext &Context, BinaryOperatorKind Op,
1124                             bool IsXLHSInRHSPart, LValue XLValue,
1125                             RValue ExprRValue) {
1126   Optional<llvm::AtomicRMWInst::BinOp> RMWOp;
1127   // Allow atomicrmw only if 'x' and 'expr' are integer values, lvalue for 'x'
1128   // expression is simple and atomic is allowed for the given type for the
1129   // target platform.
1130   if (ExprRValue.isScalar() &&
1131       ExprRValue.getScalarVal()->getType()->isIntegerTy() &&
1132       XLValue.isSimple() &&
1133       (isa<llvm::ConstantInt>(ExprRValue.getScalarVal()) ||
1134        (ExprRValue.getScalarVal()->getType() ==
1135         XLValue.getAddress()->getType()->getPointerElementType())) &&
1136       Context.getTargetInfo().hasBuiltinAtomic(
1137           Context.getTypeSize(XLValue.getType()),
1138           Context.toBits(XLValue.getAlignment()))) {
1139     switch (Op) {
1140     case BO_Add:
1141       RMWOp = llvm::AtomicRMWInst::Add;
1142       break;
1143     case BO_Sub:
1144       if (IsXLHSInRHSPart) {
1145         RMWOp = llvm::AtomicRMWInst::Sub;
1146       }
1147       break;
1148     case BO_And:
1149       RMWOp = llvm::AtomicRMWInst::And;
1150       break;
1151     case BO_Or:
1152       RMWOp = llvm::AtomicRMWInst::Or;
1153       break;
1154     case BO_Xor:
1155       RMWOp = llvm::AtomicRMWInst::Xor;
1156       break;
1157     case BO_Mul:
1158     case BO_Div:
1159     case BO_Rem:
1160     case BO_Shl:
1161     case BO_Shr:
1162       break;
1163     case BO_PtrMemD:
1164     case BO_PtrMemI:
1165     case BO_LT:
1166     case BO_GT:
1167     case BO_LE:
1168     case BO_GE:
1169     case BO_EQ:
1170     case BO_NE:
1171     case BO_LAnd:
1172     case BO_LOr:
1173     case BO_Assign:
1174     case BO_MulAssign:
1175     case BO_DivAssign:
1176     case BO_RemAssign:
1177     case BO_AddAssign:
1178     case BO_SubAssign:
1179     case BO_ShlAssign:
1180     case BO_ShrAssign:
1181     case BO_AndAssign:
1182     case BO_XorAssign:
1183     case BO_OrAssign:
1184     case BO_Comma:
1185       llvm_unreachable("Unexpected binary operation in 'atomic update'.");
1186     }
1187   }
1188   return std::move(RMWOp);
1189 }
1190 
1191 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst,
1192                                     const Expr *X, const Expr *E,
1193                                     const Expr *UE, bool IsXLHSInRHSPart,
1194                                     SourceLocation Loc) {
1195   assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
1196          "Update expr in 'atomic update' must be a binary operator.");
1197   auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
1198   // Update expressions are allowed to have the following forms:
1199   // x binop= expr; -> xrval + expr;
1200   // x++, ++x -> xrval + 1;
1201   // x--, --x -> xrval - 1;
1202   // x = x binop expr; -> xrval binop expr
1203   // x = expr Op x; - > expr binop xrval;
1204   assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
1205   LValue XLValue = CGF.EmitLValue(X);
1206   RValue ExprRValue = CGF.EmitAnyExpr(E);
1207   const auto &Op =
1208       getCompatibleAtomicRMWBinOp(CGF.CGM.getContext(), BOUE->getOpcode(),
1209                                   IsXLHSInRHSPart, XLValue, ExprRValue);
1210   auto AO = IsSeqCst ? llvm::SequentiallyConsistent : llvm::Monotonic;
1211   if (Op) {
1212     auto *ExprVal = ExprRValue.getScalarVal();
1213     if (auto *IC = dyn_cast<llvm::ConstantInt>(ExprVal)) {
1214       ExprVal = CGF.Builder.CreateIntCast(
1215           IC, XLValue.getAddress()->getType()->getPointerElementType(),
1216           XLValue.getType()->hasSignedIntegerRepresentation());
1217     }
1218     CGF.Builder.CreateAtomicRMW(*Op, XLValue.getAddress(), ExprVal, AO);
1219   } else {
1220     auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
1221     auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
1222     CodeGenFunction::OpaqueValueMapping MapExpr(
1223         CGF, IsXLHSInRHSPart ? RHS : LHS, ExprRValue);
1224     auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
1225     if (XLValue.isGlobalReg()) {
1226       // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
1227       // 'xrval'.
1228       CodeGenFunction::OpaqueValueMapping MapX(
1229           CGF, XRValExpr, CGF.EmitLoadOfLValue(XLValue, Loc));
1230       CGF.EmitStoreThroughLValue(CGF.EmitAnyExpr(UE), XLValue);
1231     } else {
1232       // Perform compare-and-swap procedure.
1233       CGF.EmitAtomicUpdate(
1234           XLValue, AO, [&CGF, &UE, &XRValExpr](RValue XRVal) -> RValue {
1235             CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRVal);
1236             return CGF.EmitAnyExpr(UE);
1237           }, /*IsVolatile=*/false);
1238     }
1239   }
1240   // OpenMP, 2.12.6, atomic Construct
1241   // Any atomic construct with a seq_cst clause forces the atomically
1242   // performed operation to include an implicit flush operation without a
1243   // list.
1244   if (IsSeqCst)
1245     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
1246 }
1247 
1248 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
1249                               bool IsSeqCst, const Expr *X, const Expr *V,
1250                               const Expr *E, const Expr *UE,
1251                               bool IsXLHSInRHSPart, SourceLocation Loc) {
1252   switch (Kind) {
1253   case OMPC_read:
1254     EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc);
1255     break;
1256   case OMPC_write:
1257     EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc);
1258     break;
1259   case OMPC_unknown:
1260   case OMPC_update:
1261     EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc);
1262     break;
1263   case OMPC_capture:
1264     llvm_unreachable("CodeGen for 'omp atomic clause' is not supported yet.");
1265   case OMPC_if:
1266   case OMPC_final:
1267   case OMPC_num_threads:
1268   case OMPC_private:
1269   case OMPC_firstprivate:
1270   case OMPC_lastprivate:
1271   case OMPC_reduction:
1272   case OMPC_safelen:
1273   case OMPC_collapse:
1274   case OMPC_default:
1275   case OMPC_seq_cst:
1276   case OMPC_shared:
1277   case OMPC_linear:
1278   case OMPC_aligned:
1279   case OMPC_copyin:
1280   case OMPC_copyprivate:
1281   case OMPC_flush:
1282   case OMPC_proc_bind:
1283   case OMPC_schedule:
1284   case OMPC_ordered:
1285   case OMPC_nowait:
1286   case OMPC_untied:
1287   case OMPC_threadprivate:
1288   case OMPC_mergeable:
1289     llvm_unreachable("Clause is not allowed in 'omp atomic'.");
1290   }
1291 }
1292 
1293 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
1294   bool IsSeqCst = S.getSingleClause(/*K=*/OMPC_seq_cst);
1295   OpenMPClauseKind Kind = OMPC_unknown;
1296   for (auto *C : S.clauses()) {
1297     // Find first clause (skip seq_cst clause, if it is first).
1298     if (C->getClauseKind() != OMPC_seq_cst) {
1299       Kind = C->getClauseKind();
1300       break;
1301     }
1302   }
1303 
1304   const auto *CS =
1305       S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
1306   if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS))
1307     enterFullExpression(EWC);
1308   InlinedOpenMPRegionScopeRAII Region(*this, S);
1309 
1310   EmitOMPAtomicExpr(*this, Kind, IsSeqCst, S.getX(), S.getV(), S.getExpr(),
1311                     S.getUpdateExpr(), S.isXLHSInRHSPart(), S.getLocStart());
1312 }
1313 
1314 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &) {
1315   llvm_unreachable("CodeGen for 'omp target' is not supported yet.");
1316 }
1317 
1318 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &) {
1319   llvm_unreachable("CodeGen for 'omp teams' is not supported yet.");
1320 }
1321 
1322