1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit OpenMP nodes as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGOpenMPRuntime.h"
15 #include "CodeGenFunction.h"
16 #include "CodeGenModule.h"
17 #include "TargetInfo.h"
18 #include "clang/AST/Stmt.h"
19 #include "clang/AST/StmtOpenMP.h"
20 using namespace clang;
21 using namespace CodeGen;
22 
23 //===----------------------------------------------------------------------===//
24 //                              OpenMP Directive Emission
25 //===----------------------------------------------------------------------===//
26 
27 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen
28 /// function. Here is the logic:
29 /// if (Cond) {
30 ///   CodeGen(true);
31 /// } else {
32 ///   CodeGen(false);
33 /// }
34 static void EmitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
35                             const std::function<void(bool)> &CodeGen) {
36   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
37 
38   // If the condition constant folds and can be elided, try to avoid emitting
39   // the condition and the dead arm of the if/else.
40   bool CondConstant;
41   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
42     CodeGen(CondConstant);
43     return;
44   }
45 
46   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
47   // emit the conditional branch.
48   auto ThenBlock = CGF.createBasicBlock(/*name*/ "omp_if.then");
49   auto ElseBlock = CGF.createBasicBlock(/*name*/ "omp_if.else");
50   auto ContBlock = CGF.createBasicBlock(/*name*/ "omp_if.end");
51   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount*/ 0);
52 
53   // Emit the 'then' code.
54   CGF.EmitBlock(ThenBlock);
55   CodeGen(/*ThenBlock*/ true);
56   CGF.EmitBranch(ContBlock);
57   // Emit the 'else' code if present.
58   {
59     // There is no need to emit line number for unconditional branch.
60     auto NL = ApplyDebugLocation::CreateEmpty(CGF);
61     CGF.EmitBlock(ElseBlock);
62   }
63   CodeGen(/*ThenBlock*/ false);
64   {
65     // There is no need to emit line number for unconditional branch.
66     auto NL = ApplyDebugLocation::CreateEmpty(CGF);
67     CGF.EmitBranch(ContBlock);
68   }
69   // Emit the continuation block for code after the if.
70   CGF.EmitBlock(ContBlock, /*IsFinished*/ true);
71 }
72 
73 void CodeGenFunction::EmitOMPAggregateAssign(LValue OriginalAddr,
74                                              llvm::Value *PrivateAddr,
75                                              const Expr *AssignExpr,
76                                              QualType OriginalType,
77                                              const VarDecl *VDInit) {
78   EmitBlock(createBasicBlock(".omp.assign.begin."));
79   if (!isa<CXXConstructExpr>(AssignExpr) || isTrivialInitializer(AssignExpr)) {
80     // Perform simple memcpy.
81     EmitAggregateAssign(PrivateAddr, OriginalAddr.getAddress(),
82                         AssignExpr->getType());
83   } else {
84     // Perform element-by-element initialization.
85     QualType ElementTy;
86     auto SrcBegin = OriginalAddr.getAddress();
87     auto DestBegin = PrivateAddr;
88     auto ArrayTy = OriginalType->getAsArrayTypeUnsafe();
89     auto SrcNumElements = emitArrayLength(ArrayTy, ElementTy, SrcBegin);
90     auto DestNumElements = emitArrayLength(ArrayTy, ElementTy, DestBegin);
91     auto SrcEnd = Builder.CreateGEP(SrcBegin, SrcNumElements);
92     auto DestEnd = Builder.CreateGEP(DestBegin, DestNumElements);
93     // The basic structure here is a do-while loop, because we don't
94     // need to check for the zero-element case.
95     auto BodyBB = createBasicBlock("omp.arraycpy.body");
96     auto DoneBB = createBasicBlock("omp.arraycpy.done");
97     auto IsEmpty =
98         Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
99     Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
100 
101     // Enter the loop body, making that address the current address.
102     auto EntryBB = Builder.GetInsertBlock();
103     EmitBlock(BodyBB);
104     auto SrcElementPast = Builder.CreatePHI(SrcBegin->getType(), 2,
105                                             "omp.arraycpy.srcElementPast");
106     SrcElementPast->addIncoming(SrcEnd, EntryBB);
107     auto DestElementPast = Builder.CreatePHI(DestBegin->getType(), 2,
108                                              "omp.arraycpy.destElementPast");
109     DestElementPast->addIncoming(DestEnd, EntryBB);
110 
111     // Shift the address back by one element.
112     auto NegativeOne = llvm::ConstantInt::get(SizeTy, -1, true);
113     auto DestElement = Builder.CreateGEP(DestElementPast, NegativeOne,
114                                          "omp.arraycpy.dest.element");
115     auto SrcElement = Builder.CreateGEP(SrcElementPast, NegativeOne,
116                                         "omp.arraycpy.src.element");
117     {
118       // Create RunCleanScope to cleanup possible temps.
119       CodeGenFunction::RunCleanupsScope Init(*this);
120       // Emit initialization for single element.
121       LocalDeclMap[VDInit] = SrcElement;
122       EmitAnyExprToMem(AssignExpr, DestElement,
123                        AssignExpr->getType().getQualifiers(),
124                        /*IsInitializer*/ false);
125       LocalDeclMap.erase(VDInit);
126     }
127 
128     // Check whether we've reached the end.
129     auto Done =
130         Builder.CreateICmpEQ(DestElement, DestBegin, "omp.arraycpy.done");
131     Builder.CreateCondBr(Done, DoneBB, BodyBB);
132     DestElementPast->addIncoming(DestElement, Builder.GetInsertBlock());
133     SrcElementPast->addIncoming(SrcElement, Builder.GetInsertBlock());
134 
135     // Done.
136     EmitBlock(DoneBB, true);
137   }
138   EmitBlock(createBasicBlock(".omp.assign.end."));
139 }
140 
141 void CodeGenFunction::EmitOMPFirstprivateClause(
142     const OMPExecutableDirective &D,
143     CodeGenFunction::OMPPrivateScope &PrivateScope) {
144   auto PrivateFilter = [](const OMPClause *C) -> bool {
145     return C->getClauseKind() == OMPC_firstprivate;
146   };
147   for (OMPExecutableDirective::filtered_clause_iterator<decltype(PrivateFilter)>
148            I(D.clauses(), PrivateFilter); I; ++I) {
149     auto *C = cast<OMPFirstprivateClause>(*I);
150     auto IRef = C->varlist_begin();
151     auto InitsRef = C->inits().begin();
152     for (auto IInit : C->private_copies()) {
153       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
154       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
155       bool IsRegistered;
156       if (*InitsRef != nullptr) {
157         // Emit VarDecl with copy init for arrays.
158         auto *FD = CapturedStmtInfo->lookup(OrigVD);
159         LValue Base = MakeNaturalAlignAddrLValue(
160             CapturedStmtInfo->getContextValue(),
161             getContext().getTagDeclType(FD->getParent()));
162         auto OriginalAddr = EmitLValueForField(Base, FD);
163         auto VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
164         IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value * {
165           auto Emission = EmitAutoVarAlloca(*VD);
166           // Emit initialization of aggregate firstprivate vars.
167           EmitOMPAggregateAssign(OriginalAddr, Emission.getAllocatedAddress(),
168                                  VD->getInit(), (*IRef)->getType(), VDInit);
169           EmitAutoVarCleanups(Emission);
170           return Emission.getAllocatedAddress();
171         });
172       } else
173         IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value * {
174           // Emit private VarDecl with copy init.
175           EmitDecl(*VD);
176           return GetAddrOfLocalVar(VD);
177         });
178       assert(IsRegistered && "counter already registered as private");
179       // Silence the warning about unused variable.
180       (void)IsRegistered;
181       ++IRef, ++InitsRef;
182     }
183   }
184 }
185 
186 void CodeGenFunction::EmitOMPPrivateClause(
187     const OMPExecutableDirective &D,
188     CodeGenFunction::OMPPrivateScope &PrivateScope) {
189   auto PrivateFilter = [](const OMPClause *C) -> bool {
190     return C->getClauseKind() == OMPC_private;
191   };
192   for (OMPExecutableDirective::filtered_clause_iterator<decltype(PrivateFilter)>
193            I(D.clauses(), PrivateFilter); I; ++I) {
194     auto *C = cast<OMPPrivateClause>(*I);
195     auto IRef = C->varlist_begin();
196     for (auto IInit : C->private_copies()) {
197       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
198       auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
199       bool IsRegistered =
200           PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value * {
201             // Emit private VarDecl with copy init.
202             EmitDecl(*VD);
203             return GetAddrOfLocalVar(VD);
204           });
205       assert(IsRegistered && "counter already registered as private");
206       // Silence the warning about unused variable.
207       (void)IsRegistered;
208       ++IRef;
209     }
210   }
211 }
212 
213 /// \brief Emits code for OpenMP parallel directive in the parallel region.
214 static void EmitOMPParallelCall(CodeGenFunction &CGF,
215                                 const OMPParallelDirective &S,
216                                 llvm::Value *OutlinedFn,
217                                 llvm::Value *CapturedStruct) {
218   if (auto C = S.getSingleClause(/*K*/ OMPC_num_threads)) {
219     CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
220     auto NumThreadsClause = cast<OMPNumThreadsClause>(C);
221     auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
222                                          /*IgnoreResultAssign*/ true);
223     CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
224         CGF, NumThreads, NumThreadsClause->getLocStart());
225   }
226   CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn,
227                                               CapturedStruct);
228 }
229 
230 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
231   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
232   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
233   auto OutlinedFn = CGM.getOpenMPRuntime().emitOutlinedFunction(
234       S, *CS->getCapturedDecl()->param_begin());
235   if (auto C = S.getSingleClause(/*K*/ OMPC_if)) {
236     auto Cond = cast<OMPIfClause>(C)->getCondition();
237     EmitOMPIfClause(*this, Cond, [&](bool ThenBlock) {
238       if (ThenBlock)
239         EmitOMPParallelCall(*this, S, OutlinedFn, CapturedStruct);
240       else
241         CGM.getOpenMPRuntime().emitSerialCall(*this, S.getLocStart(),
242                                               OutlinedFn, CapturedStruct);
243     });
244   } else
245     EmitOMPParallelCall(*this, S, OutlinedFn, CapturedStruct);
246 }
247 
248 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &S,
249                                       bool SeparateIter) {
250   RunCleanupsScope BodyScope(*this);
251   // Update counters values on current iteration.
252   for (auto I : S.updates()) {
253     EmitIgnoredExpr(I);
254   }
255   // On a continue in the body, jump to the end.
256   auto Continue = getJumpDestInCurrentScope("omp.body.continue");
257   BreakContinueStack.push_back(BreakContinue(JumpDest(), Continue));
258   // Emit loop body.
259   EmitStmt(S.getBody());
260   // The end (updates/cleanups).
261   EmitBlock(Continue.getBlock());
262   BreakContinueStack.pop_back();
263   if (SeparateIter) {
264     // TODO: Update lastprivates if the SeparateIter flag is true.
265     // This will be implemented in a follow-up OMPLastprivateClause patch, but
266     // result should be still correct without it, as we do not make these
267     // variables private yet.
268   }
269 }
270 
271 void CodeGenFunction::EmitOMPInnerLoop(const OMPLoopDirective &S,
272                                        OMPPrivateScope &LoopScope,
273                                        bool SeparateIter) {
274   auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
275   auto Cnt = getPGORegionCounter(&S);
276 
277   // Start the loop with a block that tests the condition.
278   auto CondBlock = createBasicBlock("omp.inner.for.cond");
279   EmitBlock(CondBlock);
280   LoopStack.push(CondBlock);
281 
282   // If there are any cleanups between here and the loop-exit scope,
283   // create a block to stage a loop exit along.
284   auto ExitBlock = LoopExit.getBlock();
285   if (LoopScope.requiresCleanups())
286     ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
287 
288   auto LoopBody = createBasicBlock("omp.inner.for.body");
289 
290   // Emit condition: "IV < LastIteration + 1 [ - 1]"
291   // ("- 1" when lastprivate clause is present - separate one iteration).
292   llvm::Value *BoolCondVal = EvaluateExprAsBool(S.getCond(SeparateIter));
293   Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock,
294                        PGO.createLoopWeights(S.getCond(SeparateIter), Cnt));
295 
296   if (ExitBlock != LoopExit.getBlock()) {
297     EmitBlock(ExitBlock);
298     EmitBranchThroughCleanup(LoopExit);
299   }
300 
301   EmitBlock(LoopBody);
302   Cnt.beginRegion(Builder);
303 
304   // Create a block for the increment.
305   auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
306   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
307 
308   EmitOMPLoopBody(S);
309   EmitStopPoint(&S);
310 
311   // Emit "IV = IV + 1" and a back-edge to the condition block.
312   EmitBlock(Continue.getBlock());
313   EmitIgnoredExpr(S.getInc());
314   BreakContinueStack.pop_back();
315   EmitBranch(CondBlock);
316   LoopStack.pop();
317   // Emit the fall-through block.
318   EmitBlock(LoopExit.getBlock());
319 }
320 
321 void CodeGenFunction::EmitOMPSimdFinal(const OMPLoopDirective &S) {
322   auto IC = S.counters().begin();
323   for (auto F : S.finals()) {
324     if (LocalDeclMap.lookup(cast<DeclRefExpr>((*IC))->getDecl())) {
325       EmitIgnoredExpr(F);
326     }
327     ++IC;
328   }
329 }
330 
331 static void EmitOMPAlignedClause(CodeGenFunction &CGF, CodeGenModule &CGM,
332                                  const OMPAlignedClause &Clause) {
333   unsigned ClauseAlignment = 0;
334   if (auto AlignmentExpr = Clause.getAlignment()) {
335     auto AlignmentCI =
336         cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
337     ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue());
338   }
339   for (auto E : Clause.varlists()) {
340     unsigned Alignment = ClauseAlignment;
341     if (Alignment == 0) {
342       // OpenMP [2.8.1, Description]
343       // If no optional parameter is specified, implementation-defined default
344       // alignments for SIMD instructions on the target platforms are assumed.
345       Alignment = CGM.getTargetCodeGenInfo().getOpenMPSimdDefaultAlignment(
346           E->getType());
347     }
348     assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) &&
349            "alignment is not power of 2");
350     if (Alignment != 0) {
351       llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
352       CGF.EmitAlignmentAssumption(PtrValue, Alignment);
353     }
354   }
355 }
356 
357 static void EmitPrivateLoopCounters(CodeGenFunction &CGF,
358                                     CodeGenFunction::OMPPrivateScope &LoopScope,
359                                     ArrayRef<Expr *> Counters) {
360   for (auto *E : Counters) {
361     auto VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
362     bool IsRegistered = LoopScope.addPrivate(VD, [&]() -> llvm::Value * {
363       // Emit var without initialization.
364       auto VarEmission = CGF.EmitAutoVarAlloca(*VD);
365       CGF.EmitAutoVarCleanups(VarEmission);
366       return VarEmission.getAllocatedAddress();
367     });
368     assert(IsRegistered && "counter already registered as private");
369     // Silence the warning about unused variable.
370     (void)IsRegistered;
371   }
372   (void)LoopScope.Privatize();
373 }
374 
375 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
376   // Pragma 'simd' code depends on presence of 'lastprivate'.
377   // If present, we have to separate last iteration of the loop:
378   //
379   // if (LastIteration != 0) {
380   //   for (IV in 0..LastIteration-1) BODY;
381   //   BODY with updates of lastprivate vars;
382   //   <Final counter/linear vars updates>;
383   // }
384   //
385   // otherwise (when there's no lastprivate):
386   //
387   //   for (IV in 0..LastIteration) BODY;
388   //   <Final counter/linear vars updates>;
389   //
390 
391   // Walk clauses and process safelen/lastprivate.
392   bool SeparateIter = false;
393   LoopStack.setParallel();
394   LoopStack.setVectorizerEnable(true);
395   for (auto C : S.clauses()) {
396     switch (C->getClauseKind()) {
397     case OMPC_safelen: {
398       RValue Len = EmitAnyExpr(cast<OMPSafelenClause>(C)->getSafelen(),
399                                AggValueSlot::ignored(), true);
400       llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
401       LoopStack.setVectorizerWidth(Val->getZExtValue());
402       // In presence of finite 'safelen', it may be unsafe to mark all
403       // the memory instructions parallel, because loop-carried
404       // dependences of 'safelen' iterations are possible.
405       LoopStack.setParallel(false);
406       break;
407     }
408     case OMPC_aligned:
409       EmitOMPAlignedClause(*this, CGM, cast<OMPAlignedClause>(*C));
410       break;
411     case OMPC_lastprivate:
412       SeparateIter = true;
413       break;
414     default:
415       // Not handled yet
416       ;
417     }
418   }
419 
420   InlinedOpenMPRegionRAII Region(*this, S);
421   RunCleanupsScope DirectiveScope(*this);
422 
423   CGDebugInfo *DI = getDebugInfo();
424   if (DI)
425     DI->EmitLexicalBlockStart(Builder, S.getSourceRange().getBegin());
426 
427   // Emit the loop iteration variable.
428   const Expr *IVExpr = S.getIterationVariable();
429   const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
430   EmitVarDecl(*IVDecl);
431   EmitIgnoredExpr(S.getInit());
432 
433   // Emit the iterations count variable.
434   // If it is not a variable, Sema decided to calculate iterations count on each
435   // iteration (e.g., it is foldable into a constant).
436   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
437     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
438     // Emit calculation of the iterations count.
439     EmitIgnoredExpr(S.getCalcLastIteration());
440   }
441 
442   if (SeparateIter) {
443     // Emit: if (LastIteration > 0) - begin.
444     RegionCounter Cnt = getPGORegionCounter(&S);
445     auto ThenBlock = createBasicBlock("simd.if.then");
446     auto ContBlock = createBasicBlock("simd.if.end");
447     EmitBranchOnBoolExpr(S.getPreCond(), ThenBlock, ContBlock, Cnt.getCount());
448     EmitBlock(ThenBlock);
449     Cnt.beginRegion(Builder);
450     // Emit 'then' code.
451     {
452       OMPPrivateScope LoopScope(*this);
453       EmitPrivateLoopCounters(*this, LoopScope, S.counters());
454       EmitOMPInnerLoop(S, LoopScope, /* SeparateIter */ true);
455       EmitOMPLoopBody(S, /* SeparateIter */ true);
456     }
457     EmitOMPSimdFinal(S);
458     // Emit: if (LastIteration != 0) - end.
459     EmitBranch(ContBlock);
460     EmitBlock(ContBlock, true);
461   } else {
462     {
463       OMPPrivateScope LoopScope(*this);
464       EmitPrivateLoopCounters(*this, LoopScope, S.counters());
465       EmitOMPInnerLoop(S, LoopScope);
466     }
467     EmitOMPSimdFinal(S);
468   }
469 
470   if (DI)
471     DI->EmitLexicalBlockEnd(Builder, S.getSourceRange().getEnd());
472 }
473 
474 void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind,
475                                           const OMPLoopDirective &S,
476                                           OMPPrivateScope &LoopScope,
477                                           llvm::Value *LB, llvm::Value *UB,
478                                           llvm::Value *ST, llvm::Value *IL,
479                                           llvm::Value *Chunk) {
480   auto &RT = CGM.getOpenMPRuntime();
481   assert(!RT.isStaticNonchunked(ScheduleKind, /* Chunked */ Chunk != nullptr) &&
482          "static non-chunked schedule does not need outer loop");
483   if (RT.isDynamic(ScheduleKind)) {
484     ErrorUnsupported(&S, "OpenMP loop with dynamic schedule");
485     return;
486   }
487 
488   // Emit outer loop.
489   //
490   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
491   // When schedule(static, chunk_size) is specified, iterations are divided into
492   // chunks of size chunk_size, and the chunks are assigned to the threads in
493   // the team in a round-robin fashion in the order of the thread number.
494   //
495   // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
496   //   while (idx <= UB) { BODY; ++idx; } // inner loop
497   //   LB = LB + ST;
498   //   UB = UB + ST;
499   // }
500   //
501   const Expr *IVExpr = S.getIterationVariable();
502   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
503   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
504 
505   RT.emitForInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, IL, LB,
506                  UB, ST, Chunk);
507   auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
508 
509   // Start the loop with a block that tests the condition.
510   auto CondBlock = createBasicBlock("omp.dispatch.cond");
511   EmitBlock(CondBlock);
512   LoopStack.push(CondBlock);
513 
514   llvm::Value *BoolCondVal = nullptr;
515   // UB = min(UB, GlobalUB)
516   EmitIgnoredExpr(S.getEnsureUpperBound());
517   // IV = LB
518   EmitIgnoredExpr(S.getInit());
519   // IV < UB
520   BoolCondVal = EvaluateExprAsBool(S.getCond(false));
521 
522   // If there are any cleanups between here and the loop-exit scope,
523   // create a block to stage a loop exit along.
524   auto ExitBlock = LoopExit.getBlock();
525   if (LoopScope.requiresCleanups())
526     ExitBlock = createBasicBlock("omp.dispatch.cleanup");
527 
528   auto LoopBody = createBasicBlock("omp.dispatch.body");
529   Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
530   if (ExitBlock != LoopExit.getBlock()) {
531     EmitBlock(ExitBlock);
532     EmitBranchThroughCleanup(LoopExit);
533   }
534   EmitBlock(LoopBody);
535 
536   // Create a block for the increment.
537   auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
538   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
539 
540   EmitOMPInnerLoop(S, LoopScope);
541 
542   EmitBlock(Continue.getBlock());
543   BreakContinueStack.pop_back();
544   // Emit "LB = LB + Stride", "UB = UB + Stride".
545   EmitIgnoredExpr(S.getNextLowerBound());
546   EmitIgnoredExpr(S.getNextUpperBound());
547 
548   EmitBranch(CondBlock);
549   LoopStack.pop();
550   // Emit the fall-through block.
551   EmitBlock(LoopExit.getBlock());
552 
553   // Tell the runtime we are done.
554   RT.emitForFinish(*this, S.getLocStart(), ScheduleKind);
555 }
556 
557 /// \brief Emit a helper variable and return corresponding lvalue.
558 static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
559                                const DeclRefExpr *Helper) {
560   auto VDecl = cast<VarDecl>(Helper->getDecl());
561   CGF.EmitVarDecl(*VDecl);
562   return CGF.EmitLValue(Helper);
563 }
564 
565 void CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
566   // Emit the loop iteration variable.
567   auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
568   auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
569   EmitVarDecl(*IVDecl);
570 
571   // Emit the iterations count variable.
572   // If it is not a variable, Sema decided to calculate iterations count on each
573   // iteration (e.g., it is foldable into a constant).
574   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
575     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
576     // Emit calculation of the iterations count.
577     EmitIgnoredExpr(S.getCalcLastIteration());
578   }
579 
580   auto &RT = CGM.getOpenMPRuntime();
581 
582   // Check pre-condition.
583   {
584     // Skip the entire loop if we don't meet the precondition.
585     RegionCounter Cnt = getPGORegionCounter(&S);
586     auto ThenBlock = createBasicBlock("omp.precond.then");
587     auto ContBlock = createBasicBlock("omp.precond.end");
588     EmitBranchOnBoolExpr(S.getPreCond(), ThenBlock, ContBlock, Cnt.getCount());
589     EmitBlock(ThenBlock);
590     Cnt.beginRegion(Builder);
591     // Emit 'then' code.
592     {
593       // Emit helper vars inits.
594       LValue LB =
595           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable()));
596       LValue UB =
597           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable()));
598       LValue ST =
599           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
600       LValue IL =
601           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
602 
603       OMPPrivateScope LoopScope(*this);
604       EmitPrivateLoopCounters(*this, LoopScope, S.counters());
605 
606       // Detect the loop schedule kind and chunk.
607       auto ScheduleKind = OMPC_SCHEDULE_unknown;
608       llvm::Value *Chunk = nullptr;
609       if (auto C = cast_or_null<OMPScheduleClause>(
610               S.getSingleClause(OMPC_schedule))) {
611         ScheduleKind = C->getScheduleKind();
612         if (auto Ch = C->getChunkSize()) {
613           Chunk = EmitScalarExpr(Ch);
614           Chunk = EmitScalarConversion(Chunk, Ch->getType(),
615                                        S.getIterationVariable()->getType());
616         }
617       }
618       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
619       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
620       if (RT.isStaticNonchunked(ScheduleKind,
621                                 /* Chunked */ Chunk != nullptr)) {
622         // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
623         // When no chunk_size is specified, the iteration space is divided into
624         // chunks that are approximately equal in size, and at most one chunk is
625         // distributed to each thread. Note that the size of the chunks is
626         // unspecified in this case.
627         RT.emitForInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned,
628                        IL.getAddress(), LB.getAddress(), UB.getAddress(),
629                        ST.getAddress());
630         // UB = min(UB, GlobalUB);
631         EmitIgnoredExpr(S.getEnsureUpperBound());
632         // IV = LB;
633         EmitIgnoredExpr(S.getInit());
634         // while (idx <= UB) { BODY; ++idx; }
635         EmitOMPInnerLoop(S, LoopScope);
636         // Tell the runtime we are done.
637         RT.emitForFinish(*this, S.getLocStart(), ScheduleKind);
638       } else {
639         // Emit the outer loop, which requests its work chunk [LB..UB] from
640         // runtime and runs the inner loop to process it.
641         EmitOMPForOuterLoop(ScheduleKind, S, LoopScope, LB.getAddress(),
642                             UB.getAddress(), ST.getAddress(), IL.getAddress(),
643                             Chunk);
644       }
645     }
646     // We're now done with the loop, so jump to the continuation block.
647     EmitBranch(ContBlock);
648     EmitBlock(ContBlock, true);
649   }
650 }
651 
652 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
653   InlinedOpenMPRegionRAII Region(*this, S);
654   RunCleanupsScope DirectiveScope(*this);
655 
656   CGDebugInfo *DI = getDebugInfo();
657   if (DI)
658     DI->EmitLexicalBlockStart(Builder, S.getSourceRange().getBegin());
659 
660   EmitOMPWorksharingLoop(S);
661 
662   // Emit an implicit barrier at the end.
663   CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
664                                          /*IsExplicit*/ false);
665   if (DI)
666     DI->EmitLexicalBlockEnd(Builder, S.getSourceRange().getEnd());
667 }
668 
669 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &) {
670   llvm_unreachable("CodeGen for 'omp for simd' is not supported yet.");
671 }
672 
673 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &) {
674   llvm_unreachable("CodeGen for 'omp sections' is not supported yet.");
675 }
676 
677 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &) {
678   llvm_unreachable("CodeGen for 'omp section' is not supported yet.");
679 }
680 
681 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
682   CGM.getOpenMPRuntime().emitSingleRegion(*this, [&]() -> void {
683     InlinedOpenMPRegionRAII Region(*this, S);
684     RunCleanupsScope Scope(*this);
685     EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
686     EnsureInsertPoint();
687   }, S.getLocStart());
688 }
689 
690 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
691   CGM.getOpenMPRuntime().emitMasterRegion(*this, [&]() -> void {
692     InlinedOpenMPRegionRAII Region(*this, S);
693     RunCleanupsScope Scope(*this);
694     EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
695     EnsureInsertPoint();
696   }, S.getLocStart());
697 }
698 
699 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
700   CGM.getOpenMPRuntime().emitCriticalRegion(
701       *this, S.getDirectiveName().getAsString(), [&]() -> void {
702         InlinedOpenMPRegionRAII Region(*this, S);
703         RunCleanupsScope Scope(*this);
704         EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
705         EnsureInsertPoint();
706       }, S.getLocStart());
707 }
708 
709 void
710 CodeGenFunction::EmitOMPParallelForDirective(const OMPParallelForDirective &) {
711   llvm_unreachable("CodeGen for 'omp parallel for' is not supported yet.");
712 }
713 
714 void CodeGenFunction::EmitOMPParallelForSimdDirective(
715     const OMPParallelForSimdDirective &) {
716   llvm_unreachable("CodeGen for 'omp parallel for simd' is not supported yet.");
717 }
718 
719 void CodeGenFunction::EmitOMPParallelSectionsDirective(
720     const OMPParallelSectionsDirective &) {
721   llvm_unreachable("CodeGen for 'omp parallel sections' is not supported yet.");
722 }
723 
724 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &) {
725   llvm_unreachable("CodeGen for 'omp task' is not supported yet.");
726 }
727 
728 void CodeGenFunction::EmitOMPTaskyieldDirective(
729     const OMPTaskyieldDirective &S) {
730   CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart());
731 }
732 
733 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
734   CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart());
735 }
736 
737 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &) {
738   llvm_unreachable("CodeGen for 'omp taskwait' is not supported yet.");
739 }
740 
741 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
742   CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> {
743     if (auto C = S.getSingleClause(/*K*/ OMPC_flush)) {
744       auto FlushClause = cast<OMPFlushClause>(C);
745       return llvm::makeArrayRef(FlushClause->varlist_begin(),
746                                 FlushClause->varlist_end());
747     }
748     return llvm::None;
749   }(), S.getLocStart());
750 }
751 
752 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &) {
753   llvm_unreachable("CodeGen for 'omp ordered' is not supported yet.");
754 }
755 
756 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
757                                          QualType SrcType, QualType DestType) {
758   assert(CGF.hasScalarEvaluationKind(DestType) &&
759          "DestType must have scalar evaluation kind.");
760   assert(!Val.isAggregate() && "Must be a scalar or complex.");
761   return Val.isScalar()
762              ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType)
763              : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType,
764                                                  DestType);
765 }
766 
767 static CodeGenFunction::ComplexPairTy
768 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
769                       QualType DestType) {
770   assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
771          "DestType must have complex evaluation kind.");
772   CodeGenFunction::ComplexPairTy ComplexVal;
773   if (Val.isScalar()) {
774     // Convert the input element to the element type of the complex.
775     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
776     auto ScalarVal =
777         CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestElementType);
778     ComplexVal = CodeGenFunction::ComplexPairTy(
779         ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
780   } else {
781     assert(Val.isComplex() && "Must be a scalar or complex.");
782     auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
783     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
784     ComplexVal.first = CGF.EmitScalarConversion(
785         Val.getComplexVal().first, SrcElementType, DestElementType);
786     ComplexVal.second = CGF.EmitScalarConversion(
787         Val.getComplexVal().second, SrcElementType, DestElementType);
788   }
789   return ComplexVal;
790 }
791 
792 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst,
793                                   const Expr *X, const Expr *V,
794                                   SourceLocation Loc) {
795   // v = x;
796   assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
797   assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
798   LValue XLValue = CGF.EmitLValue(X);
799   LValue VLValue = CGF.EmitLValue(V);
800   RValue Res = XLValue.isGlobalReg()
801                    ? CGF.EmitLoadOfLValue(XLValue, Loc)
802                    : CGF.EmitAtomicLoad(XLValue, Loc,
803                                         IsSeqCst ? llvm::SequentiallyConsistent
804                                                  : llvm::Monotonic,
805                                         XLValue.isVolatile());
806   // OpenMP, 2.12.6, atomic Construct
807   // Any atomic construct with a seq_cst clause forces the atomically
808   // performed operation to include an implicit flush operation without a
809   // list.
810   if (IsSeqCst)
811     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
812   switch (CGF.getEvaluationKind(V->getType())) {
813   case TEK_Scalar:
814     CGF.EmitStoreOfScalar(
815         convertToScalarValue(CGF, Res, X->getType(), V->getType()), VLValue);
816     break;
817   case TEK_Complex:
818     CGF.EmitStoreOfComplex(
819         convertToComplexValue(CGF, Res, X->getType(), V->getType()), VLValue,
820         /*isInit=*/false);
821     break;
822   case TEK_Aggregate:
823     llvm_unreachable("Must be a scalar or complex.");
824   }
825 }
826 
827 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst,
828                                    const Expr *X, const Expr *E,
829                                    SourceLocation Loc) {
830   // x = expr;
831   assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
832   LValue XLValue = CGF.EmitLValue(X);
833   RValue ExprRValue = CGF.EmitAnyExpr(E);
834   if (XLValue.isGlobalReg())
835     CGF.EmitStoreThroughGlobalRegLValue(ExprRValue, XLValue);
836   else
837     CGF.EmitAtomicStore(ExprRValue, XLValue,
838                         IsSeqCst ? llvm::SequentiallyConsistent
839                                  : llvm::Monotonic,
840                         XLValue.isVolatile(), /*IsInit=*/false);
841   // OpenMP, 2.12.6, atomic Construct
842   // Any atomic construct with a seq_cst clause forces the atomically
843   // performed operation to include an implicit flush operation without a
844   // list.
845   if (IsSeqCst)
846     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
847 }
848 
849 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
850                               bool IsSeqCst, const Expr *X, const Expr *V,
851                               const Expr *E, SourceLocation Loc) {
852   switch (Kind) {
853   case OMPC_read:
854     EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc);
855     break;
856   case OMPC_write:
857     EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc);
858     break;
859   case OMPC_update:
860   case OMPC_capture:
861     llvm_unreachable("CodeGen for 'omp atomic clause' is not supported yet.");
862   case OMPC_if:
863   case OMPC_final:
864   case OMPC_num_threads:
865   case OMPC_private:
866   case OMPC_firstprivate:
867   case OMPC_lastprivate:
868   case OMPC_reduction:
869   case OMPC_safelen:
870   case OMPC_collapse:
871   case OMPC_default:
872   case OMPC_seq_cst:
873   case OMPC_shared:
874   case OMPC_linear:
875   case OMPC_aligned:
876   case OMPC_copyin:
877   case OMPC_copyprivate:
878   case OMPC_flush:
879   case OMPC_proc_bind:
880   case OMPC_schedule:
881   case OMPC_ordered:
882   case OMPC_nowait:
883   case OMPC_untied:
884   case OMPC_threadprivate:
885   case OMPC_mergeable:
886   case OMPC_unknown:
887     llvm_unreachable("Clause is not allowed in 'omp atomic'.");
888   }
889 }
890 
891 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
892   bool IsSeqCst = S.getSingleClause(/*K=*/OMPC_seq_cst);
893   OpenMPClauseKind Kind = OMPC_unknown;
894   for (auto *C : S.clauses()) {
895     // Find first clause (skip seq_cst clause, if it is first).
896     if (C->getClauseKind() != OMPC_seq_cst) {
897       Kind = C->getClauseKind();
898       break;
899     }
900   }
901   EmitOMPAtomicExpr(*this, Kind, IsSeqCst, S.getX(), S.getV(), S.getExpr(),
902                     S.getLocStart());
903 }
904 
905 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &) {
906   llvm_unreachable("CodeGen for 'omp target' is not supported yet.");
907 }
908 
909 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &) {
910   llvm_unreachable("CodeGen for 'omp teams' is not supported yet.");
911 }
912 
913