1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit OpenMP nodes as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGOpenMPRuntime.h"
15 #include "CodeGenFunction.h"
16 #include "CodeGenModule.h"
17 #include "TargetInfo.h"
18 #include "clang/AST/Stmt.h"
19 #include "clang/AST/StmtOpenMP.h"
20 using namespace clang;
21 using namespace CodeGen;
22 
23 namespace {
24 /// \brief RAII for emitting code of CapturedStmt without function outlining.
25 class InlinedOpenMPRegion {
26   CodeGenFunction &CGF;
27   CodeGenFunction::CGCapturedStmtInfo *PrevCapturedStmtInfo;
28   const Decl *StoredCurCodeDecl;
29 
30   /// \brief A class to emit CapturedStmt construct as inlined statement without
31   /// generating a function for outlined code.
32   class CGInlinedOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
33   public:
34     CGInlinedOpenMPRegionInfo() : CGCapturedStmtInfo() {}
35   };
36 
37 public:
38   InlinedOpenMPRegion(CodeGenFunction &CGF, const Stmt *S)
39       : CGF(CGF), PrevCapturedStmtInfo(CGF.CapturedStmtInfo),
40         StoredCurCodeDecl(CGF.CurCodeDecl) {
41     CGF.CurCodeDecl = cast<CapturedStmt>(S)->getCapturedDecl();
42     CGF.CapturedStmtInfo = new CGInlinedOpenMPRegionInfo();
43   }
44   ~InlinedOpenMPRegion() {
45     delete CGF.CapturedStmtInfo;
46     CGF.CapturedStmtInfo = PrevCapturedStmtInfo;
47     CGF.CurCodeDecl = StoredCurCodeDecl;
48   }
49 };
50 } // namespace
51 
52 //===----------------------------------------------------------------------===//
53 //                              OpenMP Directive Emission
54 //===----------------------------------------------------------------------===//
55 
56 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen
57 /// function. Here is the logic:
58 /// if (Cond) {
59 ///   CodeGen(true);
60 /// } else {
61 ///   CodeGen(false);
62 /// }
63 static void EmitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
64                             const std::function<void(bool)> &CodeGen) {
65   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
66 
67   // If the condition constant folds and can be elided, try to avoid emitting
68   // the condition and the dead arm of the if/else.
69   bool CondConstant;
70   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
71     CodeGen(CondConstant);
72     return;
73   }
74 
75   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
76   // emit the conditional branch.
77   auto ThenBlock = CGF.createBasicBlock(/*name*/ "omp_if.then");
78   auto ElseBlock = CGF.createBasicBlock(/*name*/ "omp_if.else");
79   auto ContBlock = CGF.createBasicBlock(/*name*/ "omp_if.end");
80   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount*/ 0);
81 
82   // Emit the 'then' code.
83   CGF.EmitBlock(ThenBlock);
84   CodeGen(/*ThenBlock*/ true);
85   CGF.EmitBranch(ContBlock);
86   // Emit the 'else' code if present.
87   {
88     // There is no need to emit line number for unconditional branch.
89     auto NL = ApplyDebugLocation::CreateEmpty(CGF);
90     CGF.EmitBlock(ElseBlock);
91   }
92   CodeGen(/*ThenBlock*/ false);
93   {
94     // There is no need to emit line number for unconditional branch.
95     auto NL = ApplyDebugLocation::CreateEmpty(CGF);
96     CGF.EmitBranch(ContBlock);
97   }
98   // Emit the continuation block for code after the if.
99   CGF.EmitBlock(ContBlock, /*IsFinished*/ true);
100 }
101 
102 void CodeGenFunction::EmitOMPAggregateAssign(LValue OriginalAddr,
103                                              llvm::Value *PrivateAddr,
104                                              const Expr *AssignExpr,
105                                              QualType OriginalType,
106                                              const VarDecl *VDInit) {
107   EmitBlock(createBasicBlock(".omp.assign.begin."));
108   if (!isa<CXXConstructExpr>(AssignExpr) || isTrivialInitializer(AssignExpr)) {
109     // Perform simple memcpy.
110     EmitAggregateAssign(PrivateAddr, OriginalAddr.getAddress(),
111                         AssignExpr->getType());
112   } else {
113     // Perform element-by-element initialization.
114     QualType ElementTy;
115     auto SrcBegin = OriginalAddr.getAddress();
116     auto DestBegin = PrivateAddr;
117     auto ArrayTy = OriginalType->getAsArrayTypeUnsafe();
118     auto SrcNumElements = emitArrayLength(ArrayTy, ElementTy, SrcBegin);
119     auto DestNumElements = emitArrayLength(ArrayTy, ElementTy, DestBegin);
120     auto SrcEnd = Builder.CreateGEP(SrcBegin, SrcNumElements);
121     auto DestEnd = Builder.CreateGEP(DestBegin, DestNumElements);
122     // The basic structure here is a do-while loop, because we don't
123     // need to check for the zero-element case.
124     auto BodyBB = createBasicBlock("omp.arraycpy.body");
125     auto DoneBB = createBasicBlock("omp.arraycpy.done");
126     auto IsEmpty =
127         Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
128     Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
129 
130     // Enter the loop body, making that address the current address.
131     auto EntryBB = Builder.GetInsertBlock();
132     EmitBlock(BodyBB);
133     auto SrcElementPast = Builder.CreatePHI(SrcBegin->getType(), 2,
134                                             "omp.arraycpy.srcElementPast");
135     SrcElementPast->addIncoming(SrcEnd, EntryBB);
136     auto DestElementPast = Builder.CreatePHI(DestBegin->getType(), 2,
137                                              "omp.arraycpy.destElementPast");
138     DestElementPast->addIncoming(DestEnd, EntryBB);
139 
140     // Shift the address back by one element.
141     auto NegativeOne = llvm::ConstantInt::get(SizeTy, -1, true);
142     auto DestElement = Builder.CreateGEP(DestElementPast, NegativeOne,
143                                          "omp.arraycpy.dest.element");
144     auto SrcElement = Builder.CreateGEP(SrcElementPast, NegativeOne,
145                                         "omp.arraycpy.src.element");
146     {
147       // Create RunCleanScope to cleanup possible temps.
148       CodeGenFunction::RunCleanupsScope Init(*this);
149       // Emit initialization for single element.
150       LocalDeclMap[VDInit] = SrcElement;
151       EmitAnyExprToMem(AssignExpr, DestElement,
152                        AssignExpr->getType().getQualifiers(),
153                        /*IsInitializer*/ false);
154       LocalDeclMap.erase(VDInit);
155     }
156 
157     // Check whether we've reached the end.
158     auto Done =
159         Builder.CreateICmpEQ(DestElement, DestBegin, "omp.arraycpy.done");
160     Builder.CreateCondBr(Done, DoneBB, BodyBB);
161     DestElementPast->addIncoming(DestElement, Builder.GetInsertBlock());
162     SrcElementPast->addIncoming(SrcElement, Builder.GetInsertBlock());
163 
164     // Done.
165     EmitBlock(DoneBB, true);
166   }
167   EmitBlock(createBasicBlock(".omp.assign.end."));
168 }
169 
170 void CodeGenFunction::EmitOMPFirstprivateClause(
171     const OMPExecutableDirective &D,
172     CodeGenFunction::OMPPrivateScope &PrivateScope) {
173   auto PrivateFilter = [](const OMPClause *C) -> bool {
174     return C->getClauseKind() == OMPC_firstprivate;
175   };
176   for (OMPExecutableDirective::filtered_clause_iterator<decltype(PrivateFilter)>
177            I(D.clauses(), PrivateFilter); I; ++I) {
178     auto *C = cast<OMPFirstprivateClause>(*I);
179     auto IRef = C->varlist_begin();
180     auto InitsRef = C->inits().begin();
181     for (auto IInit : C->private_copies()) {
182       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
183       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
184       bool IsRegistered;
185       if (*InitsRef != nullptr) {
186         // Emit VarDecl with copy init for arrays.
187         auto *FD = CapturedStmtInfo->lookup(OrigVD);
188         LValue Base = MakeNaturalAlignAddrLValue(
189             CapturedStmtInfo->getContextValue(),
190             getContext().getTagDeclType(FD->getParent()));
191         auto OriginalAddr = EmitLValueForField(Base, FD);
192         auto VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
193         IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value * {
194           auto Emission = EmitAutoVarAlloca(*VD);
195           // Emit initialization of aggregate firstprivate vars.
196           EmitOMPAggregateAssign(OriginalAddr, Emission.getAllocatedAddress(),
197                                  VD->getInit(), (*IRef)->getType(), VDInit);
198           EmitAutoVarCleanups(Emission);
199           return Emission.getAllocatedAddress();
200         });
201       } else
202         IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value * {
203           // Emit private VarDecl with copy init.
204           EmitDecl(*VD);
205           return GetAddrOfLocalVar(VD);
206         });
207       assert(IsRegistered && "counter already registered as private");
208       // Silence the warning about unused variable.
209       (void)IsRegistered;
210       ++IRef, ++InitsRef;
211     }
212   }
213 }
214 
215 void CodeGenFunction::EmitOMPPrivateClause(
216     const OMPExecutableDirective &D,
217     CodeGenFunction::OMPPrivateScope &PrivateScope) {
218   auto PrivateFilter = [](const OMPClause *C) -> bool {
219     return C->getClauseKind() == OMPC_private;
220   };
221   for (OMPExecutableDirective::filtered_clause_iterator<decltype(PrivateFilter)>
222            I(D.clauses(), PrivateFilter); I; ++I) {
223     auto *C = cast<OMPPrivateClause>(*I);
224     auto IRef = C->varlist_begin();
225     for (auto IInit : C->private_copies()) {
226       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
227       auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
228       bool IsRegistered =
229           PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value * {
230             // Emit private VarDecl with copy init.
231             EmitDecl(*VD);
232             return GetAddrOfLocalVar(VD);
233           });
234       assert(IsRegistered && "counter already registered as private");
235       // Silence the warning about unused variable.
236       (void)IsRegistered;
237       ++IRef;
238     }
239   }
240 }
241 
242 /// \brief Emits code for OpenMP parallel directive in the parallel region.
243 static void EmitOMPParallelCall(CodeGenFunction &CGF,
244                                 const OMPParallelDirective &S,
245                                 llvm::Value *OutlinedFn,
246                                 llvm::Value *CapturedStruct) {
247   if (auto C = S.getSingleClause(/*K*/ OMPC_num_threads)) {
248     CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
249     auto NumThreadsClause = cast<OMPNumThreadsClause>(C);
250     auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
251                                          /*IgnoreResultAssign*/ true);
252     CGF.CGM.getOpenMPRuntime().EmitOMPNumThreadsClause(
253         CGF, NumThreads, NumThreadsClause->getLocStart());
254   }
255   CGF.CGM.getOpenMPRuntime().EmitOMPParallelCall(CGF, S.getLocStart(),
256                                                  OutlinedFn, CapturedStruct);
257 }
258 
259 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
260   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
261   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
262   auto OutlinedFn = CGM.getOpenMPRuntime().EmitOpenMPOutlinedFunction(
263       S, *CS->getCapturedDecl()->param_begin());
264   if (auto C = S.getSingleClause(/*K*/ OMPC_if)) {
265     auto Cond = cast<OMPIfClause>(C)->getCondition();
266     EmitOMPIfClause(*this, Cond, [&](bool ThenBlock) {
267       if (ThenBlock)
268         EmitOMPParallelCall(*this, S, OutlinedFn, CapturedStruct);
269       else
270         CGM.getOpenMPRuntime().EmitOMPSerialCall(*this, S.getLocStart(),
271                                                  OutlinedFn, CapturedStruct);
272     });
273   } else
274     EmitOMPParallelCall(*this, S, OutlinedFn, CapturedStruct);
275 }
276 
277 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &S,
278                                       bool SeparateIter) {
279   RunCleanupsScope BodyScope(*this);
280   // Update counters values on current iteration.
281   for (auto I : S.updates()) {
282     EmitIgnoredExpr(I);
283   }
284   // On a continue in the body, jump to the end.
285   auto Continue = getJumpDestInCurrentScope("omp.body.continue");
286   BreakContinueStack.push_back(BreakContinue(JumpDest(), Continue));
287   // Emit loop body.
288   EmitStmt(S.getBody());
289   // The end (updates/cleanups).
290   EmitBlock(Continue.getBlock());
291   BreakContinueStack.pop_back();
292   if (SeparateIter) {
293     // TODO: Update lastprivates if the SeparateIter flag is true.
294     // This will be implemented in a follow-up OMPLastprivateClause patch, but
295     // result should be still correct without it, as we do not make these
296     // variables private yet.
297   }
298 }
299 
300 void CodeGenFunction::EmitOMPInnerLoop(const OMPLoopDirective &S,
301                                        OMPPrivateScope &LoopScope,
302                                        bool SeparateIter) {
303   auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
304   auto Cnt = getPGORegionCounter(&S);
305 
306   // Start the loop with a block that tests the condition.
307   auto CondBlock = createBasicBlock("omp.inner.for.cond");
308   EmitBlock(CondBlock);
309   LoopStack.push(CondBlock);
310 
311   // If there are any cleanups between here and the loop-exit scope,
312   // create a block to stage a loop exit along.
313   auto ExitBlock = LoopExit.getBlock();
314   if (LoopScope.requiresCleanups())
315     ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
316 
317   auto LoopBody = createBasicBlock("omp.inner.for.body");
318 
319   // Emit condition: "IV < LastIteration + 1 [ - 1]"
320   // ("- 1" when lastprivate clause is present - separate one iteration).
321   llvm::Value *BoolCondVal = EvaluateExprAsBool(S.getCond(SeparateIter));
322   Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock,
323                        PGO.createLoopWeights(S.getCond(SeparateIter), Cnt));
324 
325   if (ExitBlock != LoopExit.getBlock()) {
326     EmitBlock(ExitBlock);
327     EmitBranchThroughCleanup(LoopExit);
328   }
329 
330   EmitBlock(LoopBody);
331   Cnt.beginRegion(Builder);
332 
333   // Create a block for the increment.
334   auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
335   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
336 
337   EmitOMPLoopBody(S);
338   EmitStopPoint(&S);
339 
340   // Emit "IV = IV + 1" and a back-edge to the condition block.
341   EmitBlock(Continue.getBlock());
342   EmitIgnoredExpr(S.getInc());
343   BreakContinueStack.pop_back();
344   EmitBranch(CondBlock);
345   LoopStack.pop();
346   // Emit the fall-through block.
347   EmitBlock(LoopExit.getBlock());
348 }
349 
350 void CodeGenFunction::EmitOMPSimdFinal(const OMPLoopDirective &S) {
351   auto IC = S.counters().begin();
352   for (auto F : S.finals()) {
353     if (LocalDeclMap.lookup(cast<DeclRefExpr>((*IC))->getDecl())) {
354       EmitIgnoredExpr(F);
355     }
356     ++IC;
357   }
358 }
359 
360 static void EmitOMPAlignedClause(CodeGenFunction &CGF, CodeGenModule &CGM,
361                                  const OMPAlignedClause &Clause) {
362   unsigned ClauseAlignment = 0;
363   if (auto AlignmentExpr = Clause.getAlignment()) {
364     auto AlignmentCI =
365         cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
366     ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue());
367   }
368   for (auto E : Clause.varlists()) {
369     unsigned Alignment = ClauseAlignment;
370     if (Alignment == 0) {
371       // OpenMP [2.8.1, Description]
372       // If no optional parameter is specified, implementation-defined default
373       // alignments for SIMD instructions on the target platforms are assumed.
374       Alignment = CGM.getTargetCodeGenInfo().getOpenMPSimdDefaultAlignment(
375           E->getType());
376     }
377     assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) &&
378            "alignment is not power of 2");
379     if (Alignment != 0) {
380       llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
381       CGF.EmitAlignmentAssumption(PtrValue, Alignment);
382     }
383   }
384 }
385 
386 static void EmitPrivateLoopCounters(CodeGenFunction &CGF,
387                                     CodeGenFunction::OMPPrivateScope &LoopScope,
388                                     ArrayRef<Expr *> Counters) {
389   for (auto *E : Counters) {
390     auto VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
391     bool IsRegistered = LoopScope.addPrivate(VD, [&]() -> llvm::Value * {
392       // Emit var without initialization.
393       auto VarEmission = CGF.EmitAutoVarAlloca(*VD);
394       CGF.EmitAutoVarCleanups(VarEmission);
395       return VarEmission.getAllocatedAddress();
396     });
397     assert(IsRegistered && "counter already registered as private");
398     // Silence the warning about unused variable.
399     (void)IsRegistered;
400   }
401   (void)LoopScope.Privatize();
402 }
403 
404 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
405   // Pragma 'simd' code depends on presence of 'lastprivate'.
406   // If present, we have to separate last iteration of the loop:
407   //
408   // if (LastIteration != 0) {
409   //   for (IV in 0..LastIteration-1) BODY;
410   //   BODY with updates of lastprivate vars;
411   //   <Final counter/linear vars updates>;
412   // }
413   //
414   // otherwise (when there's no lastprivate):
415   //
416   //   for (IV in 0..LastIteration) BODY;
417   //   <Final counter/linear vars updates>;
418   //
419 
420   // Walk clauses and process safelen/lastprivate.
421   bool SeparateIter = false;
422   LoopStack.setParallel();
423   LoopStack.setVectorizerEnable(true);
424   for (auto C : S.clauses()) {
425     switch (C->getClauseKind()) {
426     case OMPC_safelen: {
427       RValue Len = EmitAnyExpr(cast<OMPSafelenClause>(C)->getSafelen(),
428                                AggValueSlot::ignored(), true);
429       llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
430       LoopStack.setVectorizerWidth(Val->getZExtValue());
431       // In presence of finite 'safelen', it may be unsafe to mark all
432       // the memory instructions parallel, because loop-carried
433       // dependences of 'safelen' iterations are possible.
434       LoopStack.setParallel(false);
435       break;
436     }
437     case OMPC_aligned:
438       EmitOMPAlignedClause(*this, CGM, cast<OMPAlignedClause>(*C));
439       break;
440     case OMPC_lastprivate:
441       SeparateIter = true;
442       break;
443     default:
444       // Not handled yet
445       ;
446     }
447   }
448 
449   InlinedOpenMPRegion Region(*this, S.getAssociatedStmt());
450   RunCleanupsScope DirectiveScope(*this);
451 
452   CGDebugInfo *DI = getDebugInfo();
453   if (DI)
454     DI->EmitLexicalBlockStart(Builder, S.getSourceRange().getBegin());
455 
456   // Emit the loop iteration variable.
457   const Expr *IVExpr = S.getIterationVariable();
458   const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
459   EmitVarDecl(*IVDecl);
460   EmitIgnoredExpr(S.getInit());
461 
462   // Emit the iterations count variable.
463   // If it is not a variable, Sema decided to calculate iterations count on each
464   // iteration (e.g., it is foldable into a constant).
465   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
466     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
467     // Emit calculation of the iterations count.
468     EmitIgnoredExpr(S.getCalcLastIteration());
469   }
470 
471   if (SeparateIter) {
472     // Emit: if (LastIteration > 0) - begin.
473     RegionCounter Cnt = getPGORegionCounter(&S);
474     auto ThenBlock = createBasicBlock("simd.if.then");
475     auto ContBlock = createBasicBlock("simd.if.end");
476     EmitBranchOnBoolExpr(S.getPreCond(), ThenBlock, ContBlock, Cnt.getCount());
477     EmitBlock(ThenBlock);
478     Cnt.beginRegion(Builder);
479     // Emit 'then' code.
480     {
481       OMPPrivateScope LoopScope(*this);
482       EmitPrivateLoopCounters(*this, LoopScope, S.counters());
483       EmitOMPInnerLoop(S, LoopScope, /* SeparateIter */ true);
484       EmitOMPLoopBody(S, /* SeparateIter */ true);
485     }
486     EmitOMPSimdFinal(S);
487     // Emit: if (LastIteration != 0) - end.
488     EmitBranch(ContBlock);
489     EmitBlock(ContBlock, true);
490   } else {
491     {
492       OMPPrivateScope LoopScope(*this);
493       EmitPrivateLoopCounters(*this, LoopScope, S.counters());
494       EmitOMPInnerLoop(S, LoopScope);
495     }
496     EmitOMPSimdFinal(S);
497   }
498 
499   if (DI)
500     DI->EmitLexicalBlockEnd(Builder, S.getSourceRange().getEnd());
501 }
502 
503 void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind,
504                                           const OMPLoopDirective &S,
505                                           OMPPrivateScope &LoopScope,
506                                           llvm::Value *LB, llvm::Value *UB,
507                                           llvm::Value *ST, llvm::Value *IL,
508                                           llvm::Value *Chunk) {
509   auto &RT = CGM.getOpenMPRuntime();
510   assert(!RT.isStaticNonchunked(ScheduleKind, /* Chunked */ Chunk != nullptr) &&
511          "static non-chunked schedule does not need outer loop");
512   if (RT.isDynamic(ScheduleKind)) {
513     ErrorUnsupported(&S, "OpenMP loop with dynamic schedule");
514     return;
515   }
516 
517   // Emit outer loop.
518   //
519   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
520   // When schedule(static, chunk_size) is specified, iterations are divided into
521   // chunks of size chunk_size, and the chunks are assigned to the threads in
522   // the team in a round-robin fashion in the order of the thread number.
523   //
524   // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
525   //   while (idx <= UB) { BODY; ++idx; } // inner loop
526   //   LB = LB + ST;
527   //   UB = UB + ST;
528   // }
529   //
530   const Expr *IVExpr = S.getIterationVariable();
531   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
532   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
533 
534   RT.EmitOMPForInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, IL,
535                     LB, UB, ST, Chunk);
536   auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
537 
538   // Start the loop with a block that tests the condition.
539   auto CondBlock = createBasicBlock("omp.dispatch.cond");
540   EmitBlock(CondBlock);
541   LoopStack.push(CondBlock);
542 
543   llvm::Value *BoolCondVal = nullptr;
544   // UB = min(UB, GlobalUB)
545   EmitIgnoredExpr(S.getEnsureUpperBound());
546   // IV = LB
547   EmitIgnoredExpr(S.getInit());
548   // IV < UB
549   BoolCondVal = EvaluateExprAsBool(S.getCond(false));
550 
551   // If there are any cleanups between here and the loop-exit scope,
552   // create a block to stage a loop exit along.
553   auto ExitBlock = LoopExit.getBlock();
554   if (LoopScope.requiresCleanups())
555     ExitBlock = createBasicBlock("omp.dispatch.cleanup");
556 
557   auto LoopBody = createBasicBlock("omp.dispatch.body");
558   Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
559   if (ExitBlock != LoopExit.getBlock()) {
560     EmitBlock(ExitBlock);
561     EmitBranchThroughCleanup(LoopExit);
562   }
563   EmitBlock(LoopBody);
564 
565   // Create a block for the increment.
566   auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
567   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
568 
569   EmitOMPInnerLoop(S, LoopScope);
570 
571   EmitBlock(Continue.getBlock());
572   BreakContinueStack.pop_back();
573   // Emit "LB = LB + Stride", "UB = UB + Stride".
574   EmitIgnoredExpr(S.getNextLowerBound());
575   EmitIgnoredExpr(S.getNextUpperBound());
576 
577   EmitBranch(CondBlock);
578   LoopStack.pop();
579   // Emit the fall-through block.
580   EmitBlock(LoopExit.getBlock());
581 
582   // Tell the runtime we are done.
583   RT.EmitOMPForFinish(*this, S.getLocStart(), ScheduleKind);
584 }
585 
586 /// \brief Emit a helper variable and return corresponding lvalue.
587 static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
588                                const DeclRefExpr *Helper) {
589   auto VDecl = cast<VarDecl>(Helper->getDecl());
590   CGF.EmitVarDecl(*VDecl);
591   return CGF.EmitLValue(Helper);
592 }
593 
594 void CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
595   // Emit the loop iteration variable.
596   auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
597   auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
598   EmitVarDecl(*IVDecl);
599 
600   // Emit the iterations count variable.
601   // If it is not a variable, Sema decided to calculate iterations count on each
602   // iteration (e.g., it is foldable into a constant).
603   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
604     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
605     // Emit calculation of the iterations count.
606     EmitIgnoredExpr(S.getCalcLastIteration());
607   }
608 
609   auto &RT = CGM.getOpenMPRuntime();
610 
611   // Check pre-condition.
612   {
613     // Skip the entire loop if we don't meet the precondition.
614     RegionCounter Cnt = getPGORegionCounter(&S);
615     auto ThenBlock = createBasicBlock("omp.precond.then");
616     auto ContBlock = createBasicBlock("omp.precond.end");
617     EmitBranchOnBoolExpr(S.getPreCond(), ThenBlock, ContBlock, Cnt.getCount());
618     EmitBlock(ThenBlock);
619     Cnt.beginRegion(Builder);
620     // Emit 'then' code.
621     {
622       // Emit helper vars inits.
623       LValue LB =
624           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable()));
625       LValue UB =
626           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable()));
627       LValue ST =
628           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
629       LValue IL =
630           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
631 
632       OMPPrivateScope LoopScope(*this);
633       EmitPrivateLoopCounters(*this, LoopScope, S.counters());
634 
635       // Detect the loop schedule kind and chunk.
636       auto ScheduleKind = OMPC_SCHEDULE_unknown;
637       llvm::Value *Chunk = nullptr;
638       if (auto C = cast_or_null<OMPScheduleClause>(
639               S.getSingleClause(OMPC_schedule))) {
640         ScheduleKind = C->getScheduleKind();
641         if (auto Ch = C->getChunkSize()) {
642           Chunk = EmitScalarExpr(Ch);
643           Chunk = EmitScalarConversion(Chunk, Ch->getType(),
644                                        S.getIterationVariable()->getType());
645         }
646       }
647       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
648       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
649       if (RT.isStaticNonchunked(ScheduleKind,
650                                 /* Chunked */ Chunk != nullptr)) {
651         // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
652         // When no chunk_size is specified, the iteration space is divided into
653         // chunks that are approximately equal in size, and at most one chunk is
654         // distributed to each thread. Note that the size of the chunks is
655         // unspecified in this case.
656         RT.EmitOMPForInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned,
657                           IL.getAddress(), LB.getAddress(), UB.getAddress(),
658                           ST.getAddress());
659         // UB = min(UB, GlobalUB);
660         EmitIgnoredExpr(S.getEnsureUpperBound());
661         // IV = LB;
662         EmitIgnoredExpr(S.getInit());
663         // while (idx <= UB) { BODY; ++idx; }
664         EmitOMPInnerLoop(S, LoopScope);
665         // Tell the runtime we are done.
666         RT.EmitOMPForFinish(*this, S.getLocStart(), ScheduleKind);
667       } else {
668         // Emit the outer loop, which requests its work chunk [LB..UB] from
669         // runtime and runs the inner loop to process it.
670         EmitOMPForOuterLoop(ScheduleKind, S, LoopScope, LB.getAddress(),
671                             UB.getAddress(), ST.getAddress(), IL.getAddress(),
672                             Chunk);
673       }
674     }
675     // We're now done with the loop, so jump to the continuation block.
676     EmitBranch(ContBlock);
677     EmitBlock(ContBlock, true);
678   }
679 }
680 
681 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
682   InlinedOpenMPRegion Region(*this, S.getAssociatedStmt());
683   RunCleanupsScope DirectiveScope(*this);
684 
685   CGDebugInfo *DI = getDebugInfo();
686   if (DI)
687     DI->EmitLexicalBlockStart(Builder, S.getSourceRange().getBegin());
688 
689   EmitOMPWorksharingLoop(S);
690 
691   // Emit an implicit barrier at the end.
692   CGM.getOpenMPRuntime().EmitOMPBarrierCall(*this, S.getLocStart(),
693                                             /*IsExplicit*/ false);
694   if (DI)
695     DI->EmitLexicalBlockEnd(Builder, S.getSourceRange().getEnd());
696 }
697 
698 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &) {
699   llvm_unreachable("CodeGen for 'omp for simd' is not supported yet.");
700 }
701 
702 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &) {
703   llvm_unreachable("CodeGen for 'omp sections' is not supported yet.");
704 }
705 
706 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &) {
707   llvm_unreachable("CodeGen for 'omp section' is not supported yet.");
708 }
709 
710 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
711   CGM.getOpenMPRuntime().EmitOMPSingleRegion(*this, [&]() -> void {
712     InlinedOpenMPRegion Region(*this, S.getAssociatedStmt());
713     RunCleanupsScope Scope(*this);
714     EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
715     EnsureInsertPoint();
716   }, S.getLocStart());
717 }
718 
719 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
720   CGM.getOpenMPRuntime().EmitOMPMasterRegion(*this, [&]() -> void {
721     InlinedOpenMPRegion Region(*this, S.getAssociatedStmt());
722     RunCleanupsScope Scope(*this);
723     EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
724     EnsureInsertPoint();
725   }, S.getLocStart());
726 }
727 
728 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
729   CGM.getOpenMPRuntime().EmitOMPCriticalRegion(
730       *this, S.getDirectiveName().getAsString(), [&]() -> void {
731     InlinedOpenMPRegion Region(*this, S.getAssociatedStmt());
732     RunCleanupsScope Scope(*this);
733     EmitStmt(
734         cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
735     EnsureInsertPoint();
736   }, S.getLocStart());
737 }
738 
739 void
740 CodeGenFunction::EmitOMPParallelForDirective(const OMPParallelForDirective &) {
741   llvm_unreachable("CodeGen for 'omp parallel for' is not supported yet.");
742 }
743 
744 void CodeGenFunction::EmitOMPParallelForSimdDirective(
745     const OMPParallelForSimdDirective &) {
746   llvm_unreachable("CodeGen for 'omp parallel for simd' is not supported yet.");
747 }
748 
749 void CodeGenFunction::EmitOMPParallelSectionsDirective(
750     const OMPParallelSectionsDirective &) {
751   llvm_unreachable("CodeGen for 'omp parallel sections' is not supported yet.");
752 }
753 
754 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &) {
755   llvm_unreachable("CodeGen for 'omp task' is not supported yet.");
756 }
757 
758 void CodeGenFunction::EmitOMPTaskyieldDirective(
759     const OMPTaskyieldDirective &S) {
760   CGM.getOpenMPRuntime().EmitOMPTaskyieldCall(*this, S.getLocStart());
761 }
762 
763 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
764   CGM.getOpenMPRuntime().EmitOMPBarrierCall(*this, S.getLocStart());
765 }
766 
767 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &) {
768   llvm_unreachable("CodeGen for 'omp taskwait' is not supported yet.");
769 }
770 
771 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
772   CGM.getOpenMPRuntime().EmitOMPFlush(
773       *this, [&]() -> ArrayRef<const Expr *> {
774                if (auto C = S.getSingleClause(/*K*/ OMPC_flush)) {
775                  auto FlushClause = cast<OMPFlushClause>(C);
776                  return llvm::makeArrayRef(FlushClause->varlist_begin(),
777                                            FlushClause->varlist_end());
778                }
779                return llvm::None;
780              }(),
781       S.getLocStart());
782 }
783 
784 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &) {
785   llvm_unreachable("CodeGen for 'omp ordered' is not supported yet.");
786 }
787 
788 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
789                                          QualType SrcType, QualType DestType) {
790   assert(CGF.hasScalarEvaluationKind(DestType) &&
791          "DestType must have scalar evaluation kind.");
792   assert(!Val.isAggregate() && "Must be a scalar or complex.");
793   return Val.isScalar()
794              ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType)
795              : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType,
796                                                  DestType);
797 }
798 
799 static CodeGenFunction::ComplexPairTy
800 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
801                       QualType DestType) {
802   assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
803          "DestType must have complex evaluation kind.");
804   CodeGenFunction::ComplexPairTy ComplexVal;
805   if (Val.isScalar()) {
806     // Convert the input element to the element type of the complex.
807     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
808     auto ScalarVal =
809         CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestElementType);
810     ComplexVal = CodeGenFunction::ComplexPairTy(
811         ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
812   } else {
813     assert(Val.isComplex() && "Must be a scalar or complex.");
814     auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
815     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
816     ComplexVal.first = CGF.EmitScalarConversion(
817         Val.getComplexVal().first, SrcElementType, DestElementType);
818     ComplexVal.second = CGF.EmitScalarConversion(
819         Val.getComplexVal().second, SrcElementType, DestElementType);
820   }
821   return ComplexVal;
822 }
823 
824 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst,
825                                   const Expr *X, const Expr *V,
826                                   SourceLocation Loc) {
827   // v = x;
828   assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
829   assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
830   LValue XLValue = CGF.EmitLValue(X);
831   LValue VLValue = CGF.EmitLValue(V);
832   RValue Res = XLValue.isGlobalReg() ? CGF.EmitLoadOfLValue(XLValue, Loc)
833                                      : CGF.EmitAtomicLoad(XLValue, Loc);
834   // OpenMP, 2.12.6, atomic Construct
835   // Any atomic construct with a seq_cst clause forces the atomically
836   // performed operation to include an implicit flush operation without a
837   // list.
838   if (IsSeqCst)
839     CGF.CGM.getOpenMPRuntime().EmitOMPFlush(CGF, llvm::None, Loc);
840   switch (CGF.getEvaluationKind(V->getType())) {
841   case TEK_Scalar:
842     CGF.EmitStoreOfScalar(
843         convertToScalarValue(CGF, Res, X->getType(), V->getType()), VLValue);
844     break;
845   case TEK_Complex:
846     CGF.EmitStoreOfComplex(
847         convertToComplexValue(CGF, Res, X->getType(), V->getType()), VLValue,
848         /*isInit=*/false);
849     break;
850   case TEK_Aggregate:
851     llvm_unreachable("Must be a scalar or complex.");
852   }
853 }
854 
855 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
856                               bool IsSeqCst, const Expr *X, const Expr *V,
857                               const Expr *, SourceLocation Loc) {
858   switch (Kind) {
859   case OMPC_read:
860     EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc);
861     break;
862   case OMPC_write:
863   case OMPC_update:
864   case OMPC_capture:
865     llvm_unreachable("CodeGen for 'omp atomic clause' is not supported yet.");
866   case OMPC_if:
867   case OMPC_final:
868   case OMPC_num_threads:
869   case OMPC_private:
870   case OMPC_firstprivate:
871   case OMPC_lastprivate:
872   case OMPC_reduction:
873   case OMPC_safelen:
874   case OMPC_collapse:
875   case OMPC_default:
876   case OMPC_seq_cst:
877   case OMPC_shared:
878   case OMPC_linear:
879   case OMPC_aligned:
880   case OMPC_copyin:
881   case OMPC_copyprivate:
882   case OMPC_flush:
883   case OMPC_proc_bind:
884   case OMPC_schedule:
885   case OMPC_ordered:
886   case OMPC_nowait:
887   case OMPC_untied:
888   case OMPC_threadprivate:
889   case OMPC_mergeable:
890   case OMPC_unknown:
891     llvm_unreachable("Clause is not allowed in 'omp atomic'.");
892   }
893 }
894 
895 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
896   bool IsSeqCst = S.getSingleClause(/*K=*/OMPC_seq_cst);
897   OpenMPClauseKind Kind = OMPC_unknown;
898   for (auto *C : S.clauses()) {
899     // Find first clause (skip seq_cst clause, if it is first).
900     if (C->getClauseKind() != OMPC_seq_cst) {
901       Kind = C->getClauseKind();
902       break;
903     }
904   }
905   EmitOMPAtomicExpr(*this, Kind, IsSeqCst, S.getX(), S.getV(), S.getExpr(),
906                     S.getLocStart());
907 }
908 
909 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &) {
910   llvm_unreachable("CodeGen for 'omp target' is not supported yet.");
911 }
912 
913 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &) {
914   llvm_unreachable("CodeGen for 'omp teams' is not supported yet.");
915 }
916 
917