1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit OpenMP nodes as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGCleanup.h"
15 #include "CGOpenMPRuntime.h"
16 #include "CodeGenFunction.h"
17 #include "CodeGenModule.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/Stmt.h"
20 #include "clang/AST/StmtOpenMP.h"
21 #include "clang/AST/DeclOpenMP.h"
22 #include "llvm/IR/CallSite.h"
23 using namespace clang;
24 using namespace CodeGen;
25 
26 namespace {
27 /// Lexical scope for OpenMP executable constructs, that handles correct codegen
28 /// for captured expressions.
29 class OMPLexicalScope : public CodeGenFunction::LexicalScope {
30   void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
31     for (const auto *C : S.clauses()) {
32       if (auto *CPI = OMPClauseWithPreInit::get(C)) {
33         if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
34           for (const auto *I : PreInit->decls()) {
35             if (!I->hasAttr<OMPCaptureNoInitAttr>())
36               CGF.EmitVarDecl(cast<VarDecl>(*I));
37             else {
38               CodeGenFunction::AutoVarEmission Emission =
39                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
40               CGF.EmitAutoVarCleanups(Emission);
41             }
42           }
43         }
44       }
45     }
46   }
47   CodeGenFunction::OMPPrivateScope InlinedShareds;
48 
49   static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
50     return CGF.LambdaCaptureFields.lookup(VD) ||
51            (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
52            (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl));
53   }
54 
55 public:
56   OMPLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S,
57                   bool AsInlined = false, bool EmitPreInitStmt = true)
58       : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
59         InlinedShareds(CGF) {
60     if (EmitPreInitStmt)
61       emitPreInitStmt(CGF, S);
62     if (AsInlined) {
63       if (S.hasAssociatedStmt()) {
64         auto *CS = cast<CapturedStmt>(S.getAssociatedStmt());
65         for (auto &C : CS->captures()) {
66           if (C.capturesVariable() || C.capturesVariableByCopy()) {
67             auto *VD = C.getCapturedVar();
68             DeclRefExpr DRE(const_cast<VarDecl *>(VD),
69                             isCapturedVar(CGF, VD) ||
70                                 (CGF.CapturedStmtInfo &&
71                                  InlinedShareds.isGlobalVarCaptured(VD)),
72                             VD->getType().getNonReferenceType(), VK_LValue,
73                             SourceLocation());
74             InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
75               return CGF.EmitLValue(&DRE).getAddress();
76             });
77           }
78         }
79         (void)InlinedShareds.Privatize();
80       }
81     }
82   }
83 };
84 
85 /// Lexical scope for OpenMP parallel construct, that handles correct codegen
86 /// for captured expressions.
87 class OMPParallelScope final : public OMPLexicalScope {
88   bool EmitPreInitStmt(const OMPExecutableDirective &S) {
89     OpenMPDirectiveKind Kind = S.getDirectiveKind();
90     return !(isOpenMPTargetExecutionDirective(Kind) ||
91              isOpenMPLoopBoundSharingDirective(Kind)) &&
92            isOpenMPParallelDirective(Kind);
93   }
94 
95 public:
96   OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
97       : OMPLexicalScope(CGF, S,
98                         /*AsInlined=*/false,
99                         /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {}
100 };
101 
102 /// Lexical scope for OpenMP teams construct, that handles correct codegen
103 /// for captured expressions.
104 class OMPTeamsScope final : public OMPLexicalScope {
105   bool EmitPreInitStmt(const OMPExecutableDirective &S) {
106     OpenMPDirectiveKind Kind = S.getDirectiveKind();
107     return !isOpenMPTargetExecutionDirective(Kind) &&
108            isOpenMPTeamsDirective(Kind);
109   }
110 
111 public:
112   OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
113       : OMPLexicalScope(CGF, S,
114                         /*AsInlined=*/false,
115                         /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {}
116 };
117 
118 /// Private scope for OpenMP loop-based directives, that supports capturing
119 /// of used expression from loop statement.
120 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
121   void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) {
122     if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) {
123       if (auto *PreInits = cast_or_null<DeclStmt>(LD->getPreInits())) {
124         for (const auto *I : PreInits->decls())
125           CGF.EmitVarDecl(cast<VarDecl>(*I));
126       }
127     }
128   }
129 
130 public:
131   OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S)
132       : CodeGenFunction::RunCleanupsScope(CGF) {
133     emitPreInitStmt(CGF, S);
134   }
135 };
136 
137 } // namespace
138 
139 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) {
140   auto &C = getContext();
141   llvm::Value *Size = nullptr;
142   auto SizeInChars = C.getTypeSizeInChars(Ty);
143   if (SizeInChars.isZero()) {
144     // getTypeSizeInChars() returns 0 for a VLA.
145     while (auto *VAT = C.getAsVariableArrayType(Ty)) {
146       llvm::Value *ArraySize;
147       std::tie(ArraySize, Ty) = getVLASize(VAT);
148       Size = Size ? Builder.CreateNUWMul(Size, ArraySize) : ArraySize;
149     }
150     SizeInChars = C.getTypeSizeInChars(Ty);
151     if (SizeInChars.isZero())
152       return llvm::ConstantInt::get(SizeTy, /*V=*/0);
153     Size = Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars));
154   } else
155     Size = CGM.getSize(SizeInChars);
156   return Size;
157 }
158 
159 void CodeGenFunction::GenerateOpenMPCapturedVars(
160     const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
161   const RecordDecl *RD = S.getCapturedRecordDecl();
162   auto CurField = RD->field_begin();
163   auto CurCap = S.captures().begin();
164   for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
165                                                  E = S.capture_init_end();
166        I != E; ++I, ++CurField, ++CurCap) {
167     if (CurField->hasCapturedVLAType()) {
168       auto VAT = CurField->getCapturedVLAType();
169       auto *Val = VLASizeMap[VAT->getSizeExpr()];
170       CapturedVars.push_back(Val);
171     } else if (CurCap->capturesThis())
172       CapturedVars.push_back(CXXThisValue);
173     else if (CurCap->capturesVariableByCopy()) {
174       llvm::Value *CV =
175           EmitLoadOfLValue(EmitLValue(*I), SourceLocation()).getScalarVal();
176 
177       // If the field is not a pointer, we need to save the actual value
178       // and load it as a void pointer.
179       if (!CurField->getType()->isAnyPointerType()) {
180         auto &Ctx = getContext();
181         auto DstAddr = CreateMemTemp(
182             Ctx.getUIntPtrType(),
183             Twine(CurCap->getCapturedVar()->getName()) + ".casted");
184         LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
185 
186         auto *SrcAddrVal = EmitScalarConversion(
187             DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()),
188             Ctx.getPointerType(CurField->getType()), SourceLocation());
189         LValue SrcLV =
190             MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType());
191 
192         // Store the value using the source type pointer.
193         EmitStoreThroughLValue(RValue::get(CV), SrcLV);
194 
195         // Load the value using the destination type pointer.
196         CV = EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal();
197       }
198       CapturedVars.push_back(CV);
199     } else {
200       assert(CurCap->capturesVariable() && "Expected capture by reference.");
201       CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer());
202     }
203   }
204 }
205 
206 static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType,
207                                     StringRef Name, LValue AddrLV,
208                                     bool isReferenceType = false) {
209   ASTContext &Ctx = CGF.getContext();
210 
211   auto *CastedPtr = CGF.EmitScalarConversion(
212       AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(),
213       Ctx.getPointerType(DstType), SourceLocation());
214   auto TmpAddr =
215       CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType))
216           .getAddress();
217 
218   // If we are dealing with references we need to return the address of the
219   // reference instead of the reference of the value.
220   if (isReferenceType) {
221     QualType RefType = Ctx.getLValueReferenceType(DstType);
222     auto *RefVal = TmpAddr.getPointer();
223     TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name) + ".ref");
224     auto TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType);
225     CGF.EmitStoreThroughLValue(RValue::get(RefVal), TmpLVal, /*isInit*/ true);
226   }
227 
228   return TmpAddr;
229 }
230 
231 static QualType getCanonicalParamType(ASTContext &C, QualType T) {
232   if (T->isLValueReferenceType()) {
233     return C.getLValueReferenceType(
234         getCanonicalParamType(C, T.getNonReferenceType()),
235         /*SpelledAsLValue=*/false);
236   }
237   if (T->isPointerType())
238     return C.getPointerType(getCanonicalParamType(C, T->getPointeeType()));
239   return C.getCanonicalParamType(T);
240 }
241 
242 llvm::Function *
243 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
244   assert(
245       CapturedStmtInfo &&
246       "CapturedStmtInfo should be set when generating the captured function");
247   const CapturedDecl *CD = S.getCapturedDecl();
248   const RecordDecl *RD = S.getCapturedRecordDecl();
249   assert(CD->hasBody() && "missing CapturedDecl body");
250 
251   // Build the argument list.
252   ASTContext &Ctx = CGM.getContext();
253   FunctionArgList Args;
254   Args.append(CD->param_begin(),
255               std::next(CD->param_begin(), CD->getContextParamPosition()));
256   auto I = S.captures().begin();
257   for (auto *FD : RD->fields()) {
258     QualType ArgType = FD->getType();
259     IdentifierInfo *II = nullptr;
260     VarDecl *CapVar = nullptr;
261 
262     // If this is a capture by copy and the type is not a pointer, the outlined
263     // function argument type should be uintptr and the value properly casted to
264     // uintptr. This is necessary given that the runtime library is only able to
265     // deal with pointers. We can pass in the same way the VLA type sizes to the
266     // outlined function.
267     if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
268         I->capturesVariableArrayType())
269       ArgType = Ctx.getUIntPtrType();
270 
271     if (I->capturesVariable() || I->capturesVariableByCopy()) {
272       CapVar = I->getCapturedVar();
273       II = CapVar->getIdentifier();
274     } else if (I->capturesThis())
275       II = &getContext().Idents.get("this");
276     else {
277       assert(I->capturesVariableArrayType());
278       II = &getContext().Idents.get("vla");
279     }
280     if (ArgType->isVariablyModifiedType()) {
281       ArgType =
282           getCanonicalParamType(getContext(), ArgType.getNonReferenceType());
283     }
284     Args.push_back(ImplicitParamDecl::Create(getContext(), /*DC=*/nullptr,
285                                              FD->getLocation(), II, ArgType,
286                                              ImplicitParamDecl::Other));
287     ++I;
288   }
289   Args.append(
290       std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
291       CD->param_end());
292 
293   // Create the function declaration.
294   FunctionType::ExtInfo ExtInfo;
295   const CGFunctionInfo &FuncInfo =
296       CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args);
297   llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
298 
299   llvm::Function *F = llvm::Function::Create(
300       FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
301       CapturedStmtInfo->getHelperName(), &CGM.getModule());
302   CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
303   if (CD->isNothrow())
304     F->addFnAttr(llvm::Attribute::NoUnwind);
305 
306   // Generate the function.
307   StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getLocation(),
308                 CD->getBody()->getLocStart());
309   unsigned Cnt = CD->getContextParamPosition();
310   I = S.captures().begin();
311   for (auto *FD : RD->fields()) {
312     // If we are capturing a pointer by copy we don't need to do anything, just
313     // use the value that we get from the arguments.
314     if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
315       const VarDecl *CurVD = I->getCapturedVar();
316       Address LocalAddr = GetAddrOfLocalVar(Args[Cnt]);
317       // If the variable is a reference we need to materialize it here.
318       if (CurVD->getType()->isReferenceType()) {
319         Address RefAddr = CreateMemTemp(CurVD->getType(), getPointerAlign(),
320                                         ".materialized_ref");
321         EmitStoreOfScalar(LocalAddr.getPointer(), RefAddr, /*Volatile=*/false,
322                           CurVD->getType());
323         LocalAddr = RefAddr;
324       }
325       setAddrOfLocalVar(CurVD, LocalAddr);
326       ++Cnt;
327       ++I;
328       continue;
329     }
330 
331     LValueBaseInfo BaseInfo(AlignmentSource::Decl, false);
332     LValue ArgLVal =
333         MakeAddrLValue(GetAddrOfLocalVar(Args[Cnt]), Args[Cnt]->getType(),
334                        BaseInfo);
335     if (FD->hasCapturedVLAType()) {
336       LValue CastedArgLVal =
337           MakeAddrLValue(castValueFromUintptr(*this, FD->getType(),
338                                               Args[Cnt]->getName(), ArgLVal),
339                          FD->getType(), BaseInfo);
340       auto *ExprArg =
341           EmitLoadOfLValue(CastedArgLVal, SourceLocation()).getScalarVal();
342       auto VAT = FD->getCapturedVLAType();
343       VLASizeMap[VAT->getSizeExpr()] = ExprArg;
344     } else if (I->capturesVariable()) {
345       auto *Var = I->getCapturedVar();
346       QualType VarTy = Var->getType();
347       Address ArgAddr = ArgLVal.getAddress();
348       if (!VarTy->isReferenceType()) {
349         if (ArgLVal.getType()->isLValueReferenceType()) {
350           ArgAddr = EmitLoadOfReference(
351               ArgAddr, ArgLVal.getType()->castAs<ReferenceType>());
352         } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) {
353           assert(ArgLVal.getType()->isPointerType());
354           ArgAddr = EmitLoadOfPointer(
355               ArgAddr, ArgLVal.getType()->castAs<PointerType>());
356         }
357       }
358       setAddrOfLocalVar(
359           Var, Address(ArgAddr.getPointer(), getContext().getDeclAlign(Var)));
360     } else if (I->capturesVariableByCopy()) {
361       assert(!FD->getType()->isAnyPointerType() &&
362              "Not expecting a captured pointer.");
363       auto *Var = I->getCapturedVar();
364       QualType VarTy = Var->getType();
365       setAddrOfLocalVar(Var, castValueFromUintptr(*this, FD->getType(),
366                                                   Args[Cnt]->getName(), ArgLVal,
367                                                   VarTy->isReferenceType()));
368     } else {
369       // If 'this' is captured, load it into CXXThisValue.
370       assert(I->capturesThis());
371       CXXThisValue =
372           EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation()).getScalarVal();
373     }
374     ++Cnt;
375     ++I;
376   }
377 
378   PGO.assignRegionCounters(GlobalDecl(CD), F);
379   CapturedStmtInfo->EmitBody(*this, CD->getBody());
380   FinishFunction(CD->getBodyRBrace());
381 
382   return F;
383 }
384 
385 //===----------------------------------------------------------------------===//
386 //                              OpenMP Directive Emission
387 //===----------------------------------------------------------------------===//
388 void CodeGenFunction::EmitOMPAggregateAssign(
389     Address DestAddr, Address SrcAddr, QualType OriginalType,
390     const llvm::function_ref<void(Address, Address)> &CopyGen) {
391   // Perform element-by-element initialization.
392   QualType ElementTy;
393 
394   // Drill down to the base element type on both arrays.
395   auto ArrayTy = OriginalType->getAsArrayTypeUnsafe();
396   auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr);
397   SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
398 
399   auto SrcBegin = SrcAddr.getPointer();
400   auto DestBegin = DestAddr.getPointer();
401   // Cast from pointer to array type to pointer to single element.
402   auto DestEnd = Builder.CreateGEP(DestBegin, NumElements);
403   // The basic structure here is a while-do loop.
404   auto BodyBB = createBasicBlock("omp.arraycpy.body");
405   auto DoneBB = createBasicBlock("omp.arraycpy.done");
406   auto IsEmpty =
407       Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
408   Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
409 
410   // Enter the loop body, making that address the current address.
411   auto EntryBB = Builder.GetInsertBlock();
412   EmitBlock(BodyBB);
413 
414   CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy);
415 
416   llvm::PHINode *SrcElementPHI =
417     Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast");
418   SrcElementPHI->addIncoming(SrcBegin, EntryBB);
419   Address SrcElementCurrent =
420       Address(SrcElementPHI,
421               SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
422 
423   llvm::PHINode *DestElementPHI =
424     Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
425   DestElementPHI->addIncoming(DestBegin, EntryBB);
426   Address DestElementCurrent =
427     Address(DestElementPHI,
428             DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
429 
430   // Emit copy.
431   CopyGen(DestElementCurrent, SrcElementCurrent);
432 
433   // Shift the address forward by one element.
434   auto DestElementNext = Builder.CreateConstGEP1_32(
435       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
436   auto SrcElementNext = Builder.CreateConstGEP1_32(
437       SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
438   // Check whether we've reached the end.
439   auto Done =
440       Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
441   Builder.CreateCondBr(Done, DoneBB, BodyBB);
442   DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock());
443   SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock());
444 
445   // Done.
446   EmitBlock(DoneBB, /*IsFinished=*/true);
447 }
448 
449 /// Check if the combiner is a call to UDR combiner and if it is so return the
450 /// UDR decl used for reduction.
451 static const OMPDeclareReductionDecl *
452 getReductionInit(const Expr *ReductionOp) {
453   if (auto *CE = dyn_cast<CallExpr>(ReductionOp))
454     if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
455       if (auto *DRE =
456               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
457         if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
458           return DRD;
459   return nullptr;
460 }
461 
462 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
463                                              const OMPDeclareReductionDecl *DRD,
464                                              const Expr *InitOp,
465                                              Address Private, Address Original,
466                                              QualType Ty) {
467   if (DRD->getInitializer()) {
468     std::pair<llvm::Function *, llvm::Function *> Reduction =
469         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
470     auto *CE = cast<CallExpr>(InitOp);
471     auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
472     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
473     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
474     auto *LHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
475     auto *RHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
476     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
477     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
478                             [=]() -> Address { return Private; });
479     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
480                             [=]() -> Address { return Original; });
481     (void)PrivateScope.Privatize();
482     RValue Func = RValue::get(Reduction.second);
483     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
484     CGF.EmitIgnoredExpr(InitOp);
485   } else {
486     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
487     auto *GV = new llvm::GlobalVariable(
488         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
489         llvm::GlobalValue::PrivateLinkage, Init, ".init");
490     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
491     RValue InitRVal;
492     switch (CGF.getEvaluationKind(Ty)) {
493     case TEK_Scalar:
494       InitRVal = CGF.EmitLoadOfLValue(LV, SourceLocation());
495       break;
496     case TEK_Complex:
497       InitRVal =
498           RValue::getComplex(CGF.EmitLoadOfComplex(LV, SourceLocation()));
499       break;
500     case TEK_Aggregate:
501       InitRVal = RValue::getAggregate(LV.getAddress());
502       break;
503     }
504     OpaqueValueExpr OVE(SourceLocation(), Ty, VK_RValue);
505     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
506     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
507                          /*IsInitializer=*/false);
508   }
509 }
510 
511 /// \brief Emit initialization of arrays of complex types.
512 /// \param DestAddr Address of the array.
513 /// \param Type Type of array.
514 /// \param Init Initial expression of array.
515 /// \param SrcAddr Address of the original array.
516 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
517                                  QualType Type, const Expr *Init,
518                                  Address SrcAddr = Address::invalid()) {
519   auto *DRD = getReductionInit(Init);
520   // Perform element-by-element initialization.
521   QualType ElementTy;
522 
523   // Drill down to the base element type on both arrays.
524   auto ArrayTy = Type->getAsArrayTypeUnsafe();
525   auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
526   DestAddr =
527       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
528   if (DRD)
529     SrcAddr =
530         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
531 
532   llvm::Value *SrcBegin = nullptr;
533   if (DRD)
534     SrcBegin = SrcAddr.getPointer();
535   auto DestBegin = DestAddr.getPointer();
536   // Cast from pointer to array type to pointer to single element.
537   auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
538   // The basic structure here is a while-do loop.
539   auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
540   auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
541   auto IsEmpty =
542       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
543   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
544 
545   // Enter the loop body, making that address the current address.
546   auto EntryBB = CGF.Builder.GetInsertBlock();
547   CGF.EmitBlock(BodyBB);
548 
549   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
550 
551   llvm::PHINode *SrcElementPHI = nullptr;
552   Address SrcElementCurrent = Address::invalid();
553   if (DRD) {
554     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
555                                           "omp.arraycpy.srcElementPast");
556     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
557     SrcElementCurrent =
558         Address(SrcElementPHI,
559                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
560   }
561   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
562       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
563   DestElementPHI->addIncoming(DestBegin, EntryBB);
564   Address DestElementCurrent =
565       Address(DestElementPHI,
566               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
567 
568   // Emit copy.
569   {
570     CodeGenFunction::RunCleanupsScope InitScope(CGF);
571     if (DRD && (DRD->getInitializer() || !Init)) {
572       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
573                                        SrcElementCurrent, ElementTy);
574     } else
575       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
576                            /*IsInitializer=*/false);
577   }
578 
579   if (DRD) {
580     // Shift the address forward by one element.
581     auto SrcElementNext = CGF.Builder.CreateConstGEP1_32(
582         SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
583     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
584   }
585 
586   // Shift the address forward by one element.
587   auto DestElementNext = CGF.Builder.CreateConstGEP1_32(
588       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
589   // Check whether we've reached the end.
590   auto Done =
591       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
592   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
593   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
594 
595   // Done.
596   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
597 }
598 
599 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
600                                   Address SrcAddr, const VarDecl *DestVD,
601                                   const VarDecl *SrcVD, const Expr *Copy) {
602   if (OriginalType->isArrayType()) {
603     auto *BO = dyn_cast<BinaryOperator>(Copy);
604     if (BO && BO->getOpcode() == BO_Assign) {
605       // Perform simple memcpy for simple copying.
606       EmitAggregateAssign(DestAddr, SrcAddr, OriginalType);
607     } else {
608       // For arrays with complex element types perform element by element
609       // copying.
610       EmitOMPAggregateAssign(
611           DestAddr, SrcAddr, OriginalType,
612           [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) {
613             // Working with the single array element, so have to remap
614             // destination and source variables to corresponding array
615             // elements.
616             CodeGenFunction::OMPPrivateScope Remap(*this);
617             Remap.addPrivate(DestVD, [DestElement]() -> Address {
618               return DestElement;
619             });
620             Remap.addPrivate(
621                 SrcVD, [SrcElement]() -> Address { return SrcElement; });
622             (void)Remap.Privatize();
623             EmitIgnoredExpr(Copy);
624           });
625     }
626   } else {
627     // Remap pseudo source variable to private copy.
628     CodeGenFunction::OMPPrivateScope Remap(*this);
629     Remap.addPrivate(SrcVD, [SrcAddr]() -> Address { return SrcAddr; });
630     Remap.addPrivate(DestVD, [DestAddr]() -> Address { return DestAddr; });
631     (void)Remap.Privatize();
632     // Emit copying of the whole variable.
633     EmitIgnoredExpr(Copy);
634   }
635 }
636 
637 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
638                                                 OMPPrivateScope &PrivateScope) {
639   if (!HaveInsertPoint())
640     return false;
641   bool FirstprivateIsLastprivate = false;
642   llvm::DenseSet<const VarDecl *> Lastprivates;
643   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
644     for (const auto *D : C->varlists())
645       Lastprivates.insert(
646           cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
647   }
648   llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
649   CGCapturedStmtInfo CapturesInfo(cast<CapturedStmt>(*D.getAssociatedStmt()));
650   for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
651     auto IRef = C->varlist_begin();
652     auto InitsRef = C->inits().begin();
653     for (auto IInit : C->private_copies()) {
654       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
655       bool ThisFirstprivateIsLastprivate =
656           Lastprivates.count(OrigVD->getCanonicalDecl()) > 0;
657       auto *CapFD = CapturesInfo.lookup(OrigVD);
658       auto *FD = CapturedStmtInfo->lookup(OrigVD);
659       if (!ThisFirstprivateIsLastprivate && FD && (FD == CapFD) &&
660           !FD->getType()->isReferenceType()) {
661         EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
662         ++IRef;
663         ++InitsRef;
664         continue;
665       }
666       FirstprivateIsLastprivate =
667           FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate;
668       if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) {
669         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
670         auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
671         bool IsRegistered;
672         DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
673                         /*RefersToEnclosingVariableOrCapture=*/FD != nullptr,
674                         (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
675         Address OriginalAddr = EmitLValue(&DRE).getAddress();
676         QualType Type = VD->getType();
677         if (Type->isArrayType()) {
678           // Emit VarDecl with copy init for arrays.
679           // Get the address of the original variable captured in current
680           // captured region.
681           IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
682             auto Emission = EmitAutoVarAlloca(*VD);
683             auto *Init = VD->getInit();
684             if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) {
685               // Perform simple memcpy.
686               EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr,
687                                   Type);
688             } else {
689               EmitOMPAggregateAssign(
690                   Emission.getAllocatedAddress(), OriginalAddr, Type,
691                   [this, VDInit, Init](Address DestElement,
692                                        Address SrcElement) {
693                     // Clean up any temporaries needed by the initialization.
694                     RunCleanupsScope InitScope(*this);
695                     // Emit initialization for single element.
696                     setAddrOfLocalVar(VDInit, SrcElement);
697                     EmitAnyExprToMem(Init, DestElement,
698                                      Init->getType().getQualifiers(),
699                                      /*IsInitializer*/ false);
700                     LocalDeclMap.erase(VDInit);
701                   });
702             }
703             EmitAutoVarCleanups(Emission);
704             return Emission.getAllocatedAddress();
705           });
706         } else {
707           IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
708             // Emit private VarDecl with copy init.
709             // Remap temp VDInit variable to the address of the original
710             // variable
711             // (for proper handling of captured global variables).
712             setAddrOfLocalVar(VDInit, OriginalAddr);
713             EmitDecl(*VD);
714             LocalDeclMap.erase(VDInit);
715             return GetAddrOfLocalVar(VD);
716           });
717         }
718         assert(IsRegistered &&
719                "firstprivate var already registered as private");
720         // Silence the warning about unused variable.
721         (void)IsRegistered;
722       }
723       ++IRef;
724       ++InitsRef;
725     }
726   }
727   return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty();
728 }
729 
730 void CodeGenFunction::EmitOMPPrivateClause(
731     const OMPExecutableDirective &D,
732     CodeGenFunction::OMPPrivateScope &PrivateScope) {
733   if (!HaveInsertPoint())
734     return;
735   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
736   for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
737     auto IRef = C->varlist_begin();
738     for (auto IInit : C->private_copies()) {
739       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
740       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
741         auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
742         bool IsRegistered =
743             PrivateScope.addPrivate(OrigVD, [&]() -> Address {
744               // Emit private VarDecl with copy init.
745               EmitDecl(*VD);
746               return GetAddrOfLocalVar(VD);
747             });
748         assert(IsRegistered && "private var already registered as private");
749         // Silence the warning about unused variable.
750         (void)IsRegistered;
751       }
752       ++IRef;
753     }
754   }
755 }
756 
757 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
758   if (!HaveInsertPoint())
759     return false;
760   // threadprivate_var1 = master_threadprivate_var1;
761   // operator=(threadprivate_var2, master_threadprivate_var2);
762   // ...
763   // __kmpc_barrier(&loc, global_tid);
764   llvm::DenseSet<const VarDecl *> CopiedVars;
765   llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
766   for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) {
767     auto IRef = C->varlist_begin();
768     auto ISrcRef = C->source_exprs().begin();
769     auto IDestRef = C->destination_exprs().begin();
770     for (auto *AssignOp : C->assignment_ops()) {
771       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
772       QualType Type = VD->getType();
773       if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
774         // Get the address of the master variable. If we are emitting code with
775         // TLS support, the address is passed from the master as field in the
776         // captured declaration.
777         Address MasterAddr = Address::invalid();
778         if (getLangOpts().OpenMPUseTLS &&
779             getContext().getTargetInfo().isTLSSupported()) {
780           assert(CapturedStmtInfo->lookup(VD) &&
781                  "Copyin threadprivates should have been captured!");
782           DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(),
783                           VK_LValue, (*IRef)->getExprLoc());
784           MasterAddr = EmitLValue(&DRE).getAddress();
785           LocalDeclMap.erase(VD);
786         } else {
787           MasterAddr =
788             Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD)
789                                         : CGM.GetAddrOfGlobal(VD),
790                     getContext().getDeclAlign(VD));
791         }
792         // Get the address of the threadprivate variable.
793         Address PrivateAddr = EmitLValue(*IRef).getAddress();
794         if (CopiedVars.size() == 1) {
795           // At first check if current thread is a master thread. If it is, no
796           // need to copy data.
797           CopyBegin = createBasicBlock("copyin.not.master");
798           CopyEnd = createBasicBlock("copyin.not.master.end");
799           Builder.CreateCondBr(
800               Builder.CreateICmpNE(
801                   Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy),
802                   Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy)),
803               CopyBegin, CopyEnd);
804           EmitBlock(CopyBegin);
805         }
806         auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
807         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
808         EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp);
809       }
810       ++IRef;
811       ++ISrcRef;
812       ++IDestRef;
813     }
814   }
815   if (CopyEnd) {
816     // Exit out of copying procedure for non-master thread.
817     EmitBlock(CopyEnd, /*IsFinished=*/true);
818     return true;
819   }
820   return false;
821 }
822 
823 bool CodeGenFunction::EmitOMPLastprivateClauseInit(
824     const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
825   if (!HaveInsertPoint())
826     return false;
827   bool HasAtLeastOneLastprivate = false;
828   llvm::DenseSet<const VarDecl *> SIMDLCVs;
829   if (isOpenMPSimdDirective(D.getDirectiveKind())) {
830     auto *LoopDirective = cast<OMPLoopDirective>(&D);
831     for (auto *C : LoopDirective->counters()) {
832       SIMDLCVs.insert(
833           cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
834     }
835   }
836   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
837   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
838     HasAtLeastOneLastprivate = true;
839     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()))
840       break;
841     auto IRef = C->varlist_begin();
842     auto IDestRef = C->destination_exprs().begin();
843     for (auto *IInit : C->private_copies()) {
844       // Keep the address of the original variable for future update at the end
845       // of the loop.
846       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
847       // Taskloops do not require additional initialization, it is done in
848       // runtime support library.
849       if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
850         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
851         PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address {
852           DeclRefExpr DRE(
853               const_cast<VarDecl *>(OrigVD),
854               /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup(
855                   OrigVD) != nullptr,
856               (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
857           return EmitLValue(&DRE).getAddress();
858         });
859         // Check if the variable is also a firstprivate: in this case IInit is
860         // not generated. Initialization of this variable will happen in codegen
861         // for 'firstprivate' clause.
862         if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) {
863           auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
864           bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
865             // Emit private VarDecl with copy init.
866             EmitDecl(*VD);
867             return GetAddrOfLocalVar(VD);
868           });
869           assert(IsRegistered &&
870                  "lastprivate var already registered as private");
871           (void)IsRegistered;
872         }
873       }
874       ++IRef;
875       ++IDestRef;
876     }
877   }
878   return HasAtLeastOneLastprivate;
879 }
880 
881 void CodeGenFunction::EmitOMPLastprivateClauseFinal(
882     const OMPExecutableDirective &D, bool NoFinals,
883     llvm::Value *IsLastIterCond) {
884   if (!HaveInsertPoint())
885     return;
886   // Emit following code:
887   // if (<IsLastIterCond>) {
888   //   orig_var1 = private_orig_var1;
889   //   ...
890   //   orig_varn = private_orig_varn;
891   // }
892   llvm::BasicBlock *ThenBB = nullptr;
893   llvm::BasicBlock *DoneBB = nullptr;
894   if (IsLastIterCond) {
895     ThenBB = createBasicBlock(".omp.lastprivate.then");
896     DoneBB = createBasicBlock(".omp.lastprivate.done");
897     Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
898     EmitBlock(ThenBB);
899   }
900   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
901   llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates;
902   if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) {
903     auto IC = LoopDirective->counters().begin();
904     for (auto F : LoopDirective->finals()) {
905       auto *D =
906           cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl();
907       if (NoFinals)
908         AlreadyEmittedVars.insert(D);
909       else
910         LoopCountersAndUpdates[D] = F;
911       ++IC;
912     }
913   }
914   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
915     auto IRef = C->varlist_begin();
916     auto ISrcRef = C->source_exprs().begin();
917     auto IDestRef = C->destination_exprs().begin();
918     for (auto *AssignOp : C->assignment_ops()) {
919       auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
920       QualType Type = PrivateVD->getType();
921       auto *CanonicalVD = PrivateVD->getCanonicalDecl();
922       if (AlreadyEmittedVars.insert(CanonicalVD).second) {
923         // If lastprivate variable is a loop control variable for loop-based
924         // directive, update its value before copyin back to original
925         // variable.
926         if (auto *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD))
927           EmitIgnoredExpr(FinalExpr);
928         auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
929         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
930         // Get the address of the original variable.
931         Address OriginalAddr = GetAddrOfLocalVar(DestVD);
932         // Get the address of the private variable.
933         Address PrivateAddr = GetAddrOfLocalVar(PrivateVD);
934         if (auto RefTy = PrivateVD->getType()->getAs<ReferenceType>())
935           PrivateAddr =
936               Address(Builder.CreateLoad(PrivateAddr),
937                       getNaturalTypeAlignment(RefTy->getPointeeType()));
938         EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp);
939       }
940       ++IRef;
941       ++ISrcRef;
942       ++IDestRef;
943     }
944     if (auto *PostUpdate = C->getPostUpdateExpr())
945       EmitIgnoredExpr(PostUpdate);
946   }
947   if (IsLastIterCond)
948     EmitBlock(DoneBB, /*IsFinished=*/true);
949 }
950 
951 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
952                           LValue BaseLV, llvm::Value *Addr) {
953   Address Tmp = Address::invalid();
954   Address TopTmp = Address::invalid();
955   Address MostTopTmp = Address::invalid();
956   BaseTy = BaseTy.getNonReferenceType();
957   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
958          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
959     Tmp = CGF.CreateMemTemp(BaseTy);
960     if (TopTmp.isValid())
961       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
962     else
963       MostTopTmp = Tmp;
964     TopTmp = Tmp;
965     BaseTy = BaseTy->getPointeeType();
966   }
967   llvm::Type *Ty = BaseLV.getPointer()->getType();
968   if (Tmp.isValid())
969     Ty = Tmp.getElementType();
970   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
971   if (Tmp.isValid()) {
972     CGF.Builder.CreateStore(Addr, Tmp);
973     return MostTopTmp;
974   }
975   return Address(Addr, BaseLV.getAlignment());
976 }
977 
978 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
979                           LValue BaseLV) {
980   BaseTy = BaseTy.getNonReferenceType();
981   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
982          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
983     if (auto *PtrTy = BaseTy->getAs<PointerType>())
984       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
985     else {
986       BaseLV = CGF.EmitLoadOfReferenceLValue(BaseLV.getAddress(),
987                                              BaseTy->castAs<ReferenceType>());
988     }
989     BaseTy = BaseTy->getPointeeType();
990   }
991   return CGF.MakeAddrLValue(
992       Address(
993           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
994               BaseLV.getPointer(), CGF.ConvertTypeForMem(ElTy)->getPointerTo()),
995           BaseLV.getAlignment()),
996       BaseLV.getType(), BaseLV.getBaseInfo());
997 }
998 
999 void CodeGenFunction::EmitOMPReductionClauseInit(
1000     const OMPExecutableDirective &D,
1001     CodeGenFunction::OMPPrivateScope &PrivateScope) {
1002   if (!HaveInsertPoint())
1003     return;
1004   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1005     auto ILHS = C->lhs_exprs().begin();
1006     auto IRHS = C->rhs_exprs().begin();
1007     auto IPriv = C->privates().begin();
1008     auto IRed = C->reduction_ops().begin();
1009     for (auto IRef : C->varlists()) {
1010       auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
1011       auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
1012       auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
1013       auto *DRD = getReductionInit(*IRed);
1014       if (auto *OASE = dyn_cast<OMPArraySectionExpr>(IRef)) {
1015         auto *Base = OASE->getBase()->IgnoreParenImpCasts();
1016         while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
1017           Base = TempOASE->getBase()->IgnoreParenImpCasts();
1018         while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1019           Base = TempASE->getBase()->IgnoreParenImpCasts();
1020         auto *DE = cast<DeclRefExpr>(Base);
1021         auto *OrigVD = cast<VarDecl>(DE->getDecl());
1022         auto OASELValueLB = EmitOMPArraySectionExpr(OASE);
1023         auto OASELValueUB =
1024             EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
1025         auto OriginalBaseLValue = EmitLValue(DE);
1026         LValue BaseLValue =
1027             loadToBegin(*this, OrigVD->getType(), OASELValueLB.getType(),
1028                         OriginalBaseLValue);
1029         // Store the address of the original variable associated with the LHS
1030         // implicit variable.
1031         PrivateScope.addPrivate(LHSVD, [OASELValueLB]() -> Address {
1032           return OASELValueLB.getAddress();
1033         });
1034         // Emit reduction copy.
1035         bool IsRegistered = PrivateScope.addPrivate(
1036             OrigVD, [this, OrigVD, PrivateVD, BaseLValue, OASELValueLB,
1037                      OASELValueUB, OriginalBaseLValue, DRD, IRed]() -> Address {
1038               // Emit VarDecl with copy init for arrays.
1039               // Get the address of the original variable captured in current
1040               // captured region.
1041               auto *Size = Builder.CreatePtrDiff(OASELValueUB.getPointer(),
1042                                                  OASELValueLB.getPointer());
1043               Size = Builder.CreateNUWAdd(
1044                   Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
1045               CodeGenFunction::OpaqueValueMapping OpaqueMap(
1046                   *this, cast<OpaqueValueExpr>(
1047                              getContext()
1048                                  .getAsVariableArrayType(PrivateVD->getType())
1049                                  ->getSizeExpr()),
1050                   RValue::get(Size));
1051               EmitVariablyModifiedType(PrivateVD->getType());
1052               auto Emission = EmitAutoVarAlloca(*PrivateVD);
1053               auto Addr = Emission.getAllocatedAddress();
1054               auto *Init = PrivateVD->getInit();
1055               EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(),
1056                                    DRD ? *IRed : Init,
1057                                    OASELValueLB.getAddress());
1058               EmitAutoVarCleanups(Emission);
1059               // Emit private VarDecl with reduction init.
1060               auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(),
1061                                                    OASELValueLB.getPointer());
1062               auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset);
1063               return castToBase(*this, OrigVD->getType(),
1064                                 OASELValueLB.getType(), OriginalBaseLValue,
1065                                 Ptr);
1066             });
1067         assert(IsRegistered && "private var already registered as private");
1068         // Silence the warning about unused variable.
1069         (void)IsRegistered;
1070         PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
1071           return GetAddrOfLocalVar(PrivateVD);
1072         });
1073       } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(IRef)) {
1074         auto *Base = ASE->getBase()->IgnoreParenImpCasts();
1075         while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1076           Base = TempASE->getBase()->IgnoreParenImpCasts();
1077         auto *DE = cast<DeclRefExpr>(Base);
1078         auto *OrigVD = cast<VarDecl>(DE->getDecl());
1079         auto ASELValue = EmitLValue(ASE);
1080         auto OriginalBaseLValue = EmitLValue(DE);
1081         LValue BaseLValue = loadToBegin(
1082             *this, OrigVD->getType(), ASELValue.getType(), OriginalBaseLValue);
1083         // Store the address of the original variable associated with the LHS
1084         // implicit variable.
1085         PrivateScope.addPrivate(
1086             LHSVD, [ASELValue]() -> Address { return ASELValue.getAddress(); });
1087         // Emit reduction copy.
1088         bool IsRegistered = PrivateScope.addPrivate(
1089             OrigVD, [this, OrigVD, PrivateVD, BaseLValue, ASELValue,
1090                      OriginalBaseLValue, DRD, IRed]() -> Address {
1091               // Emit private VarDecl with reduction init.
1092               AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD);
1093               auto Addr = Emission.getAllocatedAddress();
1094               if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
1095                 emitInitWithReductionInitializer(*this, DRD, *IRed, Addr,
1096                                                  ASELValue.getAddress(),
1097                                                  ASELValue.getType());
1098               } else
1099                 EmitAutoVarInit(Emission);
1100               EmitAutoVarCleanups(Emission);
1101               auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(),
1102                                                    ASELValue.getPointer());
1103               auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset);
1104               return castToBase(*this, OrigVD->getType(), ASELValue.getType(),
1105                                 OriginalBaseLValue, Ptr);
1106             });
1107         assert(IsRegistered && "private var already registered as private");
1108         // Silence the warning about unused variable.
1109         (void)IsRegistered;
1110         PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address {
1111           return Builder.CreateElementBitCast(
1112               GetAddrOfLocalVar(PrivateVD), ConvertTypeForMem(RHSVD->getType()),
1113               "rhs.begin");
1114         });
1115       } else {
1116         auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
1117         QualType Type = PrivateVD->getType();
1118         if (getContext().getAsArrayType(Type)) {
1119           // Store the address of the original variable associated with the LHS
1120           // implicit variable.
1121           DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1122                           CapturedStmtInfo->lookup(OrigVD) != nullptr,
1123                           IRef->getType(), VK_LValue, IRef->getExprLoc());
1124           Address OriginalAddr = EmitLValue(&DRE).getAddress();
1125           PrivateScope.addPrivate(LHSVD, [this, &OriginalAddr,
1126                                           LHSVD]() -> Address {
1127             OriginalAddr = Builder.CreateElementBitCast(
1128                 OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin");
1129             return OriginalAddr;
1130           });
1131           bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
1132             if (Type->isVariablyModifiedType()) {
1133               CodeGenFunction::OpaqueValueMapping OpaqueMap(
1134                   *this, cast<OpaqueValueExpr>(
1135                              getContext()
1136                                  .getAsVariableArrayType(PrivateVD->getType())
1137                                  ->getSizeExpr()),
1138                   RValue::get(
1139                       getTypeSize(OrigVD->getType().getNonReferenceType())));
1140               EmitVariablyModifiedType(Type);
1141             }
1142             auto Emission = EmitAutoVarAlloca(*PrivateVD);
1143             auto Addr = Emission.getAllocatedAddress();
1144             auto *Init = PrivateVD->getInit();
1145             EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(),
1146                                  DRD ? *IRed : Init, OriginalAddr);
1147             EmitAutoVarCleanups(Emission);
1148             return Emission.getAllocatedAddress();
1149           });
1150           assert(IsRegistered && "private var already registered as private");
1151           // Silence the warning about unused variable.
1152           (void)IsRegistered;
1153           PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address {
1154             return Builder.CreateElementBitCast(
1155                 GetAddrOfLocalVar(PrivateVD),
1156                 ConvertTypeForMem(RHSVD->getType()), "rhs.begin");
1157           });
1158         } else {
1159           // Store the address of the original variable associated with the LHS
1160           // implicit variable.
1161           Address OriginalAddr = Address::invalid();
1162           PrivateScope.addPrivate(LHSVD, [this, OrigVD, IRef,
1163                                           &OriginalAddr]() -> Address {
1164             DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1165                             CapturedStmtInfo->lookup(OrigVD) != nullptr,
1166                             IRef->getType(), VK_LValue, IRef->getExprLoc());
1167             OriginalAddr = EmitLValue(&DRE).getAddress();
1168             return OriginalAddr;
1169           });
1170           // Emit reduction copy.
1171           bool IsRegistered = PrivateScope.addPrivate(
1172               OrigVD, [this, PrivateVD, OriginalAddr, DRD, IRed]() -> Address {
1173                 // Emit private VarDecl with reduction init.
1174                 AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD);
1175                 auto Addr = Emission.getAllocatedAddress();
1176                 if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
1177                   emitInitWithReductionInitializer(*this, DRD, *IRed, Addr,
1178                                                    OriginalAddr,
1179                                                    PrivateVD->getType());
1180                 } else
1181                   EmitAutoVarInit(Emission);
1182                 EmitAutoVarCleanups(Emission);
1183                 return Addr;
1184               });
1185           assert(IsRegistered && "private var already registered as private");
1186           // Silence the warning about unused variable.
1187           (void)IsRegistered;
1188           PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
1189             return GetAddrOfLocalVar(PrivateVD);
1190           });
1191         }
1192       }
1193       ++ILHS;
1194       ++IRHS;
1195       ++IPriv;
1196       ++IRed;
1197     }
1198   }
1199 }
1200 
1201 void CodeGenFunction::EmitOMPReductionClauseFinal(
1202     const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) {
1203   if (!HaveInsertPoint())
1204     return;
1205   llvm::SmallVector<const Expr *, 8> Privates;
1206   llvm::SmallVector<const Expr *, 8> LHSExprs;
1207   llvm::SmallVector<const Expr *, 8> RHSExprs;
1208   llvm::SmallVector<const Expr *, 8> ReductionOps;
1209   bool HasAtLeastOneReduction = false;
1210   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1211     HasAtLeastOneReduction = true;
1212     Privates.append(C->privates().begin(), C->privates().end());
1213     LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1214     RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1215     ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1216   }
1217   if (HasAtLeastOneReduction) {
1218     bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
1219                       isOpenMPParallelDirective(D.getDirectiveKind()) ||
1220                       D.getDirectiveKind() == OMPD_simd;
1221     bool SimpleReduction = D.getDirectiveKind() == OMPD_simd;
1222     // Emit nowait reduction if nowait clause is present or directive is a
1223     // parallel directive (it always has implicit barrier).
1224     CGM.getOpenMPRuntime().emitReduction(
1225         *this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps,
1226         {WithNowait, SimpleReduction, ReductionKind});
1227   }
1228 }
1229 
1230 static void emitPostUpdateForReductionClause(
1231     CodeGenFunction &CGF, const OMPExecutableDirective &D,
1232     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1233   if (!CGF.HaveInsertPoint())
1234     return;
1235   llvm::BasicBlock *DoneBB = nullptr;
1236   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1237     if (auto *PostUpdate = C->getPostUpdateExpr()) {
1238       if (!DoneBB) {
1239         if (auto *Cond = CondGen(CGF)) {
1240           // If the first post-update expression is found, emit conditional
1241           // block if it was requested.
1242           auto *ThenBB = CGF.createBasicBlock(".omp.reduction.pu");
1243           DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done");
1244           CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1245           CGF.EmitBlock(ThenBB);
1246         }
1247       }
1248       CGF.EmitIgnoredExpr(PostUpdate);
1249     }
1250   }
1251   if (DoneBB)
1252     CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
1253 }
1254 
1255 namespace {
1256 /// Codegen lambda for appending distribute lower and upper bounds to outlined
1257 /// parallel function. This is necessary for combined constructs such as
1258 /// 'distribute parallel for'
1259 typedef llvm::function_ref<void(CodeGenFunction &,
1260                                 const OMPExecutableDirective &,
1261                                 llvm::SmallVectorImpl<llvm::Value *> &)>
1262     CodeGenBoundParametersTy;
1263 } // anonymous namespace
1264 
1265 static void emitCommonOMPParallelDirective(
1266     CodeGenFunction &CGF, const OMPExecutableDirective &S,
1267     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1268     const CodeGenBoundParametersTy &CodeGenBoundParameters) {
1269   const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
1270   auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
1271       S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
1272   if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
1273     CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
1274     auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
1275                                          /*IgnoreResultAssign*/ true);
1276     CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
1277         CGF, NumThreads, NumThreadsClause->getLocStart());
1278   }
1279   if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
1280     CodeGenFunction::RunCleanupsScope ProcBindScope(CGF);
1281     CGF.CGM.getOpenMPRuntime().emitProcBindClause(
1282         CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart());
1283   }
1284   const Expr *IfCond = nullptr;
1285   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
1286     if (C->getNameModifier() == OMPD_unknown ||
1287         C->getNameModifier() == OMPD_parallel) {
1288       IfCond = C->getCondition();
1289       break;
1290     }
1291   }
1292 
1293   OMPParallelScope Scope(CGF, S);
1294   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
1295   // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk
1296   // lower and upper bounds with the pragma 'for' chunking mechanism.
1297   // The following lambda takes care of appending the lower and upper bound
1298   // parameters when necessary
1299   CodeGenBoundParameters(CGF, S, CapturedVars);
1300   CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
1301   CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn,
1302                                               CapturedVars, IfCond);
1303 }
1304 
1305 static void emitEmptyBoundParameters(CodeGenFunction &,
1306                                      const OMPExecutableDirective &,
1307                                      llvm::SmallVectorImpl<llvm::Value *> &) {}
1308 
1309 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
1310   // Emit parallel region as a standalone region.
1311   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
1312     OMPPrivateScope PrivateScope(CGF);
1313     bool Copyins = CGF.EmitOMPCopyinClause(S);
1314     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
1315     if (Copyins) {
1316       // Emit implicit barrier to synchronize threads and avoid data races on
1317       // propagation master's thread values of threadprivate variables to local
1318       // instances of that variables of all other implicit threads.
1319       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
1320           CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
1321           /*ForceSimpleCall=*/true);
1322     }
1323     CGF.EmitOMPPrivateClause(S, PrivateScope);
1324     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
1325     (void)PrivateScope.Privatize();
1326     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1327     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
1328   };
1329   emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen,
1330                                  emitEmptyBoundParameters);
1331   emitPostUpdateForReductionClause(
1332       *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1333 }
1334 
1335 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
1336                                       JumpDest LoopExit) {
1337   RunCleanupsScope BodyScope(*this);
1338   // Update counters values on current iteration.
1339   for (auto I : D.updates()) {
1340     EmitIgnoredExpr(I);
1341   }
1342   // Update the linear variables.
1343   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1344     for (auto *U : C->updates())
1345       EmitIgnoredExpr(U);
1346   }
1347 
1348   // On a continue in the body, jump to the end.
1349   auto Continue = getJumpDestInCurrentScope("omp.body.continue");
1350   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1351   // Emit loop body.
1352   EmitStmt(D.getBody());
1353   // The end (updates/cleanups).
1354   EmitBlock(Continue.getBlock());
1355   BreakContinueStack.pop_back();
1356 }
1357 
1358 void CodeGenFunction::EmitOMPInnerLoop(
1359     const Stmt &S, bool RequiresCleanup, const Expr *LoopCond,
1360     const Expr *IncExpr,
1361     const llvm::function_ref<void(CodeGenFunction &)> &BodyGen,
1362     const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen) {
1363   auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
1364 
1365   // Start the loop with a block that tests the condition.
1366   auto CondBlock = createBasicBlock("omp.inner.for.cond");
1367   EmitBlock(CondBlock);
1368   const SourceRange &R = S.getSourceRange();
1369   LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
1370                  SourceLocToDebugLoc(R.getEnd()));
1371 
1372   // If there are any cleanups between here and the loop-exit scope,
1373   // create a block to stage a loop exit along.
1374   auto ExitBlock = LoopExit.getBlock();
1375   if (RequiresCleanup)
1376     ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
1377 
1378   auto LoopBody = createBasicBlock("omp.inner.for.body");
1379 
1380   // Emit condition.
1381   EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S));
1382   if (ExitBlock != LoopExit.getBlock()) {
1383     EmitBlock(ExitBlock);
1384     EmitBranchThroughCleanup(LoopExit);
1385   }
1386 
1387   EmitBlock(LoopBody);
1388   incrementProfileCounter(&S);
1389 
1390   // Create a block for the increment.
1391   auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
1392   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1393 
1394   BodyGen(*this);
1395 
1396   // Emit "IV = IV + 1" and a back-edge to the condition block.
1397   EmitBlock(Continue.getBlock());
1398   EmitIgnoredExpr(IncExpr);
1399   PostIncGen(*this);
1400   BreakContinueStack.pop_back();
1401   EmitBranch(CondBlock);
1402   LoopStack.pop();
1403   // Emit the fall-through block.
1404   EmitBlock(LoopExit.getBlock());
1405 }
1406 
1407 void CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
1408   if (!HaveInsertPoint())
1409     return;
1410   // Emit inits for the linear variables.
1411   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1412     for (auto *Init : C->inits()) {
1413       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
1414       if (auto *Ref = dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) {
1415         AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
1416         auto *OrigVD = cast<VarDecl>(Ref->getDecl());
1417         DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1418                         CapturedStmtInfo->lookup(OrigVD) != nullptr,
1419                         VD->getInit()->getType(), VK_LValue,
1420                         VD->getInit()->getExprLoc());
1421         EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(),
1422                                                 VD->getType()),
1423                        /*capturedByInit=*/false);
1424         EmitAutoVarCleanups(Emission);
1425       } else
1426         EmitVarDecl(*VD);
1427     }
1428     // Emit the linear steps for the linear clauses.
1429     // If a step is not constant, it is pre-calculated before the loop.
1430     if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
1431       if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
1432         EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
1433         // Emit calculation of the linear step.
1434         EmitIgnoredExpr(CS);
1435       }
1436   }
1437 }
1438 
1439 void CodeGenFunction::EmitOMPLinearClauseFinal(
1440     const OMPLoopDirective &D,
1441     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1442   if (!HaveInsertPoint())
1443     return;
1444   llvm::BasicBlock *DoneBB = nullptr;
1445   // Emit the final values of the linear variables.
1446   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1447     auto IC = C->varlist_begin();
1448     for (auto *F : C->finals()) {
1449       if (!DoneBB) {
1450         if (auto *Cond = CondGen(*this)) {
1451           // If the first post-update expression is found, emit conditional
1452           // block if it was requested.
1453           auto *ThenBB = createBasicBlock(".omp.linear.pu");
1454           DoneBB = createBasicBlock(".omp.linear.pu.done");
1455           Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1456           EmitBlock(ThenBB);
1457         }
1458       }
1459       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl());
1460       DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1461                       CapturedStmtInfo->lookup(OrigVD) != nullptr,
1462                       (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
1463       Address OrigAddr = EmitLValue(&DRE).getAddress();
1464       CodeGenFunction::OMPPrivateScope VarScope(*this);
1465       VarScope.addPrivate(OrigVD, [OrigAddr]() -> Address { return OrigAddr; });
1466       (void)VarScope.Privatize();
1467       EmitIgnoredExpr(F);
1468       ++IC;
1469     }
1470     if (auto *PostUpdate = C->getPostUpdateExpr())
1471       EmitIgnoredExpr(PostUpdate);
1472   }
1473   if (DoneBB)
1474     EmitBlock(DoneBB, /*IsFinished=*/true);
1475 }
1476 
1477 static void emitAlignedClause(CodeGenFunction &CGF,
1478                               const OMPExecutableDirective &D) {
1479   if (!CGF.HaveInsertPoint())
1480     return;
1481   for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) {
1482     unsigned ClauseAlignment = 0;
1483     if (auto AlignmentExpr = Clause->getAlignment()) {
1484       auto AlignmentCI =
1485           cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
1486       ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue());
1487     }
1488     for (auto E : Clause->varlists()) {
1489       unsigned Alignment = ClauseAlignment;
1490       if (Alignment == 0) {
1491         // OpenMP [2.8.1, Description]
1492         // If no optional parameter is specified, implementation-defined default
1493         // alignments for SIMD instructions on the target platforms are assumed.
1494         Alignment =
1495             CGF.getContext()
1496                 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
1497                     E->getType()->getPointeeType()))
1498                 .getQuantity();
1499       }
1500       assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) &&
1501              "alignment is not power of 2");
1502       if (Alignment != 0) {
1503         llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
1504         CGF.EmitAlignmentAssumption(PtrValue, Alignment);
1505       }
1506     }
1507   }
1508 }
1509 
1510 void CodeGenFunction::EmitOMPPrivateLoopCounters(
1511     const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) {
1512   if (!HaveInsertPoint())
1513     return;
1514   auto I = S.private_counters().begin();
1515   for (auto *E : S.counters()) {
1516     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1517     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
1518     (void)LoopScope.addPrivate(VD, [&]() -> Address {
1519       // Emit var without initialization.
1520       if (!LocalDeclMap.count(PrivateVD)) {
1521         auto VarEmission = EmitAutoVarAlloca(*PrivateVD);
1522         EmitAutoVarCleanups(VarEmission);
1523       }
1524       DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD),
1525                       /*RefersToEnclosingVariableOrCapture=*/false,
1526                       (*I)->getType(), VK_LValue, (*I)->getExprLoc());
1527       return EmitLValue(&DRE).getAddress();
1528     });
1529     if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) ||
1530         VD->hasGlobalStorage()) {
1531       (void)LoopScope.addPrivate(PrivateVD, [&]() -> Address {
1532         DeclRefExpr DRE(const_cast<VarDecl *>(VD),
1533                         LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD),
1534                         E->getType(), VK_LValue, E->getExprLoc());
1535         return EmitLValue(&DRE).getAddress();
1536       });
1537     }
1538     ++I;
1539   }
1540 }
1541 
1542 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
1543                         const Expr *Cond, llvm::BasicBlock *TrueBlock,
1544                         llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
1545   if (!CGF.HaveInsertPoint())
1546     return;
1547   {
1548     CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
1549     CGF.EmitOMPPrivateLoopCounters(S, PreCondScope);
1550     (void)PreCondScope.Privatize();
1551     // Get initial values of real counters.
1552     for (auto I : S.inits()) {
1553       CGF.EmitIgnoredExpr(I);
1554     }
1555   }
1556   // Check that loop is executed at least one time.
1557   CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
1558 }
1559 
1560 void CodeGenFunction::EmitOMPLinearClause(
1561     const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) {
1562   if (!HaveInsertPoint())
1563     return;
1564   llvm::DenseSet<const VarDecl *> SIMDLCVs;
1565   if (isOpenMPSimdDirective(D.getDirectiveKind())) {
1566     auto *LoopDirective = cast<OMPLoopDirective>(&D);
1567     for (auto *C : LoopDirective->counters()) {
1568       SIMDLCVs.insert(
1569           cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
1570     }
1571   }
1572   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1573     auto CurPrivate = C->privates().begin();
1574     for (auto *E : C->varlists()) {
1575       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1576       auto *PrivateVD =
1577           cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl());
1578       if (!SIMDLCVs.count(VD->getCanonicalDecl())) {
1579         bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address {
1580           // Emit private VarDecl with copy init.
1581           EmitVarDecl(*PrivateVD);
1582           return GetAddrOfLocalVar(PrivateVD);
1583         });
1584         assert(IsRegistered && "linear var already registered as private");
1585         // Silence the warning about unused variable.
1586         (void)IsRegistered;
1587       } else
1588         EmitVarDecl(*PrivateVD);
1589       ++CurPrivate;
1590     }
1591   }
1592 }
1593 
1594 static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
1595                                      const OMPExecutableDirective &D,
1596                                      bool IsMonotonic) {
1597   if (!CGF.HaveInsertPoint())
1598     return;
1599   if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
1600     RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
1601                                  /*ignoreResult=*/true);
1602     llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
1603     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
1604     // In presence of finite 'safelen', it may be unsafe to mark all
1605     // the memory instructions parallel, because loop-carried
1606     // dependences of 'safelen' iterations are possible.
1607     if (!IsMonotonic)
1608       CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());
1609   } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
1610     RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
1611                                  /*ignoreResult=*/true);
1612     llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
1613     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
1614     // In presence of finite 'safelen', it may be unsafe to mark all
1615     // the memory instructions parallel, because loop-carried
1616     // dependences of 'safelen' iterations are possible.
1617     CGF.LoopStack.setParallel(false);
1618   }
1619 }
1620 
1621 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D,
1622                                       bool IsMonotonic) {
1623   // Walk clauses and process safelen/lastprivate.
1624   LoopStack.setParallel(!IsMonotonic);
1625   LoopStack.setVectorizeEnable(true);
1626   emitSimdlenSafelenClause(*this, D, IsMonotonic);
1627 }
1628 
1629 void CodeGenFunction::EmitOMPSimdFinal(
1630     const OMPLoopDirective &D,
1631     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1632   if (!HaveInsertPoint())
1633     return;
1634   llvm::BasicBlock *DoneBB = nullptr;
1635   auto IC = D.counters().begin();
1636   auto IPC = D.private_counters().begin();
1637   for (auto F : D.finals()) {
1638     auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl());
1639     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl());
1640     auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD);
1641     if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) ||
1642         OrigVD->hasGlobalStorage() || CED) {
1643       if (!DoneBB) {
1644         if (auto *Cond = CondGen(*this)) {
1645           // If the first post-update expression is found, emit conditional
1646           // block if it was requested.
1647           auto *ThenBB = createBasicBlock(".omp.final.then");
1648           DoneBB = createBasicBlock(".omp.final.done");
1649           Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1650           EmitBlock(ThenBB);
1651         }
1652       }
1653       Address OrigAddr = Address::invalid();
1654       if (CED)
1655         OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress();
1656       else {
1657         DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD),
1658                         /*RefersToEnclosingVariableOrCapture=*/false,
1659                         (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc());
1660         OrigAddr = EmitLValue(&DRE).getAddress();
1661       }
1662       OMPPrivateScope VarScope(*this);
1663       VarScope.addPrivate(OrigVD,
1664                           [OrigAddr]() -> Address { return OrigAddr; });
1665       (void)VarScope.Privatize();
1666       EmitIgnoredExpr(F);
1667     }
1668     ++IC;
1669     ++IPC;
1670   }
1671   if (DoneBB)
1672     EmitBlock(DoneBB, /*IsFinished=*/true);
1673 }
1674 
1675 static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF,
1676                                          const OMPLoopDirective &S,
1677                                          CodeGenFunction::JumpDest LoopExit) {
1678   CGF.EmitOMPLoopBody(S, LoopExit);
1679   CGF.EmitStopPoint(&S);
1680 }
1681 
1682 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
1683   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
1684     OMPLoopScope PreInitScope(CGF, S);
1685     // if (PreCond) {
1686     //   for (IV in 0..LastIteration) BODY;
1687     //   <Final counter/linear vars updates>;
1688     // }
1689     //
1690 
1691     // Emit: if (PreCond) - begin.
1692     // If the condition constant folds and can be elided, avoid emitting the
1693     // whole loop.
1694     bool CondConstant;
1695     llvm::BasicBlock *ContBlock = nullptr;
1696     if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
1697       if (!CondConstant)
1698         return;
1699     } else {
1700       auto *ThenBlock = CGF.createBasicBlock("simd.if.then");
1701       ContBlock = CGF.createBasicBlock("simd.if.end");
1702       emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
1703                   CGF.getProfileCount(&S));
1704       CGF.EmitBlock(ThenBlock);
1705       CGF.incrementProfileCounter(&S);
1706     }
1707 
1708     // Emit the loop iteration variable.
1709     const Expr *IVExpr = S.getIterationVariable();
1710     const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
1711     CGF.EmitVarDecl(*IVDecl);
1712     CGF.EmitIgnoredExpr(S.getInit());
1713 
1714     // Emit the iterations count variable.
1715     // If it is not a variable, Sema decided to calculate iterations count on
1716     // each iteration (e.g., it is foldable into a constant).
1717     if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
1718       CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
1719       // Emit calculation of the iterations count.
1720       CGF.EmitIgnoredExpr(S.getCalcLastIteration());
1721     }
1722 
1723     CGF.EmitOMPSimdInit(S);
1724 
1725     emitAlignedClause(CGF, S);
1726     CGF.EmitOMPLinearClauseInit(S);
1727     {
1728       OMPPrivateScope LoopScope(CGF);
1729       CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
1730       CGF.EmitOMPLinearClause(S, LoopScope);
1731       CGF.EmitOMPPrivateClause(S, LoopScope);
1732       CGF.EmitOMPReductionClauseInit(S, LoopScope);
1733       bool HasLastprivateClause =
1734           CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
1735       (void)LoopScope.Privatize();
1736       CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
1737                            S.getInc(),
1738                            [&S](CodeGenFunction &CGF) {
1739                              CGF.EmitOMPLoopBody(S, JumpDest());
1740                              CGF.EmitStopPoint(&S);
1741                            },
1742                            [](CodeGenFunction &) {});
1743       CGF.EmitOMPSimdFinal(
1744           S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1745       // Emit final copy of the lastprivate variables at the end of loops.
1746       if (HasLastprivateClause)
1747         CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true);
1748       CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd);
1749       emitPostUpdateForReductionClause(
1750           CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1751     }
1752     CGF.EmitOMPLinearClauseFinal(
1753         S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1754     // Emit: if (PreCond) - end.
1755     if (ContBlock) {
1756       CGF.EmitBranch(ContBlock);
1757       CGF.EmitBlock(ContBlock, true);
1758     }
1759   };
1760   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
1761   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
1762 }
1763 
1764 void CodeGenFunction::EmitOMPOuterLoop(
1765     bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S,
1766     CodeGenFunction::OMPPrivateScope &LoopScope,
1767     const CodeGenFunction::OMPLoopArguments &LoopArgs,
1768     const CodeGenFunction::CodeGenLoopTy &CodeGenLoop,
1769     const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) {
1770   auto &RT = CGM.getOpenMPRuntime();
1771 
1772   const Expr *IVExpr = S.getIterationVariable();
1773   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1774   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1775 
1776   auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
1777 
1778   // Start the loop with a block that tests the condition.
1779   auto CondBlock = createBasicBlock("omp.dispatch.cond");
1780   EmitBlock(CondBlock);
1781   const SourceRange &R = S.getSourceRange();
1782   LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
1783                  SourceLocToDebugLoc(R.getEnd()));
1784 
1785   llvm::Value *BoolCondVal = nullptr;
1786   if (!DynamicOrOrdered) {
1787     // UB = min(UB, GlobalUB) or
1788     // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g.
1789     // 'distribute parallel for')
1790     EmitIgnoredExpr(LoopArgs.EUB);
1791     // IV = LB
1792     EmitIgnoredExpr(LoopArgs.Init);
1793     // IV < UB
1794     BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond);
1795   } else {
1796     BoolCondVal =
1797         RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, LoopArgs.IL,
1798                        LoopArgs.LB, LoopArgs.UB, LoopArgs.ST);
1799   }
1800 
1801   // If there are any cleanups between here and the loop-exit scope,
1802   // create a block to stage a loop exit along.
1803   auto ExitBlock = LoopExit.getBlock();
1804   if (LoopScope.requiresCleanups())
1805     ExitBlock = createBasicBlock("omp.dispatch.cleanup");
1806 
1807   auto LoopBody = createBasicBlock("omp.dispatch.body");
1808   Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
1809   if (ExitBlock != LoopExit.getBlock()) {
1810     EmitBlock(ExitBlock);
1811     EmitBranchThroughCleanup(LoopExit);
1812   }
1813   EmitBlock(LoopBody);
1814 
1815   // Emit "IV = LB" (in case of static schedule, we have already calculated new
1816   // LB for loop condition and emitted it above).
1817   if (DynamicOrOrdered)
1818     EmitIgnoredExpr(LoopArgs.Init);
1819 
1820   // Create a block for the increment.
1821   auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
1822   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1823 
1824   // Generate !llvm.loop.parallel metadata for loads and stores for loops
1825   // with dynamic/guided scheduling and without ordered clause.
1826   if (!isOpenMPSimdDirective(S.getDirectiveKind()))
1827     LoopStack.setParallel(!IsMonotonic);
1828   else
1829     EmitOMPSimdInit(S, IsMonotonic);
1830 
1831   SourceLocation Loc = S.getLocStart();
1832 
1833   // when 'distribute' is not combined with a 'for':
1834   // while (idx <= UB) { BODY; ++idx; }
1835   // when 'distribute' is combined with a 'for'
1836   // (e.g. 'distribute parallel for')
1837   // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
1838   EmitOMPInnerLoop(
1839       S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr,
1840       [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
1841         CodeGenLoop(CGF, S, LoopExit);
1842       },
1843       [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) {
1844         CodeGenOrdered(CGF, Loc, IVSize, IVSigned);
1845       });
1846 
1847   EmitBlock(Continue.getBlock());
1848   BreakContinueStack.pop_back();
1849   if (!DynamicOrOrdered) {
1850     // Emit "LB = LB + Stride", "UB = UB + Stride".
1851     EmitIgnoredExpr(LoopArgs.NextLB);
1852     EmitIgnoredExpr(LoopArgs.NextUB);
1853   }
1854 
1855   EmitBranch(CondBlock);
1856   LoopStack.pop();
1857   // Emit the fall-through block.
1858   EmitBlock(LoopExit.getBlock());
1859 
1860   // Tell the runtime we are done.
1861   auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) {
1862     if (!DynamicOrOrdered)
1863       CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd());
1864   };
1865   OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
1866 }
1867 
1868 void CodeGenFunction::EmitOMPForOuterLoop(
1869     const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic,
1870     const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
1871     const OMPLoopArguments &LoopArgs,
1872     const CodeGenDispatchBoundsTy &CGDispatchBounds) {
1873   auto &RT = CGM.getOpenMPRuntime();
1874 
1875   // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
1876   const bool DynamicOrOrdered =
1877       Ordered || RT.isDynamic(ScheduleKind.Schedule);
1878 
1879   assert((Ordered ||
1880           !RT.isStaticNonchunked(ScheduleKind.Schedule,
1881                                  LoopArgs.Chunk != nullptr)) &&
1882          "static non-chunked schedule does not need outer loop");
1883 
1884   // Emit outer loop.
1885   //
1886   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
1887   // When schedule(dynamic,chunk_size) is specified, the iterations are
1888   // distributed to threads in the team in chunks as the threads request them.
1889   // Each thread executes a chunk of iterations, then requests another chunk,
1890   // until no chunks remain to be distributed. Each chunk contains chunk_size
1891   // iterations, except for the last chunk to be distributed, which may have
1892   // fewer iterations. When no chunk_size is specified, it defaults to 1.
1893   //
1894   // When schedule(guided,chunk_size) is specified, the iterations are assigned
1895   // to threads in the team in chunks as the executing threads request them.
1896   // Each thread executes a chunk of iterations, then requests another chunk,
1897   // until no chunks remain to be assigned. For a chunk_size of 1, the size of
1898   // each chunk is proportional to the number of unassigned iterations divided
1899   // by the number of threads in the team, decreasing to 1. For a chunk_size
1900   // with value k (greater than 1), the size of each chunk is determined in the
1901   // same way, with the restriction that the chunks do not contain fewer than k
1902   // iterations (except for the last chunk to be assigned, which may have fewer
1903   // than k iterations).
1904   //
1905   // When schedule(auto) is specified, the decision regarding scheduling is
1906   // delegated to the compiler and/or runtime system. The programmer gives the
1907   // implementation the freedom to choose any possible mapping of iterations to
1908   // threads in the team.
1909   //
1910   // When schedule(runtime) is specified, the decision regarding scheduling is
1911   // deferred until run time, and the schedule and chunk size are taken from the
1912   // run-sched-var ICV. If the ICV is set to auto, the schedule is
1913   // implementation defined
1914   //
1915   // while(__kmpc_dispatch_next(&LB, &UB)) {
1916   //   idx = LB;
1917   //   while (idx <= UB) { BODY; ++idx;
1918   //   __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
1919   //   } // inner loop
1920   // }
1921   //
1922   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
1923   // When schedule(static, chunk_size) is specified, iterations are divided into
1924   // chunks of size chunk_size, and the chunks are assigned to the threads in
1925   // the team in a round-robin fashion in the order of the thread number.
1926   //
1927   // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
1928   //   while (idx <= UB) { BODY; ++idx; } // inner loop
1929   //   LB = LB + ST;
1930   //   UB = UB + ST;
1931   // }
1932   //
1933 
1934   const Expr *IVExpr = S.getIterationVariable();
1935   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1936   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1937 
1938   if (DynamicOrOrdered) {
1939     auto DispatchBounds = CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB);
1940     llvm::Value *LBVal = DispatchBounds.first;
1941     llvm::Value *UBVal = DispatchBounds.second;
1942     CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal,
1943                                                              LoopArgs.Chunk};
1944     RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, IVSize,
1945                            IVSigned, Ordered, DipatchRTInputValues);
1946   } else {
1947     RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned,
1948                          Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB,
1949                          LoopArgs.ST, LoopArgs.Chunk);
1950   }
1951 
1952   auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc,
1953                                     const unsigned IVSize,
1954                                     const bool IVSigned) {
1955     if (Ordered) {
1956       CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize,
1957                                                             IVSigned);
1958     }
1959   };
1960 
1961   OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST,
1962                                  LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB);
1963   OuterLoopArgs.IncExpr = S.getInc();
1964   OuterLoopArgs.Init = S.getInit();
1965   OuterLoopArgs.Cond = S.getCond();
1966   OuterLoopArgs.NextLB = S.getNextLowerBound();
1967   OuterLoopArgs.NextUB = S.getNextUpperBound();
1968   EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs,
1969                    emitOMPLoopBodyWithStopPoint, CodeGenOrdered);
1970 }
1971 
1972 static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc,
1973                              const unsigned IVSize, const bool IVSigned) {}
1974 
1975 void CodeGenFunction::EmitOMPDistributeOuterLoop(
1976     OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S,
1977     OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs,
1978     const CodeGenLoopTy &CodeGenLoopContent) {
1979 
1980   auto &RT = CGM.getOpenMPRuntime();
1981 
1982   // Emit outer loop.
1983   // Same behavior as a OMPForOuterLoop, except that schedule cannot be
1984   // dynamic
1985   //
1986 
1987   const Expr *IVExpr = S.getIterationVariable();
1988   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1989   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1990 
1991   RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize,
1992                               IVSigned, /* Ordered = */ false, LoopArgs.IL,
1993                               LoopArgs.LB, LoopArgs.UB, LoopArgs.ST,
1994                               LoopArgs.Chunk);
1995 
1996   // for combined 'distribute' and 'for' the increment expression of distribute
1997   // is store in DistInc. For 'distribute' alone, it is in Inc.
1998   Expr *IncExpr;
1999   if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()))
2000     IncExpr = S.getDistInc();
2001   else
2002     IncExpr = S.getInc();
2003 
2004   // this routine is shared by 'omp distribute parallel for' and
2005   // 'omp distribute': select the right EUB expression depending on the
2006   // directive
2007   OMPLoopArguments OuterLoopArgs;
2008   OuterLoopArgs.LB = LoopArgs.LB;
2009   OuterLoopArgs.UB = LoopArgs.UB;
2010   OuterLoopArgs.ST = LoopArgs.ST;
2011   OuterLoopArgs.IL = LoopArgs.IL;
2012   OuterLoopArgs.Chunk = LoopArgs.Chunk;
2013   OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2014                           ? S.getCombinedEnsureUpperBound()
2015                           : S.getEnsureUpperBound();
2016   OuterLoopArgs.IncExpr = IncExpr;
2017   OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2018                            ? S.getCombinedInit()
2019                            : S.getInit();
2020   OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2021                            ? S.getCombinedCond()
2022                            : S.getCond();
2023   OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2024                              ? S.getCombinedNextLowerBound()
2025                              : S.getNextLowerBound();
2026   OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2027                              ? S.getCombinedNextUpperBound()
2028                              : S.getNextUpperBound();
2029 
2030   EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S,
2031                    LoopScope, OuterLoopArgs, CodeGenLoopContent,
2032                    emitEmptyOrdered);
2033 }
2034 
2035 /// Emit a helper variable and return corresponding lvalue.
2036 static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
2037                                const DeclRefExpr *Helper) {
2038   auto VDecl = cast<VarDecl>(Helper->getDecl());
2039   CGF.EmitVarDecl(*VDecl);
2040   return CGF.EmitLValue(Helper);
2041 }
2042 
2043 static std::pair<LValue, LValue>
2044 emitDistributeParallelForInnerBounds(CodeGenFunction &CGF,
2045                                      const OMPExecutableDirective &S) {
2046   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2047   LValue LB =
2048       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
2049   LValue UB =
2050       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
2051 
2052   // When composing 'distribute' with 'for' (e.g. as in 'distribute
2053   // parallel for') we need to use the 'distribute'
2054   // chunk lower and upper bounds rather than the whole loop iteration
2055   // space. These are parameters to the outlined function for 'parallel'
2056   // and we copy the bounds of the previous schedule into the
2057   // the current ones.
2058   LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable());
2059   LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable());
2060   llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(PrevLB, SourceLocation());
2061   PrevLBVal = CGF.EmitScalarConversion(
2062       PrevLBVal, LS.getPrevLowerBoundVariable()->getType(),
2063       LS.getIterationVariable()->getType(), SourceLocation());
2064   llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(PrevUB, SourceLocation());
2065   PrevUBVal = CGF.EmitScalarConversion(
2066       PrevUBVal, LS.getPrevUpperBoundVariable()->getType(),
2067       LS.getIterationVariable()->getType(), SourceLocation());
2068 
2069   CGF.EmitStoreOfScalar(PrevLBVal, LB);
2070   CGF.EmitStoreOfScalar(PrevUBVal, UB);
2071 
2072   return {LB, UB};
2073 }
2074 
2075 /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then
2076 /// we need to use the LB and UB expressions generated by the worksharing
2077 /// code generation support, whereas in non combined situations we would
2078 /// just emit 0 and the LastIteration expression
2079 /// This function is necessary due to the difference of the LB and UB
2080 /// types for the RT emission routines for 'for_static_init' and
2081 /// 'for_dispatch_init'
2082 static std::pair<llvm::Value *, llvm::Value *>
2083 emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF,
2084                                         const OMPExecutableDirective &S,
2085                                         Address LB, Address UB) {
2086   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2087   const Expr *IVExpr = LS.getIterationVariable();
2088   // when implementing a dynamic schedule for a 'for' combined with a
2089   // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop
2090   // is not normalized as each team only executes its own assigned
2091   // distribute chunk
2092   QualType IteratorTy = IVExpr->getType();
2093   llvm::Value *LBVal = CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy,
2094                                             SourceLocation());
2095   llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy,
2096                                             SourceLocation());
2097   return {LBVal, UBVal};
2098 }
2099 
2100 static void emitDistributeParallelForDistributeInnerBoundParams(
2101     CodeGenFunction &CGF, const OMPExecutableDirective &S,
2102     llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) {
2103   const auto &Dir = cast<OMPLoopDirective>(S);
2104   LValue LB =
2105       CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable()));
2106   auto LBCast = CGF.Builder.CreateIntCast(
2107       CGF.Builder.CreateLoad(LB.getAddress()), CGF.SizeTy, /*isSigned=*/false);
2108   CapturedVars.push_back(LBCast);
2109   LValue UB =
2110       CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable()));
2111 
2112   auto UBCast = CGF.Builder.CreateIntCast(
2113       CGF.Builder.CreateLoad(UB.getAddress()), CGF.SizeTy, /*isSigned=*/false);
2114   CapturedVars.push_back(UBCast);
2115 }
2116 
2117 static void
2118 emitInnerParallelForWhenCombined(CodeGenFunction &CGF,
2119                                  const OMPLoopDirective &S,
2120                                  CodeGenFunction::JumpDest LoopExit) {
2121   auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF,
2122                                          PrePostActionTy &) {
2123     CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(),
2124                                emitDistributeParallelForInnerBounds,
2125                                emitDistributeParallelForDispatchBounds);
2126   };
2127 
2128   emitCommonOMPParallelDirective(
2129       CGF, S, OMPD_for, CGInlinedWorksharingLoop,
2130       emitDistributeParallelForDistributeInnerBoundParams);
2131 }
2132 
2133 void CodeGenFunction::EmitOMPDistributeParallelForDirective(
2134     const OMPDistributeParallelForDirective &S) {
2135   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2136     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
2137                               S.getDistInc());
2138   };
2139   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2140   OMPCancelStackRAII CancelRegion(*this, OMPD_distribute_parallel_for,
2141                                   /*HasCancel=*/false);
2142   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen,
2143                                               /*HasCancel=*/false);
2144 }
2145 
2146 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective(
2147     const OMPDistributeParallelForSimdDirective &S) {
2148   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2149   CGM.getOpenMPRuntime().emitInlinedDirective(
2150       *this, OMPD_distribute_parallel_for_simd,
2151       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2152         OMPLoopScope PreInitScope(CGF, S);
2153         CGF.EmitStmt(
2154             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2155       });
2156 }
2157 
2158 void CodeGenFunction::EmitOMPDistributeSimdDirective(
2159     const OMPDistributeSimdDirective &S) {
2160   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2161   CGM.getOpenMPRuntime().emitInlinedDirective(
2162       *this, OMPD_distribute_simd,
2163       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2164         OMPLoopScope PreInitScope(CGF, S);
2165         CGF.EmitStmt(
2166             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2167       });
2168 }
2169 
2170 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective(
2171     const OMPTargetParallelForSimdDirective &S) {
2172   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2173   CGM.getOpenMPRuntime().emitInlinedDirective(
2174       *this, OMPD_target_parallel_for_simd,
2175       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2176         OMPLoopScope PreInitScope(CGF, S);
2177         CGF.EmitStmt(
2178             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2179       });
2180 }
2181 
2182 void CodeGenFunction::EmitOMPTargetSimdDirective(
2183     const OMPTargetSimdDirective &S) {
2184   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2185   CGM.getOpenMPRuntime().emitInlinedDirective(
2186       *this, OMPD_target_simd, [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2187         OMPLoopScope PreInitScope(CGF, S);
2188         CGF.EmitStmt(
2189             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2190       });
2191 }
2192 
2193 void CodeGenFunction::EmitOMPTeamsDistributeDirective(
2194     const OMPTeamsDistributeDirective &S) {
2195   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2196   CGM.getOpenMPRuntime().emitInlinedDirective(
2197       *this, OMPD_teams_distribute,
2198       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2199         OMPLoopScope PreInitScope(CGF, S);
2200         CGF.EmitStmt(
2201             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2202       });
2203 }
2204 
2205 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective(
2206     const OMPTeamsDistributeSimdDirective &S) {
2207   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2208   CGM.getOpenMPRuntime().emitInlinedDirective(
2209       *this, OMPD_teams_distribute_simd,
2210       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2211         OMPLoopScope PreInitScope(CGF, S);
2212         CGF.EmitStmt(
2213             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2214       });
2215 }
2216 
2217 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective(
2218     const OMPTeamsDistributeParallelForSimdDirective &S) {
2219   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2220   CGM.getOpenMPRuntime().emitInlinedDirective(
2221       *this, OMPD_teams_distribute_parallel_for_simd,
2222       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2223         OMPLoopScope PreInitScope(CGF, S);
2224         CGF.EmitStmt(
2225             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2226       });
2227 }
2228 
2229 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective(
2230     const OMPTeamsDistributeParallelForDirective &S) {
2231   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2232   CGM.getOpenMPRuntime().emitInlinedDirective(
2233       *this, OMPD_teams_distribute_parallel_for,
2234       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2235         OMPLoopScope PreInitScope(CGF, S);
2236         CGF.EmitStmt(
2237             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2238       });
2239 }
2240 
2241 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective(
2242     const OMPTargetTeamsDistributeDirective &S) {
2243   CGM.getOpenMPRuntime().emitInlinedDirective(
2244       *this, OMPD_target_teams_distribute,
2245       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2246         CGF.EmitStmt(
2247             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2248       });
2249 }
2250 
2251 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective(
2252     const OMPTargetTeamsDistributeParallelForDirective &S) {
2253   CGM.getOpenMPRuntime().emitInlinedDirective(
2254       *this, OMPD_target_teams_distribute_parallel_for,
2255       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2256         CGF.EmitStmt(
2257             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2258       });
2259 }
2260 
2261 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective(
2262     const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
2263   CGM.getOpenMPRuntime().emitInlinedDirective(
2264       *this, OMPD_target_teams_distribute_parallel_for_simd,
2265       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2266         CGF.EmitStmt(
2267             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2268       });
2269 }
2270 
2271 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective(
2272     const OMPTargetTeamsDistributeSimdDirective &S) {
2273   CGM.getOpenMPRuntime().emitInlinedDirective(
2274       *this, OMPD_target_teams_distribute_simd,
2275       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2276         CGF.EmitStmt(
2277             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2278       });
2279 }
2280 
2281 namespace {
2282   struct ScheduleKindModifiersTy {
2283     OpenMPScheduleClauseKind Kind;
2284     OpenMPScheduleClauseModifier M1;
2285     OpenMPScheduleClauseModifier M2;
2286     ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind,
2287                             OpenMPScheduleClauseModifier M1,
2288                             OpenMPScheduleClauseModifier M2)
2289         : Kind(Kind), M1(M1), M2(M2) {}
2290   };
2291 } // namespace
2292 
2293 bool CodeGenFunction::EmitOMPWorksharingLoop(
2294     const OMPLoopDirective &S, Expr *EUB,
2295     const CodeGenLoopBoundsTy &CodeGenLoopBounds,
2296     const CodeGenDispatchBoundsTy &CGDispatchBounds) {
2297   // Emit the loop iteration variable.
2298   auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
2299   auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
2300   EmitVarDecl(*IVDecl);
2301 
2302   // Emit the iterations count variable.
2303   // If it is not a variable, Sema decided to calculate iterations count on each
2304   // iteration (e.g., it is foldable into a constant).
2305   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
2306     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
2307     // Emit calculation of the iterations count.
2308     EmitIgnoredExpr(S.getCalcLastIteration());
2309   }
2310 
2311   auto &RT = CGM.getOpenMPRuntime();
2312 
2313   bool HasLastprivateClause;
2314   // Check pre-condition.
2315   {
2316     OMPLoopScope PreInitScope(*this, S);
2317     // Skip the entire loop if we don't meet the precondition.
2318     // If the condition constant folds and can be elided, avoid emitting the
2319     // whole loop.
2320     bool CondConstant;
2321     llvm::BasicBlock *ContBlock = nullptr;
2322     if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
2323       if (!CondConstant)
2324         return false;
2325     } else {
2326       auto *ThenBlock = createBasicBlock("omp.precond.then");
2327       ContBlock = createBasicBlock("omp.precond.end");
2328       emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
2329                   getProfileCount(&S));
2330       EmitBlock(ThenBlock);
2331       incrementProfileCounter(&S);
2332     }
2333 
2334     bool Ordered = false;
2335     if (auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) {
2336       if (OrderedClause->getNumForLoops())
2337         RT.emitDoacrossInit(*this, S);
2338       else
2339         Ordered = true;
2340     }
2341 
2342     llvm::DenseSet<const Expr *> EmittedFinals;
2343     emitAlignedClause(*this, S);
2344     EmitOMPLinearClauseInit(S);
2345     // Emit helper vars inits.
2346 
2347     std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S);
2348     LValue LB = Bounds.first;
2349     LValue UB = Bounds.second;
2350     LValue ST =
2351         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
2352     LValue IL =
2353         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
2354 
2355     // Emit 'then' code.
2356     {
2357       OMPPrivateScope LoopScope(*this);
2358       if (EmitOMPFirstprivateClause(S, LoopScope)) {
2359         // Emit implicit barrier to synchronize threads and avoid data races on
2360         // initialization of firstprivate variables and post-update of
2361         // lastprivate variables.
2362         CGM.getOpenMPRuntime().emitBarrierCall(
2363             *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
2364             /*ForceSimpleCall=*/true);
2365       }
2366       EmitOMPPrivateClause(S, LoopScope);
2367       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
2368       EmitOMPReductionClauseInit(S, LoopScope);
2369       EmitOMPPrivateLoopCounters(S, LoopScope);
2370       EmitOMPLinearClause(S, LoopScope);
2371       (void)LoopScope.Privatize();
2372 
2373       // Detect the loop schedule kind and chunk.
2374       llvm::Value *Chunk = nullptr;
2375       OpenMPScheduleTy ScheduleKind;
2376       if (auto *C = S.getSingleClause<OMPScheduleClause>()) {
2377         ScheduleKind.Schedule = C->getScheduleKind();
2378         ScheduleKind.M1 = C->getFirstScheduleModifier();
2379         ScheduleKind.M2 = C->getSecondScheduleModifier();
2380         if (const auto *Ch = C->getChunkSize()) {
2381           Chunk = EmitScalarExpr(Ch);
2382           Chunk = EmitScalarConversion(Chunk, Ch->getType(),
2383                                        S.getIterationVariable()->getType(),
2384                                        S.getLocStart());
2385         }
2386       }
2387       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2388       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2389       // OpenMP 4.5, 2.7.1 Loop Construct, Description.
2390       // If the static schedule kind is specified or if the ordered clause is
2391       // specified, and if no monotonic modifier is specified, the effect will
2392       // be as if the monotonic modifier was specified.
2393       if (RT.isStaticNonchunked(ScheduleKind.Schedule,
2394                                 /* Chunked */ Chunk != nullptr) &&
2395           !Ordered) {
2396         if (isOpenMPSimdDirective(S.getDirectiveKind()))
2397           EmitOMPSimdInit(S, /*IsMonotonic=*/true);
2398         // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2399         // When no chunk_size is specified, the iteration space is divided into
2400         // chunks that are approximately equal in size, and at most one chunk is
2401         // distributed to each thread. Note that the size of the chunks is
2402         // unspecified in this case.
2403         RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind,
2404                              IVSize, IVSigned, Ordered,
2405                              IL.getAddress(), LB.getAddress(),
2406                              UB.getAddress(), ST.getAddress());
2407         auto LoopExit =
2408             getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
2409         // UB = min(UB, GlobalUB);
2410         EmitIgnoredExpr(S.getEnsureUpperBound());
2411         // IV = LB;
2412         EmitIgnoredExpr(S.getInit());
2413         // while (idx <= UB) { BODY; ++idx; }
2414         EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
2415                          S.getInc(),
2416                          [&S, LoopExit](CodeGenFunction &CGF) {
2417                            CGF.EmitOMPLoopBody(S, LoopExit);
2418                            CGF.EmitStopPoint(&S);
2419                          },
2420                          [](CodeGenFunction &) {});
2421         EmitBlock(LoopExit.getBlock());
2422         // Tell the runtime we are done.
2423         auto &&CodeGen = [&S](CodeGenFunction &CGF) {
2424           CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd());
2425         };
2426         OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
2427       } else {
2428         const bool IsMonotonic =
2429             Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static ||
2430             ScheduleKind.Schedule == OMPC_SCHEDULE_unknown ||
2431             ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
2432             ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
2433         // Emit the outer loop, which requests its work chunk [LB..UB] from
2434         // runtime and runs the inner loop to process it.
2435         const OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(),
2436                                              ST.getAddress(), IL.getAddress(),
2437                                              Chunk, EUB);
2438         EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
2439                             LoopArguments, CGDispatchBounds);
2440       }
2441       if (isOpenMPSimdDirective(S.getDirectiveKind())) {
2442         EmitOMPSimdFinal(S,
2443                          [&](CodeGenFunction &CGF) -> llvm::Value * {
2444                            return CGF.Builder.CreateIsNotNull(
2445                                CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2446                          });
2447       }
2448       EmitOMPReductionClauseFinal(
2449           S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind())
2450                  ? /*Parallel and Simd*/ OMPD_parallel_for_simd
2451                  : /*Parallel only*/ OMPD_parallel);
2452       // Emit post-update of the reduction variables if IsLastIter != 0.
2453       emitPostUpdateForReductionClause(
2454           *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2455             return CGF.Builder.CreateIsNotNull(
2456                 CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2457           });
2458       // Emit final copy of the lastprivate variables if IsLastIter != 0.
2459       if (HasLastprivateClause)
2460         EmitOMPLastprivateClauseFinal(
2461             S, isOpenMPSimdDirective(S.getDirectiveKind()),
2462             Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart())));
2463     }
2464     EmitOMPLinearClauseFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2465       return CGF.Builder.CreateIsNotNull(
2466           CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2467     });
2468     // We're now done with the loop, so jump to the continuation block.
2469     if (ContBlock) {
2470       EmitBranch(ContBlock);
2471       EmitBlock(ContBlock, true);
2472     }
2473   }
2474   return HasLastprivateClause;
2475 }
2476 
2477 /// The following two functions generate expressions for the loop lower
2478 /// and upper bounds in case of static and dynamic (dispatch) schedule
2479 /// of the associated 'for' or 'distribute' loop.
2480 static std::pair<LValue, LValue>
2481 emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
2482   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2483   LValue LB =
2484       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
2485   LValue UB =
2486       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
2487   return {LB, UB};
2488 }
2489 
2490 /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not
2491 /// consider the lower and upper bound expressions generated by the
2492 /// worksharing loop support, but we use 0 and the iteration space size as
2493 /// constants
2494 static std::pair<llvm::Value *, llvm::Value *>
2495 emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S,
2496                           Address LB, Address UB) {
2497   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2498   const Expr *IVExpr = LS.getIterationVariable();
2499   const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType());
2500   llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0);
2501   llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration());
2502   return {LBVal, UBVal};
2503 }
2504 
2505 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
2506   bool HasLastprivates = false;
2507   auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
2508                                           PrePostActionTy &) {
2509     OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel());
2510     HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
2511                                                  emitForLoopBounds,
2512                                                  emitDispatchForLoopBounds);
2513   };
2514   {
2515     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2516     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
2517                                                 S.hasCancel());
2518   }
2519 
2520   // Emit an implicit barrier at the end.
2521   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
2522     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
2523   }
2524 }
2525 
2526 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
2527   bool HasLastprivates = false;
2528   auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
2529                                           PrePostActionTy &) {
2530     HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
2531                                                  emitForLoopBounds,
2532                                                  emitDispatchForLoopBounds);
2533   };
2534   {
2535     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2536     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
2537   }
2538 
2539   // Emit an implicit barrier at the end.
2540   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
2541     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
2542   }
2543 }
2544 
2545 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
2546                                 const Twine &Name,
2547                                 llvm::Value *Init = nullptr) {
2548   auto LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
2549   if (Init)
2550     CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true);
2551   return LVal;
2552 }
2553 
2554 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
2555   auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt();
2556   auto *CS = dyn_cast<CompoundStmt>(Stmt);
2557   bool HasLastprivates = false;
2558   auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF,
2559                                                     PrePostActionTy &) {
2560     auto &C = CGF.CGM.getContext();
2561     auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2562     // Emit helper vars inits.
2563     LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.",
2564                                   CGF.Builder.getInt32(0));
2565     auto *GlobalUBVal = CS != nullptr ? CGF.Builder.getInt32(CS->size() - 1)
2566                                       : CGF.Builder.getInt32(0);
2567     LValue UB =
2568         createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
2569     LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.",
2570                                   CGF.Builder.getInt32(1));
2571     LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.",
2572                                   CGF.Builder.getInt32(0));
2573     // Loop counter.
2574     LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv.");
2575     OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
2576     CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
2577     OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
2578     CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
2579     // Generate condition for loop.
2580     BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue,
2581                         OK_Ordinary, S.getLocStart(), FPOptions());
2582     // Increment for loop counter.
2583     UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary,
2584                       S.getLocStart());
2585     auto BodyGen = [Stmt, CS, &S, &IV](CodeGenFunction &CGF) {
2586       // Iterate through all sections and emit a switch construct:
2587       // switch (IV) {
2588       //   case 0:
2589       //     <SectionStmt[0]>;
2590       //     break;
2591       // ...
2592       //   case <NumSection> - 1:
2593       //     <SectionStmt[<NumSection> - 1]>;
2594       //     break;
2595       // }
2596       // .omp.sections.exit:
2597       auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
2598       auto *SwitchStmt = CGF.Builder.CreateSwitch(
2599           CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB,
2600           CS == nullptr ? 1 : CS->size());
2601       if (CS) {
2602         unsigned CaseNumber = 0;
2603         for (auto *SubStmt : CS->children()) {
2604           auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
2605           CGF.EmitBlock(CaseBB);
2606           SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB);
2607           CGF.EmitStmt(SubStmt);
2608           CGF.EmitBranch(ExitBB);
2609           ++CaseNumber;
2610         }
2611       } else {
2612         auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
2613         CGF.EmitBlock(CaseBB);
2614         SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB);
2615         CGF.EmitStmt(Stmt);
2616         CGF.EmitBranch(ExitBB);
2617       }
2618       CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
2619     };
2620 
2621     CodeGenFunction::OMPPrivateScope LoopScope(CGF);
2622     if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) {
2623       // Emit implicit barrier to synchronize threads and avoid data races on
2624       // initialization of firstprivate variables and post-update of lastprivate
2625       // variables.
2626       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
2627           CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
2628           /*ForceSimpleCall=*/true);
2629     }
2630     CGF.EmitOMPPrivateClause(S, LoopScope);
2631     HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
2632     CGF.EmitOMPReductionClauseInit(S, LoopScope);
2633     (void)LoopScope.Privatize();
2634 
2635     // Emit static non-chunked loop.
2636     OpenMPScheduleTy ScheduleKind;
2637     ScheduleKind.Schedule = OMPC_SCHEDULE_static;
2638     CGF.CGM.getOpenMPRuntime().emitForStaticInit(
2639         CGF, S.getLocStart(), ScheduleKind, /*IVSize=*/32,
2640         /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), LB.getAddress(),
2641         UB.getAddress(), ST.getAddress());
2642     // UB = min(UB, GlobalUB);
2643     auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart());
2644     auto *MinUBGlobalUB = CGF.Builder.CreateSelect(
2645         CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
2646     CGF.EmitStoreOfScalar(MinUBGlobalUB, UB);
2647     // IV = LB;
2648     CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV);
2649     // while (idx <= UB) { BODY; ++idx; }
2650     CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen,
2651                          [](CodeGenFunction &) {});
2652     // Tell the runtime we are done.
2653     auto &&CodeGen = [&S](CodeGenFunction &CGF) {
2654       CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd());
2655     };
2656     CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen);
2657     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
2658     // Emit post-update of the reduction variables if IsLastIter != 0.
2659     emitPostUpdateForReductionClause(
2660         CGF, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2661           return CGF.Builder.CreateIsNotNull(
2662               CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2663         });
2664 
2665     // Emit final copy of the lastprivate variables if IsLastIter != 0.
2666     if (HasLastprivates)
2667       CGF.EmitOMPLastprivateClauseFinal(
2668           S, /*NoFinals=*/false,
2669           CGF.Builder.CreateIsNotNull(
2670               CGF.EmitLoadOfScalar(IL, S.getLocStart())));
2671   };
2672 
2673   bool HasCancel = false;
2674   if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S))
2675     HasCancel = OSD->hasCancel();
2676   else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S))
2677     HasCancel = OPSD->hasCancel();
2678   OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel);
2679   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen,
2680                                               HasCancel);
2681   // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
2682   // clause. Otherwise the barrier will be generated by the codegen for the
2683   // directive.
2684   if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) {
2685     // Emit implicit barrier to synchronize threads and avoid data races on
2686     // initialization of firstprivate variables.
2687     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
2688                                            OMPD_unknown);
2689   }
2690 }
2691 
2692 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
2693   {
2694     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2695     EmitSections(S);
2696   }
2697   // Emit an implicit barrier at the end.
2698   if (!S.getSingleClause<OMPNowaitClause>()) {
2699     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
2700                                            OMPD_sections);
2701   }
2702 }
2703 
2704 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
2705   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2706     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2707   };
2708   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2709   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen,
2710                                               S.hasCancel());
2711 }
2712 
2713 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
2714   llvm::SmallVector<const Expr *, 8> CopyprivateVars;
2715   llvm::SmallVector<const Expr *, 8> DestExprs;
2716   llvm::SmallVector<const Expr *, 8> SrcExprs;
2717   llvm::SmallVector<const Expr *, 8> AssignmentOps;
2718   // Check if there are any 'copyprivate' clauses associated with this
2719   // 'single' construct.
2720   // Build a list of copyprivate variables along with helper expressions
2721   // (<source>, <destination>, <destination>=<source> expressions)
2722   for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
2723     CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
2724     DestExprs.append(C->destination_exprs().begin(),
2725                      C->destination_exprs().end());
2726     SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end());
2727     AssignmentOps.append(C->assignment_ops().begin(),
2728                          C->assignment_ops().end());
2729   }
2730   // Emit code for 'single' region along with 'copyprivate' clauses
2731   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2732     Action.Enter(CGF);
2733     OMPPrivateScope SingleScope(CGF);
2734     (void)CGF.EmitOMPFirstprivateClause(S, SingleScope);
2735     CGF.EmitOMPPrivateClause(S, SingleScope);
2736     (void)SingleScope.Privatize();
2737     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2738   };
2739   {
2740     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2741     CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(),
2742                                             CopyprivateVars, DestExprs,
2743                                             SrcExprs, AssignmentOps);
2744   }
2745   // Emit an implicit barrier at the end (to avoid data race on firstprivate
2746   // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
2747   if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) {
2748     CGM.getOpenMPRuntime().emitBarrierCall(
2749         *this, S.getLocStart(),
2750         S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
2751   }
2752 }
2753 
2754 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
2755   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2756     Action.Enter(CGF);
2757     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2758   };
2759   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2760   CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart());
2761 }
2762 
2763 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
2764   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2765     Action.Enter(CGF);
2766     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2767   };
2768   Expr *Hint = nullptr;
2769   if (auto *HintClause = S.getSingleClause<OMPHintClause>())
2770     Hint = HintClause->getHint();
2771   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2772   CGM.getOpenMPRuntime().emitCriticalRegion(*this,
2773                                             S.getDirectiveName().getAsString(),
2774                                             CodeGen, S.getLocStart(), Hint);
2775 }
2776 
2777 void CodeGenFunction::EmitOMPParallelForDirective(
2778     const OMPParallelForDirective &S) {
2779   // Emit directive as a combined directive that consists of two implicit
2780   // directives: 'parallel' with 'for' directive.
2781   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2782     OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel());
2783     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
2784                                emitDispatchForLoopBounds);
2785   };
2786   emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
2787                                  emitEmptyBoundParameters);
2788 }
2789 
2790 void CodeGenFunction::EmitOMPParallelForSimdDirective(
2791     const OMPParallelForSimdDirective &S) {
2792   // Emit directive as a combined directive that consists of two implicit
2793   // directives: 'parallel' with 'for' directive.
2794   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2795     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
2796                                emitDispatchForLoopBounds);
2797   };
2798   emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen,
2799                                  emitEmptyBoundParameters);
2800 }
2801 
2802 void CodeGenFunction::EmitOMPParallelSectionsDirective(
2803     const OMPParallelSectionsDirective &S) {
2804   // Emit directive as a combined directive that consists of two implicit
2805   // directives: 'parallel' with 'sections' directive.
2806   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2807     CGF.EmitSections(S);
2808   };
2809   emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen,
2810                                  emitEmptyBoundParameters);
2811 }
2812 
2813 void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S,
2814                                                 const RegionCodeGenTy &BodyGen,
2815                                                 const TaskGenTy &TaskGen,
2816                                                 OMPTaskDataTy &Data) {
2817   // Emit outlined function for task construct.
2818   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
2819   auto *I = CS->getCapturedDecl()->param_begin();
2820   auto *PartId = std::next(I);
2821   auto *TaskT = std::next(I, 4);
2822   // Check if the task is final
2823   if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
2824     // If the condition constant folds and can be elided, try to avoid emitting
2825     // the condition and the dead arm of the if/else.
2826     auto *Cond = Clause->getCondition();
2827     bool CondConstant;
2828     if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
2829       Data.Final.setInt(CondConstant);
2830     else
2831       Data.Final.setPointer(EvaluateExprAsBool(Cond));
2832   } else {
2833     // By default the task is not final.
2834     Data.Final.setInt(/*IntVal=*/false);
2835   }
2836   // Check if the task has 'priority' clause.
2837   if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) {
2838     auto *Prio = Clause->getPriority();
2839     Data.Priority.setInt(/*IntVal=*/true);
2840     Data.Priority.setPointer(EmitScalarConversion(
2841         EmitScalarExpr(Prio), Prio->getType(),
2842         getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
2843         Prio->getExprLoc()));
2844   }
2845   // The first function argument for tasks is a thread id, the second one is a
2846   // part id (0 for tied tasks, >=0 for untied task).
2847   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
2848   // Get list of private variables.
2849   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
2850     auto IRef = C->varlist_begin();
2851     for (auto *IInit : C->private_copies()) {
2852       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2853       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2854         Data.PrivateVars.push_back(*IRef);
2855         Data.PrivateCopies.push_back(IInit);
2856       }
2857       ++IRef;
2858     }
2859   }
2860   EmittedAsPrivate.clear();
2861   // Get list of firstprivate variables.
2862   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
2863     auto IRef = C->varlist_begin();
2864     auto IElemInitRef = C->inits().begin();
2865     for (auto *IInit : C->private_copies()) {
2866       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2867       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2868         Data.FirstprivateVars.push_back(*IRef);
2869         Data.FirstprivateCopies.push_back(IInit);
2870         Data.FirstprivateInits.push_back(*IElemInitRef);
2871       }
2872       ++IRef;
2873       ++IElemInitRef;
2874     }
2875   }
2876   // Get list of lastprivate variables (for taskloops).
2877   llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs;
2878   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
2879     auto IRef = C->varlist_begin();
2880     auto ID = C->destination_exprs().begin();
2881     for (auto *IInit : C->private_copies()) {
2882       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2883       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2884         Data.LastprivateVars.push_back(*IRef);
2885         Data.LastprivateCopies.push_back(IInit);
2886       }
2887       LastprivateDstsOrigs.insert(
2888           {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()),
2889            cast<DeclRefExpr>(*IRef)});
2890       ++IRef;
2891       ++ID;
2892     }
2893   }
2894   // Build list of dependences.
2895   for (const auto *C : S.getClausesOfKind<OMPDependClause>())
2896     for (auto *IRef : C->varlists())
2897       Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef));
2898   auto &&CodeGen = [&Data, CS, &BodyGen, &LastprivateDstsOrigs](
2899       CodeGenFunction &CGF, PrePostActionTy &Action) {
2900     // Set proper addresses for generated private copies.
2901     OMPPrivateScope Scope(CGF);
2902     if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() ||
2903         !Data.LastprivateVars.empty()) {
2904       auto *CopyFn = CGF.Builder.CreateLoad(
2905           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3)));
2906       auto *PrivatesPtr = CGF.Builder.CreateLoad(
2907           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2)));
2908       // Map privates.
2909       llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
2910       llvm::SmallVector<llvm::Value *, 16> CallArgs;
2911       CallArgs.push_back(PrivatesPtr);
2912       for (auto *E : Data.PrivateVars) {
2913         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2914         Address PrivatePtr = CGF.CreateMemTemp(
2915             CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr");
2916         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2917         CallArgs.push_back(PrivatePtr.getPointer());
2918       }
2919       for (auto *E : Data.FirstprivateVars) {
2920         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2921         Address PrivatePtr =
2922             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
2923                               ".firstpriv.ptr.addr");
2924         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2925         CallArgs.push_back(PrivatePtr.getPointer());
2926       }
2927       for (auto *E : Data.LastprivateVars) {
2928         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2929         Address PrivatePtr =
2930             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
2931                               ".lastpriv.ptr.addr");
2932         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2933         CallArgs.push_back(PrivatePtr.getPointer());
2934       }
2935       CGF.EmitRuntimeCall(CopyFn, CallArgs);
2936       for (auto &&Pair : LastprivateDstsOrigs) {
2937         auto *OrigVD = cast<VarDecl>(Pair.second->getDecl());
2938         DeclRefExpr DRE(
2939             const_cast<VarDecl *>(OrigVD),
2940             /*RefersToEnclosingVariableOrCapture=*/CGF.CapturedStmtInfo->lookup(
2941                 OrigVD) != nullptr,
2942             Pair.second->getType(), VK_LValue, Pair.second->getExprLoc());
2943         Scope.addPrivate(Pair.first, [&CGF, &DRE]() {
2944           return CGF.EmitLValue(&DRE).getAddress();
2945         });
2946       }
2947       for (auto &&Pair : PrivatePtrs) {
2948         Address Replacement(CGF.Builder.CreateLoad(Pair.second),
2949                             CGF.getContext().getDeclAlign(Pair.first));
2950         Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
2951       }
2952     }
2953     (void)Scope.Privatize();
2954 
2955     Action.Enter(CGF);
2956     BodyGen(CGF);
2957   };
2958   auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
2959       S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied,
2960       Data.NumberOfParts);
2961   OMPLexicalScope Scope(*this, S);
2962   TaskGen(*this, OutlinedFn, Data);
2963 }
2964 
2965 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
2966   // Emit outlined function for task construct.
2967   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
2968   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
2969   auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
2970   const Expr *IfCond = nullptr;
2971   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
2972     if (C->getNameModifier() == OMPD_unknown ||
2973         C->getNameModifier() == OMPD_task) {
2974       IfCond = C->getCondition();
2975       break;
2976     }
2977   }
2978 
2979   OMPTaskDataTy Data;
2980   // Check if we should emit tied or untied task.
2981   Data.Tied = !S.getSingleClause<OMPUntiedClause>();
2982   auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
2983     CGF.EmitStmt(CS->getCapturedStmt());
2984   };
2985   auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
2986                     IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn,
2987                             const OMPTaskDataTy &Data) {
2988     CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getLocStart(), S, OutlinedFn,
2989                                             SharedsTy, CapturedStruct, IfCond,
2990                                             Data);
2991   };
2992   EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
2993 }
2994 
2995 void CodeGenFunction::EmitOMPTaskyieldDirective(
2996     const OMPTaskyieldDirective &S) {
2997   CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart());
2998 }
2999 
3000 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
3001   CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier);
3002 }
3003 
3004 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
3005   CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart());
3006 }
3007 
3008 void CodeGenFunction::EmitOMPTaskgroupDirective(
3009     const OMPTaskgroupDirective &S) {
3010   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3011     Action.Enter(CGF);
3012     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3013   };
3014   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
3015   CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart());
3016 }
3017 
3018 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
3019   CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> {
3020     if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) {
3021       return llvm::makeArrayRef(FlushClause->varlist_begin(),
3022                                 FlushClause->varlist_end());
3023     }
3024     return llvm::None;
3025   }(), S.getLocStart());
3026 }
3027 
3028 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
3029                                             const CodeGenLoopTy &CodeGenLoop,
3030                                             Expr *IncExpr) {
3031   // Emit the loop iteration variable.
3032   auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
3033   auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
3034   EmitVarDecl(*IVDecl);
3035 
3036   // Emit the iterations count variable.
3037   // If it is not a variable, Sema decided to calculate iterations count on each
3038   // iteration (e.g., it is foldable into a constant).
3039   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
3040     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
3041     // Emit calculation of the iterations count.
3042     EmitIgnoredExpr(S.getCalcLastIteration());
3043   }
3044 
3045   auto &RT = CGM.getOpenMPRuntime();
3046 
3047   bool HasLastprivateClause = false;
3048   // Check pre-condition.
3049   {
3050     OMPLoopScope PreInitScope(*this, S);
3051     // Skip the entire loop if we don't meet the precondition.
3052     // If the condition constant folds and can be elided, avoid emitting the
3053     // whole loop.
3054     bool CondConstant;
3055     llvm::BasicBlock *ContBlock = nullptr;
3056     if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
3057       if (!CondConstant)
3058         return;
3059     } else {
3060       auto *ThenBlock = createBasicBlock("omp.precond.then");
3061       ContBlock = createBasicBlock("omp.precond.end");
3062       emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
3063                   getProfileCount(&S));
3064       EmitBlock(ThenBlock);
3065       incrementProfileCounter(&S);
3066     }
3067 
3068     // Emit 'then' code.
3069     {
3070       // Emit helper vars inits.
3071 
3072       LValue LB = EmitOMPHelperVar(
3073           *this, cast<DeclRefExpr>(
3074                      (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3075                           ? S.getCombinedLowerBoundVariable()
3076                           : S.getLowerBoundVariable())));
3077       LValue UB = EmitOMPHelperVar(
3078           *this, cast<DeclRefExpr>(
3079                      (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3080                           ? S.getCombinedUpperBoundVariable()
3081                           : S.getUpperBoundVariable())));
3082       LValue ST =
3083           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
3084       LValue IL =
3085           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
3086 
3087       OMPPrivateScope LoopScope(*this);
3088       if (EmitOMPFirstprivateClause(S, LoopScope)) {
3089         // Emit implicit barrier to synchronize threads and avoid data races on
3090         // initialization of firstprivate variables and post-update of
3091         // lastprivate variables.
3092         CGM.getOpenMPRuntime().emitBarrierCall(
3093           *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
3094           /*ForceSimpleCall=*/true);
3095       }
3096       EmitOMPPrivateClause(S, LoopScope);
3097       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
3098       EmitOMPPrivateLoopCounters(S, LoopScope);
3099       (void)LoopScope.Privatize();
3100 
3101       // Detect the distribute schedule kind and chunk.
3102       llvm::Value *Chunk = nullptr;
3103       OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown;
3104       if (auto *C = S.getSingleClause<OMPDistScheduleClause>()) {
3105         ScheduleKind = C->getDistScheduleKind();
3106         if (const auto *Ch = C->getChunkSize()) {
3107           Chunk = EmitScalarExpr(Ch);
3108           Chunk = EmitScalarConversion(Chunk, Ch->getType(),
3109           S.getIterationVariable()->getType(),
3110           S.getLocStart());
3111         }
3112       }
3113       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
3114       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3115 
3116       // OpenMP [2.10.8, distribute Construct, Description]
3117       // If dist_schedule is specified, kind must be static. If specified,
3118       // iterations are divided into chunks of size chunk_size, chunks are
3119       // assigned to the teams of the league in a round-robin fashion in the
3120       // order of the team number. When no chunk_size is specified, the
3121       // iteration space is divided into chunks that are approximately equal
3122       // in size, and at most one chunk is distributed to each team of the
3123       // league. The size of the chunks is unspecified in this case.
3124       if (RT.isStaticNonchunked(ScheduleKind,
3125                                 /* Chunked */ Chunk != nullptr)) {
3126         RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind,
3127                              IVSize, IVSigned, /* Ordered = */ false,
3128                              IL.getAddress(), LB.getAddress(),
3129                              UB.getAddress(), ST.getAddress());
3130         auto LoopExit =
3131             getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
3132         // UB = min(UB, GlobalUB);
3133         EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3134                             ? S.getCombinedEnsureUpperBound()
3135                             : S.getEnsureUpperBound());
3136         // IV = LB;
3137         EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3138                             ? S.getCombinedInit()
3139                             : S.getInit());
3140 
3141         Expr *Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3142                          ? S.getCombinedCond()
3143                          : S.getCond();
3144 
3145         // for distribute alone,  codegen
3146         // while (idx <= UB) { BODY; ++idx; }
3147         // when combined with 'for' (e.g. as in 'distribute parallel for')
3148         // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
3149         EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), Cond, IncExpr,
3150                          [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
3151                            CodeGenLoop(CGF, S, LoopExit);
3152                          },
3153                          [](CodeGenFunction &) {});
3154         EmitBlock(LoopExit.getBlock());
3155         // Tell the runtime we are done.
3156         RT.emitForStaticFinish(*this, S.getLocStart());
3157       } else {
3158         // Emit the outer loop, which requests its work chunk [LB..UB] from
3159         // runtime and runs the inner loop to process it.
3160         const OMPLoopArguments LoopArguments = {
3161             LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(),
3162             Chunk};
3163         EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments,
3164                                    CodeGenLoop);
3165       }
3166 
3167       // Emit final copy of the lastprivate variables if IsLastIter != 0.
3168       if (HasLastprivateClause)
3169         EmitOMPLastprivateClauseFinal(
3170             S, /*NoFinals=*/false,
3171             Builder.CreateIsNotNull(
3172                 EmitLoadOfScalar(IL, S.getLocStart())));
3173     }
3174 
3175     // We're now done with the loop, so jump to the continuation block.
3176     if (ContBlock) {
3177       EmitBranch(ContBlock);
3178       EmitBlock(ContBlock, true);
3179     }
3180   }
3181 }
3182 
3183 void CodeGenFunction::EmitOMPDistributeDirective(
3184     const OMPDistributeDirective &S) {
3185   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3186 
3187     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
3188   };
3189   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
3190   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen,
3191                                               false);
3192 }
3193 
3194 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
3195                                                    const CapturedStmt *S) {
3196   CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
3197   CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
3198   CGF.CapturedStmtInfo = &CapStmtInfo;
3199   auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S);
3200   Fn->addFnAttr(llvm::Attribute::NoInline);
3201   return Fn;
3202 }
3203 
3204 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
3205   if (!S.getAssociatedStmt()) {
3206     for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
3207       CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC);
3208     return;
3209   }
3210   auto *C = S.getSingleClause<OMPSIMDClause>();
3211   auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF,
3212                                  PrePostActionTy &Action) {
3213     if (C) {
3214       auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
3215       llvm::SmallVector<llvm::Value *, 16> CapturedVars;
3216       CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
3217       auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS);
3218       CGF.EmitNounwindRuntimeCall(OutlinedFn, CapturedVars);
3219     } else {
3220       Action.Enter(CGF);
3221       CGF.EmitStmt(
3222           cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3223     }
3224   };
3225   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
3226   CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C);
3227 }
3228 
3229 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
3230                                          QualType SrcType, QualType DestType,
3231                                          SourceLocation Loc) {
3232   assert(CGF.hasScalarEvaluationKind(DestType) &&
3233          "DestType must have scalar evaluation kind.");
3234   assert(!Val.isAggregate() && "Must be a scalar or complex.");
3235   return Val.isScalar()
3236              ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType,
3237                                         Loc)
3238              : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType,
3239                                                  DestType, Loc);
3240 }
3241 
3242 static CodeGenFunction::ComplexPairTy
3243 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
3244                       QualType DestType, SourceLocation Loc) {
3245   assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
3246          "DestType must have complex evaluation kind.");
3247   CodeGenFunction::ComplexPairTy ComplexVal;
3248   if (Val.isScalar()) {
3249     // Convert the input element to the element type of the complex.
3250     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
3251     auto ScalarVal = CGF.EmitScalarConversion(Val.getScalarVal(), SrcType,
3252                                               DestElementType, Loc);
3253     ComplexVal = CodeGenFunction::ComplexPairTy(
3254         ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
3255   } else {
3256     assert(Val.isComplex() && "Must be a scalar or complex.");
3257     auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
3258     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
3259     ComplexVal.first = CGF.EmitScalarConversion(
3260         Val.getComplexVal().first, SrcElementType, DestElementType, Loc);
3261     ComplexVal.second = CGF.EmitScalarConversion(
3262         Val.getComplexVal().second, SrcElementType, DestElementType, Loc);
3263   }
3264   return ComplexVal;
3265 }
3266 
3267 static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst,
3268                                   LValue LVal, RValue RVal) {
3269   if (LVal.isGlobalReg()) {
3270     CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal);
3271   } else {
3272     CGF.EmitAtomicStore(RVal, LVal,
3273                         IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3274                                  : llvm::AtomicOrdering::Monotonic,
3275                         LVal.isVolatile(), /*IsInit=*/false);
3276   }
3277 }
3278 
3279 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal,
3280                                          QualType RValTy, SourceLocation Loc) {
3281   switch (getEvaluationKind(LVal.getType())) {
3282   case TEK_Scalar:
3283     EmitStoreThroughLValue(RValue::get(convertToScalarValue(
3284                                *this, RVal, RValTy, LVal.getType(), Loc)),
3285                            LVal);
3286     break;
3287   case TEK_Complex:
3288     EmitStoreOfComplex(
3289         convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal,
3290         /*isInit=*/false);
3291     break;
3292   case TEK_Aggregate:
3293     llvm_unreachable("Must be a scalar or complex.");
3294   }
3295 }
3296 
3297 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst,
3298                                   const Expr *X, const Expr *V,
3299                                   SourceLocation Loc) {
3300   // v = x;
3301   assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
3302   assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
3303   LValue XLValue = CGF.EmitLValue(X);
3304   LValue VLValue = CGF.EmitLValue(V);
3305   RValue Res = XLValue.isGlobalReg()
3306                    ? CGF.EmitLoadOfLValue(XLValue, Loc)
3307                    : CGF.EmitAtomicLoad(
3308                          XLValue, Loc,
3309                          IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3310                                   : llvm::AtomicOrdering::Monotonic,
3311                          XLValue.isVolatile());
3312   // OpenMP, 2.12.6, atomic Construct
3313   // Any atomic construct with a seq_cst clause forces the atomically
3314   // performed operation to include an implicit flush operation without a
3315   // list.
3316   if (IsSeqCst)
3317     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3318   CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc);
3319 }
3320 
3321 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst,
3322                                    const Expr *X, const Expr *E,
3323                                    SourceLocation Loc) {
3324   // x = expr;
3325   assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
3326   emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E));
3327   // OpenMP, 2.12.6, atomic Construct
3328   // Any atomic construct with a seq_cst clause forces the atomically
3329   // performed operation to include an implicit flush operation without a
3330   // list.
3331   if (IsSeqCst)
3332     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3333 }
3334 
3335 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
3336                                                 RValue Update,
3337                                                 BinaryOperatorKind BO,
3338                                                 llvm::AtomicOrdering AO,
3339                                                 bool IsXLHSInRHSPart) {
3340   auto &Context = CGF.CGM.getContext();
3341   // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
3342   // expression is simple and atomic is allowed for the given type for the
3343   // target platform.
3344   if (BO == BO_Comma || !Update.isScalar() ||
3345       !Update.getScalarVal()->getType()->isIntegerTy() ||
3346       !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) &&
3347                         (Update.getScalarVal()->getType() !=
3348                          X.getAddress().getElementType())) ||
3349       !X.getAddress().getElementType()->isIntegerTy() ||
3350       !Context.getTargetInfo().hasBuiltinAtomic(
3351           Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment())))
3352     return std::make_pair(false, RValue::get(nullptr));
3353 
3354   llvm::AtomicRMWInst::BinOp RMWOp;
3355   switch (BO) {
3356   case BO_Add:
3357     RMWOp = llvm::AtomicRMWInst::Add;
3358     break;
3359   case BO_Sub:
3360     if (!IsXLHSInRHSPart)
3361       return std::make_pair(false, RValue::get(nullptr));
3362     RMWOp = llvm::AtomicRMWInst::Sub;
3363     break;
3364   case BO_And:
3365     RMWOp = llvm::AtomicRMWInst::And;
3366     break;
3367   case BO_Or:
3368     RMWOp = llvm::AtomicRMWInst::Or;
3369     break;
3370   case BO_Xor:
3371     RMWOp = llvm::AtomicRMWInst::Xor;
3372     break;
3373   case BO_LT:
3374     RMWOp = X.getType()->hasSignedIntegerRepresentation()
3375                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min
3376                                    : llvm::AtomicRMWInst::Max)
3377                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin
3378                                    : llvm::AtomicRMWInst::UMax);
3379     break;
3380   case BO_GT:
3381     RMWOp = X.getType()->hasSignedIntegerRepresentation()
3382                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max
3383                                    : llvm::AtomicRMWInst::Min)
3384                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
3385                                    : llvm::AtomicRMWInst::UMin);
3386     break;
3387   case BO_Assign:
3388     RMWOp = llvm::AtomicRMWInst::Xchg;
3389     break;
3390   case BO_Mul:
3391   case BO_Div:
3392   case BO_Rem:
3393   case BO_Shl:
3394   case BO_Shr:
3395   case BO_LAnd:
3396   case BO_LOr:
3397     return std::make_pair(false, RValue::get(nullptr));
3398   case BO_PtrMemD:
3399   case BO_PtrMemI:
3400   case BO_LE:
3401   case BO_GE:
3402   case BO_EQ:
3403   case BO_NE:
3404   case BO_AddAssign:
3405   case BO_SubAssign:
3406   case BO_AndAssign:
3407   case BO_OrAssign:
3408   case BO_XorAssign:
3409   case BO_MulAssign:
3410   case BO_DivAssign:
3411   case BO_RemAssign:
3412   case BO_ShlAssign:
3413   case BO_ShrAssign:
3414   case BO_Comma:
3415     llvm_unreachable("Unsupported atomic update operation");
3416   }
3417   auto *UpdateVal = Update.getScalarVal();
3418   if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) {
3419     UpdateVal = CGF.Builder.CreateIntCast(
3420         IC, X.getAddress().getElementType(),
3421         X.getType()->hasSignedIntegerRepresentation());
3422   }
3423   auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO);
3424   return std::make_pair(true, RValue::get(Res));
3425 }
3426 
3427 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
3428     LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
3429     llvm::AtomicOrdering AO, SourceLocation Loc,
3430     const llvm::function_ref<RValue(RValue)> &CommonGen) {
3431   // Update expressions are allowed to have the following forms:
3432   // x binop= expr; -> xrval + expr;
3433   // x++, ++x -> xrval + 1;
3434   // x--, --x -> xrval - 1;
3435   // x = x binop expr; -> xrval binop expr
3436   // x = expr Op x; - > expr binop xrval;
3437   auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart);
3438   if (!Res.first) {
3439     if (X.isGlobalReg()) {
3440       // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
3441       // 'xrval'.
3442       EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X);
3443     } else {
3444       // Perform compare-and-swap procedure.
3445       EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified());
3446     }
3447   }
3448   return Res;
3449 }
3450 
3451 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst,
3452                                     const Expr *X, const Expr *E,
3453                                     const Expr *UE, bool IsXLHSInRHSPart,
3454                                     SourceLocation Loc) {
3455   assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
3456          "Update expr in 'atomic update' must be a binary operator.");
3457   auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
3458   // Update expressions are allowed to have the following forms:
3459   // x binop= expr; -> xrval + expr;
3460   // x++, ++x -> xrval + 1;
3461   // x--, --x -> xrval - 1;
3462   // x = x binop expr; -> xrval binop expr
3463   // x = expr Op x; - > expr binop xrval;
3464   assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
3465   LValue XLValue = CGF.EmitLValue(X);
3466   RValue ExprRValue = CGF.EmitAnyExpr(E);
3467   auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3468                      : llvm::AtomicOrdering::Monotonic;
3469   auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
3470   auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
3471   auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
3472   auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
3473   auto Gen =
3474       [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue {
3475         CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3476         CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
3477         return CGF.EmitAnyExpr(UE);
3478       };
3479   (void)CGF.EmitOMPAtomicSimpleUpdateExpr(
3480       XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
3481   // OpenMP, 2.12.6, atomic Construct
3482   // Any atomic construct with a seq_cst clause forces the atomically
3483   // performed operation to include an implicit flush operation without a
3484   // list.
3485   if (IsSeqCst)
3486     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3487 }
3488 
3489 static RValue convertToType(CodeGenFunction &CGF, RValue Value,
3490                             QualType SourceType, QualType ResType,
3491                             SourceLocation Loc) {
3492   switch (CGF.getEvaluationKind(ResType)) {
3493   case TEK_Scalar:
3494     return RValue::get(
3495         convertToScalarValue(CGF, Value, SourceType, ResType, Loc));
3496   case TEK_Complex: {
3497     auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc);
3498     return RValue::getComplex(Res.first, Res.second);
3499   }
3500   case TEK_Aggregate:
3501     break;
3502   }
3503   llvm_unreachable("Must be a scalar or complex.");
3504 }
3505 
3506 static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst,
3507                                      bool IsPostfixUpdate, const Expr *V,
3508                                      const Expr *X, const Expr *E,
3509                                      const Expr *UE, bool IsXLHSInRHSPart,
3510                                      SourceLocation Loc) {
3511   assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue");
3512   assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue");
3513   RValue NewVVal;
3514   LValue VLValue = CGF.EmitLValue(V);
3515   LValue XLValue = CGF.EmitLValue(X);
3516   RValue ExprRValue = CGF.EmitAnyExpr(E);
3517   auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3518                      : llvm::AtomicOrdering::Monotonic;
3519   QualType NewVValType;
3520   if (UE) {
3521     // 'x' is updated with some additional value.
3522     assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
3523            "Update expr in 'atomic capture' must be a binary operator.");
3524     auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
3525     // Update expressions are allowed to have the following forms:
3526     // x binop= expr; -> xrval + expr;
3527     // x++, ++x -> xrval + 1;
3528     // x--, --x -> xrval - 1;
3529     // x = x binop expr; -> xrval binop expr
3530     // x = expr Op x; - > expr binop xrval;
3531     auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
3532     auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
3533     auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
3534     NewVValType = XRValExpr->getType();
3535     auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
3536     auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr,
3537                   IsPostfixUpdate](RValue XRValue) -> RValue {
3538       CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3539       CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
3540       RValue Res = CGF.EmitAnyExpr(UE);
3541       NewVVal = IsPostfixUpdate ? XRValue : Res;
3542       return Res;
3543     };
3544     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
3545         XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
3546     if (Res.first) {
3547       // 'atomicrmw' instruction was generated.
3548       if (IsPostfixUpdate) {
3549         // Use old value from 'atomicrmw'.
3550         NewVVal = Res.second;
3551       } else {
3552         // 'atomicrmw' does not provide new value, so evaluate it using old
3553         // value of 'x'.
3554         CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3555         CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second);
3556         NewVVal = CGF.EmitAnyExpr(UE);
3557       }
3558     }
3559   } else {
3560     // 'x' is simply rewritten with some 'expr'.
3561     NewVValType = X->getType().getNonReferenceType();
3562     ExprRValue = convertToType(CGF, ExprRValue, E->getType(),
3563                                X->getType().getNonReferenceType(), Loc);
3564     auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) -> RValue {
3565       NewVVal = XRValue;
3566       return ExprRValue;
3567     };
3568     // Try to perform atomicrmw xchg, otherwise simple exchange.
3569     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
3570         XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO,
3571         Loc, Gen);
3572     if (Res.first) {
3573       // 'atomicrmw' instruction was generated.
3574       NewVVal = IsPostfixUpdate ? Res.second : ExprRValue;
3575     }
3576   }
3577   // Emit post-update store to 'v' of old/new 'x' value.
3578   CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc);
3579   // OpenMP, 2.12.6, atomic Construct
3580   // Any atomic construct with a seq_cst clause forces the atomically
3581   // performed operation to include an implicit flush operation without a
3582   // list.
3583   if (IsSeqCst)
3584     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3585 }
3586 
3587 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
3588                               bool IsSeqCst, bool IsPostfixUpdate,
3589                               const Expr *X, const Expr *V, const Expr *E,
3590                               const Expr *UE, bool IsXLHSInRHSPart,
3591                               SourceLocation Loc) {
3592   switch (Kind) {
3593   case OMPC_read:
3594     EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc);
3595     break;
3596   case OMPC_write:
3597     EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc);
3598     break;
3599   case OMPC_unknown:
3600   case OMPC_update:
3601     EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc);
3602     break;
3603   case OMPC_capture:
3604     EmitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE,
3605                              IsXLHSInRHSPart, Loc);
3606     break;
3607   case OMPC_if:
3608   case OMPC_final:
3609   case OMPC_num_threads:
3610   case OMPC_private:
3611   case OMPC_firstprivate:
3612   case OMPC_lastprivate:
3613   case OMPC_reduction:
3614   case OMPC_safelen:
3615   case OMPC_simdlen:
3616   case OMPC_collapse:
3617   case OMPC_default:
3618   case OMPC_seq_cst:
3619   case OMPC_shared:
3620   case OMPC_linear:
3621   case OMPC_aligned:
3622   case OMPC_copyin:
3623   case OMPC_copyprivate:
3624   case OMPC_flush:
3625   case OMPC_proc_bind:
3626   case OMPC_schedule:
3627   case OMPC_ordered:
3628   case OMPC_nowait:
3629   case OMPC_untied:
3630   case OMPC_threadprivate:
3631   case OMPC_depend:
3632   case OMPC_mergeable:
3633   case OMPC_device:
3634   case OMPC_threads:
3635   case OMPC_simd:
3636   case OMPC_map:
3637   case OMPC_num_teams:
3638   case OMPC_thread_limit:
3639   case OMPC_priority:
3640   case OMPC_grainsize:
3641   case OMPC_nogroup:
3642   case OMPC_num_tasks:
3643   case OMPC_hint:
3644   case OMPC_dist_schedule:
3645   case OMPC_defaultmap:
3646   case OMPC_uniform:
3647   case OMPC_to:
3648   case OMPC_from:
3649   case OMPC_use_device_ptr:
3650   case OMPC_is_device_ptr:
3651     llvm_unreachable("Clause is not allowed in 'omp atomic'.");
3652   }
3653 }
3654 
3655 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
3656   bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>();
3657   OpenMPClauseKind Kind = OMPC_unknown;
3658   for (auto *C : S.clauses()) {
3659     // Find first clause (skip seq_cst clause, if it is first).
3660     if (C->getClauseKind() != OMPC_seq_cst) {
3661       Kind = C->getClauseKind();
3662       break;
3663     }
3664   }
3665 
3666   const auto *CS =
3667       S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
3668   if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) {
3669     enterFullExpression(EWC);
3670   }
3671   // Processing for statements under 'atomic capture'.
3672   if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) {
3673     for (const auto *C : Compound->body()) {
3674       if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) {
3675         enterFullExpression(EWC);
3676       }
3677     }
3678   }
3679 
3680   auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF,
3681                                             PrePostActionTy &) {
3682     CGF.EmitStopPoint(CS);
3683     EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(),
3684                       S.getV(), S.getExpr(), S.getUpdateExpr(),
3685                       S.isXLHSInRHSPart(), S.getLocStart());
3686   };
3687   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
3688   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen);
3689 }
3690 
3691 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
3692                                          const OMPExecutableDirective &S,
3693                                          const RegionCodeGenTy &CodeGen) {
3694   assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind()));
3695   CodeGenModule &CGM = CGF.CGM;
3696   const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt());
3697 
3698   llvm::Function *Fn = nullptr;
3699   llvm::Constant *FnID = nullptr;
3700 
3701   const Expr *IfCond = nullptr;
3702   // Check for the at most one if clause associated with the target region.
3703   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
3704     if (C->getNameModifier() == OMPD_unknown ||
3705         C->getNameModifier() == OMPD_target) {
3706       IfCond = C->getCondition();
3707       break;
3708     }
3709   }
3710 
3711   // Check if we have any device clause associated with the directive.
3712   const Expr *Device = nullptr;
3713   if (auto *C = S.getSingleClause<OMPDeviceClause>()) {
3714     Device = C->getDevice();
3715   }
3716 
3717   // Check if we have an if clause whose conditional always evaluates to false
3718   // or if we do not have any targets specified. If so the target region is not
3719   // an offload entry point.
3720   bool IsOffloadEntry = true;
3721   if (IfCond) {
3722     bool Val;
3723     if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val)
3724       IsOffloadEntry = false;
3725   }
3726   if (CGM.getLangOpts().OMPTargetTriples.empty())
3727     IsOffloadEntry = false;
3728 
3729   assert(CGF.CurFuncDecl && "No parent declaration for target region!");
3730   StringRef ParentName;
3731   // In case we have Ctors/Dtors we use the complete type variant to produce
3732   // the mangling of the device outlined kernel.
3733   if (auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl))
3734     ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete));
3735   else if (auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl))
3736     ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete));
3737   else
3738     ParentName =
3739         CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl)));
3740 
3741   // Emit target region as a standalone region.
3742   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID,
3743                                                     IsOffloadEntry, CodeGen);
3744   OMPLexicalScope Scope(CGF, S);
3745   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
3746   CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
3747   CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device,
3748                                         CapturedVars);
3749 }
3750 
3751 static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S,
3752                              PrePostActionTy &Action) {
3753   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
3754   (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
3755   CGF.EmitOMPPrivateClause(S, PrivateScope);
3756   (void)PrivateScope.Privatize();
3757 
3758   Action.Enter(CGF);
3759   CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3760 }
3761 
3762 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM,
3763                                                   StringRef ParentName,
3764                                                   const OMPTargetDirective &S) {
3765   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3766     emitTargetRegion(CGF, S, Action);
3767   };
3768   llvm::Function *Fn;
3769   llvm::Constant *Addr;
3770   // Emit target region as a standalone region.
3771   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
3772       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
3773   assert(Fn && Addr && "Target device function emission failed.");
3774 }
3775 
3776 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
3777   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3778     emitTargetRegion(CGF, S, Action);
3779   };
3780   emitCommonOMPTargetDirective(*this, S, CodeGen);
3781 }
3782 
3783 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
3784                                         const OMPExecutableDirective &S,
3785                                         OpenMPDirectiveKind InnermostKind,
3786                                         const RegionCodeGenTy &CodeGen) {
3787   const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams);
3788   auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction(
3789       S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
3790 
3791   const OMPNumTeamsClause *NT = S.getSingleClause<OMPNumTeamsClause>();
3792   const OMPThreadLimitClause *TL = S.getSingleClause<OMPThreadLimitClause>();
3793   if (NT || TL) {
3794     Expr *NumTeams = (NT) ? NT->getNumTeams() : nullptr;
3795     Expr *ThreadLimit = (TL) ? TL->getThreadLimit() : nullptr;
3796 
3797     CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit,
3798                                                   S.getLocStart());
3799   }
3800 
3801   OMPTeamsScope Scope(CGF, S);
3802   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
3803   CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
3804   CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn,
3805                                            CapturedVars);
3806 }
3807 
3808 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
3809   // Emit teams region as a standalone region.
3810   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3811     OMPPrivateScope PrivateScope(CGF);
3812     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
3813     CGF.EmitOMPPrivateClause(S, PrivateScope);
3814     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
3815     (void)PrivateScope.Privatize();
3816     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3817     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
3818   };
3819   emitCommonOMPTeamsDirective(*this, S, OMPD_teams, CodeGen);
3820   emitPostUpdateForReductionClause(
3821       *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
3822 }
3823 
3824 static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
3825                                   const OMPTargetTeamsDirective &S) {
3826   auto *CS = S.getCapturedStmt(OMPD_teams);
3827   Action.Enter(CGF);
3828   auto &&CodeGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
3829     // TODO: Add support for clauses.
3830     CGF.EmitStmt(CS->getCapturedStmt());
3831   };
3832   emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen);
3833 }
3834 
3835 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
3836     CodeGenModule &CGM, StringRef ParentName,
3837     const OMPTargetTeamsDirective &S) {
3838   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3839     emitTargetTeamsRegion(CGF, Action, S);
3840   };
3841   llvm::Function *Fn;
3842   llvm::Constant *Addr;
3843   // Emit target region as a standalone region.
3844   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
3845       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
3846   assert(Fn && Addr && "Target device function emission failed.");
3847 }
3848 
3849 void CodeGenFunction::EmitOMPTargetTeamsDirective(
3850     const OMPTargetTeamsDirective &S) {
3851   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3852     emitTargetTeamsRegion(CGF, Action, S);
3853   };
3854   emitCommonOMPTargetDirective(*this, S, CodeGen);
3855 }
3856 
3857 void CodeGenFunction::EmitOMPCancellationPointDirective(
3858     const OMPCancellationPointDirective &S) {
3859   CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(),
3860                                                    S.getCancelRegion());
3861 }
3862 
3863 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
3864   const Expr *IfCond = nullptr;
3865   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
3866     if (C->getNameModifier() == OMPD_unknown ||
3867         C->getNameModifier() == OMPD_cancel) {
3868       IfCond = C->getCondition();
3869       break;
3870     }
3871   }
3872   CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(), IfCond,
3873                                         S.getCancelRegion());
3874 }
3875 
3876 CodeGenFunction::JumpDest
3877 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {
3878   if (Kind == OMPD_parallel || Kind == OMPD_task ||
3879       Kind == OMPD_target_parallel)
3880     return ReturnBlock;
3881   assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections ||
3882          Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for ||
3883          Kind == OMPD_distribute_parallel_for ||
3884          Kind == OMPD_target_parallel_for);
3885   return OMPCancelStack.getExitBlock();
3886 }
3887 
3888 void CodeGenFunction::EmitOMPUseDevicePtrClause(
3889     const OMPClause &NC, OMPPrivateScope &PrivateScope,
3890     const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) {
3891   const auto &C = cast<OMPUseDevicePtrClause>(NC);
3892   auto OrigVarIt = C.varlist_begin();
3893   auto InitIt = C.inits().begin();
3894   for (auto PvtVarIt : C.private_copies()) {
3895     auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl());
3896     auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl());
3897     auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl());
3898 
3899     // In order to identify the right initializer we need to match the
3900     // declaration used by the mapping logic. In some cases we may get
3901     // OMPCapturedExprDecl that refers to the original declaration.
3902     const ValueDecl *MatchingVD = OrigVD;
3903     if (auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) {
3904       // OMPCapturedExprDecl are used to privative fields of the current
3905       // structure.
3906       auto *ME = cast<MemberExpr>(OED->getInit());
3907       assert(isa<CXXThisExpr>(ME->getBase()) &&
3908              "Base should be the current struct!");
3909       MatchingVD = ME->getMemberDecl();
3910     }
3911 
3912     // If we don't have information about the current list item, move on to
3913     // the next one.
3914     auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD);
3915     if (InitAddrIt == CaptureDeviceAddrMap.end())
3916       continue;
3917 
3918     bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
3919       // Initialize the temporary initialization variable with the address we
3920       // get from the runtime library. We have to cast the source address
3921       // because it is always a void *. References are materialized in the
3922       // privatization scope, so the initialization here disregards the fact
3923       // the original variable is a reference.
3924       QualType AddrQTy =
3925           getContext().getPointerType(OrigVD->getType().getNonReferenceType());
3926       llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy);
3927       Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy);
3928       setAddrOfLocalVar(InitVD, InitAddr);
3929 
3930       // Emit private declaration, it will be initialized by the value we
3931       // declaration we just added to the local declarations map.
3932       EmitDecl(*PvtVD);
3933 
3934       // The initialization variables reached its purpose in the emission
3935       // ofthe previous declaration, so we don't need it anymore.
3936       LocalDeclMap.erase(InitVD);
3937 
3938       // Return the address of the private variable.
3939       return GetAddrOfLocalVar(PvtVD);
3940     });
3941     assert(IsRegistered && "firstprivate var already registered as private");
3942     // Silence the warning about unused variable.
3943     (void)IsRegistered;
3944 
3945     ++OrigVarIt;
3946     ++InitIt;
3947   }
3948 }
3949 
3950 // Generate the instructions for '#pragma omp target data' directive.
3951 void CodeGenFunction::EmitOMPTargetDataDirective(
3952     const OMPTargetDataDirective &S) {
3953   CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true);
3954 
3955   // Create a pre/post action to signal the privatization of the device pointer.
3956   // This action can be replaced by the OpenMP runtime code generation to
3957   // deactivate privatization.
3958   bool PrivatizeDevicePointers = false;
3959   class DevicePointerPrivActionTy : public PrePostActionTy {
3960     bool &PrivatizeDevicePointers;
3961 
3962   public:
3963     explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers)
3964         : PrePostActionTy(), PrivatizeDevicePointers(PrivatizeDevicePointers) {}
3965     void Enter(CodeGenFunction &CGF) override {
3966       PrivatizeDevicePointers = true;
3967     }
3968   };
3969   DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers);
3970 
3971   auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers](
3972       CodeGenFunction &CGF, PrePostActionTy &Action) {
3973     auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3974       CGF.EmitStmt(
3975           cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3976     };
3977 
3978     // Codegen that selects wheather to generate the privatization code or not.
3979     auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers,
3980                           &InnermostCodeGen](CodeGenFunction &CGF,
3981                                              PrePostActionTy &Action) {
3982       RegionCodeGenTy RCG(InnermostCodeGen);
3983       PrivatizeDevicePointers = false;
3984 
3985       // Call the pre-action to change the status of PrivatizeDevicePointers if
3986       // needed.
3987       Action.Enter(CGF);
3988 
3989       if (PrivatizeDevicePointers) {
3990         OMPPrivateScope PrivateScope(CGF);
3991         // Emit all instances of the use_device_ptr clause.
3992         for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
3993           CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope,
3994                                         Info.CaptureDeviceAddrMap);
3995         (void)PrivateScope.Privatize();
3996         RCG(CGF);
3997       } else
3998         RCG(CGF);
3999     };
4000 
4001     // Forward the provided action to the privatization codegen.
4002     RegionCodeGenTy PrivRCG(PrivCodeGen);
4003     PrivRCG.setAction(Action);
4004 
4005     // Notwithstanding the body of the region is emitted as inlined directive,
4006     // we don't use an inline scope as changes in the references inside the
4007     // region are expected to be visible outside, so we do not privative them.
4008     OMPLexicalScope Scope(CGF, S);
4009     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data,
4010                                                     PrivRCG);
4011   };
4012 
4013   RegionCodeGenTy RCG(CodeGen);
4014 
4015   // If we don't have target devices, don't bother emitting the data mapping
4016   // code.
4017   if (CGM.getLangOpts().OMPTargetTriples.empty()) {
4018     RCG(*this);
4019     return;
4020   }
4021 
4022   // Check if we have any if clause associated with the directive.
4023   const Expr *IfCond = nullptr;
4024   if (auto *C = S.getSingleClause<OMPIfClause>())
4025     IfCond = C->getCondition();
4026 
4027   // Check if we have any device clause associated with the directive.
4028   const Expr *Device = nullptr;
4029   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4030     Device = C->getDevice();
4031 
4032   // Set the action to signal privatization of device pointers.
4033   RCG.setAction(PrivAction);
4034 
4035   // Emit region code.
4036   CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG,
4037                                              Info);
4038 }
4039 
4040 void CodeGenFunction::EmitOMPTargetEnterDataDirective(
4041     const OMPTargetEnterDataDirective &S) {
4042   // If we don't have target devices, don't bother emitting the data mapping
4043   // code.
4044   if (CGM.getLangOpts().OMPTargetTriples.empty())
4045     return;
4046 
4047   // Check if we have any if clause associated with the directive.
4048   const Expr *IfCond = nullptr;
4049   if (auto *C = S.getSingleClause<OMPIfClause>())
4050     IfCond = C->getCondition();
4051 
4052   // Check if we have any device clause associated with the directive.
4053   const Expr *Device = nullptr;
4054   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4055     Device = C->getDevice();
4056 
4057   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
4058 }
4059 
4060 void CodeGenFunction::EmitOMPTargetExitDataDirective(
4061     const OMPTargetExitDataDirective &S) {
4062   // If we don't have target devices, don't bother emitting the data mapping
4063   // code.
4064   if (CGM.getLangOpts().OMPTargetTriples.empty())
4065     return;
4066 
4067   // Check if we have any if clause associated with the directive.
4068   const Expr *IfCond = nullptr;
4069   if (auto *C = S.getSingleClause<OMPIfClause>())
4070     IfCond = C->getCondition();
4071 
4072   // Check if we have any device clause associated with the directive.
4073   const Expr *Device = nullptr;
4074   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4075     Device = C->getDevice();
4076 
4077   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
4078 }
4079 
4080 static void emitTargetParallelRegion(CodeGenFunction &CGF,
4081                                      const OMPTargetParallelDirective &S,
4082                                      PrePostActionTy &Action) {
4083   // Get the captured statement associated with the 'parallel' region.
4084   auto *CS = S.getCapturedStmt(OMPD_parallel);
4085   Action.Enter(CGF);
4086   auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &) {
4087     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4088     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
4089     CGF.EmitOMPPrivateClause(S, PrivateScope);
4090     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4091     (void)PrivateScope.Privatize();
4092     // TODO: Add support for clauses.
4093     CGF.EmitStmt(CS->getCapturedStmt());
4094     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
4095   };
4096   emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen,
4097                                  emitEmptyBoundParameters);
4098   emitPostUpdateForReductionClause(
4099       CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
4100 }
4101 
4102 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
4103     CodeGenModule &CGM, StringRef ParentName,
4104     const OMPTargetParallelDirective &S) {
4105   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4106     emitTargetParallelRegion(CGF, S, Action);
4107   };
4108   llvm::Function *Fn;
4109   llvm::Constant *Addr;
4110   // Emit target region as a standalone region.
4111   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4112       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4113   assert(Fn && Addr && "Target device function emission failed.");
4114 }
4115 
4116 void CodeGenFunction::EmitOMPTargetParallelDirective(
4117     const OMPTargetParallelDirective &S) {
4118   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4119     emitTargetParallelRegion(CGF, S, Action);
4120   };
4121   emitCommonOMPTargetDirective(*this, S, CodeGen);
4122 }
4123 
4124 void CodeGenFunction::EmitOMPTargetParallelForDirective(
4125     const OMPTargetParallelForDirective &S) {
4126   // TODO: codegen for target parallel for.
4127 }
4128 
4129 /// Emit a helper variable and return corresponding lvalue.
4130 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper,
4131                      const ImplicitParamDecl *PVD,
4132                      CodeGenFunction::OMPPrivateScope &Privates) {
4133   auto *VDecl = cast<VarDecl>(Helper->getDecl());
4134   Privates.addPrivate(
4135       VDecl, [&CGF, PVD]() -> Address { return CGF.GetAddrOfLocalVar(PVD); });
4136 }
4137 
4138 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
4139   assert(isOpenMPTaskLoopDirective(S.getDirectiveKind()));
4140   // Emit outlined function for task construct.
4141   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
4142   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
4143   auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
4144   const Expr *IfCond = nullptr;
4145   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
4146     if (C->getNameModifier() == OMPD_unknown ||
4147         C->getNameModifier() == OMPD_taskloop) {
4148       IfCond = C->getCondition();
4149       break;
4150     }
4151   }
4152 
4153   OMPTaskDataTy Data;
4154   // Check if taskloop must be emitted without taskgroup.
4155   Data.Nogroup = S.getSingleClause<OMPNogroupClause>();
4156   // TODO: Check if we should emit tied or untied task.
4157   Data.Tied = true;
4158   // Set scheduling for taskloop
4159   if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) {
4160     // grainsize clause
4161     Data.Schedule.setInt(/*IntVal=*/false);
4162     Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize()));
4163   } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) {
4164     // num_tasks clause
4165     Data.Schedule.setInt(/*IntVal=*/true);
4166     Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks()));
4167   }
4168 
4169   auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) {
4170     // if (PreCond) {
4171     //   for (IV in 0..LastIteration) BODY;
4172     //   <Final counter/linear vars updates>;
4173     // }
4174     //
4175 
4176     // Emit: if (PreCond) - begin.
4177     // If the condition constant folds and can be elided, avoid emitting the
4178     // whole loop.
4179     bool CondConstant;
4180     llvm::BasicBlock *ContBlock = nullptr;
4181     OMPLoopScope PreInitScope(CGF, S);
4182     if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
4183       if (!CondConstant)
4184         return;
4185     } else {
4186       auto *ThenBlock = CGF.createBasicBlock("taskloop.if.then");
4187       ContBlock = CGF.createBasicBlock("taskloop.if.end");
4188       emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
4189                   CGF.getProfileCount(&S));
4190       CGF.EmitBlock(ThenBlock);
4191       CGF.incrementProfileCounter(&S);
4192     }
4193 
4194     if (isOpenMPSimdDirective(S.getDirectiveKind()))
4195       CGF.EmitOMPSimdInit(S);
4196 
4197     OMPPrivateScope LoopScope(CGF);
4198     // Emit helper vars inits.
4199     enum { LowerBound = 5, UpperBound, Stride, LastIter };
4200     auto *I = CS->getCapturedDecl()->param_begin();
4201     auto *LBP = std::next(I, LowerBound);
4202     auto *UBP = std::next(I, UpperBound);
4203     auto *STP = std::next(I, Stride);
4204     auto *LIP = std::next(I, LastIter);
4205     mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP,
4206              LoopScope);
4207     mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP,
4208              LoopScope);
4209     mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope);
4210     mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP,
4211              LoopScope);
4212     CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
4213     bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
4214     (void)LoopScope.Privatize();
4215     // Emit the loop iteration variable.
4216     const Expr *IVExpr = S.getIterationVariable();
4217     const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
4218     CGF.EmitVarDecl(*IVDecl);
4219     CGF.EmitIgnoredExpr(S.getInit());
4220 
4221     // Emit the iterations count variable.
4222     // If it is not a variable, Sema decided to calculate iterations count on
4223     // each iteration (e.g., it is foldable into a constant).
4224     if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
4225       CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
4226       // Emit calculation of the iterations count.
4227       CGF.EmitIgnoredExpr(S.getCalcLastIteration());
4228     }
4229 
4230     CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
4231                          S.getInc(),
4232                          [&S](CodeGenFunction &CGF) {
4233                            CGF.EmitOMPLoopBody(S, JumpDest());
4234                            CGF.EmitStopPoint(&S);
4235                          },
4236                          [](CodeGenFunction &) {});
4237     // Emit: if (PreCond) - end.
4238     if (ContBlock) {
4239       CGF.EmitBranch(ContBlock);
4240       CGF.EmitBlock(ContBlock, true);
4241     }
4242     // Emit final copy of the lastprivate variables if IsLastIter != 0.
4243     if (HasLastprivateClause) {
4244       CGF.EmitOMPLastprivateClauseFinal(
4245           S, isOpenMPSimdDirective(S.getDirectiveKind()),
4246           CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar(
4247               CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
4248               (*LIP)->getType(), S.getLocStart())));
4249     }
4250   };
4251   auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
4252                     IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn,
4253                             const OMPTaskDataTy &Data) {
4254     auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &) {
4255       OMPLoopScope PreInitScope(CGF, S);
4256       CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getLocStart(), S,
4257                                                   OutlinedFn, SharedsTy,
4258                                                   CapturedStruct, IfCond, Data);
4259     };
4260     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop,
4261                                                     CodeGen);
4262   };
4263   EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
4264 }
4265 
4266 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
4267   EmitOMPTaskLoopBasedDirective(S);
4268 }
4269 
4270 void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
4271     const OMPTaskLoopSimdDirective &S) {
4272   EmitOMPTaskLoopBasedDirective(S);
4273 }
4274 
4275 // Generate the instructions for '#pragma omp target update' directive.
4276 void CodeGenFunction::EmitOMPTargetUpdateDirective(
4277     const OMPTargetUpdateDirective &S) {
4278   // If we don't have target devices, don't bother emitting the data mapping
4279   // code.
4280   if (CGM.getLangOpts().OMPTargetTriples.empty())
4281     return;
4282 
4283   // Check if we have any if clause associated with the directive.
4284   const Expr *IfCond = nullptr;
4285   if (auto *C = S.getSingleClause<OMPIfClause>())
4286     IfCond = C->getCondition();
4287 
4288   // Check if we have any device clause associated with the directive.
4289   const Expr *Device = nullptr;
4290   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4291     Device = C->getDevice();
4292 
4293   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
4294 }
4295