1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit OpenMP nodes as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGCleanup.h"
15 #include "CGOpenMPRuntime.h"
16 #include "CodeGenFunction.h"
17 #include "CodeGenModule.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/Stmt.h"
20 #include "clang/AST/StmtOpenMP.h"
21 #include "clang/AST/DeclOpenMP.h"
22 #include "llvm/IR/CallSite.h"
23 using namespace clang;
24 using namespace CodeGen;
25 
26 namespace {
27 /// Lexical scope for OpenMP executable constructs, that handles correct codegen
28 /// for captured expressions.
29 class OMPLexicalScope : public CodeGenFunction::LexicalScope {
30   void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
31     for (const auto *C : S.clauses()) {
32       if (auto *CPI = OMPClauseWithPreInit::get(C)) {
33         if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
34           for (const auto *I : PreInit->decls()) {
35             if (!I->hasAttr<OMPCaptureNoInitAttr>())
36               CGF.EmitVarDecl(cast<VarDecl>(*I));
37             else {
38               CodeGenFunction::AutoVarEmission Emission =
39                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
40               CGF.EmitAutoVarCleanups(Emission);
41             }
42           }
43         }
44       }
45     }
46   }
47   CodeGenFunction::OMPPrivateScope InlinedShareds;
48 
49   static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
50     return CGF.LambdaCaptureFields.lookup(VD) ||
51            (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
52            (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl));
53   }
54 
55 public:
56   OMPLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S,
57                   bool AsInlined = false, bool EmitPreInitStmt = true)
58       : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
59         InlinedShareds(CGF) {
60     if (EmitPreInitStmt)
61       emitPreInitStmt(CGF, S);
62     if (AsInlined) {
63       if (S.hasAssociatedStmt()) {
64         auto *CS = cast<CapturedStmt>(S.getAssociatedStmt());
65         for (auto &C : CS->captures()) {
66           if (C.capturesVariable() || C.capturesVariableByCopy()) {
67             auto *VD = C.getCapturedVar();
68             assert(VD == VD->getCanonicalDecl() &&
69                         "Canonical decl must be captured.");
70             DeclRefExpr DRE(const_cast<VarDecl *>(VD),
71                             isCapturedVar(CGF, VD) ||
72                                 (CGF.CapturedStmtInfo &&
73                                  InlinedShareds.isGlobalVarCaptured(VD)),
74                             VD->getType().getNonReferenceType(), VK_LValue,
75                             SourceLocation());
76             InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
77               return CGF.EmitLValue(&DRE).getAddress();
78             });
79           }
80         }
81         (void)InlinedShareds.Privatize();
82       }
83     }
84   }
85 };
86 
87 /// Lexical scope for OpenMP parallel construct, that handles correct codegen
88 /// for captured expressions.
89 class OMPParallelScope final : public OMPLexicalScope {
90   bool EmitPreInitStmt(const OMPExecutableDirective &S) {
91     OpenMPDirectiveKind Kind = S.getDirectiveKind();
92     return !(isOpenMPTargetExecutionDirective(Kind) ||
93              isOpenMPLoopBoundSharingDirective(Kind)) &&
94            isOpenMPParallelDirective(Kind);
95   }
96 
97 public:
98   OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
99       : OMPLexicalScope(CGF, S,
100                         /*AsInlined=*/false,
101                         /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {}
102 };
103 
104 /// Lexical scope for OpenMP teams construct, that handles correct codegen
105 /// for captured expressions.
106 class OMPTeamsScope final : public OMPLexicalScope {
107   bool EmitPreInitStmt(const OMPExecutableDirective &S) {
108     OpenMPDirectiveKind Kind = S.getDirectiveKind();
109     return !isOpenMPTargetExecutionDirective(Kind) &&
110            isOpenMPTeamsDirective(Kind);
111   }
112 
113 public:
114   OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
115       : OMPLexicalScope(CGF, S,
116                         /*AsInlined=*/false,
117                         /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {}
118 };
119 
120 /// Private scope for OpenMP loop-based directives, that supports capturing
121 /// of used expression from loop statement.
122 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
123   void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) {
124     if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) {
125       if (auto *PreInits = cast_or_null<DeclStmt>(LD->getPreInits())) {
126         for (const auto *I : PreInits->decls())
127           CGF.EmitVarDecl(cast<VarDecl>(*I));
128       }
129     }
130   }
131 
132 public:
133   OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S)
134       : CodeGenFunction::RunCleanupsScope(CGF) {
135     emitPreInitStmt(CGF, S);
136   }
137 };
138 
139 } // namespace
140 
141 LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) {
142   if (auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) {
143     if (auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) {
144       OrigVD = OrigVD->getCanonicalDecl();
145       bool IsCaptured =
146           LambdaCaptureFields.lookup(OrigVD) ||
147           (CapturedStmtInfo && CapturedStmtInfo->lookup(OrigVD)) ||
148           (CurCodeDecl && isa<BlockDecl>(CurCodeDecl));
149       DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), IsCaptured,
150                       OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc());
151       return EmitLValue(&DRE);
152     }
153   }
154   return EmitLValue(E);
155 }
156 
157 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) {
158   auto &C = getContext();
159   llvm::Value *Size = nullptr;
160   auto SizeInChars = C.getTypeSizeInChars(Ty);
161   if (SizeInChars.isZero()) {
162     // getTypeSizeInChars() returns 0 for a VLA.
163     while (auto *VAT = C.getAsVariableArrayType(Ty)) {
164       llvm::Value *ArraySize;
165       std::tie(ArraySize, Ty) = getVLASize(VAT);
166       Size = Size ? Builder.CreateNUWMul(Size, ArraySize) : ArraySize;
167     }
168     SizeInChars = C.getTypeSizeInChars(Ty);
169     if (SizeInChars.isZero())
170       return llvm::ConstantInt::get(SizeTy, /*V=*/0);
171     Size = Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars));
172   } else
173     Size = CGM.getSize(SizeInChars);
174   return Size;
175 }
176 
177 void CodeGenFunction::GenerateOpenMPCapturedVars(
178     const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
179   const RecordDecl *RD = S.getCapturedRecordDecl();
180   auto CurField = RD->field_begin();
181   auto CurCap = S.captures().begin();
182   for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
183                                                  E = S.capture_init_end();
184        I != E; ++I, ++CurField, ++CurCap) {
185     if (CurField->hasCapturedVLAType()) {
186       auto VAT = CurField->getCapturedVLAType();
187       auto *Val = VLASizeMap[VAT->getSizeExpr()];
188       CapturedVars.push_back(Val);
189     } else if (CurCap->capturesThis())
190       CapturedVars.push_back(CXXThisValue);
191     else if (CurCap->capturesVariableByCopy()) {
192       llvm::Value *CV =
193           EmitLoadOfLValue(EmitLValue(*I), SourceLocation()).getScalarVal();
194 
195       // If the field is not a pointer, we need to save the actual value
196       // and load it as a void pointer.
197       if (!CurField->getType()->isAnyPointerType()) {
198         auto &Ctx = getContext();
199         auto DstAddr = CreateMemTemp(
200             Ctx.getUIntPtrType(),
201             Twine(CurCap->getCapturedVar()->getName()) + ".casted");
202         LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
203 
204         auto *SrcAddrVal = EmitScalarConversion(
205             DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()),
206             Ctx.getPointerType(CurField->getType()), SourceLocation());
207         LValue SrcLV =
208             MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType());
209 
210         // Store the value using the source type pointer.
211         EmitStoreThroughLValue(RValue::get(CV), SrcLV);
212 
213         // Load the value using the destination type pointer.
214         CV = EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal();
215       }
216       CapturedVars.push_back(CV);
217     } else {
218       assert(CurCap->capturesVariable() && "Expected capture by reference.");
219       CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer());
220     }
221   }
222 }
223 
224 static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType,
225                                     StringRef Name, LValue AddrLV,
226                                     bool isReferenceType = false) {
227   ASTContext &Ctx = CGF.getContext();
228 
229   auto *CastedPtr = CGF.EmitScalarConversion(
230       AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(),
231       Ctx.getPointerType(DstType), SourceLocation());
232   auto TmpAddr =
233       CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType))
234           .getAddress();
235 
236   // If we are dealing with references we need to return the address of the
237   // reference instead of the reference of the value.
238   if (isReferenceType) {
239     QualType RefType = Ctx.getLValueReferenceType(DstType);
240     auto *RefVal = TmpAddr.getPointer();
241     TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name) + ".ref");
242     auto TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType);
243     CGF.EmitStoreThroughLValue(RValue::get(RefVal), TmpLVal, /*isInit*/ true);
244   }
245 
246   return TmpAddr;
247 }
248 
249 static QualType getCanonicalParamType(ASTContext &C, QualType T) {
250   if (T->isLValueReferenceType()) {
251     return C.getLValueReferenceType(
252         getCanonicalParamType(C, T.getNonReferenceType()),
253         /*SpelledAsLValue=*/false);
254   }
255   if (T->isPointerType())
256     return C.getPointerType(getCanonicalParamType(C, T->getPointeeType()));
257   if (auto *A = T->getAsArrayTypeUnsafe()) {
258     if (auto *VLA = dyn_cast<VariableArrayType>(A))
259       return getCanonicalParamType(C, VLA->getElementType());
260     else if (!A->isVariablyModifiedType())
261       return C.getCanonicalType(T);
262   }
263   return C.getCanonicalParamType(T);
264 }
265 
266 namespace {
267   /// Contains required data for proper outlined function codegen.
268   struct FunctionOptions {
269     /// Captured statement for which the function is generated.
270     const CapturedStmt *S = nullptr;
271     /// true if cast to/from  UIntPtr is required for variables captured by
272     /// value.
273     const bool UIntPtrCastRequired = true;
274     /// true if only casted arguments must be registered as local args or VLA
275     /// sizes.
276     const bool RegisterCastedArgsOnly = false;
277     /// Name of the generated function.
278     const StringRef FunctionName;
279     explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired,
280                              bool RegisterCastedArgsOnly,
281                              StringRef FunctionName)
282         : S(S), UIntPtrCastRequired(UIntPtrCastRequired),
283           RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly),
284           FunctionName(FunctionName) {}
285   };
286 }
287 
288 static llvm::Function *emitOutlinedFunctionPrologue(
289     CodeGenFunction &CGF, FunctionArgList &Args,
290     llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>>
291         &LocalAddrs,
292     llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>>
293         &VLASizes,
294     llvm::Value *&CXXThisValue, const FunctionOptions &FO) {
295   const CapturedDecl *CD = FO.S->getCapturedDecl();
296   const RecordDecl *RD = FO.S->getCapturedRecordDecl();
297   assert(CD->hasBody() && "missing CapturedDecl body");
298 
299   CXXThisValue = nullptr;
300   // Build the argument list.
301   CodeGenModule &CGM = CGF.CGM;
302   ASTContext &Ctx = CGM.getContext();
303   FunctionArgList TargetArgs;
304   Args.append(CD->param_begin(),
305               std::next(CD->param_begin(), CD->getContextParamPosition()));
306   TargetArgs.append(
307       CD->param_begin(),
308       std::next(CD->param_begin(), CD->getContextParamPosition()));
309   auto I = FO.S->captures().begin();
310   for (auto *FD : RD->fields()) {
311     QualType ArgType = FD->getType();
312     IdentifierInfo *II = nullptr;
313     VarDecl *CapVar = nullptr;
314 
315     // If this is a capture by copy and the type is not a pointer, the outlined
316     // function argument type should be uintptr and the value properly casted to
317     // uintptr. This is necessary given that the runtime library is only able to
318     // deal with pointers. We can pass in the same way the VLA type sizes to the
319     // outlined function.
320     if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
321         I->capturesVariableArrayType()) {
322       if (FO.UIntPtrCastRequired)
323         ArgType = Ctx.getUIntPtrType();
324     }
325 
326     if (I->capturesVariable() || I->capturesVariableByCopy()) {
327       CapVar = I->getCapturedVar();
328       II = CapVar->getIdentifier();
329     } else if (I->capturesThis())
330       II = &Ctx.Idents.get("this");
331     else {
332       assert(I->capturesVariableArrayType());
333       II = &Ctx.Idents.get("vla");
334     }
335     if (ArgType->isVariablyModifiedType())
336       ArgType = getCanonicalParamType(Ctx, ArgType);
337     auto *Arg =
338         ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(), II,
339                                   ArgType, ImplicitParamDecl::Other);
340     Args.emplace_back(Arg);
341     // Do not cast arguments if we emit function with non-original types.
342     TargetArgs.emplace_back(
343         FO.UIntPtrCastRequired
344             ? Arg
345             : CGM.getOpenMPRuntime().translateParameter(FD, Arg));
346     ++I;
347   }
348   Args.append(
349       std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
350       CD->param_end());
351   TargetArgs.append(
352       std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
353       CD->param_end());
354 
355   // Create the function declaration.
356   FunctionType::ExtInfo ExtInfo;
357   const CGFunctionInfo &FuncInfo =
358       CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs);
359   llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
360 
361   llvm::Function *F =
362       llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
363                              FO.FunctionName, &CGM.getModule());
364   CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
365   if (CD->isNothrow())
366     F->setDoesNotThrow();
367 
368   // Generate the function.
369   CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs,
370                     FO.S->getLocStart(), CD->getBody()->getLocStart());
371   unsigned Cnt = CD->getContextParamPosition();
372   I = FO.S->captures().begin();
373   for (auto *FD : RD->fields()) {
374     // Do not map arguments if we emit function with non-original types.
375     Address LocalAddr(Address::invalid());
376     if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) {
377       LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt],
378                                                              TargetArgs[Cnt]);
379     } else {
380       LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]);
381     }
382     // If we are capturing a pointer by copy we don't need to do anything, just
383     // use the value that we get from the arguments.
384     if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
385       const VarDecl *CurVD = I->getCapturedVar();
386       // If the variable is a reference we need to materialize it here.
387       if (CurVD->getType()->isReferenceType()) {
388         Address RefAddr = CGF.CreateMemTemp(
389             CurVD->getType(), CGM.getPointerAlign(), ".materialized_ref");
390         CGF.EmitStoreOfScalar(LocalAddr.getPointer(), RefAddr,
391                               /*Volatile=*/false, CurVD->getType());
392         LocalAddr = RefAddr;
393       }
394       if (!FO.RegisterCastedArgsOnly)
395         LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}});
396       ++Cnt;
397       ++I;
398       continue;
399     }
400 
401     LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(),
402                                         AlignmentSource::Decl);
403     if (FD->hasCapturedVLAType()) {
404       if (FO.UIntPtrCastRequired) {
405         ArgLVal = CGF.MakeAddrLValue(castValueFromUintptr(CGF, FD->getType(),
406                                                           Args[Cnt]->getName(),
407                                                           ArgLVal),
408                                      FD->getType(), AlignmentSource::Decl);
409       }
410       auto *ExprArg =
411           CGF.EmitLoadOfLValue(ArgLVal, SourceLocation()).getScalarVal();
412       auto VAT = FD->getCapturedVLAType();
413       VLASizes.insert({Args[Cnt], {VAT->getSizeExpr(), ExprArg}});
414     } else if (I->capturesVariable()) {
415       auto *Var = I->getCapturedVar();
416       QualType VarTy = Var->getType();
417       Address ArgAddr = ArgLVal.getAddress();
418       if (!VarTy->isReferenceType()) {
419         if (ArgLVal.getType()->isLValueReferenceType()) {
420           ArgAddr = CGF.EmitLoadOfReference(ArgLVal);
421         } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) {
422           assert(ArgLVal.getType()->isPointerType());
423           ArgAddr = CGF.EmitLoadOfPointer(
424               ArgAddr, ArgLVal.getType()->castAs<PointerType>());
425         }
426       }
427       if (!FO.RegisterCastedArgsOnly) {
428         LocalAddrs.insert(
429             {Args[Cnt],
430              {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}});
431       }
432     } else if (I->capturesVariableByCopy()) {
433       assert(!FD->getType()->isAnyPointerType() &&
434              "Not expecting a captured pointer.");
435       auto *Var = I->getCapturedVar();
436       QualType VarTy = Var->getType();
437       LocalAddrs.insert(
438           {Args[Cnt],
439            {Var,
440             FO.UIntPtrCastRequired
441                 ? castValueFromUintptr(CGF, FD->getType(), Args[Cnt]->getName(),
442                                        ArgLVal, VarTy->isReferenceType())
443                 : ArgLVal.getAddress()}});
444     } else {
445       // If 'this' is captured, load it into CXXThisValue.
446       assert(I->capturesThis());
447       CXXThisValue = CGF.EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation())
448                          .getScalarVal();
449       LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress()}});
450     }
451     ++Cnt;
452     ++I;
453   }
454 
455   return F;
456 }
457 
458 llvm::Function *
459 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
460   assert(
461       CapturedStmtInfo &&
462       "CapturedStmtInfo should be set when generating the captured function");
463   const CapturedDecl *CD = S.getCapturedDecl();
464   // Build the argument list.
465   bool NeedWrapperFunction =
466       getDebugInfo() &&
467       CGM.getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo;
468   FunctionArgList Args;
469   llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs;
470   llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes;
471   SmallString<256> Buffer;
472   llvm::raw_svector_ostream Out(Buffer);
473   Out << CapturedStmtInfo->getHelperName();
474   if (NeedWrapperFunction)
475     Out << "_debug__";
476   FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false,
477                      Out.str());
478   llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs,
479                                                    VLASizes, CXXThisValue, FO);
480   for (const auto &LocalAddrPair : LocalAddrs) {
481     if (LocalAddrPair.second.first) {
482       setAddrOfLocalVar(LocalAddrPair.second.first,
483                         LocalAddrPair.second.second);
484     }
485   }
486   for (const auto &VLASizePair : VLASizes)
487     VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
488   PGO.assignRegionCounters(GlobalDecl(CD), F);
489   CapturedStmtInfo->EmitBody(*this, CD->getBody());
490   FinishFunction(CD->getBodyRBrace());
491   if (!NeedWrapperFunction)
492     return F;
493 
494   FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true,
495                             /*RegisterCastedArgsOnly=*/true,
496                             CapturedStmtInfo->getHelperName());
497   CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
498   Args.clear();
499   LocalAddrs.clear();
500   VLASizes.clear();
501   llvm::Function *WrapperF =
502       emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes,
503                                    WrapperCGF.CXXThisValue, WrapperFO);
504   llvm::SmallVector<llvm::Value *, 4> CallArgs;
505   for (const auto *Arg : Args) {
506     llvm::Value *CallArg;
507     auto I = LocalAddrs.find(Arg);
508     if (I != LocalAddrs.end()) {
509       LValue LV = WrapperCGF.MakeAddrLValue(
510           I->second.second,
511           I->second.first ? I->second.first->getType() : Arg->getType(),
512           AlignmentSource::Decl);
513       CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation());
514     } else {
515       auto EI = VLASizes.find(Arg);
516       if (EI != VLASizes.end())
517         CallArg = EI->second.second;
518       else {
519         LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg),
520                                               Arg->getType(),
521                                               AlignmentSource::Decl);
522         CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation());
523       }
524     }
525     CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType()));
526   }
527   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, S.getLocStart(),
528                                                   F, CallArgs);
529   WrapperCGF.FinishFunction();
530   return WrapperF;
531 }
532 
533 //===----------------------------------------------------------------------===//
534 //                              OpenMP Directive Emission
535 //===----------------------------------------------------------------------===//
536 void CodeGenFunction::EmitOMPAggregateAssign(
537     Address DestAddr, Address SrcAddr, QualType OriginalType,
538     const llvm::function_ref<void(Address, Address)> &CopyGen) {
539   // Perform element-by-element initialization.
540   QualType ElementTy;
541 
542   // Drill down to the base element type on both arrays.
543   auto ArrayTy = OriginalType->getAsArrayTypeUnsafe();
544   auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr);
545   SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
546 
547   auto SrcBegin = SrcAddr.getPointer();
548   auto DestBegin = DestAddr.getPointer();
549   // Cast from pointer to array type to pointer to single element.
550   auto DestEnd = Builder.CreateGEP(DestBegin, NumElements);
551   // The basic structure here is a while-do loop.
552   auto BodyBB = createBasicBlock("omp.arraycpy.body");
553   auto DoneBB = createBasicBlock("omp.arraycpy.done");
554   auto IsEmpty =
555       Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
556   Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
557 
558   // Enter the loop body, making that address the current address.
559   auto EntryBB = Builder.GetInsertBlock();
560   EmitBlock(BodyBB);
561 
562   CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy);
563 
564   llvm::PHINode *SrcElementPHI =
565     Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast");
566   SrcElementPHI->addIncoming(SrcBegin, EntryBB);
567   Address SrcElementCurrent =
568       Address(SrcElementPHI,
569               SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
570 
571   llvm::PHINode *DestElementPHI =
572     Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
573   DestElementPHI->addIncoming(DestBegin, EntryBB);
574   Address DestElementCurrent =
575     Address(DestElementPHI,
576             DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
577 
578   // Emit copy.
579   CopyGen(DestElementCurrent, SrcElementCurrent);
580 
581   // Shift the address forward by one element.
582   auto DestElementNext = Builder.CreateConstGEP1_32(
583       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
584   auto SrcElementNext = Builder.CreateConstGEP1_32(
585       SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
586   // Check whether we've reached the end.
587   auto Done =
588       Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
589   Builder.CreateCondBr(Done, DoneBB, BodyBB);
590   DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock());
591   SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock());
592 
593   // Done.
594   EmitBlock(DoneBB, /*IsFinished=*/true);
595 }
596 
597 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
598                                   Address SrcAddr, const VarDecl *DestVD,
599                                   const VarDecl *SrcVD, const Expr *Copy) {
600   if (OriginalType->isArrayType()) {
601     auto *BO = dyn_cast<BinaryOperator>(Copy);
602     if (BO && BO->getOpcode() == BO_Assign) {
603       // Perform simple memcpy for simple copying.
604       EmitAggregateAssign(DestAddr, SrcAddr, OriginalType);
605     } else {
606       // For arrays with complex element types perform element by element
607       // copying.
608       EmitOMPAggregateAssign(
609           DestAddr, SrcAddr, OriginalType,
610           [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) {
611             // Working with the single array element, so have to remap
612             // destination and source variables to corresponding array
613             // elements.
614             CodeGenFunction::OMPPrivateScope Remap(*this);
615             Remap.addPrivate(DestVD, [DestElement]() -> Address {
616               return DestElement;
617             });
618             Remap.addPrivate(
619                 SrcVD, [SrcElement]() -> Address { return SrcElement; });
620             (void)Remap.Privatize();
621             EmitIgnoredExpr(Copy);
622           });
623     }
624   } else {
625     // Remap pseudo source variable to private copy.
626     CodeGenFunction::OMPPrivateScope Remap(*this);
627     Remap.addPrivate(SrcVD, [SrcAddr]() -> Address { return SrcAddr; });
628     Remap.addPrivate(DestVD, [DestAddr]() -> Address { return DestAddr; });
629     (void)Remap.Privatize();
630     // Emit copying of the whole variable.
631     EmitIgnoredExpr(Copy);
632   }
633 }
634 
635 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
636                                                 OMPPrivateScope &PrivateScope) {
637   if (!HaveInsertPoint())
638     return false;
639   bool FirstprivateIsLastprivate = false;
640   llvm::DenseSet<const VarDecl *> Lastprivates;
641   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
642     for (const auto *D : C->varlists())
643       Lastprivates.insert(
644           cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
645   }
646   llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
647   CGCapturedStmtInfo CapturesInfo(cast<CapturedStmt>(*D.getAssociatedStmt()));
648   for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
649     auto IRef = C->varlist_begin();
650     auto InitsRef = C->inits().begin();
651     for (auto IInit : C->private_copies()) {
652       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
653       bool ThisFirstprivateIsLastprivate =
654           Lastprivates.count(OrigVD->getCanonicalDecl()) > 0;
655       auto *CapFD = CapturesInfo.lookup(OrigVD);
656       auto *FD = CapturedStmtInfo->lookup(OrigVD);
657       if (!ThisFirstprivateIsLastprivate && FD && (FD == CapFD) &&
658           !FD->getType()->isReferenceType()) {
659         EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
660         ++IRef;
661         ++InitsRef;
662         continue;
663       }
664       FirstprivateIsLastprivate =
665           FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate;
666       if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) {
667         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
668         auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
669         bool IsRegistered;
670         DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
671                         /*RefersToEnclosingVariableOrCapture=*/FD != nullptr,
672                         (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
673         Address OriginalAddr = EmitLValue(&DRE).getAddress();
674         QualType Type = VD->getType();
675         if (Type->isArrayType()) {
676           // Emit VarDecl with copy init for arrays.
677           // Get the address of the original variable captured in current
678           // captured region.
679           IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
680             auto Emission = EmitAutoVarAlloca(*VD);
681             auto *Init = VD->getInit();
682             if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) {
683               // Perform simple memcpy.
684               EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr,
685                                   Type);
686             } else {
687               EmitOMPAggregateAssign(
688                   Emission.getAllocatedAddress(), OriginalAddr, Type,
689                   [this, VDInit, Init](Address DestElement,
690                                        Address SrcElement) {
691                     // Clean up any temporaries needed by the initialization.
692                     RunCleanupsScope InitScope(*this);
693                     // Emit initialization for single element.
694                     setAddrOfLocalVar(VDInit, SrcElement);
695                     EmitAnyExprToMem(Init, DestElement,
696                                      Init->getType().getQualifiers(),
697                                      /*IsInitializer*/ false);
698                     LocalDeclMap.erase(VDInit);
699                   });
700             }
701             EmitAutoVarCleanups(Emission);
702             return Emission.getAllocatedAddress();
703           });
704         } else {
705           IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
706             // Emit private VarDecl with copy init.
707             // Remap temp VDInit variable to the address of the original
708             // variable
709             // (for proper handling of captured global variables).
710             setAddrOfLocalVar(VDInit, OriginalAddr);
711             EmitDecl(*VD);
712             LocalDeclMap.erase(VDInit);
713             return GetAddrOfLocalVar(VD);
714           });
715         }
716         assert(IsRegistered &&
717                "firstprivate var already registered as private");
718         // Silence the warning about unused variable.
719         (void)IsRegistered;
720       }
721       ++IRef;
722       ++InitsRef;
723     }
724   }
725   return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty();
726 }
727 
728 void CodeGenFunction::EmitOMPPrivateClause(
729     const OMPExecutableDirective &D,
730     CodeGenFunction::OMPPrivateScope &PrivateScope) {
731   if (!HaveInsertPoint())
732     return;
733   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
734   for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
735     auto IRef = C->varlist_begin();
736     for (auto IInit : C->private_copies()) {
737       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
738       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
739         auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
740         bool IsRegistered =
741             PrivateScope.addPrivate(OrigVD, [&]() -> Address {
742               // Emit private VarDecl with copy init.
743               EmitDecl(*VD);
744               return GetAddrOfLocalVar(VD);
745             });
746         assert(IsRegistered && "private var already registered as private");
747         // Silence the warning about unused variable.
748         (void)IsRegistered;
749       }
750       ++IRef;
751     }
752   }
753 }
754 
755 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
756   if (!HaveInsertPoint())
757     return false;
758   // threadprivate_var1 = master_threadprivate_var1;
759   // operator=(threadprivate_var2, master_threadprivate_var2);
760   // ...
761   // __kmpc_barrier(&loc, global_tid);
762   llvm::DenseSet<const VarDecl *> CopiedVars;
763   llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
764   for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) {
765     auto IRef = C->varlist_begin();
766     auto ISrcRef = C->source_exprs().begin();
767     auto IDestRef = C->destination_exprs().begin();
768     for (auto *AssignOp : C->assignment_ops()) {
769       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
770       QualType Type = VD->getType();
771       if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
772         // Get the address of the master variable. If we are emitting code with
773         // TLS support, the address is passed from the master as field in the
774         // captured declaration.
775         Address MasterAddr = Address::invalid();
776         if (getLangOpts().OpenMPUseTLS &&
777             getContext().getTargetInfo().isTLSSupported()) {
778           assert(CapturedStmtInfo->lookup(VD) &&
779                  "Copyin threadprivates should have been captured!");
780           DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(),
781                           VK_LValue, (*IRef)->getExprLoc());
782           MasterAddr = EmitLValue(&DRE).getAddress();
783           LocalDeclMap.erase(VD);
784         } else {
785           MasterAddr =
786             Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD)
787                                         : CGM.GetAddrOfGlobal(VD),
788                     getContext().getDeclAlign(VD));
789         }
790         // Get the address of the threadprivate variable.
791         Address PrivateAddr = EmitLValue(*IRef).getAddress();
792         if (CopiedVars.size() == 1) {
793           // At first check if current thread is a master thread. If it is, no
794           // need to copy data.
795           CopyBegin = createBasicBlock("copyin.not.master");
796           CopyEnd = createBasicBlock("copyin.not.master.end");
797           Builder.CreateCondBr(
798               Builder.CreateICmpNE(
799                   Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy),
800                   Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy)),
801               CopyBegin, CopyEnd);
802           EmitBlock(CopyBegin);
803         }
804         auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
805         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
806         EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp);
807       }
808       ++IRef;
809       ++ISrcRef;
810       ++IDestRef;
811     }
812   }
813   if (CopyEnd) {
814     // Exit out of copying procedure for non-master thread.
815     EmitBlock(CopyEnd, /*IsFinished=*/true);
816     return true;
817   }
818   return false;
819 }
820 
821 bool CodeGenFunction::EmitOMPLastprivateClauseInit(
822     const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
823   if (!HaveInsertPoint())
824     return false;
825   bool HasAtLeastOneLastprivate = false;
826   llvm::DenseSet<const VarDecl *> SIMDLCVs;
827   if (isOpenMPSimdDirective(D.getDirectiveKind())) {
828     auto *LoopDirective = cast<OMPLoopDirective>(&D);
829     for (auto *C : LoopDirective->counters()) {
830       SIMDLCVs.insert(
831           cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
832     }
833   }
834   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
835   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
836     HasAtLeastOneLastprivate = true;
837     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()))
838       break;
839     auto IRef = C->varlist_begin();
840     auto IDestRef = C->destination_exprs().begin();
841     for (auto *IInit : C->private_copies()) {
842       // Keep the address of the original variable for future update at the end
843       // of the loop.
844       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
845       // Taskloops do not require additional initialization, it is done in
846       // runtime support library.
847       if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
848         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
849         PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address {
850           DeclRefExpr DRE(
851               const_cast<VarDecl *>(OrigVD),
852               /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup(
853                   OrigVD) != nullptr,
854               (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
855           return EmitLValue(&DRE).getAddress();
856         });
857         // Check if the variable is also a firstprivate: in this case IInit is
858         // not generated. Initialization of this variable will happen in codegen
859         // for 'firstprivate' clause.
860         if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) {
861           auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
862           bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
863             // Emit private VarDecl with copy init.
864             EmitDecl(*VD);
865             return GetAddrOfLocalVar(VD);
866           });
867           assert(IsRegistered &&
868                  "lastprivate var already registered as private");
869           (void)IsRegistered;
870         }
871       }
872       ++IRef;
873       ++IDestRef;
874     }
875   }
876   return HasAtLeastOneLastprivate;
877 }
878 
879 void CodeGenFunction::EmitOMPLastprivateClauseFinal(
880     const OMPExecutableDirective &D, bool NoFinals,
881     llvm::Value *IsLastIterCond) {
882   if (!HaveInsertPoint())
883     return;
884   // Emit following code:
885   // if (<IsLastIterCond>) {
886   //   orig_var1 = private_orig_var1;
887   //   ...
888   //   orig_varn = private_orig_varn;
889   // }
890   llvm::BasicBlock *ThenBB = nullptr;
891   llvm::BasicBlock *DoneBB = nullptr;
892   if (IsLastIterCond) {
893     ThenBB = createBasicBlock(".omp.lastprivate.then");
894     DoneBB = createBasicBlock(".omp.lastprivate.done");
895     Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
896     EmitBlock(ThenBB);
897   }
898   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
899   llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates;
900   if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) {
901     auto IC = LoopDirective->counters().begin();
902     for (auto F : LoopDirective->finals()) {
903       auto *D =
904           cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl();
905       if (NoFinals)
906         AlreadyEmittedVars.insert(D);
907       else
908         LoopCountersAndUpdates[D] = F;
909       ++IC;
910     }
911   }
912   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
913     auto IRef = C->varlist_begin();
914     auto ISrcRef = C->source_exprs().begin();
915     auto IDestRef = C->destination_exprs().begin();
916     for (auto *AssignOp : C->assignment_ops()) {
917       auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
918       QualType Type = PrivateVD->getType();
919       auto *CanonicalVD = PrivateVD->getCanonicalDecl();
920       if (AlreadyEmittedVars.insert(CanonicalVD).second) {
921         // If lastprivate variable is a loop control variable for loop-based
922         // directive, update its value before copyin back to original
923         // variable.
924         if (auto *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD))
925           EmitIgnoredExpr(FinalExpr);
926         auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
927         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
928         // Get the address of the original variable.
929         Address OriginalAddr = GetAddrOfLocalVar(DestVD);
930         // Get the address of the private variable.
931         Address PrivateAddr = GetAddrOfLocalVar(PrivateVD);
932         if (auto RefTy = PrivateVD->getType()->getAs<ReferenceType>())
933           PrivateAddr =
934               Address(Builder.CreateLoad(PrivateAddr),
935                       getNaturalTypeAlignment(RefTy->getPointeeType()));
936         EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp);
937       }
938       ++IRef;
939       ++ISrcRef;
940       ++IDestRef;
941     }
942     if (auto *PostUpdate = C->getPostUpdateExpr())
943       EmitIgnoredExpr(PostUpdate);
944   }
945   if (IsLastIterCond)
946     EmitBlock(DoneBB, /*IsFinished=*/true);
947 }
948 
949 void CodeGenFunction::EmitOMPReductionClauseInit(
950     const OMPExecutableDirective &D,
951     CodeGenFunction::OMPPrivateScope &PrivateScope) {
952   if (!HaveInsertPoint())
953     return;
954   SmallVector<const Expr *, 4> Shareds;
955   SmallVector<const Expr *, 4> Privates;
956   SmallVector<const Expr *, 4> ReductionOps;
957   SmallVector<const Expr *, 4> LHSs;
958   SmallVector<const Expr *, 4> RHSs;
959   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
960     auto IPriv = C->privates().begin();
961     auto IRed = C->reduction_ops().begin();
962     auto ILHS = C->lhs_exprs().begin();
963     auto IRHS = C->rhs_exprs().begin();
964     for (const auto *Ref : C->varlists()) {
965       Shareds.emplace_back(Ref);
966       Privates.emplace_back(*IPriv);
967       ReductionOps.emplace_back(*IRed);
968       LHSs.emplace_back(*ILHS);
969       RHSs.emplace_back(*IRHS);
970       std::advance(IPriv, 1);
971       std::advance(IRed, 1);
972       std::advance(ILHS, 1);
973       std::advance(IRHS, 1);
974     }
975   }
976   ReductionCodeGen RedCG(Shareds, Privates, ReductionOps);
977   unsigned Count = 0;
978   auto ILHS = LHSs.begin();
979   auto IRHS = RHSs.begin();
980   auto IPriv = Privates.begin();
981   for (const auto *IRef : Shareds) {
982     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
983     // Emit private VarDecl with reduction init.
984     RedCG.emitSharedLValue(*this, Count);
985     RedCG.emitAggregateType(*this, Count);
986     auto Emission = EmitAutoVarAlloca(*PrivateVD);
987     RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(),
988                              RedCG.getSharedLValue(Count),
989                              [&Emission](CodeGenFunction &CGF) {
990                                CGF.EmitAutoVarInit(Emission);
991                                return true;
992                              });
993     EmitAutoVarCleanups(Emission);
994     Address BaseAddr = RedCG.adjustPrivateAddress(
995         *this, Count, Emission.getAllocatedAddress());
996     bool IsRegistered = PrivateScope.addPrivate(
997         RedCG.getBaseDecl(Count), [BaseAddr]() -> Address { return BaseAddr; });
998     assert(IsRegistered && "private var already registered as private");
999     // Silence the warning about unused variable.
1000     (void)IsRegistered;
1001 
1002     auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
1003     auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
1004     QualType Type = PrivateVD->getType();
1005     bool isaOMPArraySectionExpr = isa<OMPArraySectionExpr>(IRef);
1006     if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) {
1007       // Store the address of the original variable associated with the LHS
1008       // implicit variable.
1009       PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address {
1010         return RedCG.getSharedLValue(Count).getAddress();
1011       });
1012       PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
1013         return GetAddrOfLocalVar(PrivateVD);
1014       });
1015     } else if ((isaOMPArraySectionExpr && Type->isScalarType()) ||
1016                isa<ArraySubscriptExpr>(IRef)) {
1017       // Store the address of the original variable associated with the LHS
1018       // implicit variable.
1019       PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address {
1020         return RedCG.getSharedLValue(Count).getAddress();
1021       });
1022       PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address {
1023         return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD),
1024                                             ConvertTypeForMem(RHSVD->getType()),
1025                                             "rhs.begin");
1026       });
1027     } else {
1028       QualType Type = PrivateVD->getType();
1029       bool IsArray = getContext().getAsArrayType(Type) != nullptr;
1030       Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress();
1031       // Store the address of the original variable associated with the LHS
1032       // implicit variable.
1033       if (IsArray) {
1034         OriginalAddr = Builder.CreateElementBitCast(
1035             OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin");
1036       }
1037       PrivateScope.addPrivate(
1038           LHSVD, [OriginalAddr]() -> Address { return OriginalAddr; });
1039       PrivateScope.addPrivate(
1040           RHSVD, [this, PrivateVD, RHSVD, IsArray]() -> Address {
1041             return IsArray
1042                        ? Builder.CreateElementBitCast(
1043                              GetAddrOfLocalVar(PrivateVD),
1044                              ConvertTypeForMem(RHSVD->getType()), "rhs.begin")
1045                        : GetAddrOfLocalVar(PrivateVD);
1046           });
1047     }
1048     ++ILHS;
1049     ++IRHS;
1050     ++IPriv;
1051     ++Count;
1052   }
1053 }
1054 
1055 void CodeGenFunction::EmitOMPReductionClauseFinal(
1056     const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) {
1057   if (!HaveInsertPoint())
1058     return;
1059   llvm::SmallVector<const Expr *, 8> Privates;
1060   llvm::SmallVector<const Expr *, 8> LHSExprs;
1061   llvm::SmallVector<const Expr *, 8> RHSExprs;
1062   llvm::SmallVector<const Expr *, 8> ReductionOps;
1063   bool HasAtLeastOneReduction = false;
1064   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1065     HasAtLeastOneReduction = true;
1066     Privates.append(C->privates().begin(), C->privates().end());
1067     LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1068     RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1069     ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1070   }
1071   if (HasAtLeastOneReduction) {
1072     bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
1073                       isOpenMPParallelDirective(D.getDirectiveKind()) ||
1074                       D.getDirectiveKind() == OMPD_simd;
1075     bool SimpleReduction = D.getDirectiveKind() == OMPD_simd;
1076     // Emit nowait reduction if nowait clause is present or directive is a
1077     // parallel directive (it always has implicit barrier).
1078     CGM.getOpenMPRuntime().emitReduction(
1079         *this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps,
1080         {WithNowait, SimpleReduction, ReductionKind});
1081   }
1082 }
1083 
1084 static void emitPostUpdateForReductionClause(
1085     CodeGenFunction &CGF, const OMPExecutableDirective &D,
1086     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1087   if (!CGF.HaveInsertPoint())
1088     return;
1089   llvm::BasicBlock *DoneBB = nullptr;
1090   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1091     if (auto *PostUpdate = C->getPostUpdateExpr()) {
1092       if (!DoneBB) {
1093         if (auto *Cond = CondGen(CGF)) {
1094           // If the first post-update expression is found, emit conditional
1095           // block if it was requested.
1096           auto *ThenBB = CGF.createBasicBlock(".omp.reduction.pu");
1097           DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done");
1098           CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1099           CGF.EmitBlock(ThenBB);
1100         }
1101       }
1102       CGF.EmitIgnoredExpr(PostUpdate);
1103     }
1104   }
1105   if (DoneBB)
1106     CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
1107 }
1108 
1109 namespace {
1110 /// Codegen lambda for appending distribute lower and upper bounds to outlined
1111 /// parallel function. This is necessary for combined constructs such as
1112 /// 'distribute parallel for'
1113 typedef llvm::function_ref<void(CodeGenFunction &,
1114                                 const OMPExecutableDirective &,
1115                                 llvm::SmallVectorImpl<llvm::Value *> &)>
1116     CodeGenBoundParametersTy;
1117 } // anonymous namespace
1118 
1119 static void emitCommonOMPParallelDirective(
1120     CodeGenFunction &CGF, const OMPExecutableDirective &S,
1121     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1122     const CodeGenBoundParametersTy &CodeGenBoundParameters) {
1123   const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
1124   auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
1125       S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
1126   if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
1127     CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
1128     auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
1129                                          /*IgnoreResultAssign*/ true);
1130     CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
1131         CGF, NumThreads, NumThreadsClause->getLocStart());
1132   }
1133   if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
1134     CodeGenFunction::RunCleanupsScope ProcBindScope(CGF);
1135     CGF.CGM.getOpenMPRuntime().emitProcBindClause(
1136         CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart());
1137   }
1138   const Expr *IfCond = nullptr;
1139   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
1140     if (C->getNameModifier() == OMPD_unknown ||
1141         C->getNameModifier() == OMPD_parallel) {
1142       IfCond = C->getCondition();
1143       break;
1144     }
1145   }
1146 
1147   OMPParallelScope Scope(CGF, S);
1148   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
1149   // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk
1150   // lower and upper bounds with the pragma 'for' chunking mechanism.
1151   // The following lambda takes care of appending the lower and upper bound
1152   // parameters when necessary
1153   CodeGenBoundParameters(CGF, S, CapturedVars);
1154   CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
1155   CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn,
1156                                               CapturedVars, IfCond);
1157 }
1158 
1159 static void emitEmptyBoundParameters(CodeGenFunction &,
1160                                      const OMPExecutableDirective &,
1161                                      llvm::SmallVectorImpl<llvm::Value *> &) {}
1162 
1163 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
1164   // Emit parallel region as a standalone region.
1165   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
1166     OMPPrivateScope PrivateScope(CGF);
1167     bool Copyins = CGF.EmitOMPCopyinClause(S);
1168     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
1169     if (Copyins) {
1170       // Emit implicit barrier to synchronize threads and avoid data races on
1171       // propagation master's thread values of threadprivate variables to local
1172       // instances of that variables of all other implicit threads.
1173       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
1174           CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
1175           /*ForceSimpleCall=*/true);
1176     }
1177     CGF.EmitOMPPrivateClause(S, PrivateScope);
1178     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
1179     (void)PrivateScope.Privatize();
1180     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1181     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
1182   };
1183   emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen,
1184                                  emitEmptyBoundParameters);
1185   emitPostUpdateForReductionClause(
1186       *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1187 }
1188 
1189 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
1190                                       JumpDest LoopExit) {
1191   RunCleanupsScope BodyScope(*this);
1192   // Update counters values on current iteration.
1193   for (auto I : D.updates()) {
1194     EmitIgnoredExpr(I);
1195   }
1196   // Update the linear variables.
1197   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1198     for (auto *U : C->updates())
1199       EmitIgnoredExpr(U);
1200   }
1201 
1202   // On a continue in the body, jump to the end.
1203   auto Continue = getJumpDestInCurrentScope("omp.body.continue");
1204   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1205   // Emit loop body.
1206   EmitStmt(D.getBody());
1207   // The end (updates/cleanups).
1208   EmitBlock(Continue.getBlock());
1209   BreakContinueStack.pop_back();
1210 }
1211 
1212 void CodeGenFunction::EmitOMPInnerLoop(
1213     const Stmt &S, bool RequiresCleanup, const Expr *LoopCond,
1214     const Expr *IncExpr,
1215     const llvm::function_ref<void(CodeGenFunction &)> &BodyGen,
1216     const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen) {
1217   auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
1218 
1219   // Start the loop with a block that tests the condition.
1220   auto CondBlock = createBasicBlock("omp.inner.for.cond");
1221   EmitBlock(CondBlock);
1222   const SourceRange &R = S.getSourceRange();
1223   LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
1224                  SourceLocToDebugLoc(R.getEnd()));
1225 
1226   // If there are any cleanups between here and the loop-exit scope,
1227   // create a block to stage a loop exit along.
1228   auto ExitBlock = LoopExit.getBlock();
1229   if (RequiresCleanup)
1230     ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
1231 
1232   auto LoopBody = createBasicBlock("omp.inner.for.body");
1233 
1234   // Emit condition.
1235   EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S));
1236   if (ExitBlock != LoopExit.getBlock()) {
1237     EmitBlock(ExitBlock);
1238     EmitBranchThroughCleanup(LoopExit);
1239   }
1240 
1241   EmitBlock(LoopBody);
1242   incrementProfileCounter(&S);
1243 
1244   // Create a block for the increment.
1245   auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
1246   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1247 
1248   BodyGen(*this);
1249 
1250   // Emit "IV = IV + 1" and a back-edge to the condition block.
1251   EmitBlock(Continue.getBlock());
1252   EmitIgnoredExpr(IncExpr);
1253   PostIncGen(*this);
1254   BreakContinueStack.pop_back();
1255   EmitBranch(CondBlock);
1256   LoopStack.pop();
1257   // Emit the fall-through block.
1258   EmitBlock(LoopExit.getBlock());
1259 }
1260 
1261 bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
1262   if (!HaveInsertPoint())
1263     return false;
1264   // Emit inits for the linear variables.
1265   bool HasLinears = false;
1266   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1267     for (auto *Init : C->inits()) {
1268       HasLinears = true;
1269       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
1270       if (auto *Ref = dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) {
1271         AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
1272         auto *OrigVD = cast<VarDecl>(Ref->getDecl());
1273         DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1274                         CapturedStmtInfo->lookup(OrigVD) != nullptr,
1275                         VD->getInit()->getType(), VK_LValue,
1276                         VD->getInit()->getExprLoc());
1277         EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(),
1278                                                 VD->getType()),
1279                        /*capturedByInit=*/false);
1280         EmitAutoVarCleanups(Emission);
1281       } else
1282         EmitVarDecl(*VD);
1283     }
1284     // Emit the linear steps for the linear clauses.
1285     // If a step is not constant, it is pre-calculated before the loop.
1286     if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
1287       if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
1288         EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
1289         // Emit calculation of the linear step.
1290         EmitIgnoredExpr(CS);
1291       }
1292   }
1293   return HasLinears;
1294 }
1295 
1296 void CodeGenFunction::EmitOMPLinearClauseFinal(
1297     const OMPLoopDirective &D,
1298     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1299   if (!HaveInsertPoint())
1300     return;
1301   llvm::BasicBlock *DoneBB = nullptr;
1302   // Emit the final values of the linear variables.
1303   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1304     auto IC = C->varlist_begin();
1305     for (auto *F : C->finals()) {
1306       if (!DoneBB) {
1307         if (auto *Cond = CondGen(*this)) {
1308           // If the first post-update expression is found, emit conditional
1309           // block if it was requested.
1310           auto *ThenBB = createBasicBlock(".omp.linear.pu");
1311           DoneBB = createBasicBlock(".omp.linear.pu.done");
1312           Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1313           EmitBlock(ThenBB);
1314         }
1315       }
1316       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl());
1317       DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1318                       CapturedStmtInfo->lookup(OrigVD) != nullptr,
1319                       (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
1320       Address OrigAddr = EmitLValue(&DRE).getAddress();
1321       CodeGenFunction::OMPPrivateScope VarScope(*this);
1322       VarScope.addPrivate(OrigVD, [OrigAddr]() -> Address { return OrigAddr; });
1323       (void)VarScope.Privatize();
1324       EmitIgnoredExpr(F);
1325       ++IC;
1326     }
1327     if (auto *PostUpdate = C->getPostUpdateExpr())
1328       EmitIgnoredExpr(PostUpdate);
1329   }
1330   if (DoneBB)
1331     EmitBlock(DoneBB, /*IsFinished=*/true);
1332 }
1333 
1334 static void emitAlignedClause(CodeGenFunction &CGF,
1335                               const OMPExecutableDirective &D) {
1336   if (!CGF.HaveInsertPoint())
1337     return;
1338   for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) {
1339     unsigned ClauseAlignment = 0;
1340     if (auto AlignmentExpr = Clause->getAlignment()) {
1341       auto AlignmentCI =
1342           cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
1343       ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue());
1344     }
1345     for (auto E : Clause->varlists()) {
1346       unsigned Alignment = ClauseAlignment;
1347       if (Alignment == 0) {
1348         // OpenMP [2.8.1, Description]
1349         // If no optional parameter is specified, implementation-defined default
1350         // alignments for SIMD instructions on the target platforms are assumed.
1351         Alignment =
1352             CGF.getContext()
1353                 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
1354                     E->getType()->getPointeeType()))
1355                 .getQuantity();
1356       }
1357       assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) &&
1358              "alignment is not power of 2");
1359       if (Alignment != 0) {
1360         llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
1361         CGF.EmitAlignmentAssumption(PtrValue, Alignment);
1362       }
1363     }
1364   }
1365 }
1366 
1367 void CodeGenFunction::EmitOMPPrivateLoopCounters(
1368     const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) {
1369   if (!HaveInsertPoint())
1370     return;
1371   auto I = S.private_counters().begin();
1372   for (auto *E : S.counters()) {
1373     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1374     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
1375     (void)LoopScope.addPrivate(VD, [&]() -> Address {
1376       // Emit var without initialization.
1377       if (!LocalDeclMap.count(PrivateVD)) {
1378         auto VarEmission = EmitAutoVarAlloca(*PrivateVD);
1379         EmitAutoVarCleanups(VarEmission);
1380       }
1381       DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD),
1382                       /*RefersToEnclosingVariableOrCapture=*/false,
1383                       (*I)->getType(), VK_LValue, (*I)->getExprLoc());
1384       return EmitLValue(&DRE).getAddress();
1385     });
1386     if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) ||
1387         VD->hasGlobalStorage()) {
1388       (void)LoopScope.addPrivate(PrivateVD, [&]() -> Address {
1389         DeclRefExpr DRE(const_cast<VarDecl *>(VD),
1390                         LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD),
1391                         E->getType(), VK_LValue, E->getExprLoc());
1392         return EmitLValue(&DRE).getAddress();
1393       });
1394     }
1395     ++I;
1396   }
1397 }
1398 
1399 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
1400                         const Expr *Cond, llvm::BasicBlock *TrueBlock,
1401                         llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
1402   if (!CGF.HaveInsertPoint())
1403     return;
1404   {
1405     CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
1406     CGF.EmitOMPPrivateLoopCounters(S, PreCondScope);
1407     (void)PreCondScope.Privatize();
1408     // Get initial values of real counters.
1409     for (auto I : S.inits()) {
1410       CGF.EmitIgnoredExpr(I);
1411     }
1412   }
1413   // Check that loop is executed at least one time.
1414   CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
1415 }
1416 
1417 void CodeGenFunction::EmitOMPLinearClause(
1418     const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) {
1419   if (!HaveInsertPoint())
1420     return;
1421   llvm::DenseSet<const VarDecl *> SIMDLCVs;
1422   if (isOpenMPSimdDirective(D.getDirectiveKind())) {
1423     auto *LoopDirective = cast<OMPLoopDirective>(&D);
1424     for (auto *C : LoopDirective->counters()) {
1425       SIMDLCVs.insert(
1426           cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
1427     }
1428   }
1429   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1430     auto CurPrivate = C->privates().begin();
1431     for (auto *E : C->varlists()) {
1432       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1433       auto *PrivateVD =
1434           cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl());
1435       if (!SIMDLCVs.count(VD->getCanonicalDecl())) {
1436         bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address {
1437           // Emit private VarDecl with copy init.
1438           EmitVarDecl(*PrivateVD);
1439           return GetAddrOfLocalVar(PrivateVD);
1440         });
1441         assert(IsRegistered && "linear var already registered as private");
1442         // Silence the warning about unused variable.
1443         (void)IsRegistered;
1444       } else
1445         EmitVarDecl(*PrivateVD);
1446       ++CurPrivate;
1447     }
1448   }
1449 }
1450 
1451 static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
1452                                      const OMPExecutableDirective &D,
1453                                      bool IsMonotonic) {
1454   if (!CGF.HaveInsertPoint())
1455     return;
1456   if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
1457     RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
1458                                  /*ignoreResult=*/true);
1459     llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
1460     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
1461     // In presence of finite 'safelen', it may be unsafe to mark all
1462     // the memory instructions parallel, because loop-carried
1463     // dependences of 'safelen' iterations are possible.
1464     if (!IsMonotonic)
1465       CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());
1466   } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
1467     RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
1468                                  /*ignoreResult=*/true);
1469     llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
1470     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
1471     // In presence of finite 'safelen', it may be unsafe to mark all
1472     // the memory instructions parallel, because loop-carried
1473     // dependences of 'safelen' iterations are possible.
1474     CGF.LoopStack.setParallel(false);
1475   }
1476 }
1477 
1478 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D,
1479                                       bool IsMonotonic) {
1480   // Walk clauses and process safelen/lastprivate.
1481   LoopStack.setParallel(!IsMonotonic);
1482   LoopStack.setVectorizeEnable(true);
1483   emitSimdlenSafelenClause(*this, D, IsMonotonic);
1484 }
1485 
1486 void CodeGenFunction::EmitOMPSimdFinal(
1487     const OMPLoopDirective &D,
1488     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1489   if (!HaveInsertPoint())
1490     return;
1491   llvm::BasicBlock *DoneBB = nullptr;
1492   auto IC = D.counters().begin();
1493   auto IPC = D.private_counters().begin();
1494   for (auto F : D.finals()) {
1495     auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl());
1496     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl());
1497     auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD);
1498     if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) ||
1499         OrigVD->hasGlobalStorage() || CED) {
1500       if (!DoneBB) {
1501         if (auto *Cond = CondGen(*this)) {
1502           // If the first post-update expression is found, emit conditional
1503           // block if it was requested.
1504           auto *ThenBB = createBasicBlock(".omp.final.then");
1505           DoneBB = createBasicBlock(".omp.final.done");
1506           Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1507           EmitBlock(ThenBB);
1508         }
1509       }
1510       Address OrigAddr = Address::invalid();
1511       if (CED)
1512         OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress();
1513       else {
1514         DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD),
1515                         /*RefersToEnclosingVariableOrCapture=*/false,
1516                         (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc());
1517         OrigAddr = EmitLValue(&DRE).getAddress();
1518       }
1519       OMPPrivateScope VarScope(*this);
1520       VarScope.addPrivate(OrigVD,
1521                           [OrigAddr]() -> Address { return OrigAddr; });
1522       (void)VarScope.Privatize();
1523       EmitIgnoredExpr(F);
1524     }
1525     ++IC;
1526     ++IPC;
1527   }
1528   if (DoneBB)
1529     EmitBlock(DoneBB, /*IsFinished=*/true);
1530 }
1531 
1532 static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF,
1533                                          const OMPLoopDirective &S,
1534                                          CodeGenFunction::JumpDest LoopExit) {
1535   CGF.EmitOMPLoopBody(S, LoopExit);
1536   CGF.EmitStopPoint(&S);
1537 }
1538 
1539 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
1540   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
1541     OMPLoopScope PreInitScope(CGF, S);
1542     // if (PreCond) {
1543     //   for (IV in 0..LastIteration) BODY;
1544     //   <Final counter/linear vars updates>;
1545     // }
1546     //
1547 
1548     // Emit: if (PreCond) - begin.
1549     // If the condition constant folds and can be elided, avoid emitting the
1550     // whole loop.
1551     bool CondConstant;
1552     llvm::BasicBlock *ContBlock = nullptr;
1553     if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
1554       if (!CondConstant)
1555         return;
1556     } else {
1557       auto *ThenBlock = CGF.createBasicBlock("simd.if.then");
1558       ContBlock = CGF.createBasicBlock("simd.if.end");
1559       emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
1560                   CGF.getProfileCount(&S));
1561       CGF.EmitBlock(ThenBlock);
1562       CGF.incrementProfileCounter(&S);
1563     }
1564 
1565     // Emit the loop iteration variable.
1566     const Expr *IVExpr = S.getIterationVariable();
1567     const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
1568     CGF.EmitVarDecl(*IVDecl);
1569     CGF.EmitIgnoredExpr(S.getInit());
1570 
1571     // Emit the iterations count variable.
1572     // If it is not a variable, Sema decided to calculate iterations count on
1573     // each iteration (e.g., it is foldable into a constant).
1574     if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
1575       CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
1576       // Emit calculation of the iterations count.
1577       CGF.EmitIgnoredExpr(S.getCalcLastIteration());
1578     }
1579 
1580     CGF.EmitOMPSimdInit(S);
1581 
1582     emitAlignedClause(CGF, S);
1583     (void)CGF.EmitOMPLinearClauseInit(S);
1584     {
1585       OMPPrivateScope LoopScope(CGF);
1586       CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
1587       CGF.EmitOMPLinearClause(S, LoopScope);
1588       CGF.EmitOMPPrivateClause(S, LoopScope);
1589       CGF.EmitOMPReductionClauseInit(S, LoopScope);
1590       bool HasLastprivateClause =
1591           CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
1592       (void)LoopScope.Privatize();
1593       CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
1594                            S.getInc(),
1595                            [&S](CodeGenFunction &CGF) {
1596                              CGF.EmitOMPLoopBody(S, JumpDest());
1597                              CGF.EmitStopPoint(&S);
1598                            },
1599                            [](CodeGenFunction &) {});
1600       CGF.EmitOMPSimdFinal(
1601           S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1602       // Emit final copy of the lastprivate variables at the end of loops.
1603       if (HasLastprivateClause)
1604         CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true);
1605       CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd);
1606       emitPostUpdateForReductionClause(
1607           CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1608     }
1609     CGF.EmitOMPLinearClauseFinal(
1610         S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1611     // Emit: if (PreCond) - end.
1612     if (ContBlock) {
1613       CGF.EmitBranch(ContBlock);
1614       CGF.EmitBlock(ContBlock, true);
1615     }
1616   };
1617   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
1618   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
1619 }
1620 
1621 void CodeGenFunction::EmitOMPOuterLoop(
1622     bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S,
1623     CodeGenFunction::OMPPrivateScope &LoopScope,
1624     const CodeGenFunction::OMPLoopArguments &LoopArgs,
1625     const CodeGenFunction::CodeGenLoopTy &CodeGenLoop,
1626     const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) {
1627   auto &RT = CGM.getOpenMPRuntime();
1628 
1629   const Expr *IVExpr = S.getIterationVariable();
1630   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1631   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1632 
1633   auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
1634 
1635   // Start the loop with a block that tests the condition.
1636   auto CondBlock = createBasicBlock("omp.dispatch.cond");
1637   EmitBlock(CondBlock);
1638   const SourceRange &R = S.getSourceRange();
1639   LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
1640                  SourceLocToDebugLoc(R.getEnd()));
1641 
1642   llvm::Value *BoolCondVal = nullptr;
1643   if (!DynamicOrOrdered) {
1644     // UB = min(UB, GlobalUB) or
1645     // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g.
1646     // 'distribute parallel for')
1647     EmitIgnoredExpr(LoopArgs.EUB);
1648     // IV = LB
1649     EmitIgnoredExpr(LoopArgs.Init);
1650     // IV < UB
1651     BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond);
1652   } else {
1653     BoolCondVal =
1654         RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, LoopArgs.IL,
1655                        LoopArgs.LB, LoopArgs.UB, LoopArgs.ST);
1656   }
1657 
1658   // If there are any cleanups between here and the loop-exit scope,
1659   // create a block to stage a loop exit along.
1660   auto ExitBlock = LoopExit.getBlock();
1661   if (LoopScope.requiresCleanups())
1662     ExitBlock = createBasicBlock("omp.dispatch.cleanup");
1663 
1664   auto LoopBody = createBasicBlock("omp.dispatch.body");
1665   Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
1666   if (ExitBlock != LoopExit.getBlock()) {
1667     EmitBlock(ExitBlock);
1668     EmitBranchThroughCleanup(LoopExit);
1669   }
1670   EmitBlock(LoopBody);
1671 
1672   // Emit "IV = LB" (in case of static schedule, we have already calculated new
1673   // LB for loop condition and emitted it above).
1674   if (DynamicOrOrdered)
1675     EmitIgnoredExpr(LoopArgs.Init);
1676 
1677   // Create a block for the increment.
1678   auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
1679   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1680 
1681   // Generate !llvm.loop.parallel metadata for loads and stores for loops
1682   // with dynamic/guided scheduling and without ordered clause.
1683   if (!isOpenMPSimdDirective(S.getDirectiveKind()))
1684     LoopStack.setParallel(!IsMonotonic);
1685   else
1686     EmitOMPSimdInit(S, IsMonotonic);
1687 
1688   SourceLocation Loc = S.getLocStart();
1689 
1690   // when 'distribute' is not combined with a 'for':
1691   // while (idx <= UB) { BODY; ++idx; }
1692   // when 'distribute' is combined with a 'for'
1693   // (e.g. 'distribute parallel for')
1694   // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
1695   EmitOMPInnerLoop(
1696       S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr,
1697       [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
1698         CodeGenLoop(CGF, S, LoopExit);
1699       },
1700       [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) {
1701         CodeGenOrdered(CGF, Loc, IVSize, IVSigned);
1702       });
1703 
1704   EmitBlock(Continue.getBlock());
1705   BreakContinueStack.pop_back();
1706   if (!DynamicOrOrdered) {
1707     // Emit "LB = LB + Stride", "UB = UB + Stride".
1708     EmitIgnoredExpr(LoopArgs.NextLB);
1709     EmitIgnoredExpr(LoopArgs.NextUB);
1710   }
1711 
1712   EmitBranch(CondBlock);
1713   LoopStack.pop();
1714   // Emit the fall-through block.
1715   EmitBlock(LoopExit.getBlock());
1716 
1717   // Tell the runtime we are done.
1718   auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) {
1719     if (!DynamicOrOrdered)
1720       CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(),
1721                                                      S.getDirectiveKind());
1722   };
1723   OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
1724 }
1725 
1726 void CodeGenFunction::EmitOMPForOuterLoop(
1727     const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic,
1728     const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
1729     const OMPLoopArguments &LoopArgs,
1730     const CodeGenDispatchBoundsTy &CGDispatchBounds) {
1731   auto &RT = CGM.getOpenMPRuntime();
1732 
1733   // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
1734   const bool DynamicOrOrdered =
1735       Ordered || RT.isDynamic(ScheduleKind.Schedule);
1736 
1737   assert((Ordered ||
1738           !RT.isStaticNonchunked(ScheduleKind.Schedule,
1739                                  LoopArgs.Chunk != nullptr)) &&
1740          "static non-chunked schedule does not need outer loop");
1741 
1742   // Emit outer loop.
1743   //
1744   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
1745   // When schedule(dynamic,chunk_size) is specified, the iterations are
1746   // distributed to threads in the team in chunks as the threads request them.
1747   // Each thread executes a chunk of iterations, then requests another chunk,
1748   // until no chunks remain to be distributed. Each chunk contains chunk_size
1749   // iterations, except for the last chunk to be distributed, which may have
1750   // fewer iterations. When no chunk_size is specified, it defaults to 1.
1751   //
1752   // When schedule(guided,chunk_size) is specified, the iterations are assigned
1753   // to threads in the team in chunks as the executing threads request them.
1754   // Each thread executes a chunk of iterations, then requests another chunk,
1755   // until no chunks remain to be assigned. For a chunk_size of 1, the size of
1756   // each chunk is proportional to the number of unassigned iterations divided
1757   // by the number of threads in the team, decreasing to 1. For a chunk_size
1758   // with value k (greater than 1), the size of each chunk is determined in the
1759   // same way, with the restriction that the chunks do not contain fewer than k
1760   // iterations (except for the last chunk to be assigned, which may have fewer
1761   // than k iterations).
1762   //
1763   // When schedule(auto) is specified, the decision regarding scheduling is
1764   // delegated to the compiler and/or runtime system. The programmer gives the
1765   // implementation the freedom to choose any possible mapping of iterations to
1766   // threads in the team.
1767   //
1768   // When schedule(runtime) is specified, the decision regarding scheduling is
1769   // deferred until run time, and the schedule and chunk size are taken from the
1770   // run-sched-var ICV. If the ICV is set to auto, the schedule is
1771   // implementation defined
1772   //
1773   // while(__kmpc_dispatch_next(&LB, &UB)) {
1774   //   idx = LB;
1775   //   while (idx <= UB) { BODY; ++idx;
1776   //   __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
1777   //   } // inner loop
1778   // }
1779   //
1780   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
1781   // When schedule(static, chunk_size) is specified, iterations are divided into
1782   // chunks of size chunk_size, and the chunks are assigned to the threads in
1783   // the team in a round-robin fashion in the order of the thread number.
1784   //
1785   // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
1786   //   while (idx <= UB) { BODY; ++idx; } // inner loop
1787   //   LB = LB + ST;
1788   //   UB = UB + ST;
1789   // }
1790   //
1791 
1792   const Expr *IVExpr = S.getIterationVariable();
1793   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1794   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1795 
1796   if (DynamicOrOrdered) {
1797     auto DispatchBounds = CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB);
1798     llvm::Value *LBVal = DispatchBounds.first;
1799     llvm::Value *UBVal = DispatchBounds.second;
1800     CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal,
1801                                                              LoopArgs.Chunk};
1802     RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, IVSize,
1803                            IVSigned, Ordered, DipatchRTInputValues);
1804   } else {
1805     CGOpenMPRuntime::StaticRTInput StaticInit(
1806         IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB,
1807         LoopArgs.ST, LoopArgs.Chunk);
1808     RT.emitForStaticInit(*this, S.getLocStart(), S.getDirectiveKind(),
1809                          ScheduleKind, StaticInit);
1810   }
1811 
1812   auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc,
1813                                     const unsigned IVSize,
1814                                     const bool IVSigned) {
1815     if (Ordered) {
1816       CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize,
1817                                                             IVSigned);
1818     }
1819   };
1820 
1821   OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST,
1822                                  LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB);
1823   OuterLoopArgs.IncExpr = S.getInc();
1824   OuterLoopArgs.Init = S.getInit();
1825   OuterLoopArgs.Cond = S.getCond();
1826   OuterLoopArgs.NextLB = S.getNextLowerBound();
1827   OuterLoopArgs.NextUB = S.getNextUpperBound();
1828   EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs,
1829                    emitOMPLoopBodyWithStopPoint, CodeGenOrdered);
1830 }
1831 
1832 static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc,
1833                              const unsigned IVSize, const bool IVSigned) {}
1834 
1835 void CodeGenFunction::EmitOMPDistributeOuterLoop(
1836     OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S,
1837     OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs,
1838     const CodeGenLoopTy &CodeGenLoopContent) {
1839 
1840   auto &RT = CGM.getOpenMPRuntime();
1841 
1842   // Emit outer loop.
1843   // Same behavior as a OMPForOuterLoop, except that schedule cannot be
1844   // dynamic
1845   //
1846 
1847   const Expr *IVExpr = S.getIterationVariable();
1848   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1849   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1850 
1851   CGOpenMPRuntime::StaticRTInput StaticInit(
1852       IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB,
1853       LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk);
1854   RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, StaticInit);
1855 
1856   // for combined 'distribute' and 'for' the increment expression of distribute
1857   // is store in DistInc. For 'distribute' alone, it is in Inc.
1858   Expr *IncExpr;
1859   if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()))
1860     IncExpr = S.getDistInc();
1861   else
1862     IncExpr = S.getInc();
1863 
1864   // this routine is shared by 'omp distribute parallel for' and
1865   // 'omp distribute': select the right EUB expression depending on the
1866   // directive
1867   OMPLoopArguments OuterLoopArgs;
1868   OuterLoopArgs.LB = LoopArgs.LB;
1869   OuterLoopArgs.UB = LoopArgs.UB;
1870   OuterLoopArgs.ST = LoopArgs.ST;
1871   OuterLoopArgs.IL = LoopArgs.IL;
1872   OuterLoopArgs.Chunk = LoopArgs.Chunk;
1873   OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
1874                           ? S.getCombinedEnsureUpperBound()
1875                           : S.getEnsureUpperBound();
1876   OuterLoopArgs.IncExpr = IncExpr;
1877   OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
1878                            ? S.getCombinedInit()
1879                            : S.getInit();
1880   OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
1881                            ? S.getCombinedCond()
1882                            : S.getCond();
1883   OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
1884                              ? S.getCombinedNextLowerBound()
1885                              : S.getNextLowerBound();
1886   OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
1887                              ? S.getCombinedNextUpperBound()
1888                              : S.getNextUpperBound();
1889 
1890   EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S,
1891                    LoopScope, OuterLoopArgs, CodeGenLoopContent,
1892                    emitEmptyOrdered);
1893 }
1894 
1895 /// Emit a helper variable and return corresponding lvalue.
1896 static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
1897                                const DeclRefExpr *Helper) {
1898   auto VDecl = cast<VarDecl>(Helper->getDecl());
1899   CGF.EmitVarDecl(*VDecl);
1900   return CGF.EmitLValue(Helper);
1901 }
1902 
1903 static std::pair<LValue, LValue>
1904 emitDistributeParallelForInnerBounds(CodeGenFunction &CGF,
1905                                      const OMPExecutableDirective &S) {
1906   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
1907   LValue LB =
1908       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
1909   LValue UB =
1910       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
1911 
1912   // When composing 'distribute' with 'for' (e.g. as in 'distribute
1913   // parallel for') we need to use the 'distribute'
1914   // chunk lower and upper bounds rather than the whole loop iteration
1915   // space. These are parameters to the outlined function for 'parallel'
1916   // and we copy the bounds of the previous schedule into the
1917   // the current ones.
1918   LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable());
1919   LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable());
1920   llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(PrevLB, SourceLocation());
1921   PrevLBVal = CGF.EmitScalarConversion(
1922       PrevLBVal, LS.getPrevLowerBoundVariable()->getType(),
1923       LS.getIterationVariable()->getType(), SourceLocation());
1924   llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(PrevUB, SourceLocation());
1925   PrevUBVal = CGF.EmitScalarConversion(
1926       PrevUBVal, LS.getPrevUpperBoundVariable()->getType(),
1927       LS.getIterationVariable()->getType(), SourceLocation());
1928 
1929   CGF.EmitStoreOfScalar(PrevLBVal, LB);
1930   CGF.EmitStoreOfScalar(PrevUBVal, UB);
1931 
1932   return {LB, UB};
1933 }
1934 
1935 /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then
1936 /// we need to use the LB and UB expressions generated by the worksharing
1937 /// code generation support, whereas in non combined situations we would
1938 /// just emit 0 and the LastIteration expression
1939 /// This function is necessary due to the difference of the LB and UB
1940 /// types for the RT emission routines for 'for_static_init' and
1941 /// 'for_dispatch_init'
1942 static std::pair<llvm::Value *, llvm::Value *>
1943 emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF,
1944                                         const OMPExecutableDirective &S,
1945                                         Address LB, Address UB) {
1946   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
1947   const Expr *IVExpr = LS.getIterationVariable();
1948   // when implementing a dynamic schedule for a 'for' combined with a
1949   // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop
1950   // is not normalized as each team only executes its own assigned
1951   // distribute chunk
1952   QualType IteratorTy = IVExpr->getType();
1953   llvm::Value *LBVal = CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy,
1954                                             SourceLocation());
1955   llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy,
1956                                             SourceLocation());
1957   return {LBVal, UBVal};
1958 }
1959 
1960 static void emitDistributeParallelForDistributeInnerBoundParams(
1961     CodeGenFunction &CGF, const OMPExecutableDirective &S,
1962     llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) {
1963   const auto &Dir = cast<OMPLoopDirective>(S);
1964   LValue LB =
1965       CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable()));
1966   auto LBCast = CGF.Builder.CreateIntCast(
1967       CGF.Builder.CreateLoad(LB.getAddress()), CGF.SizeTy, /*isSigned=*/false);
1968   CapturedVars.push_back(LBCast);
1969   LValue UB =
1970       CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable()));
1971 
1972   auto UBCast = CGF.Builder.CreateIntCast(
1973       CGF.Builder.CreateLoad(UB.getAddress()), CGF.SizeTy, /*isSigned=*/false);
1974   CapturedVars.push_back(UBCast);
1975 }
1976 
1977 static void
1978 emitInnerParallelForWhenCombined(CodeGenFunction &CGF,
1979                                  const OMPLoopDirective &S,
1980                                  CodeGenFunction::JumpDest LoopExit) {
1981   auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF,
1982                                          PrePostActionTy &) {
1983     CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(),
1984                                emitDistributeParallelForInnerBounds,
1985                                emitDistributeParallelForDispatchBounds);
1986   };
1987 
1988   emitCommonOMPParallelDirective(
1989       CGF, S, OMPD_for, CGInlinedWorksharingLoop,
1990       emitDistributeParallelForDistributeInnerBoundParams);
1991 }
1992 
1993 void CodeGenFunction::EmitOMPDistributeParallelForDirective(
1994     const OMPDistributeParallelForDirective &S) {
1995   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
1996     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
1997                               S.getDistInc());
1998   };
1999   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2000   OMPCancelStackRAII CancelRegion(*this, OMPD_distribute_parallel_for,
2001                                   /*HasCancel=*/false);
2002   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen,
2003                                               /*HasCancel=*/false);
2004 }
2005 
2006 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective(
2007     const OMPDistributeParallelForSimdDirective &S) {
2008   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2009   CGM.getOpenMPRuntime().emitInlinedDirective(
2010       *this, OMPD_distribute_parallel_for_simd,
2011       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2012         OMPLoopScope PreInitScope(CGF, S);
2013         CGF.EmitStmt(
2014             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2015       });
2016 }
2017 
2018 void CodeGenFunction::EmitOMPDistributeSimdDirective(
2019     const OMPDistributeSimdDirective &S) {
2020   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2021   CGM.getOpenMPRuntime().emitInlinedDirective(
2022       *this, OMPD_distribute_simd,
2023       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2024         OMPLoopScope PreInitScope(CGF, S);
2025         CGF.EmitStmt(
2026             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2027       });
2028 }
2029 
2030 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective(
2031     const OMPTargetParallelForSimdDirective &S) {
2032   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2033   CGM.getOpenMPRuntime().emitInlinedDirective(
2034       *this, OMPD_target_parallel_for_simd,
2035       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2036         OMPLoopScope PreInitScope(CGF, S);
2037         CGF.EmitStmt(
2038             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2039       });
2040 }
2041 
2042 void CodeGenFunction::EmitOMPTargetSimdDirective(
2043     const OMPTargetSimdDirective &S) {
2044   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2045   CGM.getOpenMPRuntime().emitInlinedDirective(
2046       *this, OMPD_target_simd, [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2047         OMPLoopScope PreInitScope(CGF, S);
2048         CGF.EmitStmt(
2049             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2050       });
2051 }
2052 
2053 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective(
2054     const OMPTeamsDistributeSimdDirective &S) {
2055   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2056   CGM.getOpenMPRuntime().emitInlinedDirective(
2057       *this, OMPD_teams_distribute_simd,
2058       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2059         OMPLoopScope PreInitScope(CGF, S);
2060         CGF.EmitStmt(
2061             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2062       });
2063 }
2064 
2065 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective(
2066     const OMPTeamsDistributeParallelForSimdDirective &S) {
2067   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2068   CGM.getOpenMPRuntime().emitInlinedDirective(
2069       *this, OMPD_teams_distribute_parallel_for_simd,
2070       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2071         OMPLoopScope PreInitScope(CGF, S);
2072         CGF.EmitStmt(
2073             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2074       });
2075 }
2076 
2077 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective(
2078     const OMPTeamsDistributeParallelForDirective &S) {
2079   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2080   CGM.getOpenMPRuntime().emitInlinedDirective(
2081       *this, OMPD_teams_distribute_parallel_for,
2082       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2083         OMPLoopScope PreInitScope(CGF, S);
2084         CGF.EmitStmt(
2085             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2086       });
2087 }
2088 
2089 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective(
2090     const OMPTargetTeamsDistributeDirective &S) {
2091   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2092   CGM.getOpenMPRuntime().emitInlinedDirective(
2093       *this, OMPD_target_teams_distribute,
2094       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2095         CGF.EmitStmt(
2096             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2097       });
2098 }
2099 
2100 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective(
2101     const OMPTargetTeamsDistributeParallelForDirective &S) {
2102   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2103   CGM.getOpenMPRuntime().emitInlinedDirective(
2104       *this, OMPD_target_teams_distribute_parallel_for,
2105       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2106         CGF.EmitStmt(
2107             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2108       });
2109 }
2110 
2111 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective(
2112     const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
2113   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2114   CGM.getOpenMPRuntime().emitInlinedDirective(
2115       *this, OMPD_target_teams_distribute_parallel_for_simd,
2116       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2117         CGF.EmitStmt(
2118             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2119       });
2120 }
2121 
2122 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective(
2123     const OMPTargetTeamsDistributeSimdDirective &S) {
2124   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2125   CGM.getOpenMPRuntime().emitInlinedDirective(
2126       *this, OMPD_target_teams_distribute_simd,
2127       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2128         CGF.EmitStmt(
2129             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2130       });
2131 }
2132 
2133 namespace {
2134   struct ScheduleKindModifiersTy {
2135     OpenMPScheduleClauseKind Kind;
2136     OpenMPScheduleClauseModifier M1;
2137     OpenMPScheduleClauseModifier M2;
2138     ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind,
2139                             OpenMPScheduleClauseModifier M1,
2140                             OpenMPScheduleClauseModifier M2)
2141         : Kind(Kind), M1(M1), M2(M2) {}
2142   };
2143 } // namespace
2144 
2145 bool CodeGenFunction::EmitOMPWorksharingLoop(
2146     const OMPLoopDirective &S, Expr *EUB,
2147     const CodeGenLoopBoundsTy &CodeGenLoopBounds,
2148     const CodeGenDispatchBoundsTy &CGDispatchBounds) {
2149   // Emit the loop iteration variable.
2150   auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
2151   auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
2152   EmitVarDecl(*IVDecl);
2153 
2154   // Emit the iterations count variable.
2155   // If it is not a variable, Sema decided to calculate iterations count on each
2156   // iteration (e.g., it is foldable into a constant).
2157   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
2158     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
2159     // Emit calculation of the iterations count.
2160     EmitIgnoredExpr(S.getCalcLastIteration());
2161   }
2162 
2163   auto &RT = CGM.getOpenMPRuntime();
2164 
2165   bool HasLastprivateClause;
2166   // Check pre-condition.
2167   {
2168     OMPLoopScope PreInitScope(*this, S);
2169     // Skip the entire loop if we don't meet the precondition.
2170     // If the condition constant folds and can be elided, avoid emitting the
2171     // whole loop.
2172     bool CondConstant;
2173     llvm::BasicBlock *ContBlock = nullptr;
2174     if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
2175       if (!CondConstant)
2176         return false;
2177     } else {
2178       auto *ThenBlock = createBasicBlock("omp.precond.then");
2179       ContBlock = createBasicBlock("omp.precond.end");
2180       emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
2181                   getProfileCount(&S));
2182       EmitBlock(ThenBlock);
2183       incrementProfileCounter(&S);
2184     }
2185 
2186     bool Ordered = false;
2187     if (auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) {
2188       if (OrderedClause->getNumForLoops())
2189         RT.emitDoacrossInit(*this, S);
2190       else
2191         Ordered = true;
2192     }
2193 
2194     llvm::DenseSet<const Expr *> EmittedFinals;
2195     emitAlignedClause(*this, S);
2196     bool HasLinears = EmitOMPLinearClauseInit(S);
2197     // Emit helper vars inits.
2198 
2199     std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S);
2200     LValue LB = Bounds.first;
2201     LValue UB = Bounds.second;
2202     LValue ST =
2203         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
2204     LValue IL =
2205         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
2206 
2207     // Emit 'then' code.
2208     {
2209       OMPPrivateScope LoopScope(*this);
2210       if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) {
2211         // Emit implicit barrier to synchronize threads and avoid data races on
2212         // initialization of firstprivate variables and post-update of
2213         // lastprivate variables.
2214         CGM.getOpenMPRuntime().emitBarrierCall(
2215             *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
2216             /*ForceSimpleCall=*/true);
2217       }
2218       EmitOMPPrivateClause(S, LoopScope);
2219       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
2220       EmitOMPReductionClauseInit(S, LoopScope);
2221       EmitOMPPrivateLoopCounters(S, LoopScope);
2222       EmitOMPLinearClause(S, LoopScope);
2223       (void)LoopScope.Privatize();
2224 
2225       // Detect the loop schedule kind and chunk.
2226       llvm::Value *Chunk = nullptr;
2227       OpenMPScheduleTy ScheduleKind;
2228       if (auto *C = S.getSingleClause<OMPScheduleClause>()) {
2229         ScheduleKind.Schedule = C->getScheduleKind();
2230         ScheduleKind.M1 = C->getFirstScheduleModifier();
2231         ScheduleKind.M2 = C->getSecondScheduleModifier();
2232         if (const auto *Ch = C->getChunkSize()) {
2233           Chunk = EmitScalarExpr(Ch);
2234           Chunk = EmitScalarConversion(Chunk, Ch->getType(),
2235                                        S.getIterationVariable()->getType(),
2236                                        S.getLocStart());
2237         }
2238       }
2239       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2240       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2241       // OpenMP 4.5, 2.7.1 Loop Construct, Description.
2242       // If the static schedule kind is specified or if the ordered clause is
2243       // specified, and if no monotonic modifier is specified, the effect will
2244       // be as if the monotonic modifier was specified.
2245       if (RT.isStaticNonchunked(ScheduleKind.Schedule,
2246                                 /* Chunked */ Chunk != nullptr) &&
2247           !Ordered) {
2248         if (isOpenMPSimdDirective(S.getDirectiveKind()))
2249           EmitOMPSimdInit(S, /*IsMonotonic=*/true);
2250         // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2251         // When no chunk_size is specified, the iteration space is divided into
2252         // chunks that are approximately equal in size, and at most one chunk is
2253         // distributed to each thread. Note that the size of the chunks is
2254         // unspecified in this case.
2255         CGOpenMPRuntime::StaticRTInput StaticInit(
2256             IVSize, IVSigned, Ordered, IL.getAddress(), LB.getAddress(),
2257             UB.getAddress(), ST.getAddress());
2258         RT.emitForStaticInit(*this, S.getLocStart(), S.getDirectiveKind(),
2259                              ScheduleKind, StaticInit);
2260         auto LoopExit =
2261             getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
2262         // UB = min(UB, GlobalUB);
2263         EmitIgnoredExpr(S.getEnsureUpperBound());
2264         // IV = LB;
2265         EmitIgnoredExpr(S.getInit());
2266         // while (idx <= UB) { BODY; ++idx; }
2267         EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
2268                          S.getInc(),
2269                          [&S, LoopExit](CodeGenFunction &CGF) {
2270                            CGF.EmitOMPLoopBody(S, LoopExit);
2271                            CGF.EmitStopPoint(&S);
2272                          },
2273                          [](CodeGenFunction &) {});
2274         EmitBlock(LoopExit.getBlock());
2275         // Tell the runtime we are done.
2276         auto &&CodeGen = [&S](CodeGenFunction &CGF) {
2277           CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(),
2278                                                          S.getDirectiveKind());
2279         };
2280         OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
2281       } else {
2282         const bool IsMonotonic =
2283             Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static ||
2284             ScheduleKind.Schedule == OMPC_SCHEDULE_unknown ||
2285             ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
2286             ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
2287         // Emit the outer loop, which requests its work chunk [LB..UB] from
2288         // runtime and runs the inner loop to process it.
2289         const OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(),
2290                                              ST.getAddress(), IL.getAddress(),
2291                                              Chunk, EUB);
2292         EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
2293                             LoopArguments, CGDispatchBounds);
2294       }
2295       if (isOpenMPSimdDirective(S.getDirectiveKind())) {
2296         EmitOMPSimdFinal(S,
2297                          [&](CodeGenFunction &CGF) -> llvm::Value * {
2298                            return CGF.Builder.CreateIsNotNull(
2299                                CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2300                          });
2301       }
2302       EmitOMPReductionClauseFinal(
2303           S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind())
2304                  ? /*Parallel and Simd*/ OMPD_parallel_for_simd
2305                  : /*Parallel only*/ OMPD_parallel);
2306       // Emit post-update of the reduction variables if IsLastIter != 0.
2307       emitPostUpdateForReductionClause(
2308           *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2309             return CGF.Builder.CreateIsNotNull(
2310                 CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2311           });
2312       // Emit final copy of the lastprivate variables if IsLastIter != 0.
2313       if (HasLastprivateClause)
2314         EmitOMPLastprivateClauseFinal(
2315             S, isOpenMPSimdDirective(S.getDirectiveKind()),
2316             Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart())));
2317     }
2318     EmitOMPLinearClauseFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2319       return CGF.Builder.CreateIsNotNull(
2320           CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2321     });
2322     // We're now done with the loop, so jump to the continuation block.
2323     if (ContBlock) {
2324       EmitBranch(ContBlock);
2325       EmitBlock(ContBlock, true);
2326     }
2327   }
2328   return HasLastprivateClause;
2329 }
2330 
2331 /// The following two functions generate expressions for the loop lower
2332 /// and upper bounds in case of static and dynamic (dispatch) schedule
2333 /// of the associated 'for' or 'distribute' loop.
2334 static std::pair<LValue, LValue>
2335 emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
2336   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2337   LValue LB =
2338       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
2339   LValue UB =
2340       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
2341   return {LB, UB};
2342 }
2343 
2344 /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not
2345 /// consider the lower and upper bound expressions generated by the
2346 /// worksharing loop support, but we use 0 and the iteration space size as
2347 /// constants
2348 static std::pair<llvm::Value *, llvm::Value *>
2349 emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S,
2350                           Address LB, Address UB) {
2351   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2352   const Expr *IVExpr = LS.getIterationVariable();
2353   const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType());
2354   llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0);
2355   llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration());
2356   return {LBVal, UBVal};
2357 }
2358 
2359 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
2360   bool HasLastprivates = false;
2361   auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
2362                                           PrePostActionTy &) {
2363     OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel());
2364     HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
2365                                                  emitForLoopBounds,
2366                                                  emitDispatchForLoopBounds);
2367   };
2368   {
2369     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2370     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
2371                                                 S.hasCancel());
2372   }
2373 
2374   // Emit an implicit barrier at the end.
2375   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
2376     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
2377   }
2378 }
2379 
2380 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
2381   bool HasLastprivates = false;
2382   auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
2383                                           PrePostActionTy &) {
2384     HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
2385                                                  emitForLoopBounds,
2386                                                  emitDispatchForLoopBounds);
2387   };
2388   {
2389     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2390     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
2391   }
2392 
2393   // Emit an implicit barrier at the end.
2394   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
2395     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
2396   }
2397 }
2398 
2399 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
2400                                 const Twine &Name,
2401                                 llvm::Value *Init = nullptr) {
2402   auto LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
2403   if (Init)
2404     CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true);
2405   return LVal;
2406 }
2407 
2408 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
2409   auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt();
2410   auto *CS = dyn_cast<CompoundStmt>(Stmt);
2411   bool HasLastprivates = false;
2412   auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF,
2413                                                     PrePostActionTy &) {
2414     auto &C = CGF.CGM.getContext();
2415     auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2416     // Emit helper vars inits.
2417     LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.",
2418                                   CGF.Builder.getInt32(0));
2419     auto *GlobalUBVal = CS != nullptr ? CGF.Builder.getInt32(CS->size() - 1)
2420                                       : CGF.Builder.getInt32(0);
2421     LValue UB =
2422         createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
2423     LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.",
2424                                   CGF.Builder.getInt32(1));
2425     LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.",
2426                                   CGF.Builder.getInt32(0));
2427     // Loop counter.
2428     LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv.");
2429     OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
2430     CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
2431     OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
2432     CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
2433     // Generate condition for loop.
2434     BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue,
2435                         OK_Ordinary, S.getLocStart(), FPOptions());
2436     // Increment for loop counter.
2437     UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary,
2438                       S.getLocStart());
2439     auto BodyGen = [Stmt, CS, &S, &IV](CodeGenFunction &CGF) {
2440       // Iterate through all sections and emit a switch construct:
2441       // switch (IV) {
2442       //   case 0:
2443       //     <SectionStmt[0]>;
2444       //     break;
2445       // ...
2446       //   case <NumSection> - 1:
2447       //     <SectionStmt[<NumSection> - 1]>;
2448       //     break;
2449       // }
2450       // .omp.sections.exit:
2451       auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
2452       auto *SwitchStmt = CGF.Builder.CreateSwitch(
2453           CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB,
2454           CS == nullptr ? 1 : CS->size());
2455       if (CS) {
2456         unsigned CaseNumber = 0;
2457         for (auto *SubStmt : CS->children()) {
2458           auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
2459           CGF.EmitBlock(CaseBB);
2460           SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB);
2461           CGF.EmitStmt(SubStmt);
2462           CGF.EmitBranch(ExitBB);
2463           ++CaseNumber;
2464         }
2465       } else {
2466         auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
2467         CGF.EmitBlock(CaseBB);
2468         SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB);
2469         CGF.EmitStmt(Stmt);
2470         CGF.EmitBranch(ExitBB);
2471       }
2472       CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
2473     };
2474 
2475     CodeGenFunction::OMPPrivateScope LoopScope(CGF);
2476     if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) {
2477       // Emit implicit barrier to synchronize threads and avoid data races on
2478       // initialization of firstprivate variables and post-update of lastprivate
2479       // variables.
2480       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
2481           CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
2482           /*ForceSimpleCall=*/true);
2483     }
2484     CGF.EmitOMPPrivateClause(S, LoopScope);
2485     HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
2486     CGF.EmitOMPReductionClauseInit(S, LoopScope);
2487     (void)LoopScope.Privatize();
2488 
2489     // Emit static non-chunked loop.
2490     OpenMPScheduleTy ScheduleKind;
2491     ScheduleKind.Schedule = OMPC_SCHEDULE_static;
2492     CGOpenMPRuntime::StaticRTInput StaticInit(
2493         /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(),
2494         LB.getAddress(), UB.getAddress(), ST.getAddress());
2495     CGF.CGM.getOpenMPRuntime().emitForStaticInit(
2496         CGF, S.getLocStart(), S.getDirectiveKind(), ScheduleKind, StaticInit);
2497     // UB = min(UB, GlobalUB);
2498     auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart());
2499     auto *MinUBGlobalUB = CGF.Builder.CreateSelect(
2500         CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
2501     CGF.EmitStoreOfScalar(MinUBGlobalUB, UB);
2502     // IV = LB;
2503     CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV);
2504     // while (idx <= UB) { BODY; ++idx; }
2505     CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen,
2506                          [](CodeGenFunction &) {});
2507     // Tell the runtime we are done.
2508     auto &&CodeGen = [&S](CodeGenFunction &CGF) {
2509       CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(),
2510                                                      S.getDirectiveKind());
2511     };
2512     CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen);
2513     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
2514     // Emit post-update of the reduction variables if IsLastIter != 0.
2515     emitPostUpdateForReductionClause(
2516         CGF, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2517           return CGF.Builder.CreateIsNotNull(
2518               CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2519         });
2520 
2521     // Emit final copy of the lastprivate variables if IsLastIter != 0.
2522     if (HasLastprivates)
2523       CGF.EmitOMPLastprivateClauseFinal(
2524           S, /*NoFinals=*/false,
2525           CGF.Builder.CreateIsNotNull(
2526               CGF.EmitLoadOfScalar(IL, S.getLocStart())));
2527   };
2528 
2529   bool HasCancel = false;
2530   if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S))
2531     HasCancel = OSD->hasCancel();
2532   else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S))
2533     HasCancel = OPSD->hasCancel();
2534   OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel);
2535   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen,
2536                                               HasCancel);
2537   // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
2538   // clause. Otherwise the barrier will be generated by the codegen for the
2539   // directive.
2540   if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) {
2541     // Emit implicit barrier to synchronize threads and avoid data races on
2542     // initialization of firstprivate variables.
2543     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
2544                                            OMPD_unknown);
2545   }
2546 }
2547 
2548 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
2549   {
2550     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2551     EmitSections(S);
2552   }
2553   // Emit an implicit barrier at the end.
2554   if (!S.getSingleClause<OMPNowaitClause>()) {
2555     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
2556                                            OMPD_sections);
2557   }
2558 }
2559 
2560 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
2561   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2562     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2563   };
2564   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2565   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen,
2566                                               S.hasCancel());
2567 }
2568 
2569 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
2570   llvm::SmallVector<const Expr *, 8> CopyprivateVars;
2571   llvm::SmallVector<const Expr *, 8> DestExprs;
2572   llvm::SmallVector<const Expr *, 8> SrcExprs;
2573   llvm::SmallVector<const Expr *, 8> AssignmentOps;
2574   // Check if there are any 'copyprivate' clauses associated with this
2575   // 'single' construct.
2576   // Build a list of copyprivate variables along with helper expressions
2577   // (<source>, <destination>, <destination>=<source> expressions)
2578   for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
2579     CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
2580     DestExprs.append(C->destination_exprs().begin(),
2581                      C->destination_exprs().end());
2582     SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end());
2583     AssignmentOps.append(C->assignment_ops().begin(),
2584                          C->assignment_ops().end());
2585   }
2586   // Emit code for 'single' region along with 'copyprivate' clauses
2587   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2588     Action.Enter(CGF);
2589     OMPPrivateScope SingleScope(CGF);
2590     (void)CGF.EmitOMPFirstprivateClause(S, SingleScope);
2591     CGF.EmitOMPPrivateClause(S, SingleScope);
2592     (void)SingleScope.Privatize();
2593     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2594   };
2595   {
2596     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2597     CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(),
2598                                             CopyprivateVars, DestExprs,
2599                                             SrcExprs, AssignmentOps);
2600   }
2601   // Emit an implicit barrier at the end (to avoid data race on firstprivate
2602   // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
2603   if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) {
2604     CGM.getOpenMPRuntime().emitBarrierCall(
2605         *this, S.getLocStart(),
2606         S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
2607   }
2608 }
2609 
2610 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
2611   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2612     Action.Enter(CGF);
2613     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2614   };
2615   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2616   CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart());
2617 }
2618 
2619 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
2620   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2621     Action.Enter(CGF);
2622     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2623   };
2624   Expr *Hint = nullptr;
2625   if (auto *HintClause = S.getSingleClause<OMPHintClause>())
2626     Hint = HintClause->getHint();
2627   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2628   CGM.getOpenMPRuntime().emitCriticalRegion(*this,
2629                                             S.getDirectiveName().getAsString(),
2630                                             CodeGen, S.getLocStart(), Hint);
2631 }
2632 
2633 void CodeGenFunction::EmitOMPParallelForDirective(
2634     const OMPParallelForDirective &S) {
2635   // Emit directive as a combined directive that consists of two implicit
2636   // directives: 'parallel' with 'for' directive.
2637   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2638     OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel());
2639     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
2640                                emitDispatchForLoopBounds);
2641   };
2642   emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
2643                                  emitEmptyBoundParameters);
2644 }
2645 
2646 void CodeGenFunction::EmitOMPParallelForSimdDirective(
2647     const OMPParallelForSimdDirective &S) {
2648   // Emit directive as a combined directive that consists of two implicit
2649   // directives: 'parallel' with 'for' directive.
2650   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2651     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
2652                                emitDispatchForLoopBounds);
2653   };
2654   emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen,
2655                                  emitEmptyBoundParameters);
2656 }
2657 
2658 void CodeGenFunction::EmitOMPParallelSectionsDirective(
2659     const OMPParallelSectionsDirective &S) {
2660   // Emit directive as a combined directive that consists of two implicit
2661   // directives: 'parallel' with 'sections' directive.
2662   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2663     CGF.EmitSections(S);
2664   };
2665   emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen,
2666                                  emitEmptyBoundParameters);
2667 }
2668 
2669 void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S,
2670                                                 const RegionCodeGenTy &BodyGen,
2671                                                 const TaskGenTy &TaskGen,
2672                                                 OMPTaskDataTy &Data) {
2673   // Emit outlined function for task construct.
2674   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
2675   auto *I = CS->getCapturedDecl()->param_begin();
2676   auto *PartId = std::next(I);
2677   auto *TaskT = std::next(I, 4);
2678   // Check if the task is final
2679   if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
2680     // If the condition constant folds and can be elided, try to avoid emitting
2681     // the condition and the dead arm of the if/else.
2682     auto *Cond = Clause->getCondition();
2683     bool CondConstant;
2684     if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
2685       Data.Final.setInt(CondConstant);
2686     else
2687       Data.Final.setPointer(EvaluateExprAsBool(Cond));
2688   } else {
2689     // By default the task is not final.
2690     Data.Final.setInt(/*IntVal=*/false);
2691   }
2692   // Check if the task has 'priority' clause.
2693   if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) {
2694     auto *Prio = Clause->getPriority();
2695     Data.Priority.setInt(/*IntVal=*/true);
2696     Data.Priority.setPointer(EmitScalarConversion(
2697         EmitScalarExpr(Prio), Prio->getType(),
2698         getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
2699         Prio->getExprLoc()));
2700   }
2701   // The first function argument for tasks is a thread id, the second one is a
2702   // part id (0 for tied tasks, >=0 for untied task).
2703   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
2704   // Get list of private variables.
2705   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
2706     auto IRef = C->varlist_begin();
2707     for (auto *IInit : C->private_copies()) {
2708       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2709       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2710         Data.PrivateVars.push_back(*IRef);
2711         Data.PrivateCopies.push_back(IInit);
2712       }
2713       ++IRef;
2714     }
2715   }
2716   EmittedAsPrivate.clear();
2717   // Get list of firstprivate variables.
2718   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
2719     auto IRef = C->varlist_begin();
2720     auto IElemInitRef = C->inits().begin();
2721     for (auto *IInit : C->private_copies()) {
2722       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2723       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2724         Data.FirstprivateVars.push_back(*IRef);
2725         Data.FirstprivateCopies.push_back(IInit);
2726         Data.FirstprivateInits.push_back(*IElemInitRef);
2727       }
2728       ++IRef;
2729       ++IElemInitRef;
2730     }
2731   }
2732   // Get list of lastprivate variables (for taskloops).
2733   llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs;
2734   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
2735     auto IRef = C->varlist_begin();
2736     auto ID = C->destination_exprs().begin();
2737     for (auto *IInit : C->private_copies()) {
2738       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2739       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2740         Data.LastprivateVars.push_back(*IRef);
2741         Data.LastprivateCopies.push_back(IInit);
2742       }
2743       LastprivateDstsOrigs.insert(
2744           {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()),
2745            cast<DeclRefExpr>(*IRef)});
2746       ++IRef;
2747       ++ID;
2748     }
2749   }
2750   SmallVector<const Expr *, 4> LHSs;
2751   SmallVector<const Expr *, 4> RHSs;
2752   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
2753     auto IPriv = C->privates().begin();
2754     auto IRed = C->reduction_ops().begin();
2755     auto ILHS = C->lhs_exprs().begin();
2756     auto IRHS = C->rhs_exprs().begin();
2757     for (const auto *Ref : C->varlists()) {
2758       Data.ReductionVars.emplace_back(Ref);
2759       Data.ReductionCopies.emplace_back(*IPriv);
2760       Data.ReductionOps.emplace_back(*IRed);
2761       LHSs.emplace_back(*ILHS);
2762       RHSs.emplace_back(*IRHS);
2763       std::advance(IPriv, 1);
2764       std::advance(IRed, 1);
2765       std::advance(ILHS, 1);
2766       std::advance(IRHS, 1);
2767     }
2768   }
2769   Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit(
2770       *this, S.getLocStart(), LHSs, RHSs, Data);
2771   // Build list of dependences.
2772   for (const auto *C : S.getClausesOfKind<OMPDependClause>())
2773     for (auto *IRef : C->varlists())
2774       Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef));
2775   auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs](
2776       CodeGenFunction &CGF, PrePostActionTy &Action) {
2777     // Set proper addresses for generated private copies.
2778     OMPPrivateScope Scope(CGF);
2779     if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() ||
2780         !Data.LastprivateVars.empty()) {
2781       enum { PrivatesParam = 2, CopyFnParam = 3 };
2782       auto *CopyFn = CGF.Builder.CreateLoad(
2783           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3)));
2784       auto *PrivatesPtr = CGF.Builder.CreateLoad(
2785           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2)));
2786       // Map privates.
2787       llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
2788       llvm::SmallVector<llvm::Value *, 16> CallArgs;
2789       CallArgs.push_back(PrivatesPtr);
2790       for (auto *E : Data.PrivateVars) {
2791         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2792         Address PrivatePtr = CGF.CreateMemTemp(
2793             CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr");
2794         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2795         CallArgs.push_back(PrivatePtr.getPointer());
2796       }
2797       for (auto *E : Data.FirstprivateVars) {
2798         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2799         Address PrivatePtr =
2800             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
2801                               ".firstpriv.ptr.addr");
2802         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2803         CallArgs.push_back(PrivatePtr.getPointer());
2804       }
2805       for (auto *E : Data.LastprivateVars) {
2806         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2807         Address PrivatePtr =
2808             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
2809                               ".lastpriv.ptr.addr");
2810         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2811         CallArgs.push_back(PrivatePtr.getPointer());
2812       }
2813       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(),
2814                                                           CopyFn, CallArgs);
2815       for (auto &&Pair : LastprivateDstsOrigs) {
2816         auto *OrigVD = cast<VarDecl>(Pair.second->getDecl());
2817         DeclRefExpr DRE(
2818             const_cast<VarDecl *>(OrigVD),
2819             /*RefersToEnclosingVariableOrCapture=*/CGF.CapturedStmtInfo->lookup(
2820                 OrigVD) != nullptr,
2821             Pair.second->getType(), VK_LValue, Pair.second->getExprLoc());
2822         Scope.addPrivate(Pair.first, [&CGF, &DRE]() {
2823           return CGF.EmitLValue(&DRE).getAddress();
2824         });
2825       }
2826       for (auto &&Pair : PrivatePtrs) {
2827         Address Replacement(CGF.Builder.CreateLoad(Pair.second),
2828                             CGF.getContext().getDeclAlign(Pair.first));
2829         Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
2830       }
2831     }
2832     if (Data.Reductions) {
2833       OMPLexicalScope LexScope(CGF, S, /*AsInlined=*/true);
2834       ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionCopies,
2835                              Data.ReductionOps);
2836       llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
2837           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9)));
2838       for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
2839         RedCG.emitSharedLValue(CGF, Cnt);
2840         RedCG.emitAggregateType(CGF, Cnt);
2841         Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
2842             CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
2843         Replacement =
2844             Address(CGF.EmitScalarConversion(
2845                         Replacement.getPointer(), CGF.getContext().VoidPtrTy,
2846                         CGF.getContext().getPointerType(
2847                             Data.ReductionCopies[Cnt]->getType()),
2848                         SourceLocation()),
2849                     Replacement.getAlignment());
2850         Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
2851         Scope.addPrivate(RedCG.getBaseDecl(Cnt),
2852                          [Replacement]() { return Replacement; });
2853         // FIXME: This must removed once the runtime library is fixed.
2854         // Emit required threadprivate variables for
2855         // initilizer/combiner/finalizer.
2856         CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(),
2857                                                            RedCG, Cnt);
2858       }
2859     }
2860     // Privatize all private variables except for in_reduction items.
2861     (void)Scope.Privatize();
2862     SmallVector<const Expr *, 4> InRedVars;
2863     SmallVector<const Expr *, 4> InRedPrivs;
2864     SmallVector<const Expr *, 4> InRedOps;
2865     SmallVector<const Expr *, 4> TaskgroupDescriptors;
2866     for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
2867       auto IPriv = C->privates().begin();
2868       auto IRed = C->reduction_ops().begin();
2869       auto ITD = C->taskgroup_descriptors().begin();
2870       for (const auto *Ref : C->varlists()) {
2871         InRedVars.emplace_back(Ref);
2872         InRedPrivs.emplace_back(*IPriv);
2873         InRedOps.emplace_back(*IRed);
2874         TaskgroupDescriptors.emplace_back(*ITD);
2875         std::advance(IPriv, 1);
2876         std::advance(IRed, 1);
2877         std::advance(ITD, 1);
2878       }
2879     }
2880     // Privatize in_reduction items here, because taskgroup descriptors must be
2881     // privatized earlier.
2882     OMPPrivateScope InRedScope(CGF);
2883     if (!InRedVars.empty()) {
2884       ReductionCodeGen RedCG(InRedVars, InRedPrivs, InRedOps);
2885       for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) {
2886         RedCG.emitSharedLValue(CGF, Cnt);
2887         RedCG.emitAggregateType(CGF, Cnt);
2888         // The taskgroup descriptor variable is always implicit firstprivate and
2889         // privatized already during procoessing of the firstprivates.
2890         llvm::Value *ReductionsPtr = CGF.EmitLoadOfScalar(
2891             CGF.EmitLValue(TaskgroupDescriptors[Cnt]), SourceLocation());
2892         Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
2893             CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
2894         Replacement = Address(
2895             CGF.EmitScalarConversion(
2896                 Replacement.getPointer(), CGF.getContext().VoidPtrTy,
2897                 CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()),
2898                 SourceLocation()),
2899             Replacement.getAlignment());
2900         Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
2901         InRedScope.addPrivate(RedCG.getBaseDecl(Cnt),
2902                               [Replacement]() { return Replacement; });
2903         // FIXME: This must removed once the runtime library is fixed.
2904         // Emit required threadprivate variables for
2905         // initilizer/combiner/finalizer.
2906         CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(),
2907                                                            RedCG, Cnt);
2908       }
2909     }
2910     (void)InRedScope.Privatize();
2911 
2912     Action.Enter(CGF);
2913     BodyGen(CGF);
2914   };
2915   auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
2916       S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied,
2917       Data.NumberOfParts);
2918   OMPLexicalScope Scope(*this, S);
2919   TaskGen(*this, OutlinedFn, Data);
2920 }
2921 
2922 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
2923   // Emit outlined function for task construct.
2924   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
2925   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
2926   auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
2927   const Expr *IfCond = nullptr;
2928   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
2929     if (C->getNameModifier() == OMPD_unknown ||
2930         C->getNameModifier() == OMPD_task) {
2931       IfCond = C->getCondition();
2932       break;
2933     }
2934   }
2935 
2936   OMPTaskDataTy Data;
2937   // Check if we should emit tied or untied task.
2938   Data.Tied = !S.getSingleClause<OMPUntiedClause>();
2939   auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
2940     CGF.EmitStmt(CS->getCapturedStmt());
2941   };
2942   auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
2943                     IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn,
2944                             const OMPTaskDataTy &Data) {
2945     CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getLocStart(), S, OutlinedFn,
2946                                             SharedsTy, CapturedStruct, IfCond,
2947                                             Data);
2948   };
2949   EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
2950 }
2951 
2952 void CodeGenFunction::EmitOMPTaskyieldDirective(
2953     const OMPTaskyieldDirective &S) {
2954   CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart());
2955 }
2956 
2957 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
2958   CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier);
2959 }
2960 
2961 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
2962   CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart());
2963 }
2964 
2965 void CodeGenFunction::EmitOMPTaskgroupDirective(
2966     const OMPTaskgroupDirective &S) {
2967   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2968     Action.Enter(CGF);
2969     if (const Expr *E = S.getReductionRef()) {
2970       SmallVector<const Expr *, 4> LHSs;
2971       SmallVector<const Expr *, 4> RHSs;
2972       OMPTaskDataTy Data;
2973       for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) {
2974         auto IPriv = C->privates().begin();
2975         auto IRed = C->reduction_ops().begin();
2976         auto ILHS = C->lhs_exprs().begin();
2977         auto IRHS = C->rhs_exprs().begin();
2978         for (const auto *Ref : C->varlists()) {
2979           Data.ReductionVars.emplace_back(Ref);
2980           Data.ReductionCopies.emplace_back(*IPriv);
2981           Data.ReductionOps.emplace_back(*IRed);
2982           LHSs.emplace_back(*ILHS);
2983           RHSs.emplace_back(*IRHS);
2984           std::advance(IPriv, 1);
2985           std::advance(IRed, 1);
2986           std::advance(ILHS, 1);
2987           std::advance(IRHS, 1);
2988         }
2989       }
2990       llvm::Value *ReductionDesc =
2991           CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getLocStart(),
2992                                                            LHSs, RHSs, Data);
2993       const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2994       CGF.EmitVarDecl(*VD);
2995       CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD),
2996                             /*Volatile=*/false, E->getType());
2997     }
2998     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2999   };
3000   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
3001   CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart());
3002 }
3003 
3004 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
3005   CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> {
3006     if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) {
3007       return llvm::makeArrayRef(FlushClause->varlist_begin(),
3008                                 FlushClause->varlist_end());
3009     }
3010     return llvm::None;
3011   }(), S.getLocStart());
3012 }
3013 
3014 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
3015                                             const CodeGenLoopTy &CodeGenLoop,
3016                                             Expr *IncExpr) {
3017   // Emit the loop iteration variable.
3018   auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
3019   auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
3020   EmitVarDecl(*IVDecl);
3021 
3022   // Emit the iterations count variable.
3023   // If it is not a variable, Sema decided to calculate iterations count on each
3024   // iteration (e.g., it is foldable into a constant).
3025   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
3026     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
3027     // Emit calculation of the iterations count.
3028     EmitIgnoredExpr(S.getCalcLastIteration());
3029   }
3030 
3031   auto &RT = CGM.getOpenMPRuntime();
3032 
3033   bool HasLastprivateClause = false;
3034   // Check pre-condition.
3035   {
3036     OMPLoopScope PreInitScope(*this, S);
3037     // Skip the entire loop if we don't meet the precondition.
3038     // If the condition constant folds and can be elided, avoid emitting the
3039     // whole loop.
3040     bool CondConstant;
3041     llvm::BasicBlock *ContBlock = nullptr;
3042     if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
3043       if (!CondConstant)
3044         return;
3045     } else {
3046       auto *ThenBlock = createBasicBlock("omp.precond.then");
3047       ContBlock = createBasicBlock("omp.precond.end");
3048       emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
3049                   getProfileCount(&S));
3050       EmitBlock(ThenBlock);
3051       incrementProfileCounter(&S);
3052     }
3053 
3054     // Emit 'then' code.
3055     {
3056       // Emit helper vars inits.
3057 
3058       LValue LB = EmitOMPHelperVar(
3059           *this, cast<DeclRefExpr>(
3060                      (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3061                           ? S.getCombinedLowerBoundVariable()
3062                           : S.getLowerBoundVariable())));
3063       LValue UB = EmitOMPHelperVar(
3064           *this, cast<DeclRefExpr>(
3065                      (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3066                           ? S.getCombinedUpperBoundVariable()
3067                           : S.getUpperBoundVariable())));
3068       LValue ST =
3069           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
3070       LValue IL =
3071           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
3072 
3073       OMPPrivateScope LoopScope(*this);
3074       if (EmitOMPFirstprivateClause(S, LoopScope)) {
3075         // Emit implicit barrier to synchronize threads and avoid data races on
3076         // initialization of firstprivate variables and post-update of
3077         // lastprivate variables.
3078         CGM.getOpenMPRuntime().emitBarrierCall(
3079           *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
3080           /*ForceSimpleCall=*/true);
3081       }
3082       EmitOMPPrivateClause(S, LoopScope);
3083       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
3084       EmitOMPPrivateLoopCounters(S, LoopScope);
3085       (void)LoopScope.Privatize();
3086 
3087       // Detect the distribute schedule kind and chunk.
3088       llvm::Value *Chunk = nullptr;
3089       OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown;
3090       if (auto *C = S.getSingleClause<OMPDistScheduleClause>()) {
3091         ScheduleKind = C->getDistScheduleKind();
3092         if (const auto *Ch = C->getChunkSize()) {
3093           Chunk = EmitScalarExpr(Ch);
3094           Chunk = EmitScalarConversion(Chunk, Ch->getType(),
3095           S.getIterationVariable()->getType(),
3096           S.getLocStart());
3097         }
3098       }
3099       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
3100       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3101 
3102       // OpenMP [2.10.8, distribute Construct, Description]
3103       // If dist_schedule is specified, kind must be static. If specified,
3104       // iterations are divided into chunks of size chunk_size, chunks are
3105       // assigned to the teams of the league in a round-robin fashion in the
3106       // order of the team number. When no chunk_size is specified, the
3107       // iteration space is divided into chunks that are approximately equal
3108       // in size, and at most one chunk is distributed to each team of the
3109       // league. The size of the chunks is unspecified in this case.
3110       if (RT.isStaticNonchunked(ScheduleKind,
3111                                 /* Chunked */ Chunk != nullptr)) {
3112         CGOpenMPRuntime::StaticRTInput StaticInit(
3113             IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(),
3114             LB.getAddress(), UB.getAddress(), ST.getAddress());
3115         RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind,
3116                                     StaticInit);
3117         auto LoopExit =
3118             getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
3119         // UB = min(UB, GlobalUB);
3120         EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3121                             ? S.getCombinedEnsureUpperBound()
3122                             : S.getEnsureUpperBound());
3123         // IV = LB;
3124         EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3125                             ? S.getCombinedInit()
3126                             : S.getInit());
3127 
3128         Expr *Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3129                          ? S.getCombinedCond()
3130                          : S.getCond();
3131 
3132         // for distribute alone,  codegen
3133         // while (idx <= UB) { BODY; ++idx; }
3134         // when combined with 'for' (e.g. as in 'distribute parallel for')
3135         // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
3136         EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), Cond, IncExpr,
3137                          [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
3138                            CodeGenLoop(CGF, S, LoopExit);
3139                          },
3140                          [](CodeGenFunction &) {});
3141         EmitBlock(LoopExit.getBlock());
3142         // Tell the runtime we are done.
3143         RT.emitForStaticFinish(*this, S.getLocStart(), S.getDirectiveKind());
3144       } else {
3145         // Emit the outer loop, which requests its work chunk [LB..UB] from
3146         // runtime and runs the inner loop to process it.
3147         const OMPLoopArguments LoopArguments = {
3148             LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(),
3149             Chunk};
3150         EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments,
3151                                    CodeGenLoop);
3152       }
3153 
3154       // Emit final copy of the lastprivate variables if IsLastIter != 0.
3155       if (HasLastprivateClause)
3156         EmitOMPLastprivateClauseFinal(
3157             S, /*NoFinals=*/false,
3158             Builder.CreateIsNotNull(
3159                 EmitLoadOfScalar(IL, S.getLocStart())));
3160     }
3161 
3162     // We're now done with the loop, so jump to the continuation block.
3163     if (ContBlock) {
3164       EmitBranch(ContBlock);
3165       EmitBlock(ContBlock, true);
3166     }
3167   }
3168 }
3169 
3170 void CodeGenFunction::EmitOMPDistributeDirective(
3171     const OMPDistributeDirective &S) {
3172   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3173 
3174     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
3175   };
3176   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
3177   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen,
3178                                               false);
3179 }
3180 
3181 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
3182                                                    const CapturedStmt *S) {
3183   CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
3184   CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
3185   CGF.CapturedStmtInfo = &CapStmtInfo;
3186   auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S);
3187   Fn->addFnAttr(llvm::Attribute::NoInline);
3188   return Fn;
3189 }
3190 
3191 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
3192   if (!S.getAssociatedStmt()) {
3193     for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
3194       CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC);
3195     return;
3196   }
3197   auto *C = S.getSingleClause<OMPSIMDClause>();
3198   auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF,
3199                                  PrePostActionTy &Action) {
3200     if (C) {
3201       auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
3202       llvm::SmallVector<llvm::Value *, 16> CapturedVars;
3203       CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
3204       auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS);
3205       CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(),
3206                                                       OutlinedFn, CapturedVars);
3207     } else {
3208       Action.Enter(CGF);
3209       CGF.EmitStmt(
3210           cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3211     }
3212   };
3213   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
3214   CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C);
3215 }
3216 
3217 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
3218                                          QualType SrcType, QualType DestType,
3219                                          SourceLocation Loc) {
3220   assert(CGF.hasScalarEvaluationKind(DestType) &&
3221          "DestType must have scalar evaluation kind.");
3222   assert(!Val.isAggregate() && "Must be a scalar or complex.");
3223   return Val.isScalar()
3224              ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType,
3225                                         Loc)
3226              : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType,
3227                                                  DestType, Loc);
3228 }
3229 
3230 static CodeGenFunction::ComplexPairTy
3231 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
3232                       QualType DestType, SourceLocation Loc) {
3233   assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
3234          "DestType must have complex evaluation kind.");
3235   CodeGenFunction::ComplexPairTy ComplexVal;
3236   if (Val.isScalar()) {
3237     // Convert the input element to the element type of the complex.
3238     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
3239     auto ScalarVal = CGF.EmitScalarConversion(Val.getScalarVal(), SrcType,
3240                                               DestElementType, Loc);
3241     ComplexVal = CodeGenFunction::ComplexPairTy(
3242         ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
3243   } else {
3244     assert(Val.isComplex() && "Must be a scalar or complex.");
3245     auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
3246     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
3247     ComplexVal.first = CGF.EmitScalarConversion(
3248         Val.getComplexVal().first, SrcElementType, DestElementType, Loc);
3249     ComplexVal.second = CGF.EmitScalarConversion(
3250         Val.getComplexVal().second, SrcElementType, DestElementType, Loc);
3251   }
3252   return ComplexVal;
3253 }
3254 
3255 static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst,
3256                                   LValue LVal, RValue RVal) {
3257   if (LVal.isGlobalReg()) {
3258     CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal);
3259   } else {
3260     CGF.EmitAtomicStore(RVal, LVal,
3261                         IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3262                                  : llvm::AtomicOrdering::Monotonic,
3263                         LVal.isVolatile(), /*IsInit=*/false);
3264   }
3265 }
3266 
3267 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal,
3268                                          QualType RValTy, SourceLocation Loc) {
3269   switch (getEvaluationKind(LVal.getType())) {
3270   case TEK_Scalar:
3271     EmitStoreThroughLValue(RValue::get(convertToScalarValue(
3272                                *this, RVal, RValTy, LVal.getType(), Loc)),
3273                            LVal);
3274     break;
3275   case TEK_Complex:
3276     EmitStoreOfComplex(
3277         convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal,
3278         /*isInit=*/false);
3279     break;
3280   case TEK_Aggregate:
3281     llvm_unreachable("Must be a scalar or complex.");
3282   }
3283 }
3284 
3285 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst,
3286                                   const Expr *X, const Expr *V,
3287                                   SourceLocation Loc) {
3288   // v = x;
3289   assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
3290   assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
3291   LValue XLValue = CGF.EmitLValue(X);
3292   LValue VLValue = CGF.EmitLValue(V);
3293   RValue Res = XLValue.isGlobalReg()
3294                    ? CGF.EmitLoadOfLValue(XLValue, Loc)
3295                    : CGF.EmitAtomicLoad(
3296                          XLValue, Loc,
3297                          IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3298                                   : llvm::AtomicOrdering::Monotonic,
3299                          XLValue.isVolatile());
3300   // OpenMP, 2.12.6, atomic Construct
3301   // Any atomic construct with a seq_cst clause forces the atomically
3302   // performed operation to include an implicit flush operation without a
3303   // list.
3304   if (IsSeqCst)
3305     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3306   CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc);
3307 }
3308 
3309 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst,
3310                                    const Expr *X, const Expr *E,
3311                                    SourceLocation Loc) {
3312   // x = expr;
3313   assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
3314   emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E));
3315   // OpenMP, 2.12.6, atomic Construct
3316   // Any atomic construct with a seq_cst clause forces the atomically
3317   // performed operation to include an implicit flush operation without a
3318   // list.
3319   if (IsSeqCst)
3320     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3321 }
3322 
3323 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
3324                                                 RValue Update,
3325                                                 BinaryOperatorKind BO,
3326                                                 llvm::AtomicOrdering AO,
3327                                                 bool IsXLHSInRHSPart) {
3328   auto &Context = CGF.CGM.getContext();
3329   // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
3330   // expression is simple and atomic is allowed for the given type for the
3331   // target platform.
3332   if (BO == BO_Comma || !Update.isScalar() ||
3333       !Update.getScalarVal()->getType()->isIntegerTy() ||
3334       !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) &&
3335                         (Update.getScalarVal()->getType() !=
3336                          X.getAddress().getElementType())) ||
3337       !X.getAddress().getElementType()->isIntegerTy() ||
3338       !Context.getTargetInfo().hasBuiltinAtomic(
3339           Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment())))
3340     return std::make_pair(false, RValue::get(nullptr));
3341 
3342   llvm::AtomicRMWInst::BinOp RMWOp;
3343   switch (BO) {
3344   case BO_Add:
3345     RMWOp = llvm::AtomicRMWInst::Add;
3346     break;
3347   case BO_Sub:
3348     if (!IsXLHSInRHSPart)
3349       return std::make_pair(false, RValue::get(nullptr));
3350     RMWOp = llvm::AtomicRMWInst::Sub;
3351     break;
3352   case BO_And:
3353     RMWOp = llvm::AtomicRMWInst::And;
3354     break;
3355   case BO_Or:
3356     RMWOp = llvm::AtomicRMWInst::Or;
3357     break;
3358   case BO_Xor:
3359     RMWOp = llvm::AtomicRMWInst::Xor;
3360     break;
3361   case BO_LT:
3362     RMWOp = X.getType()->hasSignedIntegerRepresentation()
3363                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min
3364                                    : llvm::AtomicRMWInst::Max)
3365                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin
3366                                    : llvm::AtomicRMWInst::UMax);
3367     break;
3368   case BO_GT:
3369     RMWOp = X.getType()->hasSignedIntegerRepresentation()
3370                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max
3371                                    : llvm::AtomicRMWInst::Min)
3372                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
3373                                    : llvm::AtomicRMWInst::UMin);
3374     break;
3375   case BO_Assign:
3376     RMWOp = llvm::AtomicRMWInst::Xchg;
3377     break;
3378   case BO_Mul:
3379   case BO_Div:
3380   case BO_Rem:
3381   case BO_Shl:
3382   case BO_Shr:
3383   case BO_LAnd:
3384   case BO_LOr:
3385     return std::make_pair(false, RValue::get(nullptr));
3386   case BO_PtrMemD:
3387   case BO_PtrMemI:
3388   case BO_LE:
3389   case BO_GE:
3390   case BO_EQ:
3391   case BO_NE:
3392   case BO_AddAssign:
3393   case BO_SubAssign:
3394   case BO_AndAssign:
3395   case BO_OrAssign:
3396   case BO_XorAssign:
3397   case BO_MulAssign:
3398   case BO_DivAssign:
3399   case BO_RemAssign:
3400   case BO_ShlAssign:
3401   case BO_ShrAssign:
3402   case BO_Comma:
3403     llvm_unreachable("Unsupported atomic update operation");
3404   }
3405   auto *UpdateVal = Update.getScalarVal();
3406   if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) {
3407     UpdateVal = CGF.Builder.CreateIntCast(
3408         IC, X.getAddress().getElementType(),
3409         X.getType()->hasSignedIntegerRepresentation());
3410   }
3411   auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO);
3412   return std::make_pair(true, RValue::get(Res));
3413 }
3414 
3415 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
3416     LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
3417     llvm::AtomicOrdering AO, SourceLocation Loc,
3418     const llvm::function_ref<RValue(RValue)> &CommonGen) {
3419   // Update expressions are allowed to have the following forms:
3420   // x binop= expr; -> xrval + expr;
3421   // x++, ++x -> xrval + 1;
3422   // x--, --x -> xrval - 1;
3423   // x = x binop expr; -> xrval binop expr
3424   // x = expr Op x; - > expr binop xrval;
3425   auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart);
3426   if (!Res.first) {
3427     if (X.isGlobalReg()) {
3428       // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
3429       // 'xrval'.
3430       EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X);
3431     } else {
3432       // Perform compare-and-swap procedure.
3433       EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified());
3434     }
3435   }
3436   return Res;
3437 }
3438 
3439 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst,
3440                                     const Expr *X, const Expr *E,
3441                                     const Expr *UE, bool IsXLHSInRHSPart,
3442                                     SourceLocation Loc) {
3443   assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
3444          "Update expr in 'atomic update' must be a binary operator.");
3445   auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
3446   // Update expressions are allowed to have the following forms:
3447   // x binop= expr; -> xrval + expr;
3448   // x++, ++x -> xrval + 1;
3449   // x--, --x -> xrval - 1;
3450   // x = x binop expr; -> xrval binop expr
3451   // x = expr Op x; - > expr binop xrval;
3452   assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
3453   LValue XLValue = CGF.EmitLValue(X);
3454   RValue ExprRValue = CGF.EmitAnyExpr(E);
3455   auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3456                      : llvm::AtomicOrdering::Monotonic;
3457   auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
3458   auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
3459   auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
3460   auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
3461   auto Gen =
3462       [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue {
3463         CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3464         CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
3465         return CGF.EmitAnyExpr(UE);
3466       };
3467   (void)CGF.EmitOMPAtomicSimpleUpdateExpr(
3468       XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
3469   // OpenMP, 2.12.6, atomic Construct
3470   // Any atomic construct with a seq_cst clause forces the atomically
3471   // performed operation to include an implicit flush operation without a
3472   // list.
3473   if (IsSeqCst)
3474     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3475 }
3476 
3477 static RValue convertToType(CodeGenFunction &CGF, RValue Value,
3478                             QualType SourceType, QualType ResType,
3479                             SourceLocation Loc) {
3480   switch (CGF.getEvaluationKind(ResType)) {
3481   case TEK_Scalar:
3482     return RValue::get(
3483         convertToScalarValue(CGF, Value, SourceType, ResType, Loc));
3484   case TEK_Complex: {
3485     auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc);
3486     return RValue::getComplex(Res.first, Res.second);
3487   }
3488   case TEK_Aggregate:
3489     break;
3490   }
3491   llvm_unreachable("Must be a scalar or complex.");
3492 }
3493 
3494 static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst,
3495                                      bool IsPostfixUpdate, const Expr *V,
3496                                      const Expr *X, const Expr *E,
3497                                      const Expr *UE, bool IsXLHSInRHSPart,
3498                                      SourceLocation Loc) {
3499   assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue");
3500   assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue");
3501   RValue NewVVal;
3502   LValue VLValue = CGF.EmitLValue(V);
3503   LValue XLValue = CGF.EmitLValue(X);
3504   RValue ExprRValue = CGF.EmitAnyExpr(E);
3505   auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3506                      : llvm::AtomicOrdering::Monotonic;
3507   QualType NewVValType;
3508   if (UE) {
3509     // 'x' is updated with some additional value.
3510     assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
3511            "Update expr in 'atomic capture' must be a binary operator.");
3512     auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
3513     // Update expressions are allowed to have the following forms:
3514     // x binop= expr; -> xrval + expr;
3515     // x++, ++x -> xrval + 1;
3516     // x--, --x -> xrval - 1;
3517     // x = x binop expr; -> xrval binop expr
3518     // x = expr Op x; - > expr binop xrval;
3519     auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
3520     auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
3521     auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
3522     NewVValType = XRValExpr->getType();
3523     auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
3524     auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr,
3525                   IsPostfixUpdate](RValue XRValue) -> RValue {
3526       CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3527       CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
3528       RValue Res = CGF.EmitAnyExpr(UE);
3529       NewVVal = IsPostfixUpdate ? XRValue : Res;
3530       return Res;
3531     };
3532     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
3533         XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
3534     if (Res.first) {
3535       // 'atomicrmw' instruction was generated.
3536       if (IsPostfixUpdate) {
3537         // Use old value from 'atomicrmw'.
3538         NewVVal = Res.second;
3539       } else {
3540         // 'atomicrmw' does not provide new value, so evaluate it using old
3541         // value of 'x'.
3542         CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3543         CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second);
3544         NewVVal = CGF.EmitAnyExpr(UE);
3545       }
3546     }
3547   } else {
3548     // 'x' is simply rewritten with some 'expr'.
3549     NewVValType = X->getType().getNonReferenceType();
3550     ExprRValue = convertToType(CGF, ExprRValue, E->getType(),
3551                                X->getType().getNonReferenceType(), Loc);
3552     auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) -> RValue {
3553       NewVVal = XRValue;
3554       return ExprRValue;
3555     };
3556     // Try to perform atomicrmw xchg, otherwise simple exchange.
3557     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
3558         XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO,
3559         Loc, Gen);
3560     if (Res.first) {
3561       // 'atomicrmw' instruction was generated.
3562       NewVVal = IsPostfixUpdate ? Res.second : ExprRValue;
3563     }
3564   }
3565   // Emit post-update store to 'v' of old/new 'x' value.
3566   CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc);
3567   // OpenMP, 2.12.6, atomic Construct
3568   // Any atomic construct with a seq_cst clause forces the atomically
3569   // performed operation to include an implicit flush operation without a
3570   // list.
3571   if (IsSeqCst)
3572     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3573 }
3574 
3575 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
3576                               bool IsSeqCst, bool IsPostfixUpdate,
3577                               const Expr *X, const Expr *V, const Expr *E,
3578                               const Expr *UE, bool IsXLHSInRHSPart,
3579                               SourceLocation Loc) {
3580   switch (Kind) {
3581   case OMPC_read:
3582     EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc);
3583     break;
3584   case OMPC_write:
3585     EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc);
3586     break;
3587   case OMPC_unknown:
3588   case OMPC_update:
3589     EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc);
3590     break;
3591   case OMPC_capture:
3592     EmitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE,
3593                              IsXLHSInRHSPart, Loc);
3594     break;
3595   case OMPC_if:
3596   case OMPC_final:
3597   case OMPC_num_threads:
3598   case OMPC_private:
3599   case OMPC_firstprivate:
3600   case OMPC_lastprivate:
3601   case OMPC_reduction:
3602   case OMPC_task_reduction:
3603   case OMPC_in_reduction:
3604   case OMPC_safelen:
3605   case OMPC_simdlen:
3606   case OMPC_collapse:
3607   case OMPC_default:
3608   case OMPC_seq_cst:
3609   case OMPC_shared:
3610   case OMPC_linear:
3611   case OMPC_aligned:
3612   case OMPC_copyin:
3613   case OMPC_copyprivate:
3614   case OMPC_flush:
3615   case OMPC_proc_bind:
3616   case OMPC_schedule:
3617   case OMPC_ordered:
3618   case OMPC_nowait:
3619   case OMPC_untied:
3620   case OMPC_threadprivate:
3621   case OMPC_depend:
3622   case OMPC_mergeable:
3623   case OMPC_device:
3624   case OMPC_threads:
3625   case OMPC_simd:
3626   case OMPC_map:
3627   case OMPC_num_teams:
3628   case OMPC_thread_limit:
3629   case OMPC_priority:
3630   case OMPC_grainsize:
3631   case OMPC_nogroup:
3632   case OMPC_num_tasks:
3633   case OMPC_hint:
3634   case OMPC_dist_schedule:
3635   case OMPC_defaultmap:
3636   case OMPC_uniform:
3637   case OMPC_to:
3638   case OMPC_from:
3639   case OMPC_use_device_ptr:
3640   case OMPC_is_device_ptr:
3641     llvm_unreachable("Clause is not allowed in 'omp atomic'.");
3642   }
3643 }
3644 
3645 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
3646   bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>();
3647   OpenMPClauseKind Kind = OMPC_unknown;
3648   for (auto *C : S.clauses()) {
3649     // Find first clause (skip seq_cst clause, if it is first).
3650     if (C->getClauseKind() != OMPC_seq_cst) {
3651       Kind = C->getClauseKind();
3652       break;
3653     }
3654   }
3655 
3656   const auto *CS =
3657       S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
3658   if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) {
3659     enterFullExpression(EWC);
3660   }
3661   // Processing for statements under 'atomic capture'.
3662   if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) {
3663     for (const auto *C : Compound->body()) {
3664       if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) {
3665         enterFullExpression(EWC);
3666       }
3667     }
3668   }
3669 
3670   auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF,
3671                                             PrePostActionTy &) {
3672     CGF.EmitStopPoint(CS);
3673     EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(),
3674                       S.getV(), S.getExpr(), S.getUpdateExpr(),
3675                       S.isXLHSInRHSPart(), S.getLocStart());
3676   };
3677   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
3678   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen);
3679 }
3680 
3681 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
3682                                          const OMPExecutableDirective &S,
3683                                          const RegionCodeGenTy &CodeGen) {
3684   assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind()));
3685   CodeGenModule &CGM = CGF.CGM;
3686   const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt());
3687 
3688   llvm::Function *Fn = nullptr;
3689   llvm::Constant *FnID = nullptr;
3690 
3691   const Expr *IfCond = nullptr;
3692   // Check for the at most one if clause associated with the target region.
3693   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
3694     if (C->getNameModifier() == OMPD_unknown ||
3695         C->getNameModifier() == OMPD_target) {
3696       IfCond = C->getCondition();
3697       break;
3698     }
3699   }
3700 
3701   // Check if we have any device clause associated with the directive.
3702   const Expr *Device = nullptr;
3703   if (auto *C = S.getSingleClause<OMPDeviceClause>()) {
3704     Device = C->getDevice();
3705   }
3706 
3707   // Check if we have an if clause whose conditional always evaluates to false
3708   // or if we do not have any targets specified. If so the target region is not
3709   // an offload entry point.
3710   bool IsOffloadEntry = true;
3711   if (IfCond) {
3712     bool Val;
3713     if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val)
3714       IsOffloadEntry = false;
3715   }
3716   if (CGM.getLangOpts().OMPTargetTriples.empty())
3717     IsOffloadEntry = false;
3718 
3719   assert(CGF.CurFuncDecl && "No parent declaration for target region!");
3720   StringRef ParentName;
3721   // In case we have Ctors/Dtors we use the complete type variant to produce
3722   // the mangling of the device outlined kernel.
3723   if (auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl))
3724     ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete));
3725   else if (auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl))
3726     ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete));
3727   else
3728     ParentName =
3729         CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl)));
3730 
3731   // Emit target region as a standalone region.
3732   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID,
3733                                                     IsOffloadEntry, CodeGen);
3734   OMPLexicalScope Scope(CGF, S);
3735   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
3736   CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
3737   CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device,
3738                                         CapturedVars);
3739 }
3740 
3741 static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S,
3742                              PrePostActionTy &Action) {
3743   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
3744   (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
3745   CGF.EmitOMPPrivateClause(S, PrivateScope);
3746   (void)PrivateScope.Privatize();
3747 
3748   Action.Enter(CGF);
3749   CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3750 }
3751 
3752 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM,
3753                                                   StringRef ParentName,
3754                                                   const OMPTargetDirective &S) {
3755   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3756     emitTargetRegion(CGF, S, Action);
3757   };
3758   llvm::Function *Fn;
3759   llvm::Constant *Addr;
3760   // Emit target region as a standalone region.
3761   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
3762       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
3763   assert(Fn && Addr && "Target device function emission failed.");
3764 }
3765 
3766 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
3767   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3768     emitTargetRegion(CGF, S, Action);
3769   };
3770   emitCommonOMPTargetDirective(*this, S, CodeGen);
3771 }
3772 
3773 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
3774                                         const OMPExecutableDirective &S,
3775                                         OpenMPDirectiveKind InnermostKind,
3776                                         const RegionCodeGenTy &CodeGen) {
3777   const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams);
3778   auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction(
3779       S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
3780 
3781   const OMPNumTeamsClause *NT = S.getSingleClause<OMPNumTeamsClause>();
3782   const OMPThreadLimitClause *TL = S.getSingleClause<OMPThreadLimitClause>();
3783   if (NT || TL) {
3784     Expr *NumTeams = (NT) ? NT->getNumTeams() : nullptr;
3785     Expr *ThreadLimit = (TL) ? TL->getThreadLimit() : nullptr;
3786 
3787     CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit,
3788                                                   S.getLocStart());
3789   }
3790 
3791   OMPTeamsScope Scope(CGF, S);
3792   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
3793   CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
3794   CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn,
3795                                            CapturedVars);
3796 }
3797 
3798 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
3799   // Emit teams region as a standalone region.
3800   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3801     OMPPrivateScope PrivateScope(CGF);
3802     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
3803     CGF.EmitOMPPrivateClause(S, PrivateScope);
3804     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
3805     (void)PrivateScope.Privatize();
3806     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3807     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
3808   };
3809   emitCommonOMPTeamsDirective(*this, S, OMPD_teams, CodeGen);
3810   emitPostUpdateForReductionClause(
3811       *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
3812 }
3813 
3814 static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
3815                                   const OMPTargetTeamsDirective &S) {
3816   auto *CS = S.getCapturedStmt(OMPD_teams);
3817   Action.Enter(CGF);
3818   auto &&CodeGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
3819     // TODO: Add support for clauses.
3820     CGF.EmitStmt(CS->getCapturedStmt());
3821   };
3822   emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen);
3823 }
3824 
3825 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
3826     CodeGenModule &CGM, StringRef ParentName,
3827     const OMPTargetTeamsDirective &S) {
3828   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3829     emitTargetTeamsRegion(CGF, Action, S);
3830   };
3831   llvm::Function *Fn;
3832   llvm::Constant *Addr;
3833   // Emit target region as a standalone region.
3834   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
3835       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
3836   assert(Fn && Addr && "Target device function emission failed.");
3837 }
3838 
3839 void CodeGenFunction::EmitOMPTargetTeamsDirective(
3840     const OMPTargetTeamsDirective &S) {
3841   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3842     emitTargetTeamsRegion(CGF, Action, S);
3843   };
3844   emitCommonOMPTargetDirective(*this, S, CodeGen);
3845 }
3846 
3847 void CodeGenFunction::EmitOMPTeamsDistributeDirective(
3848     const OMPTeamsDistributeDirective &S) {
3849 
3850   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3851     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
3852   };
3853 
3854   // Emit teams region as a standalone region.
3855   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
3856                                             PrePostActionTy &) {
3857     OMPPrivateScope PrivateScope(CGF);
3858     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
3859     (void)PrivateScope.Privatize();
3860     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
3861                                                     CodeGenDistribute);
3862     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
3863   };
3864   emitCommonOMPTeamsDirective(*this, S, OMPD_teams, CodeGen);
3865   emitPostUpdateForReductionClause(*this, S,
3866                                    [](CodeGenFunction &) { return nullptr; });
3867 }
3868 
3869 void CodeGenFunction::EmitOMPCancellationPointDirective(
3870     const OMPCancellationPointDirective &S) {
3871   CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(),
3872                                                    S.getCancelRegion());
3873 }
3874 
3875 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
3876   const Expr *IfCond = nullptr;
3877   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
3878     if (C->getNameModifier() == OMPD_unknown ||
3879         C->getNameModifier() == OMPD_cancel) {
3880       IfCond = C->getCondition();
3881       break;
3882     }
3883   }
3884   CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(), IfCond,
3885                                         S.getCancelRegion());
3886 }
3887 
3888 CodeGenFunction::JumpDest
3889 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {
3890   if (Kind == OMPD_parallel || Kind == OMPD_task ||
3891       Kind == OMPD_target_parallel)
3892     return ReturnBlock;
3893   assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections ||
3894          Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for ||
3895          Kind == OMPD_distribute_parallel_for ||
3896          Kind == OMPD_target_parallel_for);
3897   return OMPCancelStack.getExitBlock();
3898 }
3899 
3900 void CodeGenFunction::EmitOMPUseDevicePtrClause(
3901     const OMPClause &NC, OMPPrivateScope &PrivateScope,
3902     const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) {
3903   const auto &C = cast<OMPUseDevicePtrClause>(NC);
3904   auto OrigVarIt = C.varlist_begin();
3905   auto InitIt = C.inits().begin();
3906   for (auto PvtVarIt : C.private_copies()) {
3907     auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl());
3908     auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl());
3909     auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl());
3910 
3911     // In order to identify the right initializer we need to match the
3912     // declaration used by the mapping logic. In some cases we may get
3913     // OMPCapturedExprDecl that refers to the original declaration.
3914     const ValueDecl *MatchingVD = OrigVD;
3915     if (auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) {
3916       // OMPCapturedExprDecl are used to privative fields of the current
3917       // structure.
3918       auto *ME = cast<MemberExpr>(OED->getInit());
3919       assert(isa<CXXThisExpr>(ME->getBase()) &&
3920              "Base should be the current struct!");
3921       MatchingVD = ME->getMemberDecl();
3922     }
3923 
3924     // If we don't have information about the current list item, move on to
3925     // the next one.
3926     auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD);
3927     if (InitAddrIt == CaptureDeviceAddrMap.end())
3928       continue;
3929 
3930     bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
3931       // Initialize the temporary initialization variable with the address we
3932       // get from the runtime library. We have to cast the source address
3933       // because it is always a void *. References are materialized in the
3934       // privatization scope, so the initialization here disregards the fact
3935       // the original variable is a reference.
3936       QualType AddrQTy =
3937           getContext().getPointerType(OrigVD->getType().getNonReferenceType());
3938       llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy);
3939       Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy);
3940       setAddrOfLocalVar(InitVD, InitAddr);
3941 
3942       // Emit private declaration, it will be initialized by the value we
3943       // declaration we just added to the local declarations map.
3944       EmitDecl(*PvtVD);
3945 
3946       // The initialization variables reached its purpose in the emission
3947       // ofthe previous declaration, so we don't need it anymore.
3948       LocalDeclMap.erase(InitVD);
3949 
3950       // Return the address of the private variable.
3951       return GetAddrOfLocalVar(PvtVD);
3952     });
3953     assert(IsRegistered && "firstprivate var already registered as private");
3954     // Silence the warning about unused variable.
3955     (void)IsRegistered;
3956 
3957     ++OrigVarIt;
3958     ++InitIt;
3959   }
3960 }
3961 
3962 // Generate the instructions for '#pragma omp target data' directive.
3963 void CodeGenFunction::EmitOMPTargetDataDirective(
3964     const OMPTargetDataDirective &S) {
3965   CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true);
3966 
3967   // Create a pre/post action to signal the privatization of the device pointer.
3968   // This action can be replaced by the OpenMP runtime code generation to
3969   // deactivate privatization.
3970   bool PrivatizeDevicePointers = false;
3971   class DevicePointerPrivActionTy : public PrePostActionTy {
3972     bool &PrivatizeDevicePointers;
3973 
3974   public:
3975     explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers)
3976         : PrePostActionTy(), PrivatizeDevicePointers(PrivatizeDevicePointers) {}
3977     void Enter(CodeGenFunction &CGF) override {
3978       PrivatizeDevicePointers = true;
3979     }
3980   };
3981   DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers);
3982 
3983   auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers](
3984       CodeGenFunction &CGF, PrePostActionTy &Action) {
3985     auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3986       CGF.EmitStmt(
3987           cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3988     };
3989 
3990     // Codegen that selects wheather to generate the privatization code or not.
3991     auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers,
3992                           &InnermostCodeGen](CodeGenFunction &CGF,
3993                                              PrePostActionTy &Action) {
3994       RegionCodeGenTy RCG(InnermostCodeGen);
3995       PrivatizeDevicePointers = false;
3996 
3997       // Call the pre-action to change the status of PrivatizeDevicePointers if
3998       // needed.
3999       Action.Enter(CGF);
4000 
4001       if (PrivatizeDevicePointers) {
4002         OMPPrivateScope PrivateScope(CGF);
4003         // Emit all instances of the use_device_ptr clause.
4004         for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
4005           CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope,
4006                                         Info.CaptureDeviceAddrMap);
4007         (void)PrivateScope.Privatize();
4008         RCG(CGF);
4009       } else
4010         RCG(CGF);
4011     };
4012 
4013     // Forward the provided action to the privatization codegen.
4014     RegionCodeGenTy PrivRCG(PrivCodeGen);
4015     PrivRCG.setAction(Action);
4016 
4017     // Notwithstanding the body of the region is emitted as inlined directive,
4018     // we don't use an inline scope as changes in the references inside the
4019     // region are expected to be visible outside, so we do not privative them.
4020     OMPLexicalScope Scope(CGF, S);
4021     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data,
4022                                                     PrivRCG);
4023   };
4024 
4025   RegionCodeGenTy RCG(CodeGen);
4026 
4027   // If we don't have target devices, don't bother emitting the data mapping
4028   // code.
4029   if (CGM.getLangOpts().OMPTargetTriples.empty()) {
4030     RCG(*this);
4031     return;
4032   }
4033 
4034   // Check if we have any if clause associated with the directive.
4035   const Expr *IfCond = nullptr;
4036   if (auto *C = S.getSingleClause<OMPIfClause>())
4037     IfCond = C->getCondition();
4038 
4039   // Check if we have any device clause associated with the directive.
4040   const Expr *Device = nullptr;
4041   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4042     Device = C->getDevice();
4043 
4044   // Set the action to signal privatization of device pointers.
4045   RCG.setAction(PrivAction);
4046 
4047   // Emit region code.
4048   CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG,
4049                                              Info);
4050 }
4051 
4052 void CodeGenFunction::EmitOMPTargetEnterDataDirective(
4053     const OMPTargetEnterDataDirective &S) {
4054   // If we don't have target devices, don't bother emitting the data mapping
4055   // code.
4056   if (CGM.getLangOpts().OMPTargetTriples.empty())
4057     return;
4058 
4059   // Check if we have any if clause associated with the directive.
4060   const Expr *IfCond = nullptr;
4061   if (auto *C = S.getSingleClause<OMPIfClause>())
4062     IfCond = C->getCondition();
4063 
4064   // Check if we have any device clause associated with the directive.
4065   const Expr *Device = nullptr;
4066   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4067     Device = C->getDevice();
4068 
4069   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
4070 }
4071 
4072 void CodeGenFunction::EmitOMPTargetExitDataDirective(
4073     const OMPTargetExitDataDirective &S) {
4074   // If we don't have target devices, don't bother emitting the data mapping
4075   // code.
4076   if (CGM.getLangOpts().OMPTargetTriples.empty())
4077     return;
4078 
4079   // Check if we have any if clause associated with the directive.
4080   const Expr *IfCond = nullptr;
4081   if (auto *C = S.getSingleClause<OMPIfClause>())
4082     IfCond = C->getCondition();
4083 
4084   // Check if we have any device clause associated with the directive.
4085   const Expr *Device = nullptr;
4086   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4087     Device = C->getDevice();
4088 
4089   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
4090 }
4091 
4092 static void emitTargetParallelRegion(CodeGenFunction &CGF,
4093                                      const OMPTargetParallelDirective &S,
4094                                      PrePostActionTy &Action) {
4095   // Get the captured statement associated with the 'parallel' region.
4096   auto *CS = S.getCapturedStmt(OMPD_parallel);
4097   Action.Enter(CGF);
4098   auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &) {
4099     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4100     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
4101     CGF.EmitOMPPrivateClause(S, PrivateScope);
4102     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4103     (void)PrivateScope.Privatize();
4104     // TODO: Add support for clauses.
4105     CGF.EmitStmt(CS->getCapturedStmt());
4106     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
4107   };
4108   emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen,
4109                                  emitEmptyBoundParameters);
4110   emitPostUpdateForReductionClause(
4111       CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
4112 }
4113 
4114 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
4115     CodeGenModule &CGM, StringRef ParentName,
4116     const OMPTargetParallelDirective &S) {
4117   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4118     emitTargetParallelRegion(CGF, S, Action);
4119   };
4120   llvm::Function *Fn;
4121   llvm::Constant *Addr;
4122   // Emit target region as a standalone region.
4123   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4124       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4125   assert(Fn && Addr && "Target device function emission failed.");
4126 }
4127 
4128 void CodeGenFunction::EmitOMPTargetParallelDirective(
4129     const OMPTargetParallelDirective &S) {
4130   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4131     emitTargetParallelRegion(CGF, S, Action);
4132   };
4133   emitCommonOMPTargetDirective(*this, S, CodeGen);
4134 }
4135 
4136 void CodeGenFunction::EmitOMPTargetParallelForDirective(
4137     const OMPTargetParallelForDirective &S) {
4138   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
4139   CGM.getOpenMPRuntime().emitInlinedDirective(
4140       *this, OMPD_target_parallel_for,
4141       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4142         OMPLoopScope PreInitScope(CGF, S);
4143         CGF.EmitStmt(
4144             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
4145       });
4146 }
4147 
4148 /// Emit a helper variable and return corresponding lvalue.
4149 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper,
4150                      const ImplicitParamDecl *PVD,
4151                      CodeGenFunction::OMPPrivateScope &Privates) {
4152   auto *VDecl = cast<VarDecl>(Helper->getDecl());
4153   Privates.addPrivate(
4154       VDecl, [&CGF, PVD]() -> Address { return CGF.GetAddrOfLocalVar(PVD); });
4155 }
4156 
4157 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
4158   assert(isOpenMPTaskLoopDirective(S.getDirectiveKind()));
4159   // Emit outlined function for task construct.
4160   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
4161   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
4162   auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
4163   const Expr *IfCond = nullptr;
4164   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
4165     if (C->getNameModifier() == OMPD_unknown ||
4166         C->getNameModifier() == OMPD_taskloop) {
4167       IfCond = C->getCondition();
4168       break;
4169     }
4170   }
4171 
4172   OMPTaskDataTy Data;
4173   // Check if taskloop must be emitted without taskgroup.
4174   Data.Nogroup = S.getSingleClause<OMPNogroupClause>();
4175   // TODO: Check if we should emit tied or untied task.
4176   Data.Tied = true;
4177   // Set scheduling for taskloop
4178   if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) {
4179     // grainsize clause
4180     Data.Schedule.setInt(/*IntVal=*/false);
4181     Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize()));
4182   } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) {
4183     // num_tasks clause
4184     Data.Schedule.setInt(/*IntVal=*/true);
4185     Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks()));
4186   }
4187 
4188   auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) {
4189     // if (PreCond) {
4190     //   for (IV in 0..LastIteration) BODY;
4191     //   <Final counter/linear vars updates>;
4192     // }
4193     //
4194 
4195     // Emit: if (PreCond) - begin.
4196     // If the condition constant folds and can be elided, avoid emitting the
4197     // whole loop.
4198     bool CondConstant;
4199     llvm::BasicBlock *ContBlock = nullptr;
4200     OMPLoopScope PreInitScope(CGF, S);
4201     if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
4202       if (!CondConstant)
4203         return;
4204     } else {
4205       auto *ThenBlock = CGF.createBasicBlock("taskloop.if.then");
4206       ContBlock = CGF.createBasicBlock("taskloop.if.end");
4207       emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
4208                   CGF.getProfileCount(&S));
4209       CGF.EmitBlock(ThenBlock);
4210       CGF.incrementProfileCounter(&S);
4211     }
4212 
4213     if (isOpenMPSimdDirective(S.getDirectiveKind()))
4214       CGF.EmitOMPSimdInit(S);
4215 
4216     OMPPrivateScope LoopScope(CGF);
4217     // Emit helper vars inits.
4218     enum { LowerBound = 5, UpperBound, Stride, LastIter };
4219     auto *I = CS->getCapturedDecl()->param_begin();
4220     auto *LBP = std::next(I, LowerBound);
4221     auto *UBP = std::next(I, UpperBound);
4222     auto *STP = std::next(I, Stride);
4223     auto *LIP = std::next(I, LastIter);
4224     mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP,
4225              LoopScope);
4226     mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP,
4227              LoopScope);
4228     mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope);
4229     mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP,
4230              LoopScope);
4231     CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
4232     bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
4233     (void)LoopScope.Privatize();
4234     // Emit the loop iteration variable.
4235     const Expr *IVExpr = S.getIterationVariable();
4236     const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
4237     CGF.EmitVarDecl(*IVDecl);
4238     CGF.EmitIgnoredExpr(S.getInit());
4239 
4240     // Emit the iterations count variable.
4241     // If it is not a variable, Sema decided to calculate iterations count on
4242     // each iteration (e.g., it is foldable into a constant).
4243     if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
4244       CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
4245       // Emit calculation of the iterations count.
4246       CGF.EmitIgnoredExpr(S.getCalcLastIteration());
4247     }
4248 
4249     CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
4250                          S.getInc(),
4251                          [&S](CodeGenFunction &CGF) {
4252                            CGF.EmitOMPLoopBody(S, JumpDest());
4253                            CGF.EmitStopPoint(&S);
4254                          },
4255                          [](CodeGenFunction &) {});
4256     // Emit: if (PreCond) - end.
4257     if (ContBlock) {
4258       CGF.EmitBranch(ContBlock);
4259       CGF.EmitBlock(ContBlock, true);
4260     }
4261     // Emit final copy of the lastprivate variables if IsLastIter != 0.
4262     if (HasLastprivateClause) {
4263       CGF.EmitOMPLastprivateClauseFinal(
4264           S, isOpenMPSimdDirective(S.getDirectiveKind()),
4265           CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar(
4266               CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
4267               (*LIP)->getType(), S.getLocStart())));
4268     }
4269   };
4270   auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
4271                     IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn,
4272                             const OMPTaskDataTy &Data) {
4273     auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &) {
4274       OMPLoopScope PreInitScope(CGF, S);
4275       CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getLocStart(), S,
4276                                                   OutlinedFn, SharedsTy,
4277                                                   CapturedStruct, IfCond, Data);
4278     };
4279     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop,
4280                                                     CodeGen);
4281   };
4282   if (Data.Nogroup)
4283     EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
4284   else {
4285     CGM.getOpenMPRuntime().emitTaskgroupRegion(
4286         *this,
4287         [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF,
4288                                         PrePostActionTy &Action) {
4289           Action.Enter(CGF);
4290           CGF.EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
4291         },
4292         S.getLocStart());
4293   }
4294 }
4295 
4296 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
4297   EmitOMPTaskLoopBasedDirective(S);
4298 }
4299 
4300 void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
4301     const OMPTaskLoopSimdDirective &S) {
4302   EmitOMPTaskLoopBasedDirective(S);
4303 }
4304 
4305 // Generate the instructions for '#pragma omp target update' directive.
4306 void CodeGenFunction::EmitOMPTargetUpdateDirective(
4307     const OMPTargetUpdateDirective &S) {
4308   // If we don't have target devices, don't bother emitting the data mapping
4309   // code.
4310   if (CGM.getLangOpts().OMPTargetTriples.empty())
4311     return;
4312 
4313   // Check if we have any if clause associated with the directive.
4314   const Expr *IfCond = nullptr;
4315   if (auto *C = S.getSingleClause<OMPIfClause>())
4316     IfCond = C->getCondition();
4317 
4318   // Check if we have any device clause associated with the directive.
4319   const Expr *Device = nullptr;
4320   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4321     Device = C->getDevice();
4322 
4323   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
4324 }
4325