1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit OpenMP nodes as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGCleanup.h"
15 #include "CGOpenMPRuntime.h"
16 #include "CodeGenFunction.h"
17 #include "CodeGenModule.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/Stmt.h"
20 #include "clang/AST/StmtOpenMP.h"
21 #include "clang/AST/DeclOpenMP.h"
22 #include "llvm/IR/CallSite.h"
23 using namespace clang;
24 using namespace CodeGen;
25 
26 namespace {
27 /// Lexical scope for OpenMP executable constructs, that handles correct codegen
28 /// for captured expressions.
29 class OMPLexicalScope : public CodeGenFunction::LexicalScope {
30   void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
31     for (const auto *C : S.clauses()) {
32       if (auto *CPI = OMPClauseWithPreInit::get(C)) {
33         if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
34           for (const auto *I : PreInit->decls()) {
35             if (!I->hasAttr<OMPCaptureNoInitAttr>())
36               CGF.EmitVarDecl(cast<VarDecl>(*I));
37             else {
38               CodeGenFunction::AutoVarEmission Emission =
39                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
40               CGF.EmitAutoVarCleanups(Emission);
41             }
42           }
43         }
44       }
45     }
46   }
47   CodeGenFunction::OMPPrivateScope InlinedShareds;
48 
49   static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
50     return CGF.LambdaCaptureFields.lookup(VD) ||
51            (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
52            (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl));
53   }
54 
55 public:
56   OMPLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S,
57                   bool AsInlined = false, bool EmitPreInitStmt = true)
58       : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
59         InlinedShareds(CGF) {
60     if (EmitPreInitStmt)
61       emitPreInitStmt(CGF, S);
62     if (AsInlined) {
63       if (S.hasAssociatedStmt()) {
64         auto *CS = cast<CapturedStmt>(S.getAssociatedStmt());
65         for (auto &C : CS->captures()) {
66           if (C.capturesVariable() || C.capturesVariableByCopy()) {
67             auto *VD = C.getCapturedVar();
68             assert(VD == VD->getCanonicalDecl() &&
69                         "Canonical decl must be captured.");
70             DeclRefExpr DRE(const_cast<VarDecl *>(VD),
71                             isCapturedVar(CGF, VD) ||
72                                 (CGF.CapturedStmtInfo &&
73                                  InlinedShareds.isGlobalVarCaptured(VD)),
74                             VD->getType().getNonReferenceType(), VK_LValue,
75                             SourceLocation());
76             InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
77               return CGF.EmitLValue(&DRE).getAddress();
78             });
79           }
80         }
81         (void)InlinedShareds.Privatize();
82       }
83     }
84   }
85 };
86 
87 /// Lexical scope for OpenMP parallel construct, that handles correct codegen
88 /// for captured expressions.
89 class OMPParallelScope final : public OMPLexicalScope {
90   bool EmitPreInitStmt(const OMPExecutableDirective &S) {
91     OpenMPDirectiveKind Kind = S.getDirectiveKind();
92     return !(isOpenMPTargetExecutionDirective(Kind) ||
93              isOpenMPLoopBoundSharingDirective(Kind)) &&
94            isOpenMPParallelDirective(Kind);
95   }
96 
97 public:
98   OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
99       : OMPLexicalScope(CGF, S,
100                         /*AsInlined=*/false,
101                         /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {}
102 };
103 
104 /// Lexical scope for OpenMP teams construct, that handles correct codegen
105 /// for captured expressions.
106 class OMPTeamsScope final : public OMPLexicalScope {
107   bool EmitPreInitStmt(const OMPExecutableDirective &S) {
108     OpenMPDirectiveKind Kind = S.getDirectiveKind();
109     return !isOpenMPTargetExecutionDirective(Kind) &&
110            isOpenMPTeamsDirective(Kind);
111   }
112 
113 public:
114   OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
115       : OMPLexicalScope(CGF, S,
116                         /*AsInlined=*/false,
117                         /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {}
118 };
119 
120 /// Private scope for OpenMP loop-based directives, that supports capturing
121 /// of used expression from loop statement.
122 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
123   void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) {
124     if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) {
125       if (auto *PreInits = cast_or_null<DeclStmt>(LD->getPreInits())) {
126         for (const auto *I : PreInits->decls())
127           CGF.EmitVarDecl(cast<VarDecl>(*I));
128       }
129     }
130   }
131 
132 public:
133   OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S)
134       : CodeGenFunction::RunCleanupsScope(CGF) {
135     emitPreInitStmt(CGF, S);
136   }
137 };
138 
139 } // namespace
140 
141 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
142                                          const OMPExecutableDirective &S,
143                                          const RegionCodeGenTy &CodeGen);
144 
145 LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) {
146   if (auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) {
147     if (auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) {
148       OrigVD = OrigVD->getCanonicalDecl();
149       bool IsCaptured =
150           LambdaCaptureFields.lookup(OrigVD) ||
151           (CapturedStmtInfo && CapturedStmtInfo->lookup(OrigVD)) ||
152           (CurCodeDecl && isa<BlockDecl>(CurCodeDecl));
153       DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), IsCaptured,
154                       OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc());
155       return EmitLValue(&DRE);
156     }
157   }
158   return EmitLValue(E);
159 }
160 
161 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) {
162   auto &C = getContext();
163   llvm::Value *Size = nullptr;
164   auto SizeInChars = C.getTypeSizeInChars(Ty);
165   if (SizeInChars.isZero()) {
166     // getTypeSizeInChars() returns 0 for a VLA.
167     while (auto *VAT = C.getAsVariableArrayType(Ty)) {
168       llvm::Value *ArraySize;
169       std::tie(ArraySize, Ty) = getVLASize(VAT);
170       Size = Size ? Builder.CreateNUWMul(Size, ArraySize) : ArraySize;
171     }
172     SizeInChars = C.getTypeSizeInChars(Ty);
173     if (SizeInChars.isZero())
174       return llvm::ConstantInt::get(SizeTy, /*V=*/0);
175     Size = Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars));
176   } else
177     Size = CGM.getSize(SizeInChars);
178   return Size;
179 }
180 
181 void CodeGenFunction::GenerateOpenMPCapturedVars(
182     const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
183   const RecordDecl *RD = S.getCapturedRecordDecl();
184   auto CurField = RD->field_begin();
185   auto CurCap = S.captures().begin();
186   for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
187                                                  E = S.capture_init_end();
188        I != E; ++I, ++CurField, ++CurCap) {
189     if (CurField->hasCapturedVLAType()) {
190       auto VAT = CurField->getCapturedVLAType();
191       auto *Val = VLASizeMap[VAT->getSizeExpr()];
192       CapturedVars.push_back(Val);
193     } else if (CurCap->capturesThis())
194       CapturedVars.push_back(CXXThisValue);
195     else if (CurCap->capturesVariableByCopy()) {
196       llvm::Value *CV =
197           EmitLoadOfLValue(EmitLValue(*I), SourceLocation()).getScalarVal();
198 
199       // If the field is not a pointer, we need to save the actual value
200       // and load it as a void pointer.
201       if (!CurField->getType()->isAnyPointerType()) {
202         auto &Ctx = getContext();
203         auto DstAddr = CreateMemTemp(
204             Ctx.getUIntPtrType(),
205             Twine(CurCap->getCapturedVar()->getName()) + ".casted");
206         LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
207 
208         auto *SrcAddrVal = EmitScalarConversion(
209             DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()),
210             Ctx.getPointerType(CurField->getType()), SourceLocation());
211         LValue SrcLV =
212             MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType());
213 
214         // Store the value using the source type pointer.
215         EmitStoreThroughLValue(RValue::get(CV), SrcLV);
216 
217         // Load the value using the destination type pointer.
218         CV = EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal();
219       }
220       CapturedVars.push_back(CV);
221     } else {
222       assert(CurCap->capturesVariable() && "Expected capture by reference.");
223       CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer());
224     }
225   }
226 }
227 
228 static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType,
229                                     StringRef Name, LValue AddrLV,
230                                     bool isReferenceType = false) {
231   ASTContext &Ctx = CGF.getContext();
232 
233   auto *CastedPtr = CGF.EmitScalarConversion(
234       AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(),
235       Ctx.getPointerType(DstType), SourceLocation());
236   auto TmpAddr =
237       CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType))
238           .getAddress();
239 
240   // If we are dealing with references we need to return the address of the
241   // reference instead of the reference of the value.
242   if (isReferenceType) {
243     QualType RefType = Ctx.getLValueReferenceType(DstType);
244     auto *RefVal = TmpAddr.getPointer();
245     TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name) + ".ref");
246     auto TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType);
247     CGF.EmitStoreThroughLValue(RValue::get(RefVal), TmpLVal, /*isInit*/ true);
248   }
249 
250   return TmpAddr;
251 }
252 
253 static QualType getCanonicalParamType(ASTContext &C, QualType T) {
254   if (T->isLValueReferenceType()) {
255     return C.getLValueReferenceType(
256         getCanonicalParamType(C, T.getNonReferenceType()),
257         /*SpelledAsLValue=*/false);
258   }
259   if (T->isPointerType())
260     return C.getPointerType(getCanonicalParamType(C, T->getPointeeType()));
261   if (auto *A = T->getAsArrayTypeUnsafe()) {
262     if (auto *VLA = dyn_cast<VariableArrayType>(A))
263       return getCanonicalParamType(C, VLA->getElementType());
264     else if (!A->isVariablyModifiedType())
265       return C.getCanonicalType(T);
266   }
267   return C.getCanonicalParamType(T);
268 }
269 
270 namespace {
271   /// Contains required data for proper outlined function codegen.
272   struct FunctionOptions {
273     /// Captured statement for which the function is generated.
274     const CapturedStmt *S = nullptr;
275     /// true if cast to/from  UIntPtr is required for variables captured by
276     /// value.
277     const bool UIntPtrCastRequired = true;
278     /// true if only casted arguments must be registered as local args or VLA
279     /// sizes.
280     const bool RegisterCastedArgsOnly = false;
281     /// Name of the generated function.
282     const StringRef FunctionName;
283     explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired,
284                              bool RegisterCastedArgsOnly,
285                              StringRef FunctionName)
286         : S(S), UIntPtrCastRequired(UIntPtrCastRequired),
287           RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly),
288           FunctionName(FunctionName) {}
289   };
290 }
291 
292 static llvm::Function *emitOutlinedFunctionPrologue(
293     CodeGenFunction &CGF, FunctionArgList &Args,
294     llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>>
295         &LocalAddrs,
296     llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>>
297         &VLASizes,
298     llvm::Value *&CXXThisValue, const FunctionOptions &FO) {
299   const CapturedDecl *CD = FO.S->getCapturedDecl();
300   const RecordDecl *RD = FO.S->getCapturedRecordDecl();
301   assert(CD->hasBody() && "missing CapturedDecl body");
302 
303   CXXThisValue = nullptr;
304   // Build the argument list.
305   CodeGenModule &CGM = CGF.CGM;
306   ASTContext &Ctx = CGM.getContext();
307   FunctionArgList TargetArgs;
308   Args.append(CD->param_begin(),
309               std::next(CD->param_begin(), CD->getContextParamPosition()));
310   TargetArgs.append(
311       CD->param_begin(),
312       std::next(CD->param_begin(), CD->getContextParamPosition()));
313   auto I = FO.S->captures().begin();
314   FunctionDecl *DebugFunctionDecl = nullptr;
315   if (!FO.UIntPtrCastRequired) {
316     FunctionProtoType::ExtProtoInfo EPI;
317     DebugFunctionDecl = FunctionDecl::Create(
318         Ctx, Ctx.getTranslationUnitDecl(), FO.S->getLocStart(),
319         SourceLocation(), DeclarationName(), Ctx.VoidTy,
320         Ctx.getTrivialTypeSourceInfo(
321             Ctx.getFunctionType(Ctx.VoidTy, llvm::None, EPI)),
322         SC_Static, /*isInlineSpecified=*/false, /*hasWrittenPrototype=*/false);
323   }
324   for (auto *FD : RD->fields()) {
325     QualType ArgType = FD->getType();
326     IdentifierInfo *II = nullptr;
327     VarDecl *CapVar = nullptr;
328 
329     // If this is a capture by copy and the type is not a pointer, the outlined
330     // function argument type should be uintptr and the value properly casted to
331     // uintptr. This is necessary given that the runtime library is only able to
332     // deal with pointers. We can pass in the same way the VLA type sizes to the
333     // outlined function.
334     if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
335         I->capturesVariableArrayType()) {
336       if (FO.UIntPtrCastRequired)
337         ArgType = Ctx.getUIntPtrType();
338     }
339 
340     if (I->capturesVariable() || I->capturesVariableByCopy()) {
341       CapVar = I->getCapturedVar();
342       II = CapVar->getIdentifier();
343     } else if (I->capturesThis())
344       II = &Ctx.Idents.get("this");
345     else {
346       assert(I->capturesVariableArrayType());
347       II = &Ctx.Idents.get("vla");
348     }
349     if (ArgType->isVariablyModifiedType())
350       ArgType = getCanonicalParamType(Ctx, ArgType);
351     VarDecl *Arg;
352     if (DebugFunctionDecl && (CapVar || I->capturesThis())) {
353       Arg = ParmVarDecl::Create(
354           Ctx, DebugFunctionDecl,
355           CapVar ? CapVar->getLocStart() : FD->getLocStart(),
356           CapVar ? CapVar->getLocation() : FD->getLocation(), II, ArgType,
357           /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr);
358     } else {
359       Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(),
360                                       II, ArgType, ImplicitParamDecl::Other);
361     }
362     Args.emplace_back(Arg);
363     // Do not cast arguments if we emit function with non-original types.
364     TargetArgs.emplace_back(
365         FO.UIntPtrCastRequired
366             ? Arg
367             : CGM.getOpenMPRuntime().translateParameter(FD, Arg));
368     ++I;
369   }
370   Args.append(
371       std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
372       CD->param_end());
373   TargetArgs.append(
374       std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
375       CD->param_end());
376 
377   // Create the function declaration.
378   const CGFunctionInfo &FuncInfo =
379       CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs);
380   llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
381 
382   llvm::Function *F =
383       llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
384                              FO.FunctionName, &CGM.getModule());
385   CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
386   if (CD->isNothrow())
387     F->setDoesNotThrow();
388 
389   // Generate the function.
390   CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs,
391                     FO.S->getLocStart(), CD->getBody()->getLocStart());
392   unsigned Cnt = CD->getContextParamPosition();
393   I = FO.S->captures().begin();
394   for (auto *FD : RD->fields()) {
395     // Do not map arguments if we emit function with non-original types.
396     Address LocalAddr(Address::invalid());
397     if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) {
398       LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt],
399                                                              TargetArgs[Cnt]);
400     } else {
401       LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]);
402     }
403     // If we are capturing a pointer by copy we don't need to do anything, just
404     // use the value that we get from the arguments.
405     if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
406       const VarDecl *CurVD = I->getCapturedVar();
407       // If the variable is a reference we need to materialize it here.
408       if (CurVD->getType()->isReferenceType()) {
409         Address RefAddr = CGF.CreateMemTemp(
410             CurVD->getType(), CGM.getPointerAlign(), ".materialized_ref");
411         CGF.EmitStoreOfScalar(LocalAddr.getPointer(), RefAddr,
412                               /*Volatile=*/false, CurVD->getType());
413         LocalAddr = RefAddr;
414       }
415       if (!FO.RegisterCastedArgsOnly)
416         LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}});
417       ++Cnt;
418       ++I;
419       continue;
420     }
421 
422     LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(),
423                                         AlignmentSource::Decl);
424     if (FD->hasCapturedVLAType()) {
425       if (FO.UIntPtrCastRequired) {
426         ArgLVal = CGF.MakeAddrLValue(castValueFromUintptr(CGF, FD->getType(),
427                                                           Args[Cnt]->getName(),
428                                                           ArgLVal),
429                                      FD->getType(), AlignmentSource::Decl);
430       }
431       auto *ExprArg =
432           CGF.EmitLoadOfLValue(ArgLVal, SourceLocation()).getScalarVal();
433       auto VAT = FD->getCapturedVLAType();
434       VLASizes.insert({Args[Cnt], {VAT->getSizeExpr(), ExprArg}});
435     } else if (I->capturesVariable()) {
436       auto *Var = I->getCapturedVar();
437       QualType VarTy = Var->getType();
438       Address ArgAddr = ArgLVal.getAddress();
439       if (!VarTy->isReferenceType()) {
440         if (ArgLVal.getType()->isLValueReferenceType()) {
441           ArgAddr = CGF.EmitLoadOfReference(ArgLVal);
442         } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) {
443           assert(ArgLVal.getType()->isPointerType());
444           ArgAddr = CGF.EmitLoadOfPointer(
445               ArgAddr, ArgLVal.getType()->castAs<PointerType>());
446         }
447       }
448       if (!FO.RegisterCastedArgsOnly) {
449         LocalAddrs.insert(
450             {Args[Cnt],
451              {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}});
452       }
453     } else if (I->capturesVariableByCopy()) {
454       assert(!FD->getType()->isAnyPointerType() &&
455              "Not expecting a captured pointer.");
456       auto *Var = I->getCapturedVar();
457       QualType VarTy = Var->getType();
458       LocalAddrs.insert(
459           {Args[Cnt],
460            {Var,
461             FO.UIntPtrCastRequired
462                 ? castValueFromUintptr(CGF, FD->getType(), Args[Cnt]->getName(),
463                                        ArgLVal, VarTy->isReferenceType())
464                 : ArgLVal.getAddress()}});
465     } else {
466       // If 'this' is captured, load it into CXXThisValue.
467       assert(I->capturesThis());
468       CXXThisValue = CGF.EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation())
469                          .getScalarVal();
470       LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress()}});
471     }
472     ++Cnt;
473     ++I;
474   }
475 
476   return F;
477 }
478 
479 llvm::Function *
480 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
481   assert(
482       CapturedStmtInfo &&
483       "CapturedStmtInfo should be set when generating the captured function");
484   const CapturedDecl *CD = S.getCapturedDecl();
485   // Build the argument list.
486   bool NeedWrapperFunction =
487       getDebugInfo() &&
488       CGM.getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo;
489   FunctionArgList Args;
490   llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs;
491   llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes;
492   SmallString<256> Buffer;
493   llvm::raw_svector_ostream Out(Buffer);
494   Out << CapturedStmtInfo->getHelperName();
495   if (NeedWrapperFunction)
496     Out << "_debug__";
497   FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false,
498                      Out.str());
499   llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs,
500                                                    VLASizes, CXXThisValue, FO);
501   for (const auto &LocalAddrPair : LocalAddrs) {
502     if (LocalAddrPair.second.first) {
503       setAddrOfLocalVar(LocalAddrPair.second.first,
504                         LocalAddrPair.second.second);
505     }
506   }
507   for (const auto &VLASizePair : VLASizes)
508     VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
509   PGO.assignRegionCounters(GlobalDecl(CD), F);
510   CapturedStmtInfo->EmitBody(*this, CD->getBody());
511   FinishFunction(CD->getBodyRBrace());
512   if (!NeedWrapperFunction)
513     return F;
514 
515   FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true,
516                             /*RegisterCastedArgsOnly=*/true,
517                             CapturedStmtInfo->getHelperName());
518   CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
519   Args.clear();
520   LocalAddrs.clear();
521   VLASizes.clear();
522   llvm::Function *WrapperF =
523       emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes,
524                                    WrapperCGF.CXXThisValue, WrapperFO);
525   llvm::SmallVector<llvm::Value *, 4> CallArgs;
526   for (const auto *Arg : Args) {
527     llvm::Value *CallArg;
528     auto I = LocalAddrs.find(Arg);
529     if (I != LocalAddrs.end()) {
530       LValue LV = WrapperCGF.MakeAddrLValue(
531           I->second.second,
532           I->second.first ? I->second.first->getType() : Arg->getType(),
533           AlignmentSource::Decl);
534       CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation());
535     } else {
536       auto EI = VLASizes.find(Arg);
537       if (EI != VLASizes.end())
538         CallArg = EI->second.second;
539       else {
540         LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg),
541                                               Arg->getType(),
542                                               AlignmentSource::Decl);
543         CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation());
544       }
545     }
546     CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType()));
547   }
548   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, S.getLocStart(),
549                                                   F, CallArgs);
550   WrapperCGF.FinishFunction();
551   return WrapperF;
552 }
553 
554 //===----------------------------------------------------------------------===//
555 //                              OpenMP Directive Emission
556 //===----------------------------------------------------------------------===//
557 void CodeGenFunction::EmitOMPAggregateAssign(
558     Address DestAddr, Address SrcAddr, QualType OriginalType,
559     const llvm::function_ref<void(Address, Address)> &CopyGen) {
560   // Perform element-by-element initialization.
561   QualType ElementTy;
562 
563   // Drill down to the base element type on both arrays.
564   auto ArrayTy = OriginalType->getAsArrayTypeUnsafe();
565   auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr);
566   SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
567 
568   auto SrcBegin = SrcAddr.getPointer();
569   auto DestBegin = DestAddr.getPointer();
570   // Cast from pointer to array type to pointer to single element.
571   auto DestEnd = Builder.CreateGEP(DestBegin, NumElements);
572   // The basic structure here is a while-do loop.
573   auto BodyBB = createBasicBlock("omp.arraycpy.body");
574   auto DoneBB = createBasicBlock("omp.arraycpy.done");
575   auto IsEmpty =
576       Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
577   Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
578 
579   // Enter the loop body, making that address the current address.
580   auto EntryBB = Builder.GetInsertBlock();
581   EmitBlock(BodyBB);
582 
583   CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy);
584 
585   llvm::PHINode *SrcElementPHI =
586     Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast");
587   SrcElementPHI->addIncoming(SrcBegin, EntryBB);
588   Address SrcElementCurrent =
589       Address(SrcElementPHI,
590               SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
591 
592   llvm::PHINode *DestElementPHI =
593     Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
594   DestElementPHI->addIncoming(DestBegin, EntryBB);
595   Address DestElementCurrent =
596     Address(DestElementPHI,
597             DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
598 
599   // Emit copy.
600   CopyGen(DestElementCurrent, SrcElementCurrent);
601 
602   // Shift the address forward by one element.
603   auto DestElementNext = Builder.CreateConstGEP1_32(
604       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
605   auto SrcElementNext = Builder.CreateConstGEP1_32(
606       SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
607   // Check whether we've reached the end.
608   auto Done =
609       Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
610   Builder.CreateCondBr(Done, DoneBB, BodyBB);
611   DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock());
612   SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock());
613 
614   // Done.
615   EmitBlock(DoneBB, /*IsFinished=*/true);
616 }
617 
618 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
619                                   Address SrcAddr, const VarDecl *DestVD,
620                                   const VarDecl *SrcVD, const Expr *Copy) {
621   if (OriginalType->isArrayType()) {
622     auto *BO = dyn_cast<BinaryOperator>(Copy);
623     if (BO && BO->getOpcode() == BO_Assign) {
624       // Perform simple memcpy for simple copying.
625       EmitAggregateAssign(DestAddr, SrcAddr, OriginalType);
626     } else {
627       // For arrays with complex element types perform element by element
628       // copying.
629       EmitOMPAggregateAssign(
630           DestAddr, SrcAddr, OriginalType,
631           [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) {
632             // Working with the single array element, so have to remap
633             // destination and source variables to corresponding array
634             // elements.
635             CodeGenFunction::OMPPrivateScope Remap(*this);
636             Remap.addPrivate(DestVD, [DestElement]() -> Address {
637               return DestElement;
638             });
639             Remap.addPrivate(
640                 SrcVD, [SrcElement]() -> Address { return SrcElement; });
641             (void)Remap.Privatize();
642             EmitIgnoredExpr(Copy);
643           });
644     }
645   } else {
646     // Remap pseudo source variable to private copy.
647     CodeGenFunction::OMPPrivateScope Remap(*this);
648     Remap.addPrivate(SrcVD, [SrcAddr]() -> Address { return SrcAddr; });
649     Remap.addPrivate(DestVD, [DestAddr]() -> Address { return DestAddr; });
650     (void)Remap.Privatize();
651     // Emit copying of the whole variable.
652     EmitIgnoredExpr(Copy);
653   }
654 }
655 
656 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
657                                                 OMPPrivateScope &PrivateScope) {
658   if (!HaveInsertPoint())
659     return false;
660   bool FirstprivateIsLastprivate = false;
661   llvm::DenseSet<const VarDecl *> Lastprivates;
662   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
663     for (const auto *D : C->varlists())
664       Lastprivates.insert(
665           cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
666   }
667   llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
668   CGCapturedStmtInfo CapturesInfo(cast<CapturedStmt>(*D.getAssociatedStmt()));
669   for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
670     auto IRef = C->varlist_begin();
671     auto InitsRef = C->inits().begin();
672     for (auto IInit : C->private_copies()) {
673       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
674       bool ThisFirstprivateIsLastprivate =
675           Lastprivates.count(OrigVD->getCanonicalDecl()) > 0;
676       auto *CapFD = CapturesInfo.lookup(OrigVD);
677       auto *FD = CapturedStmtInfo->lookup(OrigVD);
678       if (!ThisFirstprivateIsLastprivate && FD && (FD == CapFD) &&
679           !FD->getType()->isReferenceType()) {
680         EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
681         ++IRef;
682         ++InitsRef;
683         continue;
684       }
685       FirstprivateIsLastprivate =
686           FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate;
687       if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) {
688         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
689         auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
690         bool IsRegistered;
691         DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
692                         /*RefersToEnclosingVariableOrCapture=*/FD != nullptr,
693                         (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
694         Address OriginalAddr = EmitLValue(&DRE).getAddress();
695         QualType Type = VD->getType();
696         if (Type->isArrayType()) {
697           // Emit VarDecl with copy init for arrays.
698           // Get the address of the original variable captured in current
699           // captured region.
700           IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
701             auto Emission = EmitAutoVarAlloca(*VD);
702             auto *Init = VD->getInit();
703             if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) {
704               // Perform simple memcpy.
705               EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr,
706                                   Type);
707             } else {
708               EmitOMPAggregateAssign(
709                   Emission.getAllocatedAddress(), OriginalAddr, Type,
710                   [this, VDInit, Init](Address DestElement,
711                                        Address SrcElement) {
712                     // Clean up any temporaries needed by the initialization.
713                     RunCleanupsScope InitScope(*this);
714                     // Emit initialization for single element.
715                     setAddrOfLocalVar(VDInit, SrcElement);
716                     EmitAnyExprToMem(Init, DestElement,
717                                      Init->getType().getQualifiers(),
718                                      /*IsInitializer*/ false);
719                     LocalDeclMap.erase(VDInit);
720                   });
721             }
722             EmitAutoVarCleanups(Emission);
723             return Emission.getAllocatedAddress();
724           });
725         } else {
726           IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
727             // Emit private VarDecl with copy init.
728             // Remap temp VDInit variable to the address of the original
729             // variable
730             // (for proper handling of captured global variables).
731             setAddrOfLocalVar(VDInit, OriginalAddr);
732             EmitDecl(*VD);
733             LocalDeclMap.erase(VDInit);
734             return GetAddrOfLocalVar(VD);
735           });
736         }
737         assert(IsRegistered &&
738                "firstprivate var already registered as private");
739         // Silence the warning about unused variable.
740         (void)IsRegistered;
741       }
742       ++IRef;
743       ++InitsRef;
744     }
745   }
746   return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty();
747 }
748 
749 void CodeGenFunction::EmitOMPPrivateClause(
750     const OMPExecutableDirective &D,
751     CodeGenFunction::OMPPrivateScope &PrivateScope) {
752   if (!HaveInsertPoint())
753     return;
754   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
755   for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
756     auto IRef = C->varlist_begin();
757     for (auto IInit : C->private_copies()) {
758       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
759       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
760         auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
761         bool IsRegistered =
762             PrivateScope.addPrivate(OrigVD, [&]() -> Address {
763               // Emit private VarDecl with copy init.
764               EmitDecl(*VD);
765               return GetAddrOfLocalVar(VD);
766             });
767         assert(IsRegistered && "private var already registered as private");
768         // Silence the warning about unused variable.
769         (void)IsRegistered;
770       }
771       ++IRef;
772     }
773   }
774 }
775 
776 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
777   if (!HaveInsertPoint())
778     return false;
779   // threadprivate_var1 = master_threadprivate_var1;
780   // operator=(threadprivate_var2, master_threadprivate_var2);
781   // ...
782   // __kmpc_barrier(&loc, global_tid);
783   llvm::DenseSet<const VarDecl *> CopiedVars;
784   llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
785   for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) {
786     auto IRef = C->varlist_begin();
787     auto ISrcRef = C->source_exprs().begin();
788     auto IDestRef = C->destination_exprs().begin();
789     for (auto *AssignOp : C->assignment_ops()) {
790       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
791       QualType Type = VD->getType();
792       if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
793         // Get the address of the master variable. If we are emitting code with
794         // TLS support, the address is passed from the master as field in the
795         // captured declaration.
796         Address MasterAddr = Address::invalid();
797         if (getLangOpts().OpenMPUseTLS &&
798             getContext().getTargetInfo().isTLSSupported()) {
799           assert(CapturedStmtInfo->lookup(VD) &&
800                  "Copyin threadprivates should have been captured!");
801           DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(),
802                           VK_LValue, (*IRef)->getExprLoc());
803           MasterAddr = EmitLValue(&DRE).getAddress();
804           LocalDeclMap.erase(VD);
805         } else {
806           MasterAddr =
807             Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD)
808                                         : CGM.GetAddrOfGlobal(VD),
809                     getContext().getDeclAlign(VD));
810         }
811         // Get the address of the threadprivate variable.
812         Address PrivateAddr = EmitLValue(*IRef).getAddress();
813         if (CopiedVars.size() == 1) {
814           // At first check if current thread is a master thread. If it is, no
815           // need to copy data.
816           CopyBegin = createBasicBlock("copyin.not.master");
817           CopyEnd = createBasicBlock("copyin.not.master.end");
818           Builder.CreateCondBr(
819               Builder.CreateICmpNE(
820                   Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy),
821                   Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy)),
822               CopyBegin, CopyEnd);
823           EmitBlock(CopyBegin);
824         }
825         auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
826         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
827         EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp);
828       }
829       ++IRef;
830       ++ISrcRef;
831       ++IDestRef;
832     }
833   }
834   if (CopyEnd) {
835     // Exit out of copying procedure for non-master thread.
836     EmitBlock(CopyEnd, /*IsFinished=*/true);
837     return true;
838   }
839   return false;
840 }
841 
842 bool CodeGenFunction::EmitOMPLastprivateClauseInit(
843     const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
844   if (!HaveInsertPoint())
845     return false;
846   bool HasAtLeastOneLastprivate = false;
847   llvm::DenseSet<const VarDecl *> SIMDLCVs;
848   if (isOpenMPSimdDirective(D.getDirectiveKind())) {
849     auto *LoopDirective = cast<OMPLoopDirective>(&D);
850     for (auto *C : LoopDirective->counters()) {
851       SIMDLCVs.insert(
852           cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
853     }
854   }
855   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
856   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
857     HasAtLeastOneLastprivate = true;
858     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()))
859       break;
860     auto IRef = C->varlist_begin();
861     auto IDestRef = C->destination_exprs().begin();
862     for (auto *IInit : C->private_copies()) {
863       // Keep the address of the original variable for future update at the end
864       // of the loop.
865       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
866       // Taskloops do not require additional initialization, it is done in
867       // runtime support library.
868       if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
869         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
870         PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address {
871           DeclRefExpr DRE(
872               const_cast<VarDecl *>(OrigVD),
873               /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup(
874                   OrigVD) != nullptr,
875               (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
876           return EmitLValue(&DRE).getAddress();
877         });
878         // Check if the variable is also a firstprivate: in this case IInit is
879         // not generated. Initialization of this variable will happen in codegen
880         // for 'firstprivate' clause.
881         if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) {
882           auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
883           bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
884             // Emit private VarDecl with copy init.
885             EmitDecl(*VD);
886             return GetAddrOfLocalVar(VD);
887           });
888           assert(IsRegistered &&
889                  "lastprivate var already registered as private");
890           (void)IsRegistered;
891         }
892       }
893       ++IRef;
894       ++IDestRef;
895     }
896   }
897   return HasAtLeastOneLastprivate;
898 }
899 
900 void CodeGenFunction::EmitOMPLastprivateClauseFinal(
901     const OMPExecutableDirective &D, bool NoFinals,
902     llvm::Value *IsLastIterCond) {
903   if (!HaveInsertPoint())
904     return;
905   // Emit following code:
906   // if (<IsLastIterCond>) {
907   //   orig_var1 = private_orig_var1;
908   //   ...
909   //   orig_varn = private_orig_varn;
910   // }
911   llvm::BasicBlock *ThenBB = nullptr;
912   llvm::BasicBlock *DoneBB = nullptr;
913   if (IsLastIterCond) {
914     ThenBB = createBasicBlock(".omp.lastprivate.then");
915     DoneBB = createBasicBlock(".omp.lastprivate.done");
916     Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
917     EmitBlock(ThenBB);
918   }
919   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
920   llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates;
921   if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) {
922     auto IC = LoopDirective->counters().begin();
923     for (auto F : LoopDirective->finals()) {
924       auto *D =
925           cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl();
926       if (NoFinals)
927         AlreadyEmittedVars.insert(D);
928       else
929         LoopCountersAndUpdates[D] = F;
930       ++IC;
931     }
932   }
933   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
934     auto IRef = C->varlist_begin();
935     auto ISrcRef = C->source_exprs().begin();
936     auto IDestRef = C->destination_exprs().begin();
937     for (auto *AssignOp : C->assignment_ops()) {
938       auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
939       QualType Type = PrivateVD->getType();
940       auto *CanonicalVD = PrivateVD->getCanonicalDecl();
941       if (AlreadyEmittedVars.insert(CanonicalVD).second) {
942         // If lastprivate variable is a loop control variable for loop-based
943         // directive, update its value before copyin back to original
944         // variable.
945         if (auto *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD))
946           EmitIgnoredExpr(FinalExpr);
947         auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
948         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
949         // Get the address of the original variable.
950         Address OriginalAddr = GetAddrOfLocalVar(DestVD);
951         // Get the address of the private variable.
952         Address PrivateAddr = GetAddrOfLocalVar(PrivateVD);
953         if (auto RefTy = PrivateVD->getType()->getAs<ReferenceType>())
954           PrivateAddr =
955               Address(Builder.CreateLoad(PrivateAddr),
956                       getNaturalTypeAlignment(RefTy->getPointeeType()));
957         EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp);
958       }
959       ++IRef;
960       ++ISrcRef;
961       ++IDestRef;
962     }
963     if (auto *PostUpdate = C->getPostUpdateExpr())
964       EmitIgnoredExpr(PostUpdate);
965   }
966   if (IsLastIterCond)
967     EmitBlock(DoneBB, /*IsFinished=*/true);
968 }
969 
970 void CodeGenFunction::EmitOMPReductionClauseInit(
971     const OMPExecutableDirective &D,
972     CodeGenFunction::OMPPrivateScope &PrivateScope) {
973   if (!HaveInsertPoint())
974     return;
975   SmallVector<const Expr *, 4> Shareds;
976   SmallVector<const Expr *, 4> Privates;
977   SmallVector<const Expr *, 4> ReductionOps;
978   SmallVector<const Expr *, 4> LHSs;
979   SmallVector<const Expr *, 4> RHSs;
980   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
981     auto IPriv = C->privates().begin();
982     auto IRed = C->reduction_ops().begin();
983     auto ILHS = C->lhs_exprs().begin();
984     auto IRHS = C->rhs_exprs().begin();
985     for (const auto *Ref : C->varlists()) {
986       Shareds.emplace_back(Ref);
987       Privates.emplace_back(*IPriv);
988       ReductionOps.emplace_back(*IRed);
989       LHSs.emplace_back(*ILHS);
990       RHSs.emplace_back(*IRHS);
991       std::advance(IPriv, 1);
992       std::advance(IRed, 1);
993       std::advance(ILHS, 1);
994       std::advance(IRHS, 1);
995     }
996   }
997   ReductionCodeGen RedCG(Shareds, Privates, ReductionOps);
998   unsigned Count = 0;
999   auto ILHS = LHSs.begin();
1000   auto IRHS = RHSs.begin();
1001   auto IPriv = Privates.begin();
1002   for (const auto *IRef : Shareds) {
1003     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
1004     // Emit private VarDecl with reduction init.
1005     RedCG.emitSharedLValue(*this, Count);
1006     RedCG.emitAggregateType(*this, Count);
1007     auto Emission = EmitAutoVarAlloca(*PrivateVD);
1008     RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(),
1009                              RedCG.getSharedLValue(Count),
1010                              [&Emission](CodeGenFunction &CGF) {
1011                                CGF.EmitAutoVarInit(Emission);
1012                                return true;
1013                              });
1014     EmitAutoVarCleanups(Emission);
1015     Address BaseAddr = RedCG.adjustPrivateAddress(
1016         *this, Count, Emission.getAllocatedAddress());
1017     bool IsRegistered = PrivateScope.addPrivate(
1018         RedCG.getBaseDecl(Count), [BaseAddr]() -> Address { return BaseAddr; });
1019     assert(IsRegistered && "private var already registered as private");
1020     // Silence the warning about unused variable.
1021     (void)IsRegistered;
1022 
1023     auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
1024     auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
1025     QualType Type = PrivateVD->getType();
1026     bool isaOMPArraySectionExpr = isa<OMPArraySectionExpr>(IRef);
1027     if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) {
1028       // Store the address of the original variable associated with the LHS
1029       // implicit variable.
1030       PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address {
1031         return RedCG.getSharedLValue(Count).getAddress();
1032       });
1033       PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
1034         return GetAddrOfLocalVar(PrivateVD);
1035       });
1036     } else if ((isaOMPArraySectionExpr && Type->isScalarType()) ||
1037                isa<ArraySubscriptExpr>(IRef)) {
1038       // Store the address of the original variable associated with the LHS
1039       // implicit variable.
1040       PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address {
1041         return RedCG.getSharedLValue(Count).getAddress();
1042       });
1043       PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address {
1044         return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD),
1045                                             ConvertTypeForMem(RHSVD->getType()),
1046                                             "rhs.begin");
1047       });
1048     } else {
1049       QualType Type = PrivateVD->getType();
1050       bool IsArray = getContext().getAsArrayType(Type) != nullptr;
1051       Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress();
1052       // Store the address of the original variable associated with the LHS
1053       // implicit variable.
1054       if (IsArray) {
1055         OriginalAddr = Builder.CreateElementBitCast(
1056             OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin");
1057       }
1058       PrivateScope.addPrivate(
1059           LHSVD, [OriginalAddr]() -> Address { return OriginalAddr; });
1060       PrivateScope.addPrivate(
1061           RHSVD, [this, PrivateVD, RHSVD, IsArray]() -> Address {
1062             return IsArray
1063                        ? Builder.CreateElementBitCast(
1064                              GetAddrOfLocalVar(PrivateVD),
1065                              ConvertTypeForMem(RHSVD->getType()), "rhs.begin")
1066                        : GetAddrOfLocalVar(PrivateVD);
1067           });
1068     }
1069     ++ILHS;
1070     ++IRHS;
1071     ++IPriv;
1072     ++Count;
1073   }
1074 }
1075 
1076 void CodeGenFunction::EmitOMPReductionClauseFinal(
1077     const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) {
1078   if (!HaveInsertPoint())
1079     return;
1080   llvm::SmallVector<const Expr *, 8> Privates;
1081   llvm::SmallVector<const Expr *, 8> LHSExprs;
1082   llvm::SmallVector<const Expr *, 8> RHSExprs;
1083   llvm::SmallVector<const Expr *, 8> ReductionOps;
1084   bool HasAtLeastOneReduction = false;
1085   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1086     HasAtLeastOneReduction = true;
1087     Privates.append(C->privates().begin(), C->privates().end());
1088     LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1089     RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1090     ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1091   }
1092   if (HasAtLeastOneReduction) {
1093     bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
1094                       isOpenMPParallelDirective(D.getDirectiveKind()) ||
1095                       D.getDirectiveKind() == OMPD_simd;
1096     bool SimpleReduction = D.getDirectiveKind() == OMPD_simd;
1097     // Emit nowait reduction if nowait clause is present or directive is a
1098     // parallel directive (it always has implicit barrier).
1099     CGM.getOpenMPRuntime().emitReduction(
1100         *this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps,
1101         {WithNowait, SimpleReduction, ReductionKind});
1102   }
1103 }
1104 
1105 static void emitPostUpdateForReductionClause(
1106     CodeGenFunction &CGF, const OMPExecutableDirective &D,
1107     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1108   if (!CGF.HaveInsertPoint())
1109     return;
1110   llvm::BasicBlock *DoneBB = nullptr;
1111   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1112     if (auto *PostUpdate = C->getPostUpdateExpr()) {
1113       if (!DoneBB) {
1114         if (auto *Cond = CondGen(CGF)) {
1115           // If the first post-update expression is found, emit conditional
1116           // block if it was requested.
1117           auto *ThenBB = CGF.createBasicBlock(".omp.reduction.pu");
1118           DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done");
1119           CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1120           CGF.EmitBlock(ThenBB);
1121         }
1122       }
1123       CGF.EmitIgnoredExpr(PostUpdate);
1124     }
1125   }
1126   if (DoneBB)
1127     CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
1128 }
1129 
1130 namespace {
1131 /// Codegen lambda for appending distribute lower and upper bounds to outlined
1132 /// parallel function. This is necessary for combined constructs such as
1133 /// 'distribute parallel for'
1134 typedef llvm::function_ref<void(CodeGenFunction &,
1135                                 const OMPExecutableDirective &,
1136                                 llvm::SmallVectorImpl<llvm::Value *> &)>
1137     CodeGenBoundParametersTy;
1138 } // anonymous namespace
1139 
1140 static void emitCommonOMPParallelDirective(
1141     CodeGenFunction &CGF, const OMPExecutableDirective &S,
1142     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1143     const CodeGenBoundParametersTy &CodeGenBoundParameters) {
1144   const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
1145   auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
1146       S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
1147   if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
1148     CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
1149     auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
1150                                          /*IgnoreResultAssign*/ true);
1151     CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
1152         CGF, NumThreads, NumThreadsClause->getLocStart());
1153   }
1154   if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
1155     CodeGenFunction::RunCleanupsScope ProcBindScope(CGF);
1156     CGF.CGM.getOpenMPRuntime().emitProcBindClause(
1157         CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart());
1158   }
1159   const Expr *IfCond = nullptr;
1160   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
1161     if (C->getNameModifier() == OMPD_unknown ||
1162         C->getNameModifier() == OMPD_parallel) {
1163       IfCond = C->getCondition();
1164       break;
1165     }
1166   }
1167 
1168   OMPParallelScope Scope(CGF, S);
1169   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
1170   // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk
1171   // lower and upper bounds with the pragma 'for' chunking mechanism.
1172   // The following lambda takes care of appending the lower and upper bound
1173   // parameters when necessary
1174   CodeGenBoundParameters(CGF, S, CapturedVars);
1175   CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
1176   CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn,
1177                                               CapturedVars, IfCond);
1178 }
1179 
1180 static void emitEmptyBoundParameters(CodeGenFunction &,
1181                                      const OMPExecutableDirective &,
1182                                      llvm::SmallVectorImpl<llvm::Value *> &) {}
1183 
1184 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
1185   // Emit parallel region as a standalone region.
1186   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
1187     OMPPrivateScope PrivateScope(CGF);
1188     bool Copyins = CGF.EmitOMPCopyinClause(S);
1189     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
1190     if (Copyins) {
1191       // Emit implicit barrier to synchronize threads and avoid data races on
1192       // propagation master's thread values of threadprivate variables to local
1193       // instances of that variables of all other implicit threads.
1194       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
1195           CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
1196           /*ForceSimpleCall=*/true);
1197     }
1198     CGF.EmitOMPPrivateClause(S, PrivateScope);
1199     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
1200     (void)PrivateScope.Privatize();
1201     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1202     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
1203   };
1204   emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen,
1205                                  emitEmptyBoundParameters);
1206   emitPostUpdateForReductionClause(
1207       *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1208 }
1209 
1210 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
1211                                       JumpDest LoopExit) {
1212   RunCleanupsScope BodyScope(*this);
1213   // Update counters values on current iteration.
1214   for (auto I : D.updates()) {
1215     EmitIgnoredExpr(I);
1216   }
1217   // Update the linear variables.
1218   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1219     for (auto *U : C->updates())
1220       EmitIgnoredExpr(U);
1221   }
1222 
1223   // On a continue in the body, jump to the end.
1224   auto Continue = getJumpDestInCurrentScope("omp.body.continue");
1225   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1226   // Emit loop body.
1227   EmitStmt(D.getBody());
1228   // The end (updates/cleanups).
1229   EmitBlock(Continue.getBlock());
1230   BreakContinueStack.pop_back();
1231 }
1232 
1233 void CodeGenFunction::EmitOMPInnerLoop(
1234     const Stmt &S, bool RequiresCleanup, const Expr *LoopCond,
1235     const Expr *IncExpr,
1236     const llvm::function_ref<void(CodeGenFunction &)> &BodyGen,
1237     const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen) {
1238   auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
1239 
1240   // Start the loop with a block that tests the condition.
1241   auto CondBlock = createBasicBlock("omp.inner.for.cond");
1242   EmitBlock(CondBlock);
1243   const SourceRange &R = S.getSourceRange();
1244   LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
1245                  SourceLocToDebugLoc(R.getEnd()));
1246 
1247   // If there are any cleanups between here and the loop-exit scope,
1248   // create a block to stage a loop exit along.
1249   auto ExitBlock = LoopExit.getBlock();
1250   if (RequiresCleanup)
1251     ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
1252 
1253   auto LoopBody = createBasicBlock("omp.inner.for.body");
1254 
1255   // Emit condition.
1256   EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S));
1257   if (ExitBlock != LoopExit.getBlock()) {
1258     EmitBlock(ExitBlock);
1259     EmitBranchThroughCleanup(LoopExit);
1260   }
1261 
1262   EmitBlock(LoopBody);
1263   incrementProfileCounter(&S);
1264 
1265   // Create a block for the increment.
1266   auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
1267   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1268 
1269   BodyGen(*this);
1270 
1271   // Emit "IV = IV + 1" and a back-edge to the condition block.
1272   EmitBlock(Continue.getBlock());
1273   EmitIgnoredExpr(IncExpr);
1274   PostIncGen(*this);
1275   BreakContinueStack.pop_back();
1276   EmitBranch(CondBlock);
1277   LoopStack.pop();
1278   // Emit the fall-through block.
1279   EmitBlock(LoopExit.getBlock());
1280 }
1281 
1282 bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
1283   if (!HaveInsertPoint())
1284     return false;
1285   // Emit inits for the linear variables.
1286   bool HasLinears = false;
1287   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1288     for (auto *Init : C->inits()) {
1289       HasLinears = true;
1290       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
1291       if (auto *Ref = dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) {
1292         AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
1293         auto *OrigVD = cast<VarDecl>(Ref->getDecl());
1294         DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1295                         CapturedStmtInfo->lookup(OrigVD) != nullptr,
1296                         VD->getInit()->getType(), VK_LValue,
1297                         VD->getInit()->getExprLoc());
1298         EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(),
1299                                                 VD->getType()),
1300                        /*capturedByInit=*/false);
1301         EmitAutoVarCleanups(Emission);
1302       } else
1303         EmitVarDecl(*VD);
1304     }
1305     // Emit the linear steps for the linear clauses.
1306     // If a step is not constant, it is pre-calculated before the loop.
1307     if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
1308       if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
1309         EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
1310         // Emit calculation of the linear step.
1311         EmitIgnoredExpr(CS);
1312       }
1313   }
1314   return HasLinears;
1315 }
1316 
1317 void CodeGenFunction::EmitOMPLinearClauseFinal(
1318     const OMPLoopDirective &D,
1319     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1320   if (!HaveInsertPoint())
1321     return;
1322   llvm::BasicBlock *DoneBB = nullptr;
1323   // Emit the final values of the linear variables.
1324   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1325     auto IC = C->varlist_begin();
1326     for (auto *F : C->finals()) {
1327       if (!DoneBB) {
1328         if (auto *Cond = CondGen(*this)) {
1329           // If the first post-update expression is found, emit conditional
1330           // block if it was requested.
1331           auto *ThenBB = createBasicBlock(".omp.linear.pu");
1332           DoneBB = createBasicBlock(".omp.linear.pu.done");
1333           Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1334           EmitBlock(ThenBB);
1335         }
1336       }
1337       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl());
1338       DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1339                       CapturedStmtInfo->lookup(OrigVD) != nullptr,
1340                       (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
1341       Address OrigAddr = EmitLValue(&DRE).getAddress();
1342       CodeGenFunction::OMPPrivateScope VarScope(*this);
1343       VarScope.addPrivate(OrigVD, [OrigAddr]() -> Address { return OrigAddr; });
1344       (void)VarScope.Privatize();
1345       EmitIgnoredExpr(F);
1346       ++IC;
1347     }
1348     if (auto *PostUpdate = C->getPostUpdateExpr())
1349       EmitIgnoredExpr(PostUpdate);
1350   }
1351   if (DoneBB)
1352     EmitBlock(DoneBB, /*IsFinished=*/true);
1353 }
1354 
1355 static void emitAlignedClause(CodeGenFunction &CGF,
1356                               const OMPExecutableDirective &D) {
1357   if (!CGF.HaveInsertPoint())
1358     return;
1359   for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) {
1360     unsigned ClauseAlignment = 0;
1361     if (auto AlignmentExpr = Clause->getAlignment()) {
1362       auto AlignmentCI =
1363           cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
1364       ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue());
1365     }
1366     for (auto E : Clause->varlists()) {
1367       unsigned Alignment = ClauseAlignment;
1368       if (Alignment == 0) {
1369         // OpenMP [2.8.1, Description]
1370         // If no optional parameter is specified, implementation-defined default
1371         // alignments for SIMD instructions on the target platforms are assumed.
1372         Alignment =
1373             CGF.getContext()
1374                 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
1375                     E->getType()->getPointeeType()))
1376                 .getQuantity();
1377       }
1378       assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) &&
1379              "alignment is not power of 2");
1380       if (Alignment != 0) {
1381         llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
1382         CGF.EmitAlignmentAssumption(PtrValue, Alignment);
1383       }
1384     }
1385   }
1386 }
1387 
1388 void CodeGenFunction::EmitOMPPrivateLoopCounters(
1389     const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) {
1390   if (!HaveInsertPoint())
1391     return;
1392   auto I = S.private_counters().begin();
1393   for (auto *E : S.counters()) {
1394     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1395     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
1396     (void)LoopScope.addPrivate(VD, [&]() -> Address {
1397       // Emit var without initialization.
1398       if (!LocalDeclMap.count(PrivateVD)) {
1399         auto VarEmission = EmitAutoVarAlloca(*PrivateVD);
1400         EmitAutoVarCleanups(VarEmission);
1401       }
1402       DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD),
1403                       /*RefersToEnclosingVariableOrCapture=*/false,
1404                       (*I)->getType(), VK_LValue, (*I)->getExprLoc());
1405       return EmitLValue(&DRE).getAddress();
1406     });
1407     if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) ||
1408         VD->hasGlobalStorage()) {
1409       (void)LoopScope.addPrivate(PrivateVD, [&]() -> Address {
1410         DeclRefExpr DRE(const_cast<VarDecl *>(VD),
1411                         LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD),
1412                         E->getType(), VK_LValue, E->getExprLoc());
1413         return EmitLValue(&DRE).getAddress();
1414       });
1415     }
1416     ++I;
1417   }
1418 }
1419 
1420 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
1421                         const Expr *Cond, llvm::BasicBlock *TrueBlock,
1422                         llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
1423   if (!CGF.HaveInsertPoint())
1424     return;
1425   {
1426     CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
1427     CGF.EmitOMPPrivateLoopCounters(S, PreCondScope);
1428     (void)PreCondScope.Privatize();
1429     // Get initial values of real counters.
1430     for (auto I : S.inits()) {
1431       CGF.EmitIgnoredExpr(I);
1432     }
1433   }
1434   // Check that loop is executed at least one time.
1435   CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
1436 }
1437 
1438 void CodeGenFunction::EmitOMPLinearClause(
1439     const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) {
1440   if (!HaveInsertPoint())
1441     return;
1442   llvm::DenseSet<const VarDecl *> SIMDLCVs;
1443   if (isOpenMPSimdDirective(D.getDirectiveKind())) {
1444     auto *LoopDirective = cast<OMPLoopDirective>(&D);
1445     for (auto *C : LoopDirective->counters()) {
1446       SIMDLCVs.insert(
1447           cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
1448     }
1449   }
1450   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1451     auto CurPrivate = C->privates().begin();
1452     for (auto *E : C->varlists()) {
1453       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1454       auto *PrivateVD =
1455           cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl());
1456       if (!SIMDLCVs.count(VD->getCanonicalDecl())) {
1457         bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address {
1458           // Emit private VarDecl with copy init.
1459           EmitVarDecl(*PrivateVD);
1460           return GetAddrOfLocalVar(PrivateVD);
1461         });
1462         assert(IsRegistered && "linear var already registered as private");
1463         // Silence the warning about unused variable.
1464         (void)IsRegistered;
1465       } else
1466         EmitVarDecl(*PrivateVD);
1467       ++CurPrivate;
1468     }
1469   }
1470 }
1471 
1472 static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
1473                                      const OMPExecutableDirective &D,
1474                                      bool IsMonotonic) {
1475   if (!CGF.HaveInsertPoint())
1476     return;
1477   if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
1478     RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
1479                                  /*ignoreResult=*/true);
1480     llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
1481     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
1482     // In presence of finite 'safelen', it may be unsafe to mark all
1483     // the memory instructions parallel, because loop-carried
1484     // dependences of 'safelen' iterations are possible.
1485     if (!IsMonotonic)
1486       CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());
1487   } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
1488     RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
1489                                  /*ignoreResult=*/true);
1490     llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
1491     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
1492     // In presence of finite 'safelen', it may be unsafe to mark all
1493     // the memory instructions parallel, because loop-carried
1494     // dependences of 'safelen' iterations are possible.
1495     CGF.LoopStack.setParallel(false);
1496   }
1497 }
1498 
1499 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D,
1500                                       bool IsMonotonic) {
1501   // Walk clauses and process safelen/lastprivate.
1502   LoopStack.setParallel(!IsMonotonic);
1503   LoopStack.setVectorizeEnable(true);
1504   emitSimdlenSafelenClause(*this, D, IsMonotonic);
1505 }
1506 
1507 void CodeGenFunction::EmitOMPSimdFinal(
1508     const OMPLoopDirective &D,
1509     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1510   if (!HaveInsertPoint())
1511     return;
1512   llvm::BasicBlock *DoneBB = nullptr;
1513   auto IC = D.counters().begin();
1514   auto IPC = D.private_counters().begin();
1515   for (auto F : D.finals()) {
1516     auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl());
1517     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl());
1518     auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD);
1519     if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) ||
1520         OrigVD->hasGlobalStorage() || CED) {
1521       if (!DoneBB) {
1522         if (auto *Cond = CondGen(*this)) {
1523           // If the first post-update expression is found, emit conditional
1524           // block if it was requested.
1525           auto *ThenBB = createBasicBlock(".omp.final.then");
1526           DoneBB = createBasicBlock(".omp.final.done");
1527           Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1528           EmitBlock(ThenBB);
1529         }
1530       }
1531       Address OrigAddr = Address::invalid();
1532       if (CED)
1533         OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress();
1534       else {
1535         DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD),
1536                         /*RefersToEnclosingVariableOrCapture=*/false,
1537                         (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc());
1538         OrigAddr = EmitLValue(&DRE).getAddress();
1539       }
1540       OMPPrivateScope VarScope(*this);
1541       VarScope.addPrivate(OrigVD,
1542                           [OrigAddr]() -> Address { return OrigAddr; });
1543       (void)VarScope.Privatize();
1544       EmitIgnoredExpr(F);
1545     }
1546     ++IC;
1547     ++IPC;
1548   }
1549   if (DoneBB)
1550     EmitBlock(DoneBB, /*IsFinished=*/true);
1551 }
1552 
1553 static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF,
1554                                          const OMPLoopDirective &S,
1555                                          CodeGenFunction::JumpDest LoopExit) {
1556   CGF.EmitOMPLoopBody(S, LoopExit);
1557   CGF.EmitStopPoint(&S);
1558 }
1559 
1560 static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
1561                               PrePostActionTy &Action) {
1562   Action.Enter(CGF);
1563   assert(isOpenMPSimdDirective(S.getDirectiveKind()) &&
1564          "Expected simd directive");
1565   OMPLoopScope PreInitScope(CGF, S);
1566   // if (PreCond) {
1567   //   for (IV in 0..LastIteration) BODY;
1568   //   <Final counter/linear vars updates>;
1569   // }
1570   //
1571 
1572   // Emit: if (PreCond) - begin.
1573   // If the condition constant folds and can be elided, avoid emitting the
1574   // whole loop.
1575   bool CondConstant;
1576   llvm::BasicBlock *ContBlock = nullptr;
1577   if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
1578     if (!CondConstant)
1579       return;
1580   } else {
1581     auto *ThenBlock = CGF.createBasicBlock("simd.if.then");
1582     ContBlock = CGF.createBasicBlock("simd.if.end");
1583     emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
1584                 CGF.getProfileCount(&S));
1585     CGF.EmitBlock(ThenBlock);
1586     CGF.incrementProfileCounter(&S);
1587   }
1588 
1589   // Emit the loop iteration variable.
1590   const Expr *IVExpr = S.getIterationVariable();
1591   const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
1592   CGF.EmitVarDecl(*IVDecl);
1593   CGF.EmitIgnoredExpr(S.getInit());
1594 
1595   // Emit the iterations count variable.
1596   // If it is not a variable, Sema decided to calculate iterations count on
1597   // each iteration (e.g., it is foldable into a constant).
1598   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
1599     CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
1600     // Emit calculation of the iterations count.
1601     CGF.EmitIgnoredExpr(S.getCalcLastIteration());
1602   }
1603 
1604   CGF.EmitOMPSimdInit(S);
1605 
1606   emitAlignedClause(CGF, S);
1607   (void)CGF.EmitOMPLinearClauseInit(S);
1608   {
1609     CodeGenFunction::OMPPrivateScope LoopScope(CGF);
1610     CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
1611     CGF.EmitOMPLinearClause(S, LoopScope);
1612     CGF.EmitOMPPrivateClause(S, LoopScope);
1613     CGF.EmitOMPReductionClauseInit(S, LoopScope);
1614     bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
1615     (void)LoopScope.Privatize();
1616     CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
1617                          S.getInc(),
1618                          [&S](CodeGenFunction &CGF) {
1619                            CGF.EmitOMPLoopBody(S, CodeGenFunction::JumpDest());
1620                            CGF.EmitStopPoint(&S);
1621                          },
1622                          [](CodeGenFunction &) {});
1623     CGF.EmitOMPSimdFinal(
1624         S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1625     // Emit final copy of the lastprivate variables at the end of loops.
1626     if (HasLastprivateClause)
1627       CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true);
1628     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd);
1629     emitPostUpdateForReductionClause(
1630         CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1631   }
1632   CGF.EmitOMPLinearClauseFinal(
1633       S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1634   // Emit: if (PreCond) - end.
1635   if (ContBlock) {
1636     CGF.EmitBranch(ContBlock);
1637     CGF.EmitBlock(ContBlock, true);
1638   }
1639 }
1640 
1641 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
1642   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
1643     emitOMPSimdRegion(CGF, S, Action);
1644   };
1645   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
1646   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
1647 }
1648 
1649 void CodeGenFunction::EmitOMPOuterLoop(
1650     bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S,
1651     CodeGenFunction::OMPPrivateScope &LoopScope,
1652     const CodeGenFunction::OMPLoopArguments &LoopArgs,
1653     const CodeGenFunction::CodeGenLoopTy &CodeGenLoop,
1654     const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) {
1655   auto &RT = CGM.getOpenMPRuntime();
1656 
1657   const Expr *IVExpr = S.getIterationVariable();
1658   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1659   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1660 
1661   auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
1662 
1663   // Start the loop with a block that tests the condition.
1664   auto CondBlock = createBasicBlock("omp.dispatch.cond");
1665   EmitBlock(CondBlock);
1666   const SourceRange &R = S.getSourceRange();
1667   LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
1668                  SourceLocToDebugLoc(R.getEnd()));
1669 
1670   llvm::Value *BoolCondVal = nullptr;
1671   if (!DynamicOrOrdered) {
1672     // UB = min(UB, GlobalUB) or
1673     // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g.
1674     // 'distribute parallel for')
1675     EmitIgnoredExpr(LoopArgs.EUB);
1676     // IV = LB
1677     EmitIgnoredExpr(LoopArgs.Init);
1678     // IV < UB
1679     BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond);
1680   } else {
1681     BoolCondVal =
1682         RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, LoopArgs.IL,
1683                        LoopArgs.LB, LoopArgs.UB, LoopArgs.ST);
1684   }
1685 
1686   // If there are any cleanups between here and the loop-exit scope,
1687   // create a block to stage a loop exit along.
1688   auto ExitBlock = LoopExit.getBlock();
1689   if (LoopScope.requiresCleanups())
1690     ExitBlock = createBasicBlock("omp.dispatch.cleanup");
1691 
1692   auto LoopBody = createBasicBlock("omp.dispatch.body");
1693   Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
1694   if (ExitBlock != LoopExit.getBlock()) {
1695     EmitBlock(ExitBlock);
1696     EmitBranchThroughCleanup(LoopExit);
1697   }
1698   EmitBlock(LoopBody);
1699 
1700   // Emit "IV = LB" (in case of static schedule, we have already calculated new
1701   // LB for loop condition and emitted it above).
1702   if (DynamicOrOrdered)
1703     EmitIgnoredExpr(LoopArgs.Init);
1704 
1705   // Create a block for the increment.
1706   auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
1707   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1708 
1709   // Generate !llvm.loop.parallel metadata for loads and stores for loops
1710   // with dynamic/guided scheduling and without ordered clause.
1711   if (!isOpenMPSimdDirective(S.getDirectiveKind()))
1712     LoopStack.setParallel(!IsMonotonic);
1713   else
1714     EmitOMPSimdInit(S, IsMonotonic);
1715 
1716   SourceLocation Loc = S.getLocStart();
1717 
1718   // when 'distribute' is not combined with a 'for':
1719   // while (idx <= UB) { BODY; ++idx; }
1720   // when 'distribute' is combined with a 'for'
1721   // (e.g. 'distribute parallel for')
1722   // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
1723   EmitOMPInnerLoop(
1724       S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr,
1725       [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
1726         CodeGenLoop(CGF, S, LoopExit);
1727       },
1728       [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) {
1729         CodeGenOrdered(CGF, Loc, IVSize, IVSigned);
1730       });
1731 
1732   EmitBlock(Continue.getBlock());
1733   BreakContinueStack.pop_back();
1734   if (!DynamicOrOrdered) {
1735     // Emit "LB = LB + Stride", "UB = UB + Stride".
1736     EmitIgnoredExpr(LoopArgs.NextLB);
1737     EmitIgnoredExpr(LoopArgs.NextUB);
1738   }
1739 
1740   EmitBranch(CondBlock);
1741   LoopStack.pop();
1742   // Emit the fall-through block.
1743   EmitBlock(LoopExit.getBlock());
1744 
1745   // Tell the runtime we are done.
1746   auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) {
1747     if (!DynamicOrOrdered)
1748       CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(),
1749                                                      S.getDirectiveKind());
1750   };
1751   OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
1752 }
1753 
1754 void CodeGenFunction::EmitOMPForOuterLoop(
1755     const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic,
1756     const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
1757     const OMPLoopArguments &LoopArgs,
1758     const CodeGenDispatchBoundsTy &CGDispatchBounds) {
1759   auto &RT = CGM.getOpenMPRuntime();
1760 
1761   // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
1762   const bool DynamicOrOrdered =
1763       Ordered || RT.isDynamic(ScheduleKind.Schedule);
1764 
1765   assert((Ordered ||
1766           !RT.isStaticNonchunked(ScheduleKind.Schedule,
1767                                  LoopArgs.Chunk != nullptr)) &&
1768          "static non-chunked schedule does not need outer loop");
1769 
1770   // Emit outer loop.
1771   //
1772   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
1773   // When schedule(dynamic,chunk_size) is specified, the iterations are
1774   // distributed to threads in the team in chunks as the threads request them.
1775   // Each thread executes a chunk of iterations, then requests another chunk,
1776   // until no chunks remain to be distributed. Each chunk contains chunk_size
1777   // iterations, except for the last chunk to be distributed, which may have
1778   // fewer iterations. When no chunk_size is specified, it defaults to 1.
1779   //
1780   // When schedule(guided,chunk_size) is specified, the iterations are assigned
1781   // to threads in the team in chunks as the executing threads request them.
1782   // Each thread executes a chunk of iterations, then requests another chunk,
1783   // until no chunks remain to be assigned. For a chunk_size of 1, the size of
1784   // each chunk is proportional to the number of unassigned iterations divided
1785   // by the number of threads in the team, decreasing to 1. For a chunk_size
1786   // with value k (greater than 1), the size of each chunk is determined in the
1787   // same way, with the restriction that the chunks do not contain fewer than k
1788   // iterations (except for the last chunk to be assigned, which may have fewer
1789   // than k iterations).
1790   //
1791   // When schedule(auto) is specified, the decision regarding scheduling is
1792   // delegated to the compiler and/or runtime system. The programmer gives the
1793   // implementation the freedom to choose any possible mapping of iterations to
1794   // threads in the team.
1795   //
1796   // When schedule(runtime) is specified, the decision regarding scheduling is
1797   // deferred until run time, and the schedule and chunk size are taken from the
1798   // run-sched-var ICV. If the ICV is set to auto, the schedule is
1799   // implementation defined
1800   //
1801   // while(__kmpc_dispatch_next(&LB, &UB)) {
1802   //   idx = LB;
1803   //   while (idx <= UB) { BODY; ++idx;
1804   //   __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
1805   //   } // inner loop
1806   // }
1807   //
1808   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
1809   // When schedule(static, chunk_size) is specified, iterations are divided into
1810   // chunks of size chunk_size, and the chunks are assigned to the threads in
1811   // the team in a round-robin fashion in the order of the thread number.
1812   //
1813   // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
1814   //   while (idx <= UB) { BODY; ++idx; } // inner loop
1815   //   LB = LB + ST;
1816   //   UB = UB + ST;
1817   // }
1818   //
1819 
1820   const Expr *IVExpr = S.getIterationVariable();
1821   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1822   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1823 
1824   if (DynamicOrOrdered) {
1825     auto DispatchBounds = CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB);
1826     llvm::Value *LBVal = DispatchBounds.first;
1827     llvm::Value *UBVal = DispatchBounds.second;
1828     CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal,
1829                                                              LoopArgs.Chunk};
1830     RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, IVSize,
1831                            IVSigned, Ordered, DipatchRTInputValues);
1832   } else {
1833     CGOpenMPRuntime::StaticRTInput StaticInit(
1834         IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB,
1835         LoopArgs.ST, LoopArgs.Chunk);
1836     RT.emitForStaticInit(*this, S.getLocStart(), S.getDirectiveKind(),
1837                          ScheduleKind, StaticInit);
1838   }
1839 
1840   auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc,
1841                                     const unsigned IVSize,
1842                                     const bool IVSigned) {
1843     if (Ordered) {
1844       CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize,
1845                                                             IVSigned);
1846     }
1847   };
1848 
1849   OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST,
1850                                  LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB);
1851   OuterLoopArgs.IncExpr = S.getInc();
1852   OuterLoopArgs.Init = S.getInit();
1853   OuterLoopArgs.Cond = S.getCond();
1854   OuterLoopArgs.NextLB = S.getNextLowerBound();
1855   OuterLoopArgs.NextUB = S.getNextUpperBound();
1856   EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs,
1857                    emitOMPLoopBodyWithStopPoint, CodeGenOrdered);
1858 }
1859 
1860 static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc,
1861                              const unsigned IVSize, const bool IVSigned) {}
1862 
1863 void CodeGenFunction::EmitOMPDistributeOuterLoop(
1864     OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S,
1865     OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs,
1866     const CodeGenLoopTy &CodeGenLoopContent) {
1867 
1868   auto &RT = CGM.getOpenMPRuntime();
1869 
1870   // Emit outer loop.
1871   // Same behavior as a OMPForOuterLoop, except that schedule cannot be
1872   // dynamic
1873   //
1874 
1875   const Expr *IVExpr = S.getIterationVariable();
1876   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1877   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1878 
1879   CGOpenMPRuntime::StaticRTInput StaticInit(
1880       IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB,
1881       LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk);
1882   RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, StaticInit);
1883 
1884   // for combined 'distribute' and 'for' the increment expression of distribute
1885   // is store in DistInc. For 'distribute' alone, it is in Inc.
1886   Expr *IncExpr;
1887   if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()))
1888     IncExpr = S.getDistInc();
1889   else
1890     IncExpr = S.getInc();
1891 
1892   // this routine is shared by 'omp distribute parallel for' and
1893   // 'omp distribute': select the right EUB expression depending on the
1894   // directive
1895   OMPLoopArguments OuterLoopArgs;
1896   OuterLoopArgs.LB = LoopArgs.LB;
1897   OuterLoopArgs.UB = LoopArgs.UB;
1898   OuterLoopArgs.ST = LoopArgs.ST;
1899   OuterLoopArgs.IL = LoopArgs.IL;
1900   OuterLoopArgs.Chunk = LoopArgs.Chunk;
1901   OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
1902                           ? S.getCombinedEnsureUpperBound()
1903                           : S.getEnsureUpperBound();
1904   OuterLoopArgs.IncExpr = IncExpr;
1905   OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
1906                            ? S.getCombinedInit()
1907                            : S.getInit();
1908   OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
1909                            ? S.getCombinedCond()
1910                            : S.getCond();
1911   OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
1912                              ? S.getCombinedNextLowerBound()
1913                              : S.getNextLowerBound();
1914   OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
1915                              ? S.getCombinedNextUpperBound()
1916                              : S.getNextUpperBound();
1917 
1918   EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S,
1919                    LoopScope, OuterLoopArgs, CodeGenLoopContent,
1920                    emitEmptyOrdered);
1921 }
1922 
1923 /// Emit a helper variable and return corresponding lvalue.
1924 static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
1925                                const DeclRefExpr *Helper) {
1926   auto VDecl = cast<VarDecl>(Helper->getDecl());
1927   CGF.EmitVarDecl(*VDecl);
1928   return CGF.EmitLValue(Helper);
1929 }
1930 
1931 static std::pair<LValue, LValue>
1932 emitDistributeParallelForInnerBounds(CodeGenFunction &CGF,
1933                                      const OMPExecutableDirective &S) {
1934   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
1935   LValue LB =
1936       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
1937   LValue UB =
1938       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
1939 
1940   // When composing 'distribute' with 'for' (e.g. as in 'distribute
1941   // parallel for') we need to use the 'distribute'
1942   // chunk lower and upper bounds rather than the whole loop iteration
1943   // space. These are parameters to the outlined function for 'parallel'
1944   // and we copy the bounds of the previous schedule into the
1945   // the current ones.
1946   LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable());
1947   LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable());
1948   llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(PrevLB, SourceLocation());
1949   PrevLBVal = CGF.EmitScalarConversion(
1950       PrevLBVal, LS.getPrevLowerBoundVariable()->getType(),
1951       LS.getIterationVariable()->getType(), SourceLocation());
1952   llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(PrevUB, SourceLocation());
1953   PrevUBVal = CGF.EmitScalarConversion(
1954       PrevUBVal, LS.getPrevUpperBoundVariable()->getType(),
1955       LS.getIterationVariable()->getType(), SourceLocation());
1956 
1957   CGF.EmitStoreOfScalar(PrevLBVal, LB);
1958   CGF.EmitStoreOfScalar(PrevUBVal, UB);
1959 
1960   return {LB, UB};
1961 }
1962 
1963 /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then
1964 /// we need to use the LB and UB expressions generated by the worksharing
1965 /// code generation support, whereas in non combined situations we would
1966 /// just emit 0 and the LastIteration expression
1967 /// This function is necessary due to the difference of the LB and UB
1968 /// types for the RT emission routines for 'for_static_init' and
1969 /// 'for_dispatch_init'
1970 static std::pair<llvm::Value *, llvm::Value *>
1971 emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF,
1972                                         const OMPExecutableDirective &S,
1973                                         Address LB, Address UB) {
1974   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
1975   const Expr *IVExpr = LS.getIterationVariable();
1976   // when implementing a dynamic schedule for a 'for' combined with a
1977   // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop
1978   // is not normalized as each team only executes its own assigned
1979   // distribute chunk
1980   QualType IteratorTy = IVExpr->getType();
1981   llvm::Value *LBVal = CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy,
1982                                             SourceLocation());
1983   llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy,
1984                                             SourceLocation());
1985   return {LBVal, UBVal};
1986 }
1987 
1988 static void emitDistributeParallelForDistributeInnerBoundParams(
1989     CodeGenFunction &CGF, const OMPExecutableDirective &S,
1990     llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) {
1991   const auto &Dir = cast<OMPLoopDirective>(S);
1992   LValue LB =
1993       CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable()));
1994   auto LBCast = CGF.Builder.CreateIntCast(
1995       CGF.Builder.CreateLoad(LB.getAddress()), CGF.SizeTy, /*isSigned=*/false);
1996   CapturedVars.push_back(LBCast);
1997   LValue UB =
1998       CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable()));
1999 
2000   auto UBCast = CGF.Builder.CreateIntCast(
2001       CGF.Builder.CreateLoad(UB.getAddress()), CGF.SizeTy, /*isSigned=*/false);
2002   CapturedVars.push_back(UBCast);
2003 }
2004 
2005 static void
2006 emitInnerParallelForWhenCombined(CodeGenFunction &CGF,
2007                                  const OMPLoopDirective &S,
2008                                  CodeGenFunction::JumpDest LoopExit) {
2009   auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF,
2010                                          PrePostActionTy &) {
2011     CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(),
2012                                emitDistributeParallelForInnerBounds,
2013                                emitDistributeParallelForDispatchBounds);
2014   };
2015 
2016   emitCommonOMPParallelDirective(
2017       CGF, S, OMPD_for, CGInlinedWorksharingLoop,
2018       emitDistributeParallelForDistributeInnerBoundParams);
2019 }
2020 
2021 void CodeGenFunction::EmitOMPDistributeParallelForDirective(
2022     const OMPDistributeParallelForDirective &S) {
2023   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2024     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
2025                               S.getDistInc());
2026   };
2027   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2028   OMPCancelStackRAII CancelRegion(*this, OMPD_distribute_parallel_for,
2029                                   /*HasCancel=*/false);
2030   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen,
2031                                               /*HasCancel=*/false);
2032 }
2033 
2034 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective(
2035     const OMPDistributeParallelForSimdDirective &S) {
2036   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2037   CGM.getOpenMPRuntime().emitInlinedDirective(
2038       *this, OMPD_distribute_parallel_for_simd,
2039       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2040         OMPLoopScope PreInitScope(CGF, S);
2041         CGF.EmitStmt(
2042             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2043       });
2044 }
2045 
2046 void CodeGenFunction::EmitOMPDistributeSimdDirective(
2047     const OMPDistributeSimdDirective &S) {
2048   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2049   CGM.getOpenMPRuntime().emitInlinedDirective(
2050       *this, OMPD_distribute_simd,
2051       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2052         OMPLoopScope PreInitScope(CGF, S);
2053         CGF.EmitStmt(
2054             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2055       });
2056 }
2057 
2058 void CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
2059     CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) {
2060   // Emit SPMD target parallel for region as a standalone region.
2061   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2062     emitOMPSimdRegion(CGF, S, Action);
2063   };
2064   llvm::Function *Fn;
2065   llvm::Constant *Addr;
2066   // Emit target region as a standalone region.
2067   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
2068       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
2069   assert(Fn && Addr && "Target device function emission failed.");
2070 }
2071 
2072 void CodeGenFunction::EmitOMPTargetSimdDirective(
2073     const OMPTargetSimdDirective &S) {
2074   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2075     emitOMPSimdRegion(CGF, S, Action);
2076   };
2077   emitCommonOMPTargetDirective(*this, S, CodeGen);
2078 }
2079 
2080 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective(
2081     const OMPTeamsDistributeSimdDirective &S) {
2082   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2083   CGM.getOpenMPRuntime().emitInlinedDirective(
2084       *this, OMPD_teams_distribute_simd,
2085       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2086         OMPLoopScope PreInitScope(CGF, S);
2087         CGF.EmitStmt(
2088             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2089       });
2090 }
2091 
2092 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective(
2093     const OMPTeamsDistributeParallelForSimdDirective &S) {
2094   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2095   CGM.getOpenMPRuntime().emitInlinedDirective(
2096       *this, OMPD_teams_distribute_parallel_for_simd,
2097       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2098         OMPLoopScope PreInitScope(CGF, S);
2099         CGF.EmitStmt(
2100             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2101       });
2102 }
2103 
2104 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective(
2105     const OMPTargetTeamsDistributeDirective &S) {
2106   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2107   CGM.getOpenMPRuntime().emitInlinedDirective(
2108       *this, OMPD_target_teams_distribute,
2109       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2110         CGF.EmitStmt(
2111             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2112       });
2113 }
2114 
2115 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective(
2116     const OMPTargetTeamsDistributeParallelForDirective &S) {
2117   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2118   CGM.getOpenMPRuntime().emitInlinedDirective(
2119       *this, OMPD_target_teams_distribute_parallel_for,
2120       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2121         CGF.EmitStmt(
2122             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2123       });
2124 }
2125 
2126 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective(
2127     const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
2128   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2129   CGM.getOpenMPRuntime().emitInlinedDirective(
2130       *this, OMPD_target_teams_distribute_parallel_for_simd,
2131       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2132         CGF.EmitStmt(
2133             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2134       });
2135 }
2136 
2137 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective(
2138     const OMPTargetTeamsDistributeSimdDirective &S) {
2139   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2140   CGM.getOpenMPRuntime().emitInlinedDirective(
2141       *this, OMPD_target_teams_distribute_simd,
2142       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2143         CGF.EmitStmt(
2144             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2145       });
2146 }
2147 
2148 namespace {
2149   struct ScheduleKindModifiersTy {
2150     OpenMPScheduleClauseKind Kind;
2151     OpenMPScheduleClauseModifier M1;
2152     OpenMPScheduleClauseModifier M2;
2153     ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind,
2154                             OpenMPScheduleClauseModifier M1,
2155                             OpenMPScheduleClauseModifier M2)
2156         : Kind(Kind), M1(M1), M2(M2) {}
2157   };
2158 } // namespace
2159 
2160 bool CodeGenFunction::EmitOMPWorksharingLoop(
2161     const OMPLoopDirective &S, Expr *EUB,
2162     const CodeGenLoopBoundsTy &CodeGenLoopBounds,
2163     const CodeGenDispatchBoundsTy &CGDispatchBounds) {
2164   // Emit the loop iteration variable.
2165   auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
2166   auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
2167   EmitVarDecl(*IVDecl);
2168 
2169   // Emit the iterations count variable.
2170   // If it is not a variable, Sema decided to calculate iterations count on each
2171   // iteration (e.g., it is foldable into a constant).
2172   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
2173     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
2174     // Emit calculation of the iterations count.
2175     EmitIgnoredExpr(S.getCalcLastIteration());
2176   }
2177 
2178   auto &RT = CGM.getOpenMPRuntime();
2179 
2180   bool HasLastprivateClause;
2181   // Check pre-condition.
2182   {
2183     OMPLoopScope PreInitScope(*this, S);
2184     // Skip the entire loop if we don't meet the precondition.
2185     // If the condition constant folds and can be elided, avoid emitting the
2186     // whole loop.
2187     bool CondConstant;
2188     llvm::BasicBlock *ContBlock = nullptr;
2189     if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
2190       if (!CondConstant)
2191         return false;
2192     } else {
2193       auto *ThenBlock = createBasicBlock("omp.precond.then");
2194       ContBlock = createBasicBlock("omp.precond.end");
2195       emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
2196                   getProfileCount(&S));
2197       EmitBlock(ThenBlock);
2198       incrementProfileCounter(&S);
2199     }
2200 
2201     bool Ordered = false;
2202     if (auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) {
2203       if (OrderedClause->getNumForLoops())
2204         RT.emitDoacrossInit(*this, S);
2205       else
2206         Ordered = true;
2207     }
2208 
2209     llvm::DenseSet<const Expr *> EmittedFinals;
2210     emitAlignedClause(*this, S);
2211     bool HasLinears = EmitOMPLinearClauseInit(S);
2212     // Emit helper vars inits.
2213 
2214     std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S);
2215     LValue LB = Bounds.first;
2216     LValue UB = Bounds.second;
2217     LValue ST =
2218         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
2219     LValue IL =
2220         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
2221 
2222     // Emit 'then' code.
2223     {
2224       OMPPrivateScope LoopScope(*this);
2225       if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) {
2226         // Emit implicit barrier to synchronize threads and avoid data races on
2227         // initialization of firstprivate variables and post-update of
2228         // lastprivate variables.
2229         CGM.getOpenMPRuntime().emitBarrierCall(
2230             *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
2231             /*ForceSimpleCall=*/true);
2232       }
2233       EmitOMPPrivateClause(S, LoopScope);
2234       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
2235       EmitOMPReductionClauseInit(S, LoopScope);
2236       EmitOMPPrivateLoopCounters(S, LoopScope);
2237       EmitOMPLinearClause(S, LoopScope);
2238       (void)LoopScope.Privatize();
2239 
2240       // Detect the loop schedule kind and chunk.
2241       llvm::Value *Chunk = nullptr;
2242       OpenMPScheduleTy ScheduleKind;
2243       if (auto *C = S.getSingleClause<OMPScheduleClause>()) {
2244         ScheduleKind.Schedule = C->getScheduleKind();
2245         ScheduleKind.M1 = C->getFirstScheduleModifier();
2246         ScheduleKind.M2 = C->getSecondScheduleModifier();
2247         if (const auto *Ch = C->getChunkSize()) {
2248           Chunk = EmitScalarExpr(Ch);
2249           Chunk = EmitScalarConversion(Chunk, Ch->getType(),
2250                                        S.getIterationVariable()->getType(),
2251                                        S.getLocStart());
2252         }
2253       }
2254       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2255       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2256       // OpenMP 4.5, 2.7.1 Loop Construct, Description.
2257       // If the static schedule kind is specified or if the ordered clause is
2258       // specified, and if no monotonic modifier is specified, the effect will
2259       // be as if the monotonic modifier was specified.
2260       if (RT.isStaticNonchunked(ScheduleKind.Schedule,
2261                                 /* Chunked */ Chunk != nullptr) &&
2262           !Ordered) {
2263         if (isOpenMPSimdDirective(S.getDirectiveKind()))
2264           EmitOMPSimdInit(S, /*IsMonotonic=*/true);
2265         // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2266         // When no chunk_size is specified, the iteration space is divided into
2267         // chunks that are approximately equal in size, and at most one chunk is
2268         // distributed to each thread. Note that the size of the chunks is
2269         // unspecified in this case.
2270         CGOpenMPRuntime::StaticRTInput StaticInit(
2271             IVSize, IVSigned, Ordered, IL.getAddress(), LB.getAddress(),
2272             UB.getAddress(), ST.getAddress());
2273         RT.emitForStaticInit(*this, S.getLocStart(), S.getDirectiveKind(),
2274                              ScheduleKind, StaticInit);
2275         auto LoopExit =
2276             getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
2277         // UB = min(UB, GlobalUB);
2278         EmitIgnoredExpr(S.getEnsureUpperBound());
2279         // IV = LB;
2280         EmitIgnoredExpr(S.getInit());
2281         // while (idx <= UB) { BODY; ++idx; }
2282         EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
2283                          S.getInc(),
2284                          [&S, LoopExit](CodeGenFunction &CGF) {
2285                            CGF.EmitOMPLoopBody(S, LoopExit);
2286                            CGF.EmitStopPoint(&S);
2287                          },
2288                          [](CodeGenFunction &) {});
2289         EmitBlock(LoopExit.getBlock());
2290         // Tell the runtime we are done.
2291         auto &&CodeGen = [&S](CodeGenFunction &CGF) {
2292           CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(),
2293                                                          S.getDirectiveKind());
2294         };
2295         OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
2296       } else {
2297         const bool IsMonotonic =
2298             Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static ||
2299             ScheduleKind.Schedule == OMPC_SCHEDULE_unknown ||
2300             ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
2301             ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
2302         // Emit the outer loop, which requests its work chunk [LB..UB] from
2303         // runtime and runs the inner loop to process it.
2304         const OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(),
2305                                              ST.getAddress(), IL.getAddress(),
2306                                              Chunk, EUB);
2307         EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
2308                             LoopArguments, CGDispatchBounds);
2309       }
2310       if (isOpenMPSimdDirective(S.getDirectiveKind())) {
2311         EmitOMPSimdFinal(S,
2312                          [&](CodeGenFunction &CGF) -> llvm::Value * {
2313                            return CGF.Builder.CreateIsNotNull(
2314                                CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2315                          });
2316       }
2317       EmitOMPReductionClauseFinal(
2318           S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind())
2319                  ? /*Parallel and Simd*/ OMPD_parallel_for_simd
2320                  : /*Parallel only*/ OMPD_parallel);
2321       // Emit post-update of the reduction variables if IsLastIter != 0.
2322       emitPostUpdateForReductionClause(
2323           *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2324             return CGF.Builder.CreateIsNotNull(
2325                 CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2326           });
2327       // Emit final copy of the lastprivate variables if IsLastIter != 0.
2328       if (HasLastprivateClause)
2329         EmitOMPLastprivateClauseFinal(
2330             S, isOpenMPSimdDirective(S.getDirectiveKind()),
2331             Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart())));
2332     }
2333     EmitOMPLinearClauseFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2334       return CGF.Builder.CreateIsNotNull(
2335           CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2336     });
2337     // We're now done with the loop, so jump to the continuation block.
2338     if (ContBlock) {
2339       EmitBranch(ContBlock);
2340       EmitBlock(ContBlock, true);
2341     }
2342   }
2343   return HasLastprivateClause;
2344 }
2345 
2346 /// The following two functions generate expressions for the loop lower
2347 /// and upper bounds in case of static and dynamic (dispatch) schedule
2348 /// of the associated 'for' or 'distribute' loop.
2349 static std::pair<LValue, LValue>
2350 emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
2351   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2352   LValue LB =
2353       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
2354   LValue UB =
2355       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
2356   return {LB, UB};
2357 }
2358 
2359 /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not
2360 /// consider the lower and upper bound expressions generated by the
2361 /// worksharing loop support, but we use 0 and the iteration space size as
2362 /// constants
2363 static std::pair<llvm::Value *, llvm::Value *>
2364 emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S,
2365                           Address LB, Address UB) {
2366   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2367   const Expr *IVExpr = LS.getIterationVariable();
2368   const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType());
2369   llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0);
2370   llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration());
2371   return {LBVal, UBVal};
2372 }
2373 
2374 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
2375   bool HasLastprivates = false;
2376   auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
2377                                           PrePostActionTy &) {
2378     OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel());
2379     HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
2380                                                  emitForLoopBounds,
2381                                                  emitDispatchForLoopBounds);
2382   };
2383   {
2384     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2385     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
2386                                                 S.hasCancel());
2387   }
2388 
2389   // Emit an implicit barrier at the end.
2390   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
2391     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
2392   }
2393 }
2394 
2395 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
2396   bool HasLastprivates = false;
2397   auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
2398                                           PrePostActionTy &) {
2399     HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
2400                                                  emitForLoopBounds,
2401                                                  emitDispatchForLoopBounds);
2402   };
2403   {
2404     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2405     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
2406   }
2407 
2408   // Emit an implicit barrier at the end.
2409   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
2410     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
2411   }
2412 }
2413 
2414 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
2415                                 const Twine &Name,
2416                                 llvm::Value *Init = nullptr) {
2417   auto LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
2418   if (Init)
2419     CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true);
2420   return LVal;
2421 }
2422 
2423 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
2424   auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt();
2425   auto *CS = dyn_cast<CompoundStmt>(Stmt);
2426   bool HasLastprivates = false;
2427   auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF,
2428                                                     PrePostActionTy &) {
2429     auto &C = CGF.CGM.getContext();
2430     auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2431     // Emit helper vars inits.
2432     LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.",
2433                                   CGF.Builder.getInt32(0));
2434     auto *GlobalUBVal = CS != nullptr ? CGF.Builder.getInt32(CS->size() - 1)
2435                                       : CGF.Builder.getInt32(0);
2436     LValue UB =
2437         createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
2438     LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.",
2439                                   CGF.Builder.getInt32(1));
2440     LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.",
2441                                   CGF.Builder.getInt32(0));
2442     // Loop counter.
2443     LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv.");
2444     OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
2445     CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
2446     OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
2447     CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
2448     // Generate condition for loop.
2449     BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue,
2450                         OK_Ordinary, S.getLocStart(), FPOptions());
2451     // Increment for loop counter.
2452     UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary,
2453                       S.getLocStart());
2454     auto BodyGen = [Stmt, CS, &S, &IV](CodeGenFunction &CGF) {
2455       // Iterate through all sections and emit a switch construct:
2456       // switch (IV) {
2457       //   case 0:
2458       //     <SectionStmt[0]>;
2459       //     break;
2460       // ...
2461       //   case <NumSection> - 1:
2462       //     <SectionStmt[<NumSection> - 1]>;
2463       //     break;
2464       // }
2465       // .omp.sections.exit:
2466       auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
2467       auto *SwitchStmt = CGF.Builder.CreateSwitch(
2468           CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB,
2469           CS == nullptr ? 1 : CS->size());
2470       if (CS) {
2471         unsigned CaseNumber = 0;
2472         for (auto *SubStmt : CS->children()) {
2473           auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
2474           CGF.EmitBlock(CaseBB);
2475           SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB);
2476           CGF.EmitStmt(SubStmt);
2477           CGF.EmitBranch(ExitBB);
2478           ++CaseNumber;
2479         }
2480       } else {
2481         auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
2482         CGF.EmitBlock(CaseBB);
2483         SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB);
2484         CGF.EmitStmt(Stmt);
2485         CGF.EmitBranch(ExitBB);
2486       }
2487       CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
2488     };
2489 
2490     CodeGenFunction::OMPPrivateScope LoopScope(CGF);
2491     if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) {
2492       // Emit implicit barrier to synchronize threads and avoid data races on
2493       // initialization of firstprivate variables and post-update of lastprivate
2494       // variables.
2495       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
2496           CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
2497           /*ForceSimpleCall=*/true);
2498     }
2499     CGF.EmitOMPPrivateClause(S, LoopScope);
2500     HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
2501     CGF.EmitOMPReductionClauseInit(S, LoopScope);
2502     (void)LoopScope.Privatize();
2503 
2504     // Emit static non-chunked loop.
2505     OpenMPScheduleTy ScheduleKind;
2506     ScheduleKind.Schedule = OMPC_SCHEDULE_static;
2507     CGOpenMPRuntime::StaticRTInput StaticInit(
2508         /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(),
2509         LB.getAddress(), UB.getAddress(), ST.getAddress());
2510     CGF.CGM.getOpenMPRuntime().emitForStaticInit(
2511         CGF, S.getLocStart(), S.getDirectiveKind(), ScheduleKind, StaticInit);
2512     // UB = min(UB, GlobalUB);
2513     auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart());
2514     auto *MinUBGlobalUB = CGF.Builder.CreateSelect(
2515         CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
2516     CGF.EmitStoreOfScalar(MinUBGlobalUB, UB);
2517     // IV = LB;
2518     CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV);
2519     // while (idx <= UB) { BODY; ++idx; }
2520     CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen,
2521                          [](CodeGenFunction &) {});
2522     // Tell the runtime we are done.
2523     auto &&CodeGen = [&S](CodeGenFunction &CGF) {
2524       CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(),
2525                                                      S.getDirectiveKind());
2526     };
2527     CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen);
2528     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
2529     // Emit post-update of the reduction variables if IsLastIter != 0.
2530     emitPostUpdateForReductionClause(
2531         CGF, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2532           return CGF.Builder.CreateIsNotNull(
2533               CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2534         });
2535 
2536     // Emit final copy of the lastprivate variables if IsLastIter != 0.
2537     if (HasLastprivates)
2538       CGF.EmitOMPLastprivateClauseFinal(
2539           S, /*NoFinals=*/false,
2540           CGF.Builder.CreateIsNotNull(
2541               CGF.EmitLoadOfScalar(IL, S.getLocStart())));
2542   };
2543 
2544   bool HasCancel = false;
2545   if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S))
2546     HasCancel = OSD->hasCancel();
2547   else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S))
2548     HasCancel = OPSD->hasCancel();
2549   OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel);
2550   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen,
2551                                               HasCancel);
2552   // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
2553   // clause. Otherwise the barrier will be generated by the codegen for the
2554   // directive.
2555   if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) {
2556     // Emit implicit barrier to synchronize threads and avoid data races on
2557     // initialization of firstprivate variables.
2558     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
2559                                            OMPD_unknown);
2560   }
2561 }
2562 
2563 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
2564   {
2565     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2566     EmitSections(S);
2567   }
2568   // Emit an implicit barrier at the end.
2569   if (!S.getSingleClause<OMPNowaitClause>()) {
2570     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
2571                                            OMPD_sections);
2572   }
2573 }
2574 
2575 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
2576   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2577     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2578   };
2579   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2580   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen,
2581                                               S.hasCancel());
2582 }
2583 
2584 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
2585   llvm::SmallVector<const Expr *, 8> CopyprivateVars;
2586   llvm::SmallVector<const Expr *, 8> DestExprs;
2587   llvm::SmallVector<const Expr *, 8> SrcExprs;
2588   llvm::SmallVector<const Expr *, 8> AssignmentOps;
2589   // Check if there are any 'copyprivate' clauses associated with this
2590   // 'single' construct.
2591   // Build a list of copyprivate variables along with helper expressions
2592   // (<source>, <destination>, <destination>=<source> expressions)
2593   for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
2594     CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
2595     DestExprs.append(C->destination_exprs().begin(),
2596                      C->destination_exprs().end());
2597     SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end());
2598     AssignmentOps.append(C->assignment_ops().begin(),
2599                          C->assignment_ops().end());
2600   }
2601   // Emit code for 'single' region along with 'copyprivate' clauses
2602   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2603     Action.Enter(CGF);
2604     OMPPrivateScope SingleScope(CGF);
2605     (void)CGF.EmitOMPFirstprivateClause(S, SingleScope);
2606     CGF.EmitOMPPrivateClause(S, SingleScope);
2607     (void)SingleScope.Privatize();
2608     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2609   };
2610   {
2611     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2612     CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(),
2613                                             CopyprivateVars, DestExprs,
2614                                             SrcExprs, AssignmentOps);
2615   }
2616   // Emit an implicit barrier at the end (to avoid data race on firstprivate
2617   // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
2618   if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) {
2619     CGM.getOpenMPRuntime().emitBarrierCall(
2620         *this, S.getLocStart(),
2621         S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
2622   }
2623 }
2624 
2625 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
2626   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2627     Action.Enter(CGF);
2628     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2629   };
2630   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2631   CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart());
2632 }
2633 
2634 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
2635   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2636     Action.Enter(CGF);
2637     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2638   };
2639   Expr *Hint = nullptr;
2640   if (auto *HintClause = S.getSingleClause<OMPHintClause>())
2641     Hint = HintClause->getHint();
2642   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2643   CGM.getOpenMPRuntime().emitCriticalRegion(*this,
2644                                             S.getDirectiveName().getAsString(),
2645                                             CodeGen, S.getLocStart(), Hint);
2646 }
2647 
2648 void CodeGenFunction::EmitOMPParallelForDirective(
2649     const OMPParallelForDirective &S) {
2650   // Emit directive as a combined directive that consists of two implicit
2651   // directives: 'parallel' with 'for' directive.
2652   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2653     OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel());
2654     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
2655                                emitDispatchForLoopBounds);
2656   };
2657   emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
2658                                  emitEmptyBoundParameters);
2659 }
2660 
2661 void CodeGenFunction::EmitOMPParallelForSimdDirective(
2662     const OMPParallelForSimdDirective &S) {
2663   // Emit directive as a combined directive that consists of two implicit
2664   // directives: 'parallel' with 'for' directive.
2665   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2666     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
2667                                emitDispatchForLoopBounds);
2668   };
2669   emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen,
2670                                  emitEmptyBoundParameters);
2671 }
2672 
2673 void CodeGenFunction::EmitOMPParallelSectionsDirective(
2674     const OMPParallelSectionsDirective &S) {
2675   // Emit directive as a combined directive that consists of two implicit
2676   // directives: 'parallel' with 'sections' directive.
2677   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2678     CGF.EmitSections(S);
2679   };
2680   emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen,
2681                                  emitEmptyBoundParameters);
2682 }
2683 
2684 void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S,
2685                                                 const RegionCodeGenTy &BodyGen,
2686                                                 const TaskGenTy &TaskGen,
2687                                                 OMPTaskDataTy &Data) {
2688   // Emit outlined function for task construct.
2689   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
2690   auto *I = CS->getCapturedDecl()->param_begin();
2691   auto *PartId = std::next(I);
2692   auto *TaskT = std::next(I, 4);
2693   // Check if the task is final
2694   if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
2695     // If the condition constant folds and can be elided, try to avoid emitting
2696     // the condition and the dead arm of the if/else.
2697     auto *Cond = Clause->getCondition();
2698     bool CondConstant;
2699     if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
2700       Data.Final.setInt(CondConstant);
2701     else
2702       Data.Final.setPointer(EvaluateExprAsBool(Cond));
2703   } else {
2704     // By default the task is not final.
2705     Data.Final.setInt(/*IntVal=*/false);
2706   }
2707   // Check if the task has 'priority' clause.
2708   if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) {
2709     auto *Prio = Clause->getPriority();
2710     Data.Priority.setInt(/*IntVal=*/true);
2711     Data.Priority.setPointer(EmitScalarConversion(
2712         EmitScalarExpr(Prio), Prio->getType(),
2713         getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
2714         Prio->getExprLoc()));
2715   }
2716   // The first function argument for tasks is a thread id, the second one is a
2717   // part id (0 for tied tasks, >=0 for untied task).
2718   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
2719   // Get list of private variables.
2720   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
2721     auto IRef = C->varlist_begin();
2722     for (auto *IInit : C->private_copies()) {
2723       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2724       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2725         Data.PrivateVars.push_back(*IRef);
2726         Data.PrivateCopies.push_back(IInit);
2727       }
2728       ++IRef;
2729     }
2730   }
2731   EmittedAsPrivate.clear();
2732   // Get list of firstprivate variables.
2733   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
2734     auto IRef = C->varlist_begin();
2735     auto IElemInitRef = C->inits().begin();
2736     for (auto *IInit : C->private_copies()) {
2737       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2738       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2739         Data.FirstprivateVars.push_back(*IRef);
2740         Data.FirstprivateCopies.push_back(IInit);
2741         Data.FirstprivateInits.push_back(*IElemInitRef);
2742       }
2743       ++IRef;
2744       ++IElemInitRef;
2745     }
2746   }
2747   // Get list of lastprivate variables (for taskloops).
2748   llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs;
2749   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
2750     auto IRef = C->varlist_begin();
2751     auto ID = C->destination_exprs().begin();
2752     for (auto *IInit : C->private_copies()) {
2753       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2754       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2755         Data.LastprivateVars.push_back(*IRef);
2756         Data.LastprivateCopies.push_back(IInit);
2757       }
2758       LastprivateDstsOrigs.insert(
2759           {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()),
2760            cast<DeclRefExpr>(*IRef)});
2761       ++IRef;
2762       ++ID;
2763     }
2764   }
2765   SmallVector<const Expr *, 4> LHSs;
2766   SmallVector<const Expr *, 4> RHSs;
2767   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
2768     auto IPriv = C->privates().begin();
2769     auto IRed = C->reduction_ops().begin();
2770     auto ILHS = C->lhs_exprs().begin();
2771     auto IRHS = C->rhs_exprs().begin();
2772     for (const auto *Ref : C->varlists()) {
2773       Data.ReductionVars.emplace_back(Ref);
2774       Data.ReductionCopies.emplace_back(*IPriv);
2775       Data.ReductionOps.emplace_back(*IRed);
2776       LHSs.emplace_back(*ILHS);
2777       RHSs.emplace_back(*IRHS);
2778       std::advance(IPriv, 1);
2779       std::advance(IRed, 1);
2780       std::advance(ILHS, 1);
2781       std::advance(IRHS, 1);
2782     }
2783   }
2784   Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit(
2785       *this, S.getLocStart(), LHSs, RHSs, Data);
2786   // Build list of dependences.
2787   for (const auto *C : S.getClausesOfKind<OMPDependClause>())
2788     for (auto *IRef : C->varlists())
2789       Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef));
2790   auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs](
2791       CodeGenFunction &CGF, PrePostActionTy &Action) {
2792     // Set proper addresses for generated private copies.
2793     OMPPrivateScope Scope(CGF);
2794     if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() ||
2795         !Data.LastprivateVars.empty()) {
2796       enum { PrivatesParam = 2, CopyFnParam = 3 };
2797       auto *CopyFn = CGF.Builder.CreateLoad(
2798           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3)));
2799       auto *PrivatesPtr = CGF.Builder.CreateLoad(
2800           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2)));
2801       // Map privates.
2802       llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
2803       llvm::SmallVector<llvm::Value *, 16> CallArgs;
2804       CallArgs.push_back(PrivatesPtr);
2805       for (auto *E : Data.PrivateVars) {
2806         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2807         Address PrivatePtr = CGF.CreateMemTemp(
2808             CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr");
2809         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2810         CallArgs.push_back(PrivatePtr.getPointer());
2811       }
2812       for (auto *E : Data.FirstprivateVars) {
2813         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2814         Address PrivatePtr =
2815             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
2816                               ".firstpriv.ptr.addr");
2817         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2818         CallArgs.push_back(PrivatePtr.getPointer());
2819       }
2820       for (auto *E : Data.LastprivateVars) {
2821         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2822         Address PrivatePtr =
2823             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
2824                               ".lastpriv.ptr.addr");
2825         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2826         CallArgs.push_back(PrivatePtr.getPointer());
2827       }
2828       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(),
2829                                                           CopyFn, CallArgs);
2830       for (auto &&Pair : LastprivateDstsOrigs) {
2831         auto *OrigVD = cast<VarDecl>(Pair.second->getDecl());
2832         DeclRefExpr DRE(
2833             const_cast<VarDecl *>(OrigVD),
2834             /*RefersToEnclosingVariableOrCapture=*/CGF.CapturedStmtInfo->lookup(
2835                 OrigVD) != nullptr,
2836             Pair.second->getType(), VK_LValue, Pair.second->getExprLoc());
2837         Scope.addPrivate(Pair.first, [&CGF, &DRE]() {
2838           return CGF.EmitLValue(&DRE).getAddress();
2839         });
2840       }
2841       for (auto &&Pair : PrivatePtrs) {
2842         Address Replacement(CGF.Builder.CreateLoad(Pair.second),
2843                             CGF.getContext().getDeclAlign(Pair.first));
2844         Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
2845       }
2846     }
2847     if (Data.Reductions) {
2848       OMPLexicalScope LexScope(CGF, S, /*AsInlined=*/true);
2849       ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionCopies,
2850                              Data.ReductionOps);
2851       llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
2852           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9)));
2853       for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
2854         RedCG.emitSharedLValue(CGF, Cnt);
2855         RedCG.emitAggregateType(CGF, Cnt);
2856         Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
2857             CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
2858         Replacement =
2859             Address(CGF.EmitScalarConversion(
2860                         Replacement.getPointer(), CGF.getContext().VoidPtrTy,
2861                         CGF.getContext().getPointerType(
2862                             Data.ReductionCopies[Cnt]->getType()),
2863                         SourceLocation()),
2864                     Replacement.getAlignment());
2865         Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
2866         Scope.addPrivate(RedCG.getBaseDecl(Cnt),
2867                          [Replacement]() { return Replacement; });
2868         // FIXME: This must removed once the runtime library is fixed.
2869         // Emit required threadprivate variables for
2870         // initilizer/combiner/finalizer.
2871         CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(),
2872                                                            RedCG, Cnt);
2873       }
2874     }
2875     // Privatize all private variables except for in_reduction items.
2876     (void)Scope.Privatize();
2877     SmallVector<const Expr *, 4> InRedVars;
2878     SmallVector<const Expr *, 4> InRedPrivs;
2879     SmallVector<const Expr *, 4> InRedOps;
2880     SmallVector<const Expr *, 4> TaskgroupDescriptors;
2881     for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
2882       auto IPriv = C->privates().begin();
2883       auto IRed = C->reduction_ops().begin();
2884       auto ITD = C->taskgroup_descriptors().begin();
2885       for (const auto *Ref : C->varlists()) {
2886         InRedVars.emplace_back(Ref);
2887         InRedPrivs.emplace_back(*IPriv);
2888         InRedOps.emplace_back(*IRed);
2889         TaskgroupDescriptors.emplace_back(*ITD);
2890         std::advance(IPriv, 1);
2891         std::advance(IRed, 1);
2892         std::advance(ITD, 1);
2893       }
2894     }
2895     // Privatize in_reduction items here, because taskgroup descriptors must be
2896     // privatized earlier.
2897     OMPPrivateScope InRedScope(CGF);
2898     if (!InRedVars.empty()) {
2899       ReductionCodeGen RedCG(InRedVars, InRedPrivs, InRedOps);
2900       for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) {
2901         RedCG.emitSharedLValue(CGF, Cnt);
2902         RedCG.emitAggregateType(CGF, Cnt);
2903         // The taskgroup descriptor variable is always implicit firstprivate and
2904         // privatized already during procoessing of the firstprivates.
2905         llvm::Value *ReductionsPtr = CGF.EmitLoadOfScalar(
2906             CGF.EmitLValue(TaskgroupDescriptors[Cnt]), SourceLocation());
2907         Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
2908             CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
2909         Replacement = Address(
2910             CGF.EmitScalarConversion(
2911                 Replacement.getPointer(), CGF.getContext().VoidPtrTy,
2912                 CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()),
2913                 SourceLocation()),
2914             Replacement.getAlignment());
2915         Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
2916         InRedScope.addPrivate(RedCG.getBaseDecl(Cnt),
2917                               [Replacement]() { return Replacement; });
2918         // FIXME: This must removed once the runtime library is fixed.
2919         // Emit required threadprivate variables for
2920         // initilizer/combiner/finalizer.
2921         CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(),
2922                                                            RedCG, Cnt);
2923       }
2924     }
2925     (void)InRedScope.Privatize();
2926 
2927     Action.Enter(CGF);
2928     BodyGen(CGF);
2929   };
2930   auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
2931       S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied,
2932       Data.NumberOfParts);
2933   OMPLexicalScope Scope(*this, S);
2934   TaskGen(*this, OutlinedFn, Data);
2935 }
2936 
2937 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
2938   // Emit outlined function for task construct.
2939   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
2940   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
2941   auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
2942   const Expr *IfCond = nullptr;
2943   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
2944     if (C->getNameModifier() == OMPD_unknown ||
2945         C->getNameModifier() == OMPD_task) {
2946       IfCond = C->getCondition();
2947       break;
2948     }
2949   }
2950 
2951   OMPTaskDataTy Data;
2952   // Check if we should emit tied or untied task.
2953   Data.Tied = !S.getSingleClause<OMPUntiedClause>();
2954   auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
2955     CGF.EmitStmt(CS->getCapturedStmt());
2956   };
2957   auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
2958                     IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn,
2959                             const OMPTaskDataTy &Data) {
2960     CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getLocStart(), S, OutlinedFn,
2961                                             SharedsTy, CapturedStruct, IfCond,
2962                                             Data);
2963   };
2964   EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
2965 }
2966 
2967 void CodeGenFunction::EmitOMPTaskyieldDirective(
2968     const OMPTaskyieldDirective &S) {
2969   CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart());
2970 }
2971 
2972 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
2973   CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier);
2974 }
2975 
2976 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
2977   CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart());
2978 }
2979 
2980 void CodeGenFunction::EmitOMPTaskgroupDirective(
2981     const OMPTaskgroupDirective &S) {
2982   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2983     Action.Enter(CGF);
2984     if (const Expr *E = S.getReductionRef()) {
2985       SmallVector<const Expr *, 4> LHSs;
2986       SmallVector<const Expr *, 4> RHSs;
2987       OMPTaskDataTy Data;
2988       for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) {
2989         auto IPriv = C->privates().begin();
2990         auto IRed = C->reduction_ops().begin();
2991         auto ILHS = C->lhs_exprs().begin();
2992         auto IRHS = C->rhs_exprs().begin();
2993         for (const auto *Ref : C->varlists()) {
2994           Data.ReductionVars.emplace_back(Ref);
2995           Data.ReductionCopies.emplace_back(*IPriv);
2996           Data.ReductionOps.emplace_back(*IRed);
2997           LHSs.emplace_back(*ILHS);
2998           RHSs.emplace_back(*IRHS);
2999           std::advance(IPriv, 1);
3000           std::advance(IRed, 1);
3001           std::advance(ILHS, 1);
3002           std::advance(IRHS, 1);
3003         }
3004       }
3005       llvm::Value *ReductionDesc =
3006           CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getLocStart(),
3007                                                            LHSs, RHSs, Data);
3008       const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3009       CGF.EmitVarDecl(*VD);
3010       CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD),
3011                             /*Volatile=*/false, E->getType());
3012     }
3013     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3014   };
3015   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
3016   CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart());
3017 }
3018 
3019 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
3020   CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> {
3021     if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) {
3022       return llvm::makeArrayRef(FlushClause->varlist_begin(),
3023                                 FlushClause->varlist_end());
3024     }
3025     return llvm::None;
3026   }(), S.getLocStart());
3027 }
3028 
3029 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
3030                                             const CodeGenLoopTy &CodeGenLoop,
3031                                             Expr *IncExpr) {
3032   // Emit the loop iteration variable.
3033   auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
3034   auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
3035   EmitVarDecl(*IVDecl);
3036 
3037   // Emit the iterations count variable.
3038   // If it is not a variable, Sema decided to calculate iterations count on each
3039   // iteration (e.g., it is foldable into a constant).
3040   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
3041     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
3042     // Emit calculation of the iterations count.
3043     EmitIgnoredExpr(S.getCalcLastIteration());
3044   }
3045 
3046   auto &RT = CGM.getOpenMPRuntime();
3047 
3048   bool HasLastprivateClause = false;
3049   // Check pre-condition.
3050   {
3051     OMPLoopScope PreInitScope(*this, S);
3052     // Skip the entire loop if we don't meet the precondition.
3053     // If the condition constant folds and can be elided, avoid emitting the
3054     // whole loop.
3055     bool CondConstant;
3056     llvm::BasicBlock *ContBlock = nullptr;
3057     if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
3058       if (!CondConstant)
3059         return;
3060     } else {
3061       auto *ThenBlock = createBasicBlock("omp.precond.then");
3062       ContBlock = createBasicBlock("omp.precond.end");
3063       emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
3064                   getProfileCount(&S));
3065       EmitBlock(ThenBlock);
3066       incrementProfileCounter(&S);
3067     }
3068 
3069     // Emit 'then' code.
3070     {
3071       // Emit helper vars inits.
3072 
3073       LValue LB = EmitOMPHelperVar(
3074           *this, cast<DeclRefExpr>(
3075                      (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3076                           ? S.getCombinedLowerBoundVariable()
3077                           : S.getLowerBoundVariable())));
3078       LValue UB = EmitOMPHelperVar(
3079           *this, cast<DeclRefExpr>(
3080                      (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3081                           ? S.getCombinedUpperBoundVariable()
3082                           : S.getUpperBoundVariable())));
3083       LValue ST =
3084           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
3085       LValue IL =
3086           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
3087 
3088       OMPPrivateScope LoopScope(*this);
3089       if (EmitOMPFirstprivateClause(S, LoopScope)) {
3090         // Emit implicit barrier to synchronize threads and avoid data races on
3091         // initialization of firstprivate variables and post-update of
3092         // lastprivate variables.
3093         CGM.getOpenMPRuntime().emitBarrierCall(
3094           *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
3095           /*ForceSimpleCall=*/true);
3096       }
3097       EmitOMPPrivateClause(S, LoopScope);
3098       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
3099       EmitOMPPrivateLoopCounters(S, LoopScope);
3100       (void)LoopScope.Privatize();
3101 
3102       // Detect the distribute schedule kind and chunk.
3103       llvm::Value *Chunk = nullptr;
3104       OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown;
3105       if (auto *C = S.getSingleClause<OMPDistScheduleClause>()) {
3106         ScheduleKind = C->getDistScheduleKind();
3107         if (const auto *Ch = C->getChunkSize()) {
3108           Chunk = EmitScalarExpr(Ch);
3109           Chunk = EmitScalarConversion(Chunk, Ch->getType(),
3110           S.getIterationVariable()->getType(),
3111           S.getLocStart());
3112         }
3113       }
3114       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
3115       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3116 
3117       // OpenMP [2.10.8, distribute Construct, Description]
3118       // If dist_schedule is specified, kind must be static. If specified,
3119       // iterations are divided into chunks of size chunk_size, chunks are
3120       // assigned to the teams of the league in a round-robin fashion in the
3121       // order of the team number. When no chunk_size is specified, the
3122       // iteration space is divided into chunks that are approximately equal
3123       // in size, and at most one chunk is distributed to each team of the
3124       // league. The size of the chunks is unspecified in this case.
3125       if (RT.isStaticNonchunked(ScheduleKind,
3126                                 /* Chunked */ Chunk != nullptr)) {
3127         CGOpenMPRuntime::StaticRTInput StaticInit(
3128             IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(),
3129             LB.getAddress(), UB.getAddress(), ST.getAddress());
3130         RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind,
3131                                     StaticInit);
3132         auto LoopExit =
3133             getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
3134         // UB = min(UB, GlobalUB);
3135         EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3136                             ? S.getCombinedEnsureUpperBound()
3137                             : S.getEnsureUpperBound());
3138         // IV = LB;
3139         EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3140                             ? S.getCombinedInit()
3141                             : S.getInit());
3142 
3143         Expr *Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3144                          ? S.getCombinedCond()
3145                          : S.getCond();
3146 
3147         // for distribute alone,  codegen
3148         // while (idx <= UB) { BODY; ++idx; }
3149         // when combined with 'for' (e.g. as in 'distribute parallel for')
3150         // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
3151         EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), Cond, IncExpr,
3152                          [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
3153                            CodeGenLoop(CGF, S, LoopExit);
3154                          },
3155                          [](CodeGenFunction &) {});
3156         EmitBlock(LoopExit.getBlock());
3157         // Tell the runtime we are done.
3158         RT.emitForStaticFinish(*this, S.getLocStart(), S.getDirectiveKind());
3159       } else {
3160         // Emit the outer loop, which requests its work chunk [LB..UB] from
3161         // runtime and runs the inner loop to process it.
3162         const OMPLoopArguments LoopArguments = {
3163             LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(),
3164             Chunk};
3165         EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments,
3166                                    CodeGenLoop);
3167       }
3168 
3169       // Emit final copy of the lastprivate variables if IsLastIter != 0.
3170       if (HasLastprivateClause)
3171         EmitOMPLastprivateClauseFinal(
3172             S, /*NoFinals=*/false,
3173             Builder.CreateIsNotNull(
3174                 EmitLoadOfScalar(IL, S.getLocStart())));
3175     }
3176 
3177     // We're now done with the loop, so jump to the continuation block.
3178     if (ContBlock) {
3179       EmitBranch(ContBlock);
3180       EmitBlock(ContBlock, true);
3181     }
3182   }
3183 }
3184 
3185 void CodeGenFunction::EmitOMPDistributeDirective(
3186     const OMPDistributeDirective &S) {
3187   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3188 
3189     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
3190   };
3191   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
3192   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen,
3193                                               false);
3194 }
3195 
3196 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
3197                                                    const CapturedStmt *S) {
3198   CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
3199   CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
3200   CGF.CapturedStmtInfo = &CapStmtInfo;
3201   auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S);
3202   Fn->addFnAttr(llvm::Attribute::NoInline);
3203   return Fn;
3204 }
3205 
3206 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
3207   if (!S.getAssociatedStmt()) {
3208     for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
3209       CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC);
3210     return;
3211   }
3212   auto *C = S.getSingleClause<OMPSIMDClause>();
3213   auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF,
3214                                  PrePostActionTy &Action) {
3215     if (C) {
3216       auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
3217       llvm::SmallVector<llvm::Value *, 16> CapturedVars;
3218       CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
3219       auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS);
3220       CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(),
3221                                                       OutlinedFn, CapturedVars);
3222     } else {
3223       Action.Enter(CGF);
3224       CGF.EmitStmt(
3225           cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3226     }
3227   };
3228   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
3229   CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C);
3230 }
3231 
3232 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
3233                                          QualType SrcType, QualType DestType,
3234                                          SourceLocation Loc) {
3235   assert(CGF.hasScalarEvaluationKind(DestType) &&
3236          "DestType must have scalar evaluation kind.");
3237   assert(!Val.isAggregate() && "Must be a scalar or complex.");
3238   return Val.isScalar()
3239              ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType,
3240                                         Loc)
3241              : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType,
3242                                                  DestType, Loc);
3243 }
3244 
3245 static CodeGenFunction::ComplexPairTy
3246 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
3247                       QualType DestType, SourceLocation Loc) {
3248   assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
3249          "DestType must have complex evaluation kind.");
3250   CodeGenFunction::ComplexPairTy ComplexVal;
3251   if (Val.isScalar()) {
3252     // Convert the input element to the element type of the complex.
3253     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
3254     auto ScalarVal = CGF.EmitScalarConversion(Val.getScalarVal(), SrcType,
3255                                               DestElementType, Loc);
3256     ComplexVal = CodeGenFunction::ComplexPairTy(
3257         ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
3258   } else {
3259     assert(Val.isComplex() && "Must be a scalar or complex.");
3260     auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
3261     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
3262     ComplexVal.first = CGF.EmitScalarConversion(
3263         Val.getComplexVal().first, SrcElementType, DestElementType, Loc);
3264     ComplexVal.second = CGF.EmitScalarConversion(
3265         Val.getComplexVal().second, SrcElementType, DestElementType, Loc);
3266   }
3267   return ComplexVal;
3268 }
3269 
3270 static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst,
3271                                   LValue LVal, RValue RVal) {
3272   if (LVal.isGlobalReg()) {
3273     CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal);
3274   } else {
3275     CGF.EmitAtomicStore(RVal, LVal,
3276                         IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3277                                  : llvm::AtomicOrdering::Monotonic,
3278                         LVal.isVolatile(), /*IsInit=*/false);
3279   }
3280 }
3281 
3282 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal,
3283                                          QualType RValTy, SourceLocation Loc) {
3284   switch (getEvaluationKind(LVal.getType())) {
3285   case TEK_Scalar:
3286     EmitStoreThroughLValue(RValue::get(convertToScalarValue(
3287                                *this, RVal, RValTy, LVal.getType(), Loc)),
3288                            LVal);
3289     break;
3290   case TEK_Complex:
3291     EmitStoreOfComplex(
3292         convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal,
3293         /*isInit=*/false);
3294     break;
3295   case TEK_Aggregate:
3296     llvm_unreachable("Must be a scalar or complex.");
3297   }
3298 }
3299 
3300 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst,
3301                                   const Expr *X, const Expr *V,
3302                                   SourceLocation Loc) {
3303   // v = x;
3304   assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
3305   assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
3306   LValue XLValue = CGF.EmitLValue(X);
3307   LValue VLValue = CGF.EmitLValue(V);
3308   RValue Res = XLValue.isGlobalReg()
3309                    ? CGF.EmitLoadOfLValue(XLValue, Loc)
3310                    : CGF.EmitAtomicLoad(
3311                          XLValue, Loc,
3312                          IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3313                                   : llvm::AtomicOrdering::Monotonic,
3314                          XLValue.isVolatile());
3315   // OpenMP, 2.12.6, atomic Construct
3316   // Any atomic construct with a seq_cst clause forces the atomically
3317   // performed operation to include an implicit flush operation without a
3318   // list.
3319   if (IsSeqCst)
3320     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3321   CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc);
3322 }
3323 
3324 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst,
3325                                    const Expr *X, const Expr *E,
3326                                    SourceLocation Loc) {
3327   // x = expr;
3328   assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
3329   emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E));
3330   // OpenMP, 2.12.6, atomic Construct
3331   // Any atomic construct with a seq_cst clause forces the atomically
3332   // performed operation to include an implicit flush operation without a
3333   // list.
3334   if (IsSeqCst)
3335     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3336 }
3337 
3338 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
3339                                                 RValue Update,
3340                                                 BinaryOperatorKind BO,
3341                                                 llvm::AtomicOrdering AO,
3342                                                 bool IsXLHSInRHSPart) {
3343   auto &Context = CGF.CGM.getContext();
3344   // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
3345   // expression is simple and atomic is allowed for the given type for the
3346   // target platform.
3347   if (BO == BO_Comma || !Update.isScalar() ||
3348       !Update.getScalarVal()->getType()->isIntegerTy() ||
3349       !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) &&
3350                         (Update.getScalarVal()->getType() !=
3351                          X.getAddress().getElementType())) ||
3352       !X.getAddress().getElementType()->isIntegerTy() ||
3353       !Context.getTargetInfo().hasBuiltinAtomic(
3354           Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment())))
3355     return std::make_pair(false, RValue::get(nullptr));
3356 
3357   llvm::AtomicRMWInst::BinOp RMWOp;
3358   switch (BO) {
3359   case BO_Add:
3360     RMWOp = llvm::AtomicRMWInst::Add;
3361     break;
3362   case BO_Sub:
3363     if (!IsXLHSInRHSPart)
3364       return std::make_pair(false, RValue::get(nullptr));
3365     RMWOp = llvm::AtomicRMWInst::Sub;
3366     break;
3367   case BO_And:
3368     RMWOp = llvm::AtomicRMWInst::And;
3369     break;
3370   case BO_Or:
3371     RMWOp = llvm::AtomicRMWInst::Or;
3372     break;
3373   case BO_Xor:
3374     RMWOp = llvm::AtomicRMWInst::Xor;
3375     break;
3376   case BO_LT:
3377     RMWOp = X.getType()->hasSignedIntegerRepresentation()
3378                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min
3379                                    : llvm::AtomicRMWInst::Max)
3380                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin
3381                                    : llvm::AtomicRMWInst::UMax);
3382     break;
3383   case BO_GT:
3384     RMWOp = X.getType()->hasSignedIntegerRepresentation()
3385                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max
3386                                    : llvm::AtomicRMWInst::Min)
3387                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
3388                                    : llvm::AtomicRMWInst::UMin);
3389     break;
3390   case BO_Assign:
3391     RMWOp = llvm::AtomicRMWInst::Xchg;
3392     break;
3393   case BO_Mul:
3394   case BO_Div:
3395   case BO_Rem:
3396   case BO_Shl:
3397   case BO_Shr:
3398   case BO_LAnd:
3399   case BO_LOr:
3400     return std::make_pair(false, RValue::get(nullptr));
3401   case BO_PtrMemD:
3402   case BO_PtrMemI:
3403   case BO_LE:
3404   case BO_GE:
3405   case BO_EQ:
3406   case BO_NE:
3407   case BO_AddAssign:
3408   case BO_SubAssign:
3409   case BO_AndAssign:
3410   case BO_OrAssign:
3411   case BO_XorAssign:
3412   case BO_MulAssign:
3413   case BO_DivAssign:
3414   case BO_RemAssign:
3415   case BO_ShlAssign:
3416   case BO_ShrAssign:
3417   case BO_Comma:
3418     llvm_unreachable("Unsupported atomic update operation");
3419   }
3420   auto *UpdateVal = Update.getScalarVal();
3421   if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) {
3422     UpdateVal = CGF.Builder.CreateIntCast(
3423         IC, X.getAddress().getElementType(),
3424         X.getType()->hasSignedIntegerRepresentation());
3425   }
3426   auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO);
3427   return std::make_pair(true, RValue::get(Res));
3428 }
3429 
3430 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
3431     LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
3432     llvm::AtomicOrdering AO, SourceLocation Loc,
3433     const llvm::function_ref<RValue(RValue)> &CommonGen) {
3434   // Update expressions are allowed to have the following forms:
3435   // x binop= expr; -> xrval + expr;
3436   // x++, ++x -> xrval + 1;
3437   // x--, --x -> xrval - 1;
3438   // x = x binop expr; -> xrval binop expr
3439   // x = expr Op x; - > expr binop xrval;
3440   auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart);
3441   if (!Res.first) {
3442     if (X.isGlobalReg()) {
3443       // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
3444       // 'xrval'.
3445       EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X);
3446     } else {
3447       // Perform compare-and-swap procedure.
3448       EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified());
3449     }
3450   }
3451   return Res;
3452 }
3453 
3454 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst,
3455                                     const Expr *X, const Expr *E,
3456                                     const Expr *UE, bool IsXLHSInRHSPart,
3457                                     SourceLocation Loc) {
3458   assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
3459          "Update expr in 'atomic update' must be a binary operator.");
3460   auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
3461   // Update expressions are allowed to have the following forms:
3462   // x binop= expr; -> xrval + expr;
3463   // x++, ++x -> xrval + 1;
3464   // x--, --x -> xrval - 1;
3465   // x = x binop expr; -> xrval binop expr
3466   // x = expr Op x; - > expr binop xrval;
3467   assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
3468   LValue XLValue = CGF.EmitLValue(X);
3469   RValue ExprRValue = CGF.EmitAnyExpr(E);
3470   auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3471                      : llvm::AtomicOrdering::Monotonic;
3472   auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
3473   auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
3474   auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
3475   auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
3476   auto Gen =
3477       [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue {
3478         CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3479         CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
3480         return CGF.EmitAnyExpr(UE);
3481       };
3482   (void)CGF.EmitOMPAtomicSimpleUpdateExpr(
3483       XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
3484   // OpenMP, 2.12.6, atomic Construct
3485   // Any atomic construct with a seq_cst clause forces the atomically
3486   // performed operation to include an implicit flush operation without a
3487   // list.
3488   if (IsSeqCst)
3489     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3490 }
3491 
3492 static RValue convertToType(CodeGenFunction &CGF, RValue Value,
3493                             QualType SourceType, QualType ResType,
3494                             SourceLocation Loc) {
3495   switch (CGF.getEvaluationKind(ResType)) {
3496   case TEK_Scalar:
3497     return RValue::get(
3498         convertToScalarValue(CGF, Value, SourceType, ResType, Loc));
3499   case TEK_Complex: {
3500     auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc);
3501     return RValue::getComplex(Res.first, Res.second);
3502   }
3503   case TEK_Aggregate:
3504     break;
3505   }
3506   llvm_unreachable("Must be a scalar or complex.");
3507 }
3508 
3509 static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst,
3510                                      bool IsPostfixUpdate, const Expr *V,
3511                                      const Expr *X, const Expr *E,
3512                                      const Expr *UE, bool IsXLHSInRHSPart,
3513                                      SourceLocation Loc) {
3514   assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue");
3515   assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue");
3516   RValue NewVVal;
3517   LValue VLValue = CGF.EmitLValue(V);
3518   LValue XLValue = CGF.EmitLValue(X);
3519   RValue ExprRValue = CGF.EmitAnyExpr(E);
3520   auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3521                      : llvm::AtomicOrdering::Monotonic;
3522   QualType NewVValType;
3523   if (UE) {
3524     // 'x' is updated with some additional value.
3525     assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
3526            "Update expr in 'atomic capture' must be a binary operator.");
3527     auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
3528     // Update expressions are allowed to have the following forms:
3529     // x binop= expr; -> xrval + expr;
3530     // x++, ++x -> xrval + 1;
3531     // x--, --x -> xrval - 1;
3532     // x = x binop expr; -> xrval binop expr
3533     // x = expr Op x; - > expr binop xrval;
3534     auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
3535     auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
3536     auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
3537     NewVValType = XRValExpr->getType();
3538     auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
3539     auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr,
3540                   IsPostfixUpdate](RValue XRValue) -> RValue {
3541       CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3542       CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
3543       RValue Res = CGF.EmitAnyExpr(UE);
3544       NewVVal = IsPostfixUpdate ? XRValue : Res;
3545       return Res;
3546     };
3547     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
3548         XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
3549     if (Res.first) {
3550       // 'atomicrmw' instruction was generated.
3551       if (IsPostfixUpdate) {
3552         // Use old value from 'atomicrmw'.
3553         NewVVal = Res.second;
3554       } else {
3555         // 'atomicrmw' does not provide new value, so evaluate it using old
3556         // value of 'x'.
3557         CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3558         CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second);
3559         NewVVal = CGF.EmitAnyExpr(UE);
3560       }
3561     }
3562   } else {
3563     // 'x' is simply rewritten with some 'expr'.
3564     NewVValType = X->getType().getNonReferenceType();
3565     ExprRValue = convertToType(CGF, ExprRValue, E->getType(),
3566                                X->getType().getNonReferenceType(), Loc);
3567     auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) -> RValue {
3568       NewVVal = XRValue;
3569       return ExprRValue;
3570     };
3571     // Try to perform atomicrmw xchg, otherwise simple exchange.
3572     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
3573         XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO,
3574         Loc, Gen);
3575     if (Res.first) {
3576       // 'atomicrmw' instruction was generated.
3577       NewVVal = IsPostfixUpdate ? Res.second : ExprRValue;
3578     }
3579   }
3580   // Emit post-update store to 'v' of old/new 'x' value.
3581   CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc);
3582   // OpenMP, 2.12.6, atomic Construct
3583   // Any atomic construct with a seq_cst clause forces the atomically
3584   // performed operation to include an implicit flush operation without a
3585   // list.
3586   if (IsSeqCst)
3587     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3588 }
3589 
3590 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
3591                               bool IsSeqCst, bool IsPostfixUpdate,
3592                               const Expr *X, const Expr *V, const Expr *E,
3593                               const Expr *UE, bool IsXLHSInRHSPart,
3594                               SourceLocation Loc) {
3595   switch (Kind) {
3596   case OMPC_read:
3597     EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc);
3598     break;
3599   case OMPC_write:
3600     EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc);
3601     break;
3602   case OMPC_unknown:
3603   case OMPC_update:
3604     EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc);
3605     break;
3606   case OMPC_capture:
3607     EmitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE,
3608                              IsXLHSInRHSPart, Loc);
3609     break;
3610   case OMPC_if:
3611   case OMPC_final:
3612   case OMPC_num_threads:
3613   case OMPC_private:
3614   case OMPC_firstprivate:
3615   case OMPC_lastprivate:
3616   case OMPC_reduction:
3617   case OMPC_task_reduction:
3618   case OMPC_in_reduction:
3619   case OMPC_safelen:
3620   case OMPC_simdlen:
3621   case OMPC_collapse:
3622   case OMPC_default:
3623   case OMPC_seq_cst:
3624   case OMPC_shared:
3625   case OMPC_linear:
3626   case OMPC_aligned:
3627   case OMPC_copyin:
3628   case OMPC_copyprivate:
3629   case OMPC_flush:
3630   case OMPC_proc_bind:
3631   case OMPC_schedule:
3632   case OMPC_ordered:
3633   case OMPC_nowait:
3634   case OMPC_untied:
3635   case OMPC_threadprivate:
3636   case OMPC_depend:
3637   case OMPC_mergeable:
3638   case OMPC_device:
3639   case OMPC_threads:
3640   case OMPC_simd:
3641   case OMPC_map:
3642   case OMPC_num_teams:
3643   case OMPC_thread_limit:
3644   case OMPC_priority:
3645   case OMPC_grainsize:
3646   case OMPC_nogroup:
3647   case OMPC_num_tasks:
3648   case OMPC_hint:
3649   case OMPC_dist_schedule:
3650   case OMPC_defaultmap:
3651   case OMPC_uniform:
3652   case OMPC_to:
3653   case OMPC_from:
3654   case OMPC_use_device_ptr:
3655   case OMPC_is_device_ptr:
3656     llvm_unreachable("Clause is not allowed in 'omp atomic'.");
3657   }
3658 }
3659 
3660 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
3661   bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>();
3662   OpenMPClauseKind Kind = OMPC_unknown;
3663   for (auto *C : S.clauses()) {
3664     // Find first clause (skip seq_cst clause, if it is first).
3665     if (C->getClauseKind() != OMPC_seq_cst) {
3666       Kind = C->getClauseKind();
3667       break;
3668     }
3669   }
3670 
3671   const auto *CS =
3672       S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
3673   if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) {
3674     enterFullExpression(EWC);
3675   }
3676   // Processing for statements under 'atomic capture'.
3677   if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) {
3678     for (const auto *C : Compound->body()) {
3679       if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) {
3680         enterFullExpression(EWC);
3681       }
3682     }
3683   }
3684 
3685   auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF,
3686                                             PrePostActionTy &) {
3687     CGF.EmitStopPoint(CS);
3688     EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(),
3689                       S.getV(), S.getExpr(), S.getUpdateExpr(),
3690                       S.isXLHSInRHSPart(), S.getLocStart());
3691   };
3692   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
3693   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen);
3694 }
3695 
3696 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
3697                                          const OMPExecutableDirective &S,
3698                                          const RegionCodeGenTy &CodeGen) {
3699   assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind()));
3700   CodeGenModule &CGM = CGF.CGM;
3701   const CapturedStmt &CS = *S.getCapturedStmt(OMPD_target);
3702 
3703   llvm::Function *Fn = nullptr;
3704   llvm::Constant *FnID = nullptr;
3705 
3706   const Expr *IfCond = nullptr;
3707   // Check for the at most one if clause associated with the target region.
3708   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
3709     if (C->getNameModifier() == OMPD_unknown ||
3710         C->getNameModifier() == OMPD_target) {
3711       IfCond = C->getCondition();
3712       break;
3713     }
3714   }
3715 
3716   // Check if we have any device clause associated with the directive.
3717   const Expr *Device = nullptr;
3718   if (auto *C = S.getSingleClause<OMPDeviceClause>()) {
3719     Device = C->getDevice();
3720   }
3721 
3722   // Check if we have an if clause whose conditional always evaluates to false
3723   // or if we do not have any targets specified. If so the target region is not
3724   // an offload entry point.
3725   bool IsOffloadEntry = true;
3726   if (IfCond) {
3727     bool Val;
3728     if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val)
3729       IsOffloadEntry = false;
3730   }
3731   if (CGM.getLangOpts().OMPTargetTriples.empty())
3732     IsOffloadEntry = false;
3733 
3734   assert(CGF.CurFuncDecl && "No parent declaration for target region!");
3735   StringRef ParentName;
3736   // In case we have Ctors/Dtors we use the complete type variant to produce
3737   // the mangling of the device outlined kernel.
3738   if (auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl))
3739     ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete));
3740   else if (auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl))
3741     ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete));
3742   else
3743     ParentName =
3744         CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl)));
3745 
3746   // Emit target region as a standalone region.
3747   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID,
3748                                                     IsOffloadEntry, CodeGen);
3749   OMPLexicalScope Scope(CGF, S);
3750   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
3751   CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
3752   CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device,
3753                                         CapturedVars);
3754 }
3755 
3756 static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S,
3757                              PrePostActionTy &Action) {
3758   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
3759   (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
3760   CGF.EmitOMPPrivateClause(S, PrivateScope);
3761   (void)PrivateScope.Privatize();
3762 
3763   Action.Enter(CGF);
3764   CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3765 }
3766 
3767 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM,
3768                                                   StringRef ParentName,
3769                                                   const OMPTargetDirective &S) {
3770   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3771     emitTargetRegion(CGF, S, Action);
3772   };
3773   llvm::Function *Fn;
3774   llvm::Constant *Addr;
3775   // Emit target region as a standalone region.
3776   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
3777       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
3778   assert(Fn && Addr && "Target device function emission failed.");
3779 }
3780 
3781 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
3782   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3783     emitTargetRegion(CGF, S, Action);
3784   };
3785   emitCommonOMPTargetDirective(*this, S, CodeGen);
3786 }
3787 
3788 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
3789                                         const OMPExecutableDirective &S,
3790                                         OpenMPDirectiveKind InnermostKind,
3791                                         const RegionCodeGenTy &CodeGen) {
3792   const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams);
3793   auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction(
3794       S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
3795 
3796   const OMPNumTeamsClause *NT = S.getSingleClause<OMPNumTeamsClause>();
3797   const OMPThreadLimitClause *TL = S.getSingleClause<OMPThreadLimitClause>();
3798   if (NT || TL) {
3799     Expr *NumTeams = (NT) ? NT->getNumTeams() : nullptr;
3800     Expr *ThreadLimit = (TL) ? TL->getThreadLimit() : nullptr;
3801 
3802     CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit,
3803                                                   S.getLocStart());
3804   }
3805 
3806   OMPTeamsScope Scope(CGF, S);
3807   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
3808   CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
3809   CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn,
3810                                            CapturedVars);
3811 }
3812 
3813 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
3814   // Emit teams region as a standalone region.
3815   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3816     OMPPrivateScope PrivateScope(CGF);
3817     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
3818     CGF.EmitOMPPrivateClause(S, PrivateScope);
3819     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
3820     (void)PrivateScope.Privatize();
3821     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3822     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
3823   };
3824   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
3825   emitPostUpdateForReductionClause(
3826       *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
3827 }
3828 
3829 static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
3830                                   const OMPTargetTeamsDirective &S) {
3831   auto *CS = S.getCapturedStmt(OMPD_teams);
3832   Action.Enter(CGF);
3833   // Emit teams region as a standalone region.
3834   auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
3835     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
3836     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
3837     CGF.EmitOMPPrivateClause(S, PrivateScope);
3838     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
3839     (void)PrivateScope.Privatize();
3840     Action.Enter(CGF);
3841     CGF.EmitStmt(CS->getCapturedStmt());
3842     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
3843   };
3844   emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen);
3845   emitPostUpdateForReductionClause(
3846       CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
3847 }
3848 
3849 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
3850     CodeGenModule &CGM, StringRef ParentName,
3851     const OMPTargetTeamsDirective &S) {
3852   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3853     emitTargetTeamsRegion(CGF, Action, S);
3854   };
3855   llvm::Function *Fn;
3856   llvm::Constant *Addr;
3857   // Emit target region as a standalone region.
3858   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
3859       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
3860   assert(Fn && Addr && "Target device function emission failed.");
3861 }
3862 
3863 void CodeGenFunction::EmitOMPTargetTeamsDirective(
3864     const OMPTargetTeamsDirective &S) {
3865   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3866     emitTargetTeamsRegion(CGF, Action, S);
3867   };
3868   emitCommonOMPTargetDirective(*this, S, CodeGen);
3869 }
3870 
3871 void CodeGenFunction::EmitOMPTeamsDistributeDirective(
3872     const OMPTeamsDistributeDirective &S) {
3873 
3874   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3875     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
3876   };
3877 
3878   // Emit teams region as a standalone region.
3879   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
3880                                             PrePostActionTy &) {
3881     OMPPrivateScope PrivateScope(CGF);
3882     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
3883     (void)PrivateScope.Privatize();
3884     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
3885                                                     CodeGenDistribute);
3886     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
3887   };
3888   emitCommonOMPTeamsDirective(*this, S, OMPD_teams, CodeGen);
3889   emitPostUpdateForReductionClause(*this, S,
3890                                    [](CodeGenFunction &) { return nullptr; });
3891 }
3892 
3893 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective(
3894     const OMPTeamsDistributeParallelForDirective &S) {
3895   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3896     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
3897                               S.getDistInc());
3898   };
3899 
3900   // Emit teams region as a standalone region.
3901   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
3902                                             PrePostActionTy &) {
3903     OMPPrivateScope PrivateScope(CGF);
3904     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
3905     (void)PrivateScope.Privatize();
3906     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
3907                                                     CodeGenDistribute);
3908     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
3909   };
3910   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen);
3911   emitPostUpdateForReductionClause(*this, S,
3912                                    [](CodeGenFunction &) { return nullptr; });
3913 }
3914 
3915 void CodeGenFunction::EmitOMPCancellationPointDirective(
3916     const OMPCancellationPointDirective &S) {
3917   CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(),
3918                                                    S.getCancelRegion());
3919 }
3920 
3921 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
3922   const Expr *IfCond = nullptr;
3923   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
3924     if (C->getNameModifier() == OMPD_unknown ||
3925         C->getNameModifier() == OMPD_cancel) {
3926       IfCond = C->getCondition();
3927       break;
3928     }
3929   }
3930   CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(), IfCond,
3931                                         S.getCancelRegion());
3932 }
3933 
3934 CodeGenFunction::JumpDest
3935 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {
3936   if (Kind == OMPD_parallel || Kind == OMPD_task ||
3937       Kind == OMPD_target_parallel)
3938     return ReturnBlock;
3939   assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections ||
3940          Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for ||
3941          Kind == OMPD_distribute_parallel_for ||
3942          Kind == OMPD_target_parallel_for);
3943   return OMPCancelStack.getExitBlock();
3944 }
3945 
3946 void CodeGenFunction::EmitOMPUseDevicePtrClause(
3947     const OMPClause &NC, OMPPrivateScope &PrivateScope,
3948     const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) {
3949   const auto &C = cast<OMPUseDevicePtrClause>(NC);
3950   auto OrigVarIt = C.varlist_begin();
3951   auto InitIt = C.inits().begin();
3952   for (auto PvtVarIt : C.private_copies()) {
3953     auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl());
3954     auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl());
3955     auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl());
3956 
3957     // In order to identify the right initializer we need to match the
3958     // declaration used by the mapping logic. In some cases we may get
3959     // OMPCapturedExprDecl that refers to the original declaration.
3960     const ValueDecl *MatchingVD = OrigVD;
3961     if (auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) {
3962       // OMPCapturedExprDecl are used to privative fields of the current
3963       // structure.
3964       auto *ME = cast<MemberExpr>(OED->getInit());
3965       assert(isa<CXXThisExpr>(ME->getBase()) &&
3966              "Base should be the current struct!");
3967       MatchingVD = ME->getMemberDecl();
3968     }
3969 
3970     // If we don't have information about the current list item, move on to
3971     // the next one.
3972     auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD);
3973     if (InitAddrIt == CaptureDeviceAddrMap.end())
3974       continue;
3975 
3976     bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
3977       // Initialize the temporary initialization variable with the address we
3978       // get from the runtime library. We have to cast the source address
3979       // because it is always a void *. References are materialized in the
3980       // privatization scope, so the initialization here disregards the fact
3981       // the original variable is a reference.
3982       QualType AddrQTy =
3983           getContext().getPointerType(OrigVD->getType().getNonReferenceType());
3984       llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy);
3985       Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy);
3986       setAddrOfLocalVar(InitVD, InitAddr);
3987 
3988       // Emit private declaration, it will be initialized by the value we
3989       // declaration we just added to the local declarations map.
3990       EmitDecl(*PvtVD);
3991 
3992       // The initialization variables reached its purpose in the emission
3993       // ofthe previous declaration, so we don't need it anymore.
3994       LocalDeclMap.erase(InitVD);
3995 
3996       // Return the address of the private variable.
3997       return GetAddrOfLocalVar(PvtVD);
3998     });
3999     assert(IsRegistered && "firstprivate var already registered as private");
4000     // Silence the warning about unused variable.
4001     (void)IsRegistered;
4002 
4003     ++OrigVarIt;
4004     ++InitIt;
4005   }
4006 }
4007 
4008 // Generate the instructions for '#pragma omp target data' directive.
4009 void CodeGenFunction::EmitOMPTargetDataDirective(
4010     const OMPTargetDataDirective &S) {
4011   CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true);
4012 
4013   // Create a pre/post action to signal the privatization of the device pointer.
4014   // This action can be replaced by the OpenMP runtime code generation to
4015   // deactivate privatization.
4016   bool PrivatizeDevicePointers = false;
4017   class DevicePointerPrivActionTy : public PrePostActionTy {
4018     bool &PrivatizeDevicePointers;
4019 
4020   public:
4021     explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers)
4022         : PrePostActionTy(), PrivatizeDevicePointers(PrivatizeDevicePointers) {}
4023     void Enter(CodeGenFunction &CGF) override {
4024       PrivatizeDevicePointers = true;
4025     }
4026   };
4027   DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers);
4028 
4029   auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers](
4030       CodeGenFunction &CGF, PrePostActionTy &Action) {
4031     auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4032       CGF.EmitStmt(
4033           cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
4034     };
4035 
4036     // Codegen that selects wheather to generate the privatization code or not.
4037     auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers,
4038                           &InnermostCodeGen](CodeGenFunction &CGF,
4039                                              PrePostActionTy &Action) {
4040       RegionCodeGenTy RCG(InnermostCodeGen);
4041       PrivatizeDevicePointers = false;
4042 
4043       // Call the pre-action to change the status of PrivatizeDevicePointers if
4044       // needed.
4045       Action.Enter(CGF);
4046 
4047       if (PrivatizeDevicePointers) {
4048         OMPPrivateScope PrivateScope(CGF);
4049         // Emit all instances of the use_device_ptr clause.
4050         for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
4051           CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope,
4052                                         Info.CaptureDeviceAddrMap);
4053         (void)PrivateScope.Privatize();
4054         RCG(CGF);
4055       } else
4056         RCG(CGF);
4057     };
4058 
4059     // Forward the provided action to the privatization codegen.
4060     RegionCodeGenTy PrivRCG(PrivCodeGen);
4061     PrivRCG.setAction(Action);
4062 
4063     // Notwithstanding the body of the region is emitted as inlined directive,
4064     // we don't use an inline scope as changes in the references inside the
4065     // region are expected to be visible outside, so we do not privative them.
4066     OMPLexicalScope Scope(CGF, S);
4067     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data,
4068                                                     PrivRCG);
4069   };
4070 
4071   RegionCodeGenTy RCG(CodeGen);
4072 
4073   // If we don't have target devices, don't bother emitting the data mapping
4074   // code.
4075   if (CGM.getLangOpts().OMPTargetTriples.empty()) {
4076     RCG(*this);
4077     return;
4078   }
4079 
4080   // Check if we have any if clause associated with the directive.
4081   const Expr *IfCond = nullptr;
4082   if (auto *C = S.getSingleClause<OMPIfClause>())
4083     IfCond = C->getCondition();
4084 
4085   // Check if we have any device clause associated with the directive.
4086   const Expr *Device = nullptr;
4087   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4088     Device = C->getDevice();
4089 
4090   // Set the action to signal privatization of device pointers.
4091   RCG.setAction(PrivAction);
4092 
4093   // Emit region code.
4094   CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG,
4095                                              Info);
4096 }
4097 
4098 void CodeGenFunction::EmitOMPTargetEnterDataDirective(
4099     const OMPTargetEnterDataDirective &S) {
4100   // If we don't have target devices, don't bother emitting the data mapping
4101   // code.
4102   if (CGM.getLangOpts().OMPTargetTriples.empty())
4103     return;
4104 
4105   // Check if we have any if clause associated with the directive.
4106   const Expr *IfCond = nullptr;
4107   if (auto *C = S.getSingleClause<OMPIfClause>())
4108     IfCond = C->getCondition();
4109 
4110   // Check if we have any device clause associated with the directive.
4111   const Expr *Device = nullptr;
4112   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4113     Device = C->getDevice();
4114 
4115   auto &&CodeGen = [&S, IfCond, Device](CodeGenFunction &CGF,
4116                                         PrePostActionTy &) {
4117     CGF.CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(CGF, S, IfCond,
4118                                                             Device);
4119   };
4120   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
4121   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_target_enter_data,
4122                                               CodeGen);
4123 }
4124 
4125 void CodeGenFunction::EmitOMPTargetExitDataDirective(
4126     const OMPTargetExitDataDirective &S) {
4127   // If we don't have target devices, don't bother emitting the data mapping
4128   // code.
4129   if (CGM.getLangOpts().OMPTargetTriples.empty())
4130     return;
4131 
4132   // Check if we have any if clause associated with the directive.
4133   const Expr *IfCond = nullptr;
4134   if (auto *C = S.getSingleClause<OMPIfClause>())
4135     IfCond = C->getCondition();
4136 
4137   // Check if we have any device clause associated with the directive.
4138   const Expr *Device = nullptr;
4139   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4140     Device = C->getDevice();
4141 
4142   auto &&CodeGen = [&S, IfCond, Device](CodeGenFunction &CGF,
4143                                         PrePostActionTy &) {
4144     CGF.CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(CGF, S, IfCond,
4145                                                             Device);
4146   };
4147   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
4148   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_target_exit_data,
4149                                               CodeGen);
4150 }
4151 
4152 static void emitTargetParallelRegion(CodeGenFunction &CGF,
4153                                      const OMPTargetParallelDirective &S,
4154                                      PrePostActionTy &Action) {
4155   // Get the captured statement associated with the 'parallel' region.
4156   auto *CS = S.getCapturedStmt(OMPD_parallel);
4157   Action.Enter(CGF);
4158   auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &) {
4159     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4160     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
4161     CGF.EmitOMPPrivateClause(S, PrivateScope);
4162     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4163     (void)PrivateScope.Privatize();
4164     // TODO: Add support for clauses.
4165     CGF.EmitStmt(CS->getCapturedStmt());
4166     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
4167   };
4168   emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen,
4169                                  emitEmptyBoundParameters);
4170   emitPostUpdateForReductionClause(
4171       CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
4172 }
4173 
4174 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
4175     CodeGenModule &CGM, StringRef ParentName,
4176     const OMPTargetParallelDirective &S) {
4177   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4178     emitTargetParallelRegion(CGF, S, Action);
4179   };
4180   llvm::Function *Fn;
4181   llvm::Constant *Addr;
4182   // Emit target region as a standalone region.
4183   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4184       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4185   assert(Fn && Addr && "Target device function emission failed.");
4186 }
4187 
4188 void CodeGenFunction::EmitOMPTargetParallelDirective(
4189     const OMPTargetParallelDirective &S) {
4190   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4191     emitTargetParallelRegion(CGF, S, Action);
4192   };
4193   emitCommonOMPTargetDirective(*this, S, CodeGen);
4194 }
4195 
4196 static void emitTargetParallelForRegion(CodeGenFunction &CGF,
4197                                         const OMPTargetParallelForDirective &S,
4198                                         PrePostActionTy &Action) {
4199   Action.Enter(CGF);
4200   // Emit directive as a combined directive that consists of two implicit
4201   // directives: 'parallel' with 'for' directive.
4202   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4203     CodeGenFunction::OMPCancelStackRAII CancelRegion(
4204         CGF, OMPD_target_parallel_for, S.hasCancel());
4205     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
4206                                emitDispatchForLoopBounds);
4207   };
4208   emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen,
4209                                  emitEmptyBoundParameters);
4210 }
4211 
4212 void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
4213     CodeGenModule &CGM, StringRef ParentName,
4214     const OMPTargetParallelForDirective &S) {
4215   // Emit SPMD target parallel for region as a standalone region.
4216   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4217     emitTargetParallelForRegion(CGF, S, Action);
4218   };
4219   llvm::Function *Fn;
4220   llvm::Constant *Addr;
4221   // Emit target region as a standalone region.
4222   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4223       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4224   assert(Fn && Addr && "Target device function emission failed.");
4225 }
4226 
4227 void CodeGenFunction::EmitOMPTargetParallelForDirective(
4228     const OMPTargetParallelForDirective &S) {
4229   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4230     emitTargetParallelForRegion(CGF, S, Action);
4231   };
4232   emitCommonOMPTargetDirective(*this, S, CodeGen);
4233 }
4234 
4235 static void
4236 emitTargetParallelForSimdRegion(CodeGenFunction &CGF,
4237                                 const OMPTargetParallelForSimdDirective &S,
4238                                 PrePostActionTy &Action) {
4239   Action.Enter(CGF);
4240   // Emit directive as a combined directive that consists of two implicit
4241   // directives: 'parallel' with 'for' directive.
4242   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4243     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
4244                                emitDispatchForLoopBounds);
4245   };
4246   emitCommonOMPParallelDirective(CGF, S, OMPD_simd, CodeGen,
4247                                  emitEmptyBoundParameters);
4248 }
4249 
4250 void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
4251     CodeGenModule &CGM, StringRef ParentName,
4252     const OMPTargetParallelForSimdDirective &S) {
4253   // Emit SPMD target parallel for region as a standalone region.
4254   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4255     emitTargetParallelForSimdRegion(CGF, S, Action);
4256   };
4257   llvm::Function *Fn;
4258   llvm::Constant *Addr;
4259   // Emit target region as a standalone region.
4260   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4261       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4262   assert(Fn && Addr && "Target device function emission failed.");
4263 }
4264 
4265 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective(
4266     const OMPTargetParallelForSimdDirective &S) {
4267   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4268     emitTargetParallelForSimdRegion(CGF, S, Action);
4269   };
4270   emitCommonOMPTargetDirective(*this, S, CodeGen);
4271 }
4272 
4273 /// Emit a helper variable and return corresponding lvalue.
4274 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper,
4275                      const ImplicitParamDecl *PVD,
4276                      CodeGenFunction::OMPPrivateScope &Privates) {
4277   auto *VDecl = cast<VarDecl>(Helper->getDecl());
4278   Privates.addPrivate(
4279       VDecl, [&CGF, PVD]() -> Address { return CGF.GetAddrOfLocalVar(PVD); });
4280 }
4281 
4282 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
4283   assert(isOpenMPTaskLoopDirective(S.getDirectiveKind()));
4284   // Emit outlined function for task construct.
4285   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
4286   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
4287   auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
4288   const Expr *IfCond = nullptr;
4289   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
4290     if (C->getNameModifier() == OMPD_unknown ||
4291         C->getNameModifier() == OMPD_taskloop) {
4292       IfCond = C->getCondition();
4293       break;
4294     }
4295   }
4296 
4297   OMPTaskDataTy Data;
4298   // Check if taskloop must be emitted without taskgroup.
4299   Data.Nogroup = S.getSingleClause<OMPNogroupClause>();
4300   // TODO: Check if we should emit tied or untied task.
4301   Data.Tied = true;
4302   // Set scheduling for taskloop
4303   if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) {
4304     // grainsize clause
4305     Data.Schedule.setInt(/*IntVal=*/false);
4306     Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize()));
4307   } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) {
4308     // num_tasks clause
4309     Data.Schedule.setInt(/*IntVal=*/true);
4310     Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks()));
4311   }
4312 
4313   auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) {
4314     // if (PreCond) {
4315     //   for (IV in 0..LastIteration) BODY;
4316     //   <Final counter/linear vars updates>;
4317     // }
4318     //
4319 
4320     // Emit: if (PreCond) - begin.
4321     // If the condition constant folds and can be elided, avoid emitting the
4322     // whole loop.
4323     bool CondConstant;
4324     llvm::BasicBlock *ContBlock = nullptr;
4325     OMPLoopScope PreInitScope(CGF, S);
4326     if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
4327       if (!CondConstant)
4328         return;
4329     } else {
4330       auto *ThenBlock = CGF.createBasicBlock("taskloop.if.then");
4331       ContBlock = CGF.createBasicBlock("taskloop.if.end");
4332       emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
4333                   CGF.getProfileCount(&S));
4334       CGF.EmitBlock(ThenBlock);
4335       CGF.incrementProfileCounter(&S);
4336     }
4337 
4338     if (isOpenMPSimdDirective(S.getDirectiveKind()))
4339       CGF.EmitOMPSimdInit(S);
4340 
4341     OMPPrivateScope LoopScope(CGF);
4342     // Emit helper vars inits.
4343     enum { LowerBound = 5, UpperBound, Stride, LastIter };
4344     auto *I = CS->getCapturedDecl()->param_begin();
4345     auto *LBP = std::next(I, LowerBound);
4346     auto *UBP = std::next(I, UpperBound);
4347     auto *STP = std::next(I, Stride);
4348     auto *LIP = std::next(I, LastIter);
4349     mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP,
4350              LoopScope);
4351     mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP,
4352              LoopScope);
4353     mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope);
4354     mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP,
4355              LoopScope);
4356     CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
4357     bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
4358     (void)LoopScope.Privatize();
4359     // Emit the loop iteration variable.
4360     const Expr *IVExpr = S.getIterationVariable();
4361     const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
4362     CGF.EmitVarDecl(*IVDecl);
4363     CGF.EmitIgnoredExpr(S.getInit());
4364 
4365     // Emit the iterations count variable.
4366     // If it is not a variable, Sema decided to calculate iterations count on
4367     // each iteration (e.g., it is foldable into a constant).
4368     if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
4369       CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
4370       // Emit calculation of the iterations count.
4371       CGF.EmitIgnoredExpr(S.getCalcLastIteration());
4372     }
4373 
4374     CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
4375                          S.getInc(),
4376                          [&S](CodeGenFunction &CGF) {
4377                            CGF.EmitOMPLoopBody(S, JumpDest());
4378                            CGF.EmitStopPoint(&S);
4379                          },
4380                          [](CodeGenFunction &) {});
4381     // Emit: if (PreCond) - end.
4382     if (ContBlock) {
4383       CGF.EmitBranch(ContBlock);
4384       CGF.EmitBlock(ContBlock, true);
4385     }
4386     // Emit final copy of the lastprivate variables if IsLastIter != 0.
4387     if (HasLastprivateClause) {
4388       CGF.EmitOMPLastprivateClauseFinal(
4389           S, isOpenMPSimdDirective(S.getDirectiveKind()),
4390           CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar(
4391               CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
4392               (*LIP)->getType(), S.getLocStart())));
4393     }
4394   };
4395   auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
4396                     IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn,
4397                             const OMPTaskDataTy &Data) {
4398     auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &) {
4399       OMPLoopScope PreInitScope(CGF, S);
4400       CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getLocStart(), S,
4401                                                   OutlinedFn, SharedsTy,
4402                                                   CapturedStruct, IfCond, Data);
4403     };
4404     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop,
4405                                                     CodeGen);
4406   };
4407   if (Data.Nogroup)
4408     EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
4409   else {
4410     CGM.getOpenMPRuntime().emitTaskgroupRegion(
4411         *this,
4412         [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF,
4413                                         PrePostActionTy &Action) {
4414           Action.Enter(CGF);
4415           CGF.EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
4416         },
4417         S.getLocStart());
4418   }
4419 }
4420 
4421 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
4422   EmitOMPTaskLoopBasedDirective(S);
4423 }
4424 
4425 void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
4426     const OMPTaskLoopSimdDirective &S) {
4427   EmitOMPTaskLoopBasedDirective(S);
4428 }
4429 
4430 // Generate the instructions for '#pragma omp target update' directive.
4431 void CodeGenFunction::EmitOMPTargetUpdateDirective(
4432     const OMPTargetUpdateDirective &S) {
4433   // If we don't have target devices, don't bother emitting the data mapping
4434   // code.
4435   if (CGM.getLangOpts().OMPTargetTriples.empty())
4436     return;
4437 
4438   // Check if we have any if clause associated with the directive.
4439   const Expr *IfCond = nullptr;
4440   if (auto *C = S.getSingleClause<OMPIfClause>())
4441     IfCond = C->getCondition();
4442 
4443   // Check if we have any device clause associated with the directive.
4444   const Expr *Device = nullptr;
4445   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4446     Device = C->getDevice();
4447 
4448   auto &&CodeGen = [&S, IfCond, Device](CodeGenFunction &CGF,
4449                                         PrePostActionTy &) {
4450     CGF.CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(CGF, S, IfCond,
4451                                                             Device);
4452   };
4453   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
4454   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_target_update,
4455                                               CodeGen);
4456 }
4457